dcnum 0.23.2__tar.gz → 0.23.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

Files changed (118) hide show
  1. {dcnum-0.23.2 → dcnum-0.23.3}/CHANGELOG +4 -0
  2. {dcnum-0.23.2 → dcnum-0.23.3}/PKG-INFO +1 -1
  3. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/_version.py +2 -2
  4. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/event_extractor_manager_thread.py +6 -5
  5. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/logic/ctrl.py +4 -3
  6. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/read/hdf5_data.py +3 -1
  7. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/write/writer.py +24 -12
  8. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum.egg-info/PKG-INFO +1 -1
  9. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_write_writer.py +37 -0
  10. {dcnum-0.23.2 → dcnum-0.23.3}/.github/workflows/check.yml +0 -0
  11. {dcnum-0.23.2 → dcnum-0.23.3}/.github/workflows/deploy_pypi.yml +0 -0
  12. {dcnum-0.23.2 → dcnum-0.23.3}/.gitignore +0 -0
  13. {dcnum-0.23.2 → dcnum-0.23.3}/.readthedocs.yml +0 -0
  14. {dcnum-0.23.2 → dcnum-0.23.3}/LICENSE +0 -0
  15. {dcnum-0.23.2 → dcnum-0.23.3}/README.rst +0 -0
  16. {dcnum-0.23.2 → dcnum-0.23.3}/docs/conf.py +0 -0
  17. {dcnum-0.23.2 → dcnum-0.23.3}/docs/extensions/github_changelog.py +0 -0
  18. {dcnum-0.23.2 → dcnum-0.23.3}/docs/index.rst +0 -0
  19. {dcnum-0.23.2 → dcnum-0.23.3}/docs/requirements.txt +0 -0
  20. {dcnum-0.23.2 → dcnum-0.23.3}/pyproject.toml +0 -0
  21. {dcnum-0.23.2 → dcnum-0.23.3}/setup.cfg +0 -0
  22. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/__init__.py +0 -0
  23. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/__init__.py +0 -0
  24. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_background/__init__.py +0 -0
  25. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_background/base.py +0 -0
  26. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_background/bg_copy.py +0 -0
  27. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_background/bg_roll_median.py +0 -0
  28. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_background/bg_sparse_median.py +0 -0
  29. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_brightness/__init__.py +0 -0
  30. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_brightness/bright_all.py +0 -0
  31. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_brightness/common.py +0 -0
  32. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_contour/__init__.py +0 -0
  33. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_contour/contour.py +0 -0
  34. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_contour/moments.py +0 -0
  35. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_contour/volume.py +0 -0
  36. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_texture/__init__.py +0 -0
  37. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_texture/common.py +0 -0
  38. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/feat_texture/tex_all.py +0 -0
  39. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/gate.py +0 -0
  40. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/feat/queue_event_extractor.py +0 -0
  41. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/logic/__init__.py +0 -0
  42. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/logic/job.py +0 -0
  43. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/logic/json_encoder.py +0 -0
  44. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/meta/__init__.py +0 -0
  45. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/meta/paths.py +0 -0
  46. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/meta/ppid.py +0 -0
  47. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/read/__init__.py +0 -0
  48. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/read/cache.py +0 -0
  49. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/read/const.py +0 -0
  50. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/read/mapped.py +0 -0
  51. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/__init__.py +0 -0
  52. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segm_thresh.py +0 -0
  53. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segm_torch/__init__.py +0 -0
  54. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segm_torch/segm_torch_base.py +0 -0
  55. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segm_torch/segm_torch_mpo.py +0 -0
  56. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segm_torch/segm_torch_sto.py +0 -0
  57. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segm_torch/torch_model.py +0 -0
  58. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segm_torch/torch_postproc.py +0 -0
  59. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segm_torch/torch_preproc.py +0 -0
  60. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segmenter.py +0 -0
  61. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segmenter_manager_thread.py +0 -0
  62. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segmenter_mpo.py +0 -0
  63. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/segm/segmenter_sto.py +0 -0
  64. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/write/__init__.py +0 -0
  65. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/write/deque_writer_thread.py +0 -0
  66. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum/write/queue_collector_thread.py +0 -0
  67. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum.egg-info/SOURCES.txt +0 -0
  68. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum.egg-info/dependency_links.txt +0 -0
  69. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum.egg-info/requires.txt +0 -0
  70. {dcnum-0.23.2 → dcnum-0.23.3}/src/dcnum.egg-info/top_level.txt +0 -0
  71. {dcnum-0.23.2 → dcnum-0.23.3}/tests/conftest.py +0 -0
  72. {dcnum-0.23.2 → dcnum-0.23.3}/tests/data/fmt-hdf5_cytoshot_extended-moments-features.zip +0 -0
  73. {dcnum-0.23.2 → dcnum-0.23.3}/tests/data/fmt-hdf5_cytoshot_full-features_2023.zip +0 -0
  74. {dcnum-0.23.2 → dcnum-0.23.3}/tests/data/fmt-hdf5_cytoshot_full-features_2024.zip +0 -0
  75. {dcnum-0.23.2 → dcnum-0.23.3}/tests/data/fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip +0 -0
  76. {dcnum-0.23.2 → dcnum-0.23.3}/tests/data/fmt-hdf5_shapein_empty.zip +0 -0
  77. {dcnum-0.23.2 → dcnum-0.23.3}/tests/data/fmt-hdf5_shapein_raw-with-variable-length-logs.zip +0 -0
  78. {dcnum-0.23.2 → dcnum-0.23.3}/tests/data/segm-torch-model_unet-dcnum-test_g1_910c2.zip +0 -0
  79. {dcnum-0.23.2 → dcnum-0.23.3}/tests/data/segm-torch-test-data_unet-dcnum-test_g1_910c2.zip +0 -0
  80. {dcnum-0.23.2 → dcnum-0.23.3}/tests/helper_methods.py +0 -0
  81. {dcnum-0.23.2 → dcnum-0.23.3}/tests/requirements.txt +0 -0
  82. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_background_base.py +0 -0
  83. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_background_bg_copy.py +0 -0
  84. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_background_bg_roll_median.py +0 -0
  85. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_background_bg_sparsemed.py +0 -0
  86. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_brightness.py +0 -0
  87. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_event_extractor_manager.py +0 -0
  88. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_gate.py +0 -0
  89. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_haralick.py +0 -0
  90. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_moments_based.py +0 -0
  91. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_moments_based_extended.py +0 -0
  92. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_feat_volume.py +0 -0
  93. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_init.py +0 -0
  94. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_logic_job.py +0 -0
  95. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_logic_join.py +0 -0
  96. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_logic_json.py +0 -0
  97. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_logic_pipeline.py +0 -0
  98. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_meta_paths.py +0 -0
  99. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_meta_ppid_base.py +0 -0
  100. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_meta_ppid_bg.py +0 -0
  101. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_meta_ppid_data.py +0 -0
  102. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_meta_ppid_feat.py +0 -0
  103. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_meta_ppid_gate.py +0 -0
  104. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_meta_ppid_segm.py +0 -0
  105. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_read_basin.py +0 -0
  106. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_read_concat_hdf5.py +0 -0
  107. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_read_hdf5.py +0 -0
  108. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_read_hdf5_basins.py +0 -0
  109. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_read_hdf5_index_mapping.py +0 -0
  110. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_segm_base.py +0 -0
  111. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_segm_mpo.py +0 -0
  112. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_segm_no_mask_proc.py +0 -0
  113. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_segm_sto.py +0 -0
  114. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_segm_thresh.py +0 -0
  115. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_segm_torch.py +0 -0
  116. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_segm_torch_preproc.py +0 -0
  117. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_write_deque_writer_thread.py +0 -0
  118. {dcnum-0.23.2 → dcnum-0.23.3}/tests/test_write_queue_collector_thread.py +0 -0
@@ -1,3 +1,7 @@
1
+ 0.23.3
2
+ - fix: ignore non-file-type-like basins
3
+ - fix: workaround for slow reading from HDF5 (don't use index arrays)
4
+ - fix: avoid excessive stalling when writer is slow
1
5
  0.23.2
2
6
  - enh: add DCNumPipelineJob.validate method
3
7
  - enh: list Python libraries used in job log
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcnum
3
- Version: 0.23.2
3
+ Version: 0.23.3
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
5
  Author: Maximilian Schlögel, Paul Müller, Raghava Alajangi
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.23.2'
16
- __version_tuple__ = version_tuple = (0, 23, 2)
15
+ __version__ = version = '0.23.3'
16
+ __version_tuple__ = version_tuple = (0, 23, 3)
@@ -96,12 +96,13 @@ class EventExtractorManagerThread(threading.Thread):
96
96
  # If the writer_dq starts filling up, then this could lead to
97
97
  # an oom-kill signal. Stall for the writer to prevent this.
98
98
  if (ldq := len(self.writer_dq)) > 1000:
99
- time.sleep(1)
100
- ldq2 = len(self.writer_dq)
101
- stall_time = max(0., (ldq2 - 200) / ((ldq - ldq2) or 1))
102
- time.sleep(stall_time)
99
+ stalled_sec = 0.
100
+ for ii in range(60):
101
+ if len(self.writer_dq) > 200:
102
+ time.sleep(.5)
103
+ stalled_sec += .5
103
104
  self.logger.warning(
104
- f"Stalled {stall_time + 1:.1f}s for slow writer "
105
+ f"Stalled {stalled_sec:.1f}s due to slow writer "
105
106
  f"({ldq} chunks queued)")
106
107
 
107
108
  unavailable_slots = 0
@@ -34,6 +34,7 @@ from ..write import (
34
34
  from .job import DCNumPipelineJob
35
35
  from .json_encoder import ExtendedJSONEncoder
36
36
 
37
+
37
38
  # Force using "spawn" method for multiprocessing, because we are using
38
39
  # queues and threads and would end up with race conditions otherwise.
39
40
  mp_spawn = mp.get_context("spawn")
@@ -562,8 +563,8 @@ class DCNumJobRunner(threading.Thread):
562
563
  # 3. image features from the input file
563
564
  [self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
564
565
  ]
565
- with h5py.File(self.path_temp_out, "a") as hout:
566
- hw = HDF5Writer(hout)
566
+ with HDF5Writer(self.path_temp_out) as hw:
567
+ hout = hw.h5
567
568
  # First, we have to determine the basin mapping from input to
568
569
  # output. This information is stored by the QueueCollectorThread
569
570
  # in the "basinmap0" feature, ready to be used by us.
@@ -576,7 +577,7 @@ class DCNumJobRunner(threading.Thread):
576
577
  # mapping of the input file was set to slice(1, 100), then the
577
578
  # first image would not be there, and we would have
578
579
  # [1, 1, 1, ...].
579
- idx_um = hout["events/index_unmapped"]
580
+ idx_um = hout["events/index_unmapped"][:]
580
581
 
581
582
  # If we want to convert this to an actual basinmap feature,
582
583
  # then we have to convert those indices to indices that map
@@ -205,7 +205,9 @@ class HDF5Data:
205
205
  bn_data = "\n".join(
206
206
  [s.decode() for s in h5["basins"][bnkey][:].tolist()])
207
207
  bn_dict = json.loads(bn_data)
208
- basins.append(bn_dict)
208
+ if bn_dict["type"] == "file":
209
+ # we only support file-based basins
210
+ basins.append(bn_dict)
209
211
  self.basins = sorted(basins, key=lambda x: x["name"])
210
212
 
211
213
  if state["pixel_size"] is not None:
@@ -48,7 +48,13 @@ class HDF5Writer:
48
48
  self.h5 = obj
49
49
  self.h5_owned = False
50
50
  else:
51
- self.h5 = h5py.File(obj, mode=mode, libver="latest")
51
+ self.h5 = h5py.File(obj,
52
+ mode=mode,
53
+ libver="latest",
54
+ # Set chunk cache size to 3 MiB for each
55
+ # dataset to allow partial writes.
56
+ rdcc_nbytes=3145728,
57
+ )
52
58
  self.h5_owned = True
53
59
  self.events = self.h5.require_group("events")
54
60
  ds_kwds = set_default_filter_kwargs(ds_kwds)
@@ -323,8 +329,6 @@ def copy_features(h5_src: h5py.File,
323
329
  """
324
330
  ei = h5_src["events"]
325
331
  eo = h5_dst.require_group("events")
326
- # This is the size of the output dataset
327
- size = h5_dst.attrs["experiment:event count"]
328
332
  hw = HDF5Writer(h5_dst)
329
333
  for feat in features:
330
334
  if feat in eo:
@@ -341,20 +345,28 @@ def copy_features(h5_src: h5py.File,
341
345
  dst_name=feat.encode(),
342
346
  )
343
347
  else:
344
- # Perform mapping and store the features in chunks to keep
345
- # memory usage down.
348
+ # We have to perform mapping.
349
+ # Since h5py is very slow at indexing with arrays,
350
+ # we instead read the data in chunks from the input file,
351
+ # and perform the mapping afterward using the numpy arrays.
346
352
  dsi = ei[feat]
347
353
  chunk_size = hw.get_best_nd_chunks(dsi[0].shape, dsi.dtype)[0]
354
+ size_in = dsi.shape[0]
348
355
  start = 0
349
- while start < size:
350
- chunk_idx = mapping[start:start + chunk_size]
351
- # h5py only supports indexing in increasing order
352
- chunk_unique, order = np.unique(chunk_idx, return_inverse=True)
353
- data_unique = dsi[chunk_unique]
354
- data = data_unique[order]
356
+ while start < size_in:
357
+ # Get a big chunk of data
358
+ big_chunk = 10 * chunk_size
359
+ stop = start + big_chunk
360
+ data_in = dsi[start:stop]
361
+ # Determine the indices that we need from that chunk.
362
+ mapping_idx = (start <= mapping) * (mapping < stop)
363
+ mapping_chunk = mapping[mapping_idx] - start
364
+ data = data_in[mapping_chunk]
365
+ # Note that HDF5 does its own caching, properly handling
366
+ # partial chunk writes.
355
367
  hw.store_feature_chunk(feat, data)
356
368
  # increment start
357
- start += chunk_size
369
+ start = stop
358
370
 
359
371
 
360
372
  def copy_metadata(h5_src: h5py.File,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcnum
3
- Version: 0.23.2
3
+ Version: 0.23.3
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
5
  Author: Maximilian Schlögel, Paul Müller, Raghava Alajangi
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
@@ -84,6 +84,43 @@ def test_copy_features_error_type_group():
84
84
  )
85
85
 
86
86
 
87
+ @pytest.mark.parametrize("samples", [15, 89, 500, 714, 965, 1482])
88
+ def test_copy_features_large_dataset(samples):
89
+ """
90
+ Make sure mapping works properly for datasets that are larger
91
+ than the regular chunk size.
92
+ """
93
+ path_orig = retrieve_data(
94
+ "fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip")
95
+
96
+ # create large input file
97
+ path_large = path_orig.with_name("large.rtdc")
98
+ with write.HDF5Writer(path_large) as hw, read.HDF5Data(path_orig) as hd:
99
+ write.copy_metadata(h5_src=hd.h5, h5_dst=hw.h5)
100
+ image = hd.h5["events/image"][:]
101
+ iterations = (1000 // image.shape[0]) + 1
102
+ for ii in range(iterations):
103
+ hw.store_feature_chunk("image", image)
104
+ size = hw.h5["events/image"].shape[0]
105
+ hw.h5.attrs["experiment:event count"] = size
106
+ # 1000 should be big enough for chunk sizes of about 40
107
+ assert size > 1000, "sanity check"
108
+
109
+ # define output mapping
110
+ mapping = np.sort(np.random.randint(low=0, high=size, size=samples))
111
+ assert len(mapping) == samples
112
+
113
+ # now write to the output file
114
+ path_out = path_orig.with_name("output.rtdc")
115
+ with h5py.File(path_large) as hl, h5py.File(path_out, "a") as ho:
116
+ write.copy_features(h5_src=hl, h5_dst=ho,
117
+ features=["image"], mapping=mapping)
118
+
119
+ # make sure this worked
120
+ with h5py.File(path_large) as hl, h5py.File(path_out) as ho:
121
+ assert np.all(hl["events/image"][:][mapping] == ho["events/image"][:])
122
+
123
+
87
124
  def test_copy_metadata_empty_log_variable_length_string():
88
125
  path = retrieve_data(
89
126
  "fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes