dcnum 0.18.0__tar.gz → 0.19.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

Files changed (104) hide show
  1. {dcnum-0.18.0 → dcnum-0.19.1}/CHANGELOG +6 -0
  2. {dcnum-0.18.0/src/dcnum.egg-info → dcnum-0.19.1}/PKG-INFO +1 -1
  3. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/_version.py +2 -2
  4. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_background/bg_roll_median.py +3 -2
  5. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/logic/ctrl.py +12 -19
  6. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/read/hdf5_data.py +44 -2
  7. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/read/mapped.py +1 -3
  8. {dcnum-0.18.0 → dcnum-0.19.1/src/dcnum.egg-info}/PKG-INFO +1 -1
  9. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_logic_pipeline.py +230 -24
  10. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_meta_ppid_base.py +2 -2
  11. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_meta_ppid_data.py +17 -2
  12. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_read_hdf5.py +22 -3
  13. {dcnum-0.18.0 → dcnum-0.19.1}/.github/workflows/check.yml +0 -0
  14. {dcnum-0.18.0 → dcnum-0.19.1}/.github/workflows/deploy_pypi.yml +0 -0
  15. {dcnum-0.18.0 → dcnum-0.19.1}/.gitignore +0 -0
  16. {dcnum-0.18.0 → dcnum-0.19.1}/.readthedocs.yml +0 -0
  17. {dcnum-0.18.0 → dcnum-0.19.1}/LICENSE +0 -0
  18. {dcnum-0.18.0 → dcnum-0.19.1}/README.rst +0 -0
  19. {dcnum-0.18.0 → dcnum-0.19.1}/docs/conf.py +0 -0
  20. {dcnum-0.18.0 → dcnum-0.19.1}/docs/extensions/github_changelog.py +0 -0
  21. {dcnum-0.18.0 → dcnum-0.19.1}/docs/index.rst +0 -0
  22. {dcnum-0.18.0 → dcnum-0.19.1}/docs/requirements.txt +0 -0
  23. {dcnum-0.18.0 → dcnum-0.19.1}/pyproject.toml +0 -0
  24. {dcnum-0.18.0 → dcnum-0.19.1}/setup.cfg +0 -0
  25. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/__init__.py +0 -0
  26. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/__init__.py +0 -0
  27. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/event_extractor_manager_thread.py +0 -0
  28. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_background/__init__.py +0 -0
  29. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_background/base.py +0 -0
  30. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_background/bg_copy.py +0 -0
  31. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_background/bg_sparse_median.py +0 -0
  32. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_brightness/__init__.py +0 -0
  33. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_brightness/bright_all.py +0 -0
  34. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_brightness/common.py +0 -0
  35. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_contour/__init__.py +0 -0
  36. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_contour/contour.py +0 -0
  37. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_contour/moments.py +0 -0
  38. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_contour/volume.py +0 -0
  39. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_texture/__init__.py +0 -0
  40. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_texture/common.py +0 -0
  41. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/feat_texture/tex_all.py +0 -0
  42. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/gate.py +0 -0
  43. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/feat/queue_event_extractor.py +0 -0
  44. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/logic/__init__.py +0 -0
  45. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/logic/job.py +0 -0
  46. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/logic/json_encoder.py +0 -0
  47. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/meta/__init__.py +0 -0
  48. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/meta/paths.py +0 -0
  49. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/meta/ppid.py +0 -0
  50. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/read/__init__.py +0 -0
  51. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/read/cache.py +0 -0
  52. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/read/const.py +0 -0
  53. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/segm/__init__.py +0 -0
  54. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/segm/segm_thresh.py +0 -0
  55. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/segm/segmenter.py +0 -0
  56. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/segm/segmenter_cpu.py +0 -0
  57. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/segm/segmenter_gpu.py +0 -0
  58. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/segm/segmenter_manager_thread.py +0 -0
  59. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/write/__init__.py +0 -0
  60. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/write/deque_writer_thread.py +0 -0
  61. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/write/queue_collector_thread.py +0 -0
  62. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum/write/writer.py +0 -0
  63. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum.egg-info/SOURCES.txt +0 -0
  64. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum.egg-info/dependency_links.txt +0 -0
  65. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum.egg-info/requires.txt +0 -0
  66. {dcnum-0.18.0 → dcnum-0.19.1}/src/dcnum.egg-info/top_level.txt +0 -0
  67. {dcnum-0.18.0 → dcnum-0.19.1}/tests/conftest.py +0 -0
  68. {dcnum-0.18.0 → dcnum-0.19.1}/tests/data/fmt-hdf5_cytoshot_extended-moments-features.zip +0 -0
  69. {dcnum-0.18.0 → dcnum-0.19.1}/tests/data/fmt-hdf5_cytoshot_full-features_2023.zip +0 -0
  70. {dcnum-0.18.0 → dcnum-0.19.1}/tests/data/fmt-hdf5_cytoshot_full-features_2024.zip +0 -0
  71. {dcnum-0.18.0 → dcnum-0.19.1}/tests/data/fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip +0 -0
  72. {dcnum-0.18.0 → dcnum-0.19.1}/tests/data/fmt-hdf5_shapein_empty.zip +0 -0
  73. {dcnum-0.18.0 → dcnum-0.19.1}/tests/data/fmt-hdf5_shapein_raw-with-variable-length-logs.zip +0 -0
  74. {dcnum-0.18.0 → dcnum-0.19.1}/tests/helper_methods.py +0 -0
  75. {dcnum-0.18.0 → dcnum-0.19.1}/tests/requirements.txt +0 -0
  76. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_background_base.py +0 -0
  77. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_background_bg_copy.py +0 -0
  78. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_background_bg_roll_median.py +0 -0
  79. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_background_bg_sparsemed.py +0 -0
  80. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_brightness.py +0 -0
  81. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_event_extractor_manager.py +0 -0
  82. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_gate.py +0 -0
  83. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_haralick.py +0 -0
  84. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_moments_based.py +0 -0
  85. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_moments_based_extended.py +0 -0
  86. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_feat_volume.py +0 -0
  87. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_init.py +0 -0
  88. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_logic_job.py +0 -0
  89. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_logic_join.py +0 -0
  90. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_logic_json.py +0 -0
  91. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_meta_paths.py +0 -0
  92. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_meta_ppid_bg.py +0 -0
  93. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_meta_ppid_feat.py +0 -0
  94. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_meta_ppid_gate.py +0 -0
  95. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_meta_ppid_segm.py +0 -0
  96. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_read_basin.py +0 -0
  97. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_read_concat_hdf5.py +0 -0
  98. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_read_hdf5_index_mapping.py +0 -0
  99. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_segm_base.py +0 -0
  100. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_segm_no_mask_proc.py +0 -0
  101. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_segm_thresh.py +0 -0
  102. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_write_deque_writer_thread.py +0 -0
  103. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_write_queue_collector_thread.py +0 -0
  104. {dcnum-0.18.0 → dcnum-0.19.1}/tests/test_write_writer.py +0 -0
@@ -1,3 +1,9 @@
1
+ 0.19.1
2
+ - enh: support steps when specifying data slices in `index_mapping`
3
+ 0.19.0
4
+ - enh: elevate `HDF5Data`s `index_mapping` to pipeline identifier status
5
+ (this changes the pipeline identifier)
6
+ - enh: improve sanity checks for `BackgroundRollMed`
1
7
  0.18.0
2
8
  - BREAKING CHANGE: mask postprocessing did a morphological opening instead
3
9
  of a morphological closing, failing to remove spurious noise
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcnum
3
- Version: 0.18.0
3
+ Version: 0.19.1
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
5
  Author: Maximilian Schlögel, Paul Müller
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.18.0'
16
- __version_tuple__ = version_tuple = (0, 18, 0)
15
+ __version__ = version = '0.19.1'
16
+ __version_tuple__ = version_tuple = (0, 19, 1)
@@ -119,7 +119,7 @@ class BackgroundRollMed(Background):
119
119
  """Check user-defined properties of this class
120
120
 
121
121
  This method primarily exists so that the CLI knows which
122
- keyword arguements can be passed to this class.
122
+ keyword arguments can be passed to this class.
123
123
 
124
124
  Parameters
125
125
  ----------
@@ -132,7 +132,8 @@ class BackgroundRollMed(Background):
132
132
  `kernel_size` will not increase computation speed. Larger
133
133
  values lead to a higher memory consumption.
134
134
  """
135
- assert kernel_size > 0
135
+ assert kernel_size > 0, "kernel size must be positive number"
136
+ assert kernel_size % 2 == 0, "kernel size must be even number"
136
137
  assert batch_size > kernel_size
137
138
 
138
139
  def get_slices_for_batch(self, batch_index=0):
@@ -1,11 +1,9 @@
1
1
  import collections
2
2
  import datetime
3
- import hashlib
4
3
  import json
5
4
  import logging
6
5
  from logging.handlers import QueueListener
7
6
  import multiprocessing as mp
8
- import numbers
9
7
  import os
10
8
  import pathlib
11
9
  import platform
@@ -16,7 +14,6 @@ import traceback
16
14
  import uuid
17
15
 
18
16
  import h5py
19
- import numpy as np
20
17
 
21
18
  from ..feat.feat_background.base import get_available_background_methods
22
19
  from ..feat.queue_event_extractor import QueueEventExtractor
@@ -313,7 +310,17 @@ class DCNumJobRunner(threading.Thread):
313
310
  # Whether pipeline hash is invalid.
314
311
  ppid.compute_pipeline_hash(**datdict) != dathash
315
312
  # Whether the input file is the original output of the pipeline.
316
- or len(self.draw) != evyield)
313
+ or len(self.draw) != evyield
314
+ # If index mapping is defined, then we always redo the pipeline.
315
+ # If the pipeline hashes are identical and index mapping is not
316
+ # None, then both pipelines were done with index mapping.
317
+ # But applying the same pipeline with index mapping in series
318
+ # will lead to a different result in the second run (e.g. 1st
319
+ # pipeline run: take every 2nd event; 2nd pipeline run: take
320
+ # every second event -> results in every 4th event in output of
321
+ # second pipeline run).
322
+ or self.draw.index_mapping is not None
323
+ )
317
324
  # Do we have to recompute the background data? In addition to the
318
325
  # hash sanity check above, check the generation, input data,
319
326
  # and background pipeline identifiers.
@@ -387,21 +394,7 @@ class DCNumJobRunner(threading.Thread):
387
394
  hw.h5.attrs["pipeline:dcnum yield"] = self.event_count
388
395
  # index mapping information
389
396
  im = self.job.kwargs["data_kwargs"].get("index_mapping", None)
390
- if im is None:
391
- dim = "0"
392
- elif isinstance(im, numbers.Number):
393
- dim = f"{im}"
394
- elif isinstance(im, slice):
395
- dim = (f"{im.start if im.start is not None else 'n'}"
396
- + f"-{im.stop if im.stop is not None else 'n'}"
397
- + f"-{im.step if im.step is not None else 'n'}"
398
- )
399
- elif isinstance(im, (list, np.ndarray)):
400
- idhash = hashlib.md5(
401
- np.array(im, dtype=np.uint32).tobytes()).hexdigest()
402
- dim = f"h-{idhash[:8]}"
403
- else:
404
- dim = "unknown"
397
+ dim = HDF5Data.get_ppid_index_mapping(im)
405
398
  hw.h5.attrs["pipeline:dcnum mapping"] = dim
406
399
  # regular metadata
407
400
  hw.h5.attrs["experiment:event count"] = self.event_count
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import hashlib
3
4
  import io
4
5
  import json
6
+ import numbers
5
7
  import pathlib
6
8
  import tempfile
7
9
  from typing import Dict, BinaryIO, List
@@ -293,7 +295,9 @@ class HDF5Data:
293
295
  self.h5.close()
294
296
 
295
297
  def get_ppid(self):
296
- return self.get_ppid_from_ppkw({"pixel_size": self.pixel_size})
298
+ return self.get_ppid_from_ppkw(
299
+ {"pixel_size": self.pixel_size,
300
+ "index_mapping": self.index_mapping})
297
301
 
298
302
  @classmethod
299
303
  def get_ppid_code(cls):
@@ -304,10 +308,34 @@ class HDF5Data:
304
308
  # Data does not really fit into the PPID scheme we use for the rest
305
309
  # of the pipeline. This implementation here is custom.
306
310
  code = cls.get_ppid_code()
311
+ # pixel size
307
312
  ppid_ps = f"{kwargs['pixel_size']:.8f}".rstrip("0")
308
- kwid = "^".join([f"p={ppid_ps}"])
313
+ # index mapping
314
+ ppid_im = cls.get_ppid_index_mapping(kwargs.get("index_mapping", None))
315
+ kwid = "^".join([f"p={ppid_ps}", f"i={ppid_im}"])
309
316
  return ":".join([code, kwid])
310
317
 
318
+ @staticmethod
319
+ def get_ppid_index_mapping(index_mapping):
320
+ """Return the pipeline identifier part for index mapping"""
321
+ im = index_mapping
322
+ if im is None:
323
+ dim = "0"
324
+ elif isinstance(im, numbers.Integral):
325
+ dim = f"{im}"
326
+ elif isinstance(im, slice):
327
+ dim = (f"{im.start if im.start is not None else 'n'}"
328
+ + f"-{im.stop if im.stop is not None else 'n'}"
329
+ + f"-{im.step if im.step is not None else 'n'}"
330
+ )
331
+ elif isinstance(im, (list, np.ndarray)):
332
+ idhash = hashlib.md5(
333
+ np.array(im, dtype=np.uint32).tobytes()).hexdigest()
334
+ dim = f"h-{idhash[:8]}"
335
+ else:
336
+ dim = "unknown"
337
+ return dim
338
+
311
339
  @staticmethod
312
340
  def get_ppkw_from_ppid(dat_ppid):
313
341
  # Data does not fit in the PPID scheme we use, but we still
@@ -321,6 +349,20 @@ class HDF5Data:
321
349
  var, val = item.split("=")
322
350
  if var == "p":
323
351
  kwargs["pixel_size"] = float(val)
352
+ elif var == "i":
353
+ if val.startswith("h-") or val == "unknown":
354
+ raise ValueError(f"Cannot invert index mapping {val}")
355
+ elif val == "0":
356
+ kwargs["index_mapping"] = None
357
+ elif val.count("-"):
358
+ start, stop, step = val.split("-")
359
+ kwargs["index_mapping"] = slice(
360
+ None if start == "n" else int(start),
361
+ None if stop == "n" else int(stop),
362
+ None if step == "n" else int(step)
363
+ )
364
+ else:
365
+ kwargs["index_mapping"] = int(val)
324
366
  else:
325
367
  raise ValueError(f"Invalid parameter '{var}'!")
326
368
  return kwargs
@@ -54,12 +54,10 @@ def _get_mapping_indices_cached(
54
54
  return np.arange(index_mapping)
55
55
  elif isinstance(index_mapping, tuple):
56
56
  im_slice = slice(*index_mapping)
57
- if im_slice.step is not None:
58
- raise NotImplementedError("Slices with step not implemented yet")
59
57
  if im_slice.stop is None or im_slice.start is None:
60
58
  raise NotImplementedError(
61
59
  "Slices must have start and stop defined")
62
- return np.arange(im_slice.start, im_slice.stop)
60
+ return np.arange(im_slice.start, im_slice.stop, im_slice.step)
63
61
  elif isinstance(index_mapping, list):
64
62
  return np.array(index_mapping, dtype=np.uint32)
65
63
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcnum
3
- Version: 0.18.0
3
+ Version: 0.19.1
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
5
  Author: Maximilian Schlögel, Paul Müller
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
@@ -59,17 +59,12 @@ def test_chained_pipeline():
59
59
  == "sparsemed:k=250^s=1^t=0^f=0.8^o=1"
60
60
 
61
61
 
62
- @pytest.mark.parametrize("index_mapping,size,mapping_out", [
63
- (None, 395, "0"),
64
- (5, 11, "5"),
65
- (slice(3, 5, None), 6, "3-5-n"),
66
- ([3, 5, 6, 7], 7, "h-6e582938"),
67
- ])
68
- def test_duplicate_pipeline(index_mapping, size, mapping_out):
62
+ def test_duplicate_pipeline():
69
63
  """Test running the same pipeline twice
70
64
 
71
- When the pipeline is run on a file with the same pipeline
72
- identifier, data are just copied over. Nothing much fancy else.
65
+ When the pipeline is run on a file that has been run with the same
66
+ pipeline identifier, then we do not run the pipeline. Instead, we
67
+ copy the data from the first file.
73
68
  """
74
69
  path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
75
70
  path = path_orig.with_name("input.rtdc")
@@ -79,13 +74,12 @@ def test_duplicate_pipeline(index_mapping, size, mapping_out):
79
74
  job = logic.DCNumPipelineJob(
80
75
  path_in=path,
81
76
  path_out=path2,
82
- data_kwargs={"index_mapping": index_mapping},
83
77
  background_code="copy",
84
78
  segmenter_code="thresh",
85
79
  segmenter_kwargs={"thresh": -6,
86
80
  "kwargs_mask": {"closing_disk": 0}},
87
81
  debug=True)
88
- assert job.kwargs["data_kwargs"]["index_mapping"] == index_mapping
82
+ assert job.kwargs["data_kwargs"].get("index_mapping") is None
89
83
 
90
84
  # perform the initial pipeline
91
85
  with logic.DCNumJobRunner(job=job) as runner:
@@ -102,16 +96,12 @@ def test_duplicate_pipeline(index_mapping, size, mapping_out):
102
96
 
103
97
  # get the first image for reference
104
98
  with h5py.File(path) as h5:
105
- if index_mapping is None:
106
- idx0 = 0
107
- else:
108
- idx0 = read.get_mapping_indices(index_mapping)[0]
109
- im0 = h5["/events/image"][idx0]
99
+ im0 = h5["/events/image"][0]
110
100
 
111
101
  # remove all logs just to be sure nothing interferes
112
102
  with h5py.File(path2, "a") as h5:
113
- assert h5.attrs["pipeline:dcnum mapping"] == mapping_out
114
- assert len(h5["events/deform"]) == size
103
+ assert h5.attrs["pipeline:dcnum mapping"] == "0"
104
+ assert len(h5["events/deform"]) == 395
115
105
  del h5["logs"]
116
106
 
117
107
  # now when we do everything again, not a thing should be done
@@ -140,11 +130,227 @@ def test_duplicate_pipeline(index_mapping, size, mapping_out):
140
130
  assert "deform" in h5["events"]
141
131
  assert "image" in h5["events"]
142
132
  assert "image_bg" in h5["events"]
143
- assert len(h5["events/deform"]) == size
133
+ assert len(h5["events/deform"]) == 395
144
134
  assert h5.attrs["pipeline:dcnum mapping"] == "0"
145
135
  assert np.all(h5["events/image"][0] == im0)
146
136
 
147
137
 
138
+ def test_duplicate_pipeline_redo_index_mapping():
139
+ """Test running the same pipeline twice
140
+
141
+ When the pipeline is run on a file that has been run with the same
142
+ pipeline identifier, then we do not run the pipeline. Instead, we
143
+ copy the data from the first file.
144
+
145
+ However, if something is odd, such as index mapping defined in the
146
+ pipeline then redo the computations.
147
+ This is the purpose of this test.
148
+ """
149
+ path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
150
+ path = path_orig.with_name("input.rtdc")
151
+ path2 = path.with_name("path_intermediate.rtdc")
152
+ with read.concatenated_hdf5_data(5 * [path_orig], path_out=path):
153
+ pass
154
+ job = logic.DCNumPipelineJob(
155
+ path_in=path,
156
+ path_out=path2,
157
+ data_kwargs={"index_mapping": 10},
158
+ background_code="copy",
159
+ segmenter_code="thresh",
160
+ segmenter_kwargs={"thresh": -6,
161
+ "kwargs_mask": {"closing_disk": 0}},
162
+ debug=True)
163
+ assert job.kwargs["data_kwargs"].get("index_mapping") == 10
164
+
165
+ # perform the initial pipeline
166
+ with logic.DCNumJobRunner(job=job) as runner:
167
+ runner.run()
168
+ # Sanity checks for initial job
169
+ with read.HDF5Data(job["path_out"]) as hd:
170
+ # Check the logs
171
+ logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
172
+ assert "Starting background computation" in logdat
173
+ assert "Finished background computation" in logdat
174
+ assert "Starting segmentation and feature extraction" in logdat
175
+ assert "Flushing data to disk" in logdat
176
+ assert "Finished segmentation and feature extraction" in logdat
177
+
178
+ with h5py.File(path2, "a") as h5:
179
+ # sanity checks
180
+ assert h5.attrs["pipeline:dcnum mapping"] == "10"
181
+ assert len(h5["events/deform"]) == 24
182
+ assert h5.attrs["pipeline:dcnum yield"] == 24
183
+ # remove all logs just to be sure nothing interferes
184
+ del h5["logs"]
185
+ # Modify the yield, triggering a new pipeline run
186
+ h5.attrs["pipeline:dcnum yield"] = 111111
187
+
188
+ # now when we do everything again, not a thing should be done
189
+ job2 = logic.DCNumPipelineJob(
190
+ path_in=path2,
191
+ path_out=path2.with_name("final_out.rtdc"),
192
+ no_basins_in_output=True,
193
+ data_kwargs={"index_mapping": 10},
194
+ background_code="copy",
195
+ segmenter_code="thresh",
196
+ segmenter_kwargs={"thresh": -6,
197
+ "kwargs_mask": {"closing_disk": 0}},
198
+ debug=True)
199
+ with logic.DCNumJobRunner(job=job2) as runner2:
200
+ runner2.run()
201
+ # Real check for second run (not the `not`s [sic]!)
202
+ with read.HDF5Data(job2["path_out"]) as hd:
203
+ # Check the logs
204
+ logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
205
+ # Background computation is not repeated
206
+ assert "Starting background computation" not in logdat
207
+ assert "Finished background computation" not in logdat
208
+ # Segmentation is repeated
209
+ assert "Starting segmentation and feature extraction" in logdat
210
+ assert "Flushing data to disk" in logdat
211
+ assert "Finished segmentation and feature extraction" in logdat
212
+
213
+ with h5py.File(job2["path_out"]) as h5:
214
+ assert "deform" in h5["events"]
215
+ assert "image" in h5["events"]
216
+ assert "image_bg" in h5["events"]
217
+ # We have not 24 here, because the index mapping enumerates events,
218
+ # not frames.
219
+ assert len(h5["events/deform"]) == 11
220
+ assert h5.attrs["pipeline:dcnum mapping"] == "10"
221
+ assert h5.attrs["pipeline:dcnum yield"] == 11
222
+
223
+
224
+ def test_duplicate_pipeline_redo_yield():
225
+ """Test running the same pipeline twice
226
+
227
+ When the pipeline is run on a file that has been run with the same
228
+ pipeline identifier, then we do not run the pipeline. Instead, we
229
+ copy the data from the first file.
230
+
231
+ However, if something is odd, such as the yield of the pipeline not
232
+ matching the data in the output file, then redo the computations.
233
+ This is the purpose of this test.
234
+ """
235
+ path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
236
+ path = path_orig.with_name("input.rtdc")
237
+ path2 = path.with_name("path_intermediate.rtdc")
238
+ with read.concatenated_hdf5_data(5 * [path_orig], path_out=path):
239
+ pass
240
+ job = logic.DCNumPipelineJob(
241
+ path_in=path,
242
+ path_out=path2,
243
+ background_code="copy",
244
+ segmenter_code="thresh",
245
+ segmenter_kwargs={"thresh": -6,
246
+ "kwargs_mask": {"closing_disk": 0}},
247
+ debug=True)
248
+ assert job.kwargs["data_kwargs"].get("index_mapping") is None
249
+
250
+ # perform the initial pipeline
251
+ with logic.DCNumJobRunner(job=job) as runner:
252
+ runner.run()
253
+ # Sanity checks for initial job
254
+ with read.HDF5Data(job["path_out"]) as hd:
255
+ # Check the logs
256
+ logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
257
+ assert "Starting background computation" in logdat
258
+ assert "Finished background computation" in logdat
259
+ assert "Starting segmentation and feature extraction" in logdat
260
+ assert "Flushing data to disk" in logdat
261
+ assert "Finished segmentation and feature extraction" in logdat
262
+
263
+ with h5py.File(path2, "a") as h5:
264
+ # sanity checks
265
+ assert h5.attrs["pipeline:dcnum mapping"] == "0"
266
+ assert len(h5["events/deform"]) == 395
267
+ assert h5.attrs["pipeline:dcnum yield"] == 395
268
+ # remove all logs just to be sure nothing interferes
269
+ del h5["logs"]
270
+ # Modify the yield, triggering a new pipeline run
271
+ h5.attrs["pipeline:dcnum yield"] = 111111
272
+
273
+ # now when we do everything again, not a thing should be done
274
+ job2 = logic.DCNumPipelineJob(
275
+ path_in=path2,
276
+ path_out=path2.with_name("final_out.rtdc"),
277
+ no_basins_in_output=True,
278
+ background_code="copy",
279
+ segmenter_code="thresh",
280
+ segmenter_kwargs={"thresh": -6,
281
+ "kwargs_mask": {"closing_disk": 0}},
282
+ debug=True)
283
+ with logic.DCNumJobRunner(job=job2) as runner2:
284
+ runner2.run()
285
+ # Real check for second run (not the `not`s [sic]!)
286
+ with read.HDF5Data(job2["path_out"]) as hd:
287
+ # Check the logs
288
+ logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
289
+ # Background computation is not repeated
290
+ assert "Starting background computation" not in logdat
291
+ assert "Finished background computation" not in logdat
292
+ # Segmentation is repeated
293
+ assert "Starting segmentation and feature extraction" in logdat
294
+ assert "Flushing data to disk" in logdat
295
+ assert "Finished segmentation and feature extraction" in logdat
296
+
297
+ with h5py.File(job2["path_out"]) as h5:
298
+ assert "deform" in h5["events"]
299
+ assert "image" in h5["events"]
300
+ assert "image_bg" in h5["events"]
301
+ assert len(h5["events/deform"]) == 395
302
+ assert h5.attrs["pipeline:dcnum mapping"] == "0"
303
+ assert h5.attrs["pipeline:dcnum yield"] == 395
304
+
305
+
306
+ @pytest.mark.parametrize("index_mapping,size,mapping_out", [
307
+ (5, 11, "5"),
308
+ (slice(3, 5, None), 6, "3-5-n"),
309
+ ([3, 5, 6, 7], 7, "h-6e582938"),
310
+ ])
311
+ def test_index_mapping_pipeline(index_mapping, size, mapping_out):
312
+ """Test running the same pipeline twice
313
+
314
+ When the pipeline is run on a file with the same pipeline
315
+ identifier, data are just copied over. Nothing much fancy else.
316
+ """
317
+ path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
318
+ path = path_orig.with_name("input.rtdc")
319
+ path2 = path.with_name("path_intermediate.rtdc")
320
+ with read.concatenated_hdf5_data(5 * [path_orig], path_out=path):
321
+ pass
322
+ job = logic.DCNumPipelineJob(
323
+ path_in=path,
324
+ path_out=path2,
325
+ data_kwargs={"index_mapping": index_mapping},
326
+ background_code="copy",
327
+ segmenter_code="thresh",
328
+ segmenter_kwargs={"thresh": -6,
329
+ "kwargs_mask": {"closing_disk": 0}},
330
+ debug=True)
331
+ assert job.kwargs["data_kwargs"]["index_mapping"] == index_mapping
332
+
333
+ # perform the initial pipeline
334
+ with logic.DCNumJobRunner(job=job) as runner:
335
+ runner.run()
336
+ # Sanity checks for initial job
337
+ with read.HDF5Data(job["path_out"]) as hd:
338
+ # Check the logs
339
+ logdat = " ".join(get_log(hd, time.strftime("dcnum-log-%Y")))
340
+ assert "Starting background computation" in logdat
341
+ assert "Finished background computation" in logdat
342
+ assert "Starting segmentation and feature extraction" in logdat
343
+ assert "Flushing data to disk" in logdat
344
+ assert "Finished segmentation and feature extraction" in logdat
345
+
346
+ with h5py.File(job["path_out"]) as h5:
347
+ assert "deform" in h5["events"]
348
+ assert "image" in h5["events"]
349
+ assert "image_bg" in h5["events"]
350
+ assert len(h5["events/deform"]) == size
351
+ assert h5.attrs["pipeline:dcnum mapping"] == mapping_out
352
+
353
+
148
354
  def test_duplicate_transfer_basin_data():
149
355
  """task_transfer_basin_data should not copy basin data from input"""
150
356
  path_orig = retrieve_data("fmt-hdf5_cytoshot_full-features_2023.zip")
@@ -335,7 +541,7 @@ def test_simple_pipeline(debug):
335
541
 
336
542
  # this is the default pipeline
337
543
  gen_id = ppid.DCNUM_PPID_GENERATION
338
- dat_id = "hdf:p=0.2645"
544
+ dat_id = "hdf:p=0.2645^i=0"
339
545
  bg_id = "sparsemed:k=200^s=1^t=0^f=0.8^o=1"
340
546
  seg_id = "thresh:t=-6:cle=1^f=1^clo=0"
341
547
  feat_id = "legacy:b=1^h=1^v=1"
@@ -402,7 +608,7 @@ def test_simple_pipeline_no_offset_correction(debug):
402
608
 
403
609
  # this is the default pipeline
404
610
  gen_id = ppid.DCNUM_PPID_GENERATION
405
- dat_id = "hdf:p=0.2645"
611
+ dat_id = "hdf:p=0.2645^i=0"
406
612
  bg_id = "sparsemed:k=200^s=1^t=0^f=0.8^o=0"
407
613
  seg_id = "thresh:t=-6:cle=1^f=1^clo=0"
408
614
  feat_id = "legacy:b=1^h=1^v=1"
@@ -474,7 +680,7 @@ def test_simple_pipeline_in_thread():
474
680
  @pytest.mark.parametrize("attr,oldval,newbg", [
475
681
  # Changes that trigger computation of new background
476
682
  ["pipeline:dcnum generation", "1", True],
477
- ["pipeline:dcnum data", "hdf:p=0.2656", True],
683
+ ["pipeline:dcnum data", "hdf:p=0.2656^i=0", True],
478
684
  ["pipeline:dcnum background", "sparsemed:k=100^s=1^t=0^f=0.8^o=1", True],
479
685
  # Changes that don't trigger background computation
480
686
  ["pipeline:dcnum segmenter", "thresh:t=-1:cle=1^f=1^clo=2", False],
@@ -505,7 +711,7 @@ def test_recomputation_of_background_metadata_changed(attr, oldval, newbg):
505
711
 
506
712
  # Set the default values
507
713
  h5.attrs["pipeline:dcnum generation"] = ppid.DCNUM_PPID_GENERATION
508
- h5.attrs["pipeline:dcnum data"] = "hdf:p=0.2645"
714
+ h5.attrs["pipeline:dcnum data"] = "hdf:p=0.2645^i=0"
509
715
  h5.attrs["pipeline:dcnum background"] = \
510
716
  "sparsemed:k=200^s=1^t=0^f=0.8^o=1"
511
717
  h5.attrs["pipeline:dcnum segmenter"] = "thresh:t=-6:cle=1^f=1^clo=2"
@@ -553,7 +759,7 @@ def test_task_background():
553
759
 
554
760
  # this is the default pipeline
555
761
  gen_id = ppid.DCNUM_PPID_GENERATION
556
- dat_id = "hdf:p=0.2645"
762
+ dat_id = "hdf:p=0.2645^i=0"
557
763
  bg_id = "sparsemed:k=200^s=1^t=0^f=0.8^o=1"
558
764
  seg_id = "thresh:t=-6:cle=1^f=1^clo=2"
559
765
  feat_id = "legacy:b=1^h=1^v=1"
@@ -32,13 +32,13 @@ class ExampleClass:
32
32
  def test_compute_pipeline_hash():
33
33
  pp_hash = ppid.compute_pipeline_hash(
34
34
  gen_id="7",
35
- dat_id="hdf:p=0.34",
35
+ dat_id="hdf:p=0.34^i=0",
36
36
  bg_id="sparsemed:k=200^s=1^t=0^f=0.8^o=1",
37
37
  seg_id="thresh:t=-3:cle=1^f=1^clo=2",
38
38
  feat_id="legacy:b=1^h=0^v=1",
39
39
  gate_id="norm:o=0^s=11",
40
40
  )
41
- assert pp_hash == "2e56aa93fcb264381c90a8fd181b3fbc"
41
+ assert pp_hash == "4f3a850410b9801393ab5738afe69e9a"
42
42
 
43
43
 
44
44
  @pytest.mark.parametrize("in_list,out_list", [
@@ -1,5 +1,7 @@
1
1
  from dcnum.read import HDF5Data
2
2
 
3
+ import pytest
4
+
3
5
 
4
6
  def test_ppid_decoding_dat_check_kwargs():
5
7
  dat_ppid = "hdf:p=0.2658"
@@ -7,17 +9,30 @@ def test_ppid_decoding_dat_check_kwargs():
7
9
  assert kwargs["pixel_size"] == 0.2658
8
10
 
9
11
 
12
+ @pytest.mark.parametrize("imppid,value", [
13
+ ["0", None],
14
+ ["10", 10],
15
+ ["10-20-n", slice(10, 20)],
16
+ ["10-20-2", slice(10, 20, 2)],
17
+ ["n-n-2", slice(None, None, 2)],
18
+ ])
19
+ def test_ppid_decoding_dat_check_kwargs_index_mapping(imppid, value):
20
+ dat_ppid = f"hdf:p=0.2658^i={imppid}"
21
+ kwargs = HDF5Data.get_ppkw_from_ppid(dat_ppid)
22
+ assert kwargs["index_mapping"] == value
23
+
24
+
10
25
  def test_ppid_encoding_dat_check_kwargs():
11
26
  kwargs = {"pixel_size": 0.34}
12
27
  ppid = HDF5Data.get_ppid_from_ppkw(kwargs)
13
- assert ppid == "hdf:p=0.34"
28
+ assert ppid == "hdf:p=0.34^i=0"
14
29
 
15
30
 
16
31
  def test_ppid_encoding_dat_check_kwargs_acc():
17
32
  # accuracy for pixel_size is 8 digits after the decimal point
18
33
  kwargs = {"pixel_size": 0.3400000036}
19
34
  ppid = HDF5Data.get_ppid_from_ppkw(kwargs)
20
- assert ppid == "hdf:p=0.34"
35
+ assert ppid == "hdf:p=0.34^i=0"
21
36
 
22
37
 
23
38
  def test_ppid_required_method_definitions():
@@ -49,10 +49,10 @@ def test_get_ppid():
49
49
  "fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip")
50
50
 
51
51
  with read.HDF5Data(path) as hd:
52
- assert hd.get_ppid() == "hdf:p=0.2645"
52
+ assert hd.get_ppid() == "hdf:p=0.2645^i=0"
53
53
 
54
54
  with read.HDF5Data(path, pixel_size=0.49) as hd:
55
- assert hd.get_ppid() == "hdf:p=0.49"
55
+ assert hd.get_ppid() == "hdf:p=0.49^i=0"
56
56
 
57
57
 
58
58
  def test_get_ppkw_from_ppid_error_bad_code():
@@ -64,7 +64,7 @@ def test_get_ppkw_from_ppid_error_bad_code():
64
64
  def test_get_ppkw_from_ppid_error_bad_parameter():
65
65
  with pytest.raises(ValueError,
66
66
  match="Invalid parameter 'k'"):
67
- read.HDF5Data.get_ppkw_from_ppid("hdf:k=0.44")
67
+ read.HDF5Data.get_ppkw_from_ppid("hdf:k=0.44^i=0")
68
68
 
69
69
 
70
70
  def test_get_ppkw_from_ppid_pixel_size():
@@ -158,6 +158,25 @@ def test_image_cache_iter_chunks(size, chunks, tmp_path):
158
158
  assert list(hic.iter_chunks()) == list(range(chunks))
159
159
 
160
160
 
161
+ @pytest.mark.parametrize("index_mapping,result_data", [
162
+ [2, [0, 1]],
163
+ [slice(1, 10, 2), [1, 3, 5, 7, 9]],
164
+ [slice(1, 11, 3), [1, 4, 7, 10]],
165
+ [slice(10, 11), [10]],
166
+ [slice(1, 3, None), [1, 2]],
167
+ ])
168
+ def test_index_mapping(index_mapping, result_data):
169
+ path = retrieve_data(
170
+ "fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip")
171
+ with h5py.File(path, "a") as h5:
172
+ size = len(h5["events/image"])
173
+ assert size == 11
174
+ h5["events/temp"] = np.arange(size, dtype=np.float64)
175
+
176
+ with read.HDF5Data(path, index_mapping=index_mapping) as hd:
177
+ assert np.allclose(hd["temp"], result_data)
178
+
179
+
161
180
  def test_keyerror_when_image_is_none(tmp_path):
162
181
  path = tmp_path / "test.hdf5"
163
182
  with h5py.File(path, "w") as hw:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes