dcnum 0.17.0__py3-none-any.whl → 0.23.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dcnum might be problematic. Click here for more details.
- dcnum/_version.py +2 -2
- dcnum/feat/__init__.py +1 -1
- dcnum/feat/event_extractor_manager_thread.py +34 -25
- dcnum/feat/feat_background/base.py +22 -26
- dcnum/feat/feat_background/bg_copy.py +18 -12
- dcnum/feat/feat_background/bg_roll_median.py +20 -10
- dcnum/feat/feat_background/bg_sparse_median.py +55 -7
- dcnum/feat/feat_brightness/bright_all.py +41 -6
- dcnum/feat/feat_contour/__init__.py +4 -0
- dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
- dcnum/feat/feat_contour/volume.py +174 -0
- dcnum/feat/feat_texture/tex_all.py +28 -1
- dcnum/feat/gate.py +2 -2
- dcnum/feat/queue_event_extractor.py +30 -9
- dcnum/logic/ctrl.py +222 -48
- dcnum/logic/job.py +85 -2
- dcnum/logic/json_encoder.py +2 -0
- dcnum/meta/ppid.py +17 -3
- dcnum/read/__init__.py +1 -0
- dcnum/read/cache.py +100 -78
- dcnum/read/const.py +6 -4
- dcnum/read/hdf5_data.py +146 -23
- dcnum/read/mapped.py +87 -0
- dcnum/segm/__init__.py +6 -3
- dcnum/segm/segm_thresh.py +6 -18
- dcnum/segm/segm_torch/__init__.py +23 -0
- dcnum/segm/segm_torch/segm_torch_base.py +125 -0
- dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
- dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
- dcnum/segm/segm_torch/torch_model.py +95 -0
- dcnum/segm/segm_torch/torch_postproc.py +93 -0
- dcnum/segm/segm_torch/torch_preproc.py +114 -0
- dcnum/segm/segmenter.py +181 -80
- dcnum/segm/segmenter_manager_thread.py +38 -30
- dcnum/segm/{segmenter_cpu.py → segmenter_mpo.py} +116 -44
- dcnum/segm/segmenter_sto.py +110 -0
- dcnum/write/__init__.py +2 -1
- dcnum/write/deque_writer_thread.py +9 -1
- dcnum/write/queue_collector_thread.py +8 -14
- dcnum/write/writer.py +128 -5
- {dcnum-0.17.0.dist-info → dcnum-0.23.2.dist-info}/METADATA +4 -2
- dcnum-0.23.2.dist-info/RECORD +55 -0
- {dcnum-0.17.0.dist-info → dcnum-0.23.2.dist-info}/WHEEL +1 -1
- dcnum/feat/feat_moments/__init__.py +0 -4
- dcnum/segm/segmenter_gpu.py +0 -64
- dcnum-0.17.0.dist-info/RECORD +0 -46
- /dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
- {dcnum-0.17.0.dist-info → dcnum-0.23.2.dist-info}/LICENSE +0 -0
- {dcnum-0.17.0.dist-info → dcnum-0.23.2.dist-info}/top_level.txt +0 -0
dcnum/logic/ctrl.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import collections
|
|
2
2
|
import datetime
|
|
3
|
+
import hashlib
|
|
4
|
+
import importlib
|
|
3
5
|
import json
|
|
4
6
|
import logging
|
|
5
7
|
from logging.handlers import QueueListener
|
|
@@ -14,6 +16,7 @@ import traceback
|
|
|
14
16
|
import uuid
|
|
15
17
|
|
|
16
18
|
import h5py
|
|
19
|
+
import numpy as np
|
|
17
20
|
|
|
18
21
|
from ..feat.feat_background.base import get_available_background_methods
|
|
19
22
|
from ..feat.queue_event_extractor import QueueEventExtractor
|
|
@@ -21,10 +24,10 @@ from ..feat import gate
|
|
|
21
24
|
from ..feat import EventExtractorManagerThread
|
|
22
25
|
from ..segm import SegmenterManagerThread, get_available_segmenters
|
|
23
26
|
from ..meta import ppid
|
|
24
|
-
from ..read import HDF5Data
|
|
25
|
-
from .._version import version_tuple
|
|
27
|
+
from ..read import HDF5Data, get_mapping_indices
|
|
28
|
+
from .._version import version, version_tuple
|
|
26
29
|
from ..write import (
|
|
27
|
-
DequeWriterThread, HDF5Writer, QueueCollectorThread,
|
|
30
|
+
DequeWriterThread, HDF5Writer, QueueCollectorThread, copy_features,
|
|
28
31
|
copy_metadata, create_with_basins, set_default_filter_kwargs
|
|
29
32
|
)
|
|
30
33
|
|
|
@@ -43,6 +46,7 @@ valid_states = [
|
|
|
43
46
|
"setup",
|
|
44
47
|
"background",
|
|
45
48
|
"segmentation",
|
|
49
|
+
"plumbing",
|
|
46
50
|
"cleanup",
|
|
47
51
|
"done",
|
|
48
52
|
"error",
|
|
@@ -79,16 +83,16 @@ class DCNumJobRunner(threading.Thread):
|
|
|
79
83
|
# current job state
|
|
80
84
|
self._state = "init"
|
|
81
85
|
# overall progress [0, 1]
|
|
82
|
-
self._progress_bg = None
|
|
83
|
-
self._progress_ex = None
|
|
86
|
+
self._progress_bg = None # background
|
|
87
|
+
self._progress_ex = None # segmentation
|
|
88
|
+
self._progress_bn = None # creating basins
|
|
84
89
|
# segmentation frame rate
|
|
85
90
|
self._segm_rate = 0
|
|
86
91
|
|
|
87
92
|
# Set up logging
|
|
88
93
|
# General logger for this job
|
|
89
94
|
self.main_logger = logging.getLogger("dcnum")
|
|
90
|
-
self.main_logger.setLevel(
|
|
91
|
-
logging.DEBUG if job["debug"] else logging.INFO)
|
|
95
|
+
self.main_logger.setLevel(job["log_level"])
|
|
92
96
|
# Log file output in target directory
|
|
93
97
|
self.path_log = job["path_out"].with_suffix(".log")
|
|
94
98
|
self.path_log.parent.mkdir(exist_ok=True, parents=True)
|
|
@@ -237,8 +241,12 @@ class DCNumJobRunner(threading.Thread):
|
|
|
237
241
|
# how much fractional time each processing step takes.
|
|
238
242
|
bgw = 4 # fraction of background
|
|
239
243
|
exw = 27 # fraction of segmentation and feature extraction
|
|
244
|
+
if self.job["basin_strategy"] == "drain":
|
|
245
|
+
drw = 15 # because data need to be copied
|
|
246
|
+
else:
|
|
247
|
+
drw = 1 # just creating the basins in output file
|
|
240
248
|
clw = 1 # fraction of cleanup operations
|
|
241
|
-
tot = bgw + exw + clw
|
|
249
|
+
tot = bgw + exw + drw + clw
|
|
242
250
|
progress = 0
|
|
243
251
|
st = self.state
|
|
244
252
|
|
|
@@ -247,15 +255,22 @@ class DCNumJobRunner(threading.Thread):
|
|
|
247
255
|
# background already computed
|
|
248
256
|
progress += bgw / tot
|
|
249
257
|
elif self._progress_bg is not None:
|
|
250
|
-
# This is the image count of the input dataset
|
|
251
|
-
progress +=
|
|
258
|
+
# This is the image count of the input dataset.
|
|
259
|
+
progress += self._progress_bg.value * bgw / tot
|
|
252
260
|
|
|
253
261
|
# segmentation
|
|
254
262
|
if valid_states.index(st) > valid_states.index("segmentation"):
|
|
255
263
|
# segmentation already done
|
|
256
264
|
progress += exw / tot
|
|
257
265
|
elif self._progress_ex is not None:
|
|
258
|
-
progress += exw / tot
|
|
266
|
+
progress += self._progress_ex * exw / tot
|
|
267
|
+
|
|
268
|
+
# draining basins
|
|
269
|
+
if valid_states.index(st) > valid_states.index("plumbing"):
|
|
270
|
+
# plumbing already done
|
|
271
|
+
progress += drw / tot
|
|
272
|
+
if self._progress_bn is not None:
|
|
273
|
+
progress += self._progress_bn * drw / tot
|
|
259
274
|
|
|
260
275
|
if self.state == "done":
|
|
261
276
|
progress = 1
|
|
@@ -310,12 +325,23 @@ class DCNumJobRunner(threading.Thread):
|
|
|
310
325
|
# Whether pipeline hash is invalid.
|
|
311
326
|
ppid.compute_pipeline_hash(**datdict) != dathash
|
|
312
327
|
# Whether the input file is the original output of the pipeline.
|
|
313
|
-
or len(self.draw) != evyield
|
|
328
|
+
or len(self.draw) != evyield
|
|
329
|
+
# If index mapping is defined, then we always redo the pipeline.
|
|
330
|
+
# If the pipeline hashes are identical and index mapping is not
|
|
331
|
+
# None, then both pipelines were done with index mapping.
|
|
332
|
+
# But applying the same pipeline with index mapping in series
|
|
333
|
+
# will lead to a different result in the second run (e.g. 1st
|
|
334
|
+
# pipeline run: take every 2nd event; 2nd pipeline run: take
|
|
335
|
+
# every second event -> results in every 4th event in output of
|
|
336
|
+
# second pipeline run).
|
|
337
|
+
or self.draw.index_mapping is not None
|
|
338
|
+
)
|
|
314
339
|
# Do we have to recompute the background data? In addition to the
|
|
315
340
|
# hash sanity check above, check the generation, input data,
|
|
316
341
|
# and background pipeline identifiers.
|
|
317
342
|
redo_bg = (
|
|
318
|
-
|
|
343
|
+
"image_bg" not in self.draw
|
|
344
|
+
or (datdict["gen_id"] != self.ppdict["gen_id"])
|
|
319
345
|
or (datdict["dat_id"] != self.ppdict["dat_id"])
|
|
320
346
|
or (datdict["bg_id"] != self.ppdict["bg_id"]))
|
|
321
347
|
|
|
@@ -361,16 +387,20 @@ class DCNumJobRunner(threading.Thread):
|
|
|
361
387
|
# Note any new actions that work on `self.path_temp_in` are not
|
|
362
388
|
# reflected in `self.path_temp_out`.
|
|
363
389
|
self.path_temp_in.rename(self.path_temp_out)
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
390
|
+
# Since no segmentation was done, the output file now does not
|
|
391
|
+
# contain any events. This is not really what we wanted, but we
|
|
392
|
+
# can still store all features in the output file if required.
|
|
393
|
+
if self.job["basin_strategy"] == "drain":
|
|
394
|
+
orig_feats = []
|
|
395
|
+
for feat in self.draw.h5["events"].keys():
|
|
396
|
+
if isinstance(self.draw.h5["events"][feat], h5py.Dataset):
|
|
397
|
+
# copy_features does not support Groups
|
|
398
|
+
orig_feats.append(feat)
|
|
399
|
+
with h5py.File(self.path_temp_out, "a") as h5_dst:
|
|
400
|
+
copy_features(h5_src=self.draw.h5,
|
|
401
|
+
h5_dst=h5_dst,
|
|
402
|
+
features=orig_feats,
|
|
403
|
+
mapping=None)
|
|
374
404
|
|
|
375
405
|
with HDF5Writer(self.path_temp_out) as hw:
|
|
376
406
|
# pipeline metadata
|
|
@@ -382,6 +412,10 @@ class DCNumJobRunner(threading.Thread):
|
|
|
382
412
|
hw.h5.attrs["pipeline:dcnum gate"] = self.ppdict["gate_id"]
|
|
383
413
|
hw.h5.attrs["pipeline:dcnum hash"] = self.pphash
|
|
384
414
|
hw.h5.attrs["pipeline:dcnum yield"] = self.event_count
|
|
415
|
+
# index mapping information
|
|
416
|
+
im = self.job.kwargs["data_kwargs"].get("index_mapping", None)
|
|
417
|
+
dim = HDF5Data.get_ppid_index_mapping(im)
|
|
418
|
+
hw.h5.attrs["pipeline:dcnum mapping"] = dim
|
|
385
419
|
# regular metadata
|
|
386
420
|
hw.h5.attrs["experiment:event count"] = self.event_count
|
|
387
421
|
hw.h5.attrs["imaging:pixel size"] = self.draw.pixel_size
|
|
@@ -397,6 +431,16 @@ class DCNumJobRunner(threading.Thread):
|
|
|
397
431
|
"build": ", ".join(platform.python_build()),
|
|
398
432
|
"implementation":
|
|
399
433
|
platform.python_implementation(),
|
|
434
|
+
"libraries": get_library_versions_dict([
|
|
435
|
+
"cv2",
|
|
436
|
+
"h5py",
|
|
437
|
+
"mahotas",
|
|
438
|
+
"numba",
|
|
439
|
+
"numpy",
|
|
440
|
+
"scipy",
|
|
441
|
+
"skimage",
|
|
442
|
+
"torch",
|
|
443
|
+
]),
|
|
400
444
|
"version": platform.python_version(),
|
|
401
445
|
},
|
|
402
446
|
"system": {
|
|
@@ -419,7 +463,8 @@ class DCNumJobRunner(threading.Thread):
|
|
|
419
463
|
with h5py.File(self.job["path_in"]) as h5_src:
|
|
420
464
|
copy_metadata(h5_src=h5_src,
|
|
421
465
|
h5_dst=hw.h5,
|
|
422
|
-
#
|
|
466
|
+
# Don't copy basins, we would have to index-map
|
|
467
|
+
# them first.
|
|
423
468
|
copy_basins=False)
|
|
424
469
|
if redo_seg:
|
|
425
470
|
# Store the correct measurement identifier. This is used to
|
|
@@ -429,13 +474,27 @@ class DCNumJobRunner(threading.Thread):
|
|
|
429
474
|
# This is the identifier appendix that we use to identify this
|
|
430
475
|
# dataset. Note that we only override the run identifier when
|
|
431
476
|
# segmentation did actually take place.
|
|
432
|
-
mid_ap = "dcn-
|
|
433
|
-
# This is the current measurement identifier
|
|
434
|
-
mid_cur = hw.h5.attrs.get("experiment:run identifier"
|
|
477
|
+
mid_ap = f"dcn-{self.pphash[:7]}"
|
|
478
|
+
# This is the current measurement identifier
|
|
479
|
+
mid_cur = hw.h5.attrs.get("experiment:run identifier")
|
|
480
|
+
if not mid_cur:
|
|
481
|
+
# Compute a measurement identifier from the metadata
|
|
482
|
+
m_time = hw.h5.attrs.get("experiment:time", "none")
|
|
483
|
+
m_date = hw.h5.attrs.get("experiment:date", "none")
|
|
484
|
+
m_sid = hw.h5.attrs.get("setup:identifier", "none")
|
|
485
|
+
hasher = hashlib.md5(
|
|
486
|
+
f"{m_time}_{m_date}_{m_sid}".encode("utf-8"))
|
|
487
|
+
mid_cur = str(uuid.UUID(hex=hasher.hexdigest()))
|
|
435
488
|
# The new measurement identifier is a combination of both.
|
|
436
489
|
mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
|
|
437
490
|
hw.h5.attrs["experiment:run identifier"] = mid_new
|
|
438
491
|
|
|
492
|
+
# Handle basin data according to the user's request
|
|
493
|
+
self.state = "plumbing"
|
|
494
|
+
self.task_enforce_basin_strategy()
|
|
495
|
+
|
|
496
|
+
self.state = "cleanup"
|
|
497
|
+
|
|
439
498
|
trun = datetime.timedelta(seconds=round(time.monotonic() - time_start))
|
|
440
499
|
self.logger.info(f"Run duration: {str(trun)}")
|
|
441
500
|
self.logger.info(time.strftime("Run stop: %Y-%m-%d-%H.%M.%S",
|
|
@@ -477,6 +536,115 @@ class DCNumJobRunner(threading.Thread):
|
|
|
477
536
|
bic.process()
|
|
478
537
|
self.logger.info("Finished background computation")
|
|
479
538
|
|
|
539
|
+
def task_enforce_basin_strategy(self):
|
|
540
|
+
"""Transfer basin data from input files to output if requested
|
|
541
|
+
|
|
542
|
+
The user specified the "basin_strategy" keyword argument in
|
|
543
|
+
`self.job`. If this is set to "drain", then copy all basin
|
|
544
|
+
information from the input file to the output file. If it
|
|
545
|
+
is set to "tap", then only create basins in the output file.
|
|
546
|
+
"""
|
|
547
|
+
self._progress_bn = 0
|
|
548
|
+
t0 = time.perf_counter()
|
|
549
|
+
# We need to make sure that the features are correctly attributed
|
|
550
|
+
# from the input files. E.g. if the input file already has
|
|
551
|
+
# background images, but we recompute the background images, then
|
|
552
|
+
# we have to use the data from the recomputed background file.
|
|
553
|
+
# We achieve this by keeping a specific order and only copying those
|
|
554
|
+
# features that we don't already have in the output file.
|
|
555
|
+
feats_raw = [
|
|
556
|
+
# 1. background data from the temporary input image
|
|
557
|
+
# (this must come before draw [sic!])
|
|
558
|
+
[self.dtin.h5, ["image_bg", "bg_off"], "critical"],
|
|
559
|
+
# 2. frame-based scalar features from the raw input file
|
|
560
|
+
# (e.g. "temp" or "frame")
|
|
561
|
+
[self.draw.h5, self.draw.features_scalar_frame, "optional"],
|
|
562
|
+
# 3. image features from the input file
|
|
563
|
+
[self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
|
|
564
|
+
]
|
|
565
|
+
with h5py.File(self.path_temp_out, "a") as hout:
|
|
566
|
+
hw = HDF5Writer(hout)
|
|
567
|
+
# First, we have to determine the basin mapping from input to
|
|
568
|
+
# output. This information is stored by the QueueCollectorThread
|
|
569
|
+
# in the "basinmap0" feature, ready to be used by us.
|
|
570
|
+
if "index_unmapped" in hout["events"]:
|
|
571
|
+
# The unmapped indices enumerate the events in the output file
|
|
572
|
+
# with indices from the mapped input file. E.g. if for the
|
|
573
|
+
# first image in the input file, two events are found and for
|
|
574
|
+
# the second image in the input file, three events are found,
|
|
575
|
+
# then this would contain [0, 0, 1, 1, 1, ...]. If the index
|
|
576
|
+
# mapping of the input file was set to slice(1, 100), then the
|
|
577
|
+
# first image would not be there, and we would have
|
|
578
|
+
# [1, 1, 1, ...].
|
|
579
|
+
idx_um = hout["events/index_unmapped"]
|
|
580
|
+
|
|
581
|
+
# If we want to convert this to an actual basinmap feature,
|
|
582
|
+
# then we have to convert those indices to indices that map
|
|
583
|
+
# to the original input HDF5 file.
|
|
584
|
+
raw_im = self.draw.index_mapping
|
|
585
|
+
if raw_im is None:
|
|
586
|
+
self.logger.info("Input file mapped with basinmap0")
|
|
587
|
+
# Create a hard link to save time and space
|
|
588
|
+
hout["events/basinmap0"] = hout["events/index_unmapped"]
|
|
589
|
+
basinmap = idx_um
|
|
590
|
+
else:
|
|
591
|
+
basinmap = get_mapping_indices(raw_im)[idx_um]
|
|
592
|
+
# Store the mapped basin data in the output file.
|
|
593
|
+
hw.store_feature_chunk("basinmap0", basinmap)
|
|
594
|
+
# We don't need them anymore.
|
|
595
|
+
del hout["events/index_unmapped"]
|
|
596
|
+
|
|
597
|
+
# Note that `size_raw != (len(self.draw))` [sic!]. The former
|
|
598
|
+
# is the size of the raw dataset and the latter is its mapped
|
|
599
|
+
# size!
|
|
600
|
+
size_raw = self.draw.h5.attrs["experiment:event count"]
|
|
601
|
+
if (len(basinmap) == size_raw
|
|
602
|
+
and np.all(basinmap == np.arange(size_raw))):
|
|
603
|
+
# This means that the images in the input overlap perfectly
|
|
604
|
+
# with the images in the output, i.e. a "copy" segmenter
|
|
605
|
+
# was used or something is very reproducible.
|
|
606
|
+
# We set basinmap to None to be more efficient.
|
|
607
|
+
basinmap = None
|
|
608
|
+
|
|
609
|
+
else:
|
|
610
|
+
# The input is identical to the output, because we are using
|
|
611
|
+
# the same pipeline identifier.
|
|
612
|
+
basinmap = None
|
|
613
|
+
|
|
614
|
+
for hin, feats, importance in feats_raw:
|
|
615
|
+
# Only consider features that are available in the input
|
|
616
|
+
# and that are not already in the output.
|
|
617
|
+
feats = [f for f in feats
|
|
618
|
+
if (f in hin["events"] and f not in hout["events"])]
|
|
619
|
+
if not feats:
|
|
620
|
+
continue
|
|
621
|
+
elif (self.job["basin_strategy"] == "drain"
|
|
622
|
+
or importance == "critical"):
|
|
623
|
+
# DRAIN: Copy all features over to the output file.
|
|
624
|
+
self.logger.debug(f"Transferring {feats} to output file")
|
|
625
|
+
copy_features(h5_src=hin,
|
|
626
|
+
h5_dst=hout,
|
|
627
|
+
features=feats,
|
|
628
|
+
mapping=basinmap)
|
|
629
|
+
else:
|
|
630
|
+
# TAP: Create basins for the "optional" features in the
|
|
631
|
+
# output file. Note that the "critical" features never
|
|
632
|
+
# reach this case.
|
|
633
|
+
self.logger.debug(f"Creating basin for {feats}")
|
|
634
|
+
# Relative and absolute paths.
|
|
635
|
+
pin = pathlib.Path(hin.filename).resolve()
|
|
636
|
+
pout = pathlib.Path(hout.filename).resolve().parent
|
|
637
|
+
paths = [pin, os.path.relpath(pin, pout)]
|
|
638
|
+
hw.store_basin(name="dcnum basin",
|
|
639
|
+
features=feats,
|
|
640
|
+
mapping=basinmap,
|
|
641
|
+
paths=paths,
|
|
642
|
+
description=f"Created with dcnum {version}",
|
|
643
|
+
)
|
|
644
|
+
self._progress_bn += 1 / len(feats_raw)
|
|
645
|
+
t_tot = time.perf_counter() - t0
|
|
646
|
+
self.logger.info(f"Enforcing basin strategy time: {t_tot:.1f}s")
|
|
647
|
+
|
|
480
648
|
def task_segment_extract(self):
|
|
481
649
|
self.logger.info("Starting segmentation and feature extraction")
|
|
482
650
|
# Start writer thread
|
|
@@ -501,9 +669,9 @@ class DCNumJobRunner(threading.Thread):
|
|
|
501
669
|
num_slots = 1
|
|
502
670
|
num_extractors = 1
|
|
503
671
|
num_segmenters = 1
|
|
504
|
-
elif seg_cls.hardware_processor == "cpu": #
|
|
672
|
+
elif seg_cls.hardware_processor == "cpu": # MPO segmenter
|
|
505
673
|
# We could in principle set the number of slots to one and
|
|
506
|
-
#
|
|
674
|
+
# have both number of extractors and number of segmenters set
|
|
507
675
|
# to the total number of CPUs. However, we would need more RAM
|
|
508
676
|
# (for caching the image data) and we also have more overhead.
|
|
509
677
|
# Having two slots shared between all workers is more efficient.
|
|
@@ -511,24 +679,32 @@ class DCNumJobRunner(threading.Thread):
|
|
|
511
679
|
# Split segmentation and feature extraction workers evenly.
|
|
512
680
|
num_extractors = self.job["num_procs"] // 2
|
|
513
681
|
num_segmenters = self.job["num_procs"] - num_extractors
|
|
682
|
+
# leave one CPU for the writer and the remaining Threads
|
|
683
|
+
num_segmenters -= 1
|
|
514
684
|
else: # GPU segmenter
|
|
515
685
|
num_slots = 3
|
|
516
686
|
num_extractors = self.job["num_procs"]
|
|
687
|
+
# leave one CPU for the writer and the remaining Threads
|
|
688
|
+
num_extractors -= 1
|
|
517
689
|
num_segmenters = 1
|
|
518
690
|
num_extractors = max(1, num_extractors)
|
|
519
691
|
num_segmenters = max(1, num_segmenters)
|
|
520
692
|
self.job.kwargs["segmenter_kwargs"]["num_workers"] = num_segmenters
|
|
693
|
+
self.job.kwargs["segmenter_kwargs"]["debug"] = self.job["debug"]
|
|
694
|
+
slot_chunks = mp_spawn.Array("i", num_slots, lock=False)
|
|
695
|
+
slot_states = mp_spawn.Array("u", num_slots, lock=False)
|
|
521
696
|
|
|
522
|
-
|
|
523
|
-
|
|
697
|
+
self.logger.debug(f"Number of slots: {num_slots}")
|
|
698
|
+
self.logger.debug(f"Number of segmenters: {num_segmenters}")
|
|
699
|
+
self.logger.debug(f"Number of extractors: {num_extractors}")
|
|
524
700
|
|
|
525
|
-
# Initialize thread
|
|
701
|
+
# Initialize segmenter manager thread
|
|
526
702
|
thr_segm = SegmenterManagerThread(
|
|
527
703
|
segmenter=seg_cls(**self.job["segmenter_kwargs"]),
|
|
528
704
|
image_data=imdat,
|
|
705
|
+
bg_off=self.dtin["bg_off"] if "bg_off" in self.dtin else None,
|
|
529
706
|
slot_states=slot_states,
|
|
530
707
|
slot_chunks=slot_chunks,
|
|
531
|
-
debug=self.job["debug"],
|
|
532
708
|
)
|
|
533
709
|
thr_segm.start()
|
|
534
710
|
|
|
@@ -538,7 +714,7 @@ class DCNumJobRunner(threading.Thread):
|
|
|
538
714
|
gate=gate.Gate(self.dtin, **self.job["gate_kwargs"]),
|
|
539
715
|
num_extractors=num_extractors,
|
|
540
716
|
log_queue=self.log_queue,
|
|
541
|
-
log_level=
|
|
717
|
+
log_level=self.logger.level,
|
|
542
718
|
)
|
|
543
719
|
fe_kwargs["extract_kwargs"] = self.job["feature_kwargs"]
|
|
544
720
|
|
|
@@ -614,20 +790,18 @@ class DCNumJobRunner(threading.Thread):
|
|
|
614
790
|
|
|
615
791
|
self.logger.info("Finished segmentation and feature extraction")
|
|
616
792
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
dst_name=feat.encode(),
|
|
630
|
-
)
|
|
793
|
+
|
|
794
|
+
def get_library_versions_dict(library_name_list):
|
|
795
|
+
version_dict = {}
|
|
796
|
+
for library_name in library_name_list:
|
|
797
|
+
try:
|
|
798
|
+
lib = importlib.import_module(library_name)
|
|
799
|
+
except BaseException:
|
|
800
|
+
version = None
|
|
801
|
+
else:
|
|
802
|
+
version = lib.__version__
|
|
803
|
+
version_dict[library_name] = version
|
|
804
|
+
return version_dict
|
|
631
805
|
|
|
632
806
|
|
|
633
807
|
def join_thread_helper(thr, timeout, retries, logger, name):
|
dcnum/logic/job.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import collections
|
|
2
2
|
import copy
|
|
3
3
|
import inspect
|
|
4
|
+
import logging
|
|
4
5
|
import multiprocessing as mp
|
|
5
6
|
import pathlib
|
|
6
|
-
from typing import Dict
|
|
7
|
+
from typing import Dict, Literal
|
|
8
|
+
import warnings
|
|
7
9
|
|
|
8
10
|
from ..feat import QueueEventExtractor
|
|
9
11
|
from ..feat.feat_background.base import get_available_background_methods
|
|
@@ -27,10 +29,66 @@ class DCNumPipelineJob:
|
|
|
27
29
|
feature_kwargs: Dict = None,
|
|
28
30
|
gate_code: str = "norm",
|
|
29
31
|
gate_kwargs: Dict = None,
|
|
30
|
-
|
|
32
|
+
basin_strategy: Literal["drain", "tap"] = "drain",
|
|
33
|
+
no_basins_in_output: bool = None,
|
|
31
34
|
num_procs: int = None,
|
|
35
|
+
log_level: int = logging.INFO,
|
|
32
36
|
debug: bool = False,
|
|
33
37
|
):
|
|
38
|
+
"""Pipeline job recipe
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
path_in: pathlib.Path | str
|
|
43
|
+
input data path
|
|
44
|
+
path_out: pathlib.Path | str
|
|
45
|
+
output data path
|
|
46
|
+
data_code: str
|
|
47
|
+
code of input data reader to use
|
|
48
|
+
data_kwargs: dict
|
|
49
|
+
keyword arguments for data reader
|
|
50
|
+
background_code: str
|
|
51
|
+
code of background data computer to use
|
|
52
|
+
background_kwargs: dict
|
|
53
|
+
keyword arguments for background data computer
|
|
54
|
+
segmenter_code: str
|
|
55
|
+
code of segmenter to use
|
|
56
|
+
segmenter_kwargs: dict
|
|
57
|
+
keyword arguments for segmenter
|
|
58
|
+
feature_code: str
|
|
59
|
+
code of feature extractor
|
|
60
|
+
feature_kwargs: dict
|
|
61
|
+
keyword arguments for feature extractor
|
|
62
|
+
gate_code: str
|
|
63
|
+
code for gating/event filtering class
|
|
64
|
+
gate_kwargs: dict
|
|
65
|
+
keyword arguments for gating/event filtering class
|
|
66
|
+
basin_strategy: str
|
|
67
|
+
strategy on how to handle event data; In principle, not all
|
|
68
|
+
events have to be stored in the output file if basins are
|
|
69
|
+
defined, linking back to the original file.
|
|
70
|
+
- You can "drain" all basins which means that the output file
|
|
71
|
+
will contain all features, but will also be very big.
|
|
72
|
+
- You can "tap" the basins, including the input file, which means
|
|
73
|
+
that the output file will be comparatively small.
|
|
74
|
+
no_basins_in_output: bool
|
|
75
|
+
Deprecated
|
|
76
|
+
num_procs: int
|
|
77
|
+
Number of processes to use
|
|
78
|
+
log_level: int
|
|
79
|
+
Logging level to use.
|
|
80
|
+
debug: bool
|
|
81
|
+
Whether to set logging level to "DEBUG" and
|
|
82
|
+
use threads instead of processes
|
|
83
|
+
"""
|
|
84
|
+
if no_basins_in_output is not None:
|
|
85
|
+
warnings.warn("The `no_basins_in_output` keyword argument is "
|
|
86
|
+
"deprecated. Please use `basin_strategy` instead.")
|
|
87
|
+
if no_basins_in_output:
|
|
88
|
+
basin_strategy = "drain"
|
|
89
|
+
else:
|
|
90
|
+
basin_strategy = "tap"
|
|
91
|
+
|
|
34
92
|
#: initialize keyword arguments for this job
|
|
35
93
|
self.kwargs = {}
|
|
36
94
|
spec = inspect.getfullargspec(DCNumPipelineJob.__init__)
|
|
@@ -51,6 +109,9 @@ class DCNumPipelineJob:
|
|
|
51
109
|
if path_out is None:
|
|
52
110
|
pin = pathlib.Path(path_in)
|
|
53
111
|
path_out = pin.with_name(pin.stem + "_dcn.rtdc")
|
|
112
|
+
# Set logging level to DEBUG in debugging mode
|
|
113
|
+
if self.kwargs["debug"]:
|
|
114
|
+
self.kwargs["log_level"] = logging.DEBUG
|
|
54
115
|
self.kwargs["path_out"] = pathlib.Path(path_out)
|
|
55
116
|
# Set default mask kwargs for segmenter
|
|
56
117
|
self.kwargs["segmenter_kwargs"].setdefault("kwargs_mask", {})
|
|
@@ -121,3 +182,25 @@ class DCNumPipelineJob:
|
|
|
121
182
|
if len(ret) == 1:
|
|
122
183
|
ret = ret[0]
|
|
123
184
|
return ret
|
|
185
|
+
|
|
186
|
+
def validate(self):
|
|
187
|
+
"""Make sure the pipeline will run given the job kwargs
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
True:
|
|
192
|
+
for testing convenience
|
|
193
|
+
|
|
194
|
+
Raises
|
|
195
|
+
------
|
|
196
|
+
dcnum.segm.SegmenterNotApplicableError:
|
|
197
|
+
the segmenter is incompatible with the input path
|
|
198
|
+
"""
|
|
199
|
+
# Check segmenter applicability applicability
|
|
200
|
+
seg_cls = get_available_segmenters()[self.kwargs["segmenter_code"]]
|
|
201
|
+
with HDF5Data(self.kwargs["path_in"]) as hd:
|
|
202
|
+
seg_cls.validate_applicability(
|
|
203
|
+
segmenter_kwargs=self.kwargs["segmenter_kwargs"],
|
|
204
|
+
logs=hd.logs,
|
|
205
|
+
meta=hd.meta)
|
|
206
|
+
return True
|
dcnum/logic/json_encoder.py
CHANGED
|
@@ -13,5 +13,7 @@ class ExtendedJSONEncoder(json.JSONEncoder):
|
|
|
13
13
|
return int(obj)
|
|
14
14
|
elif isinstance(obj, np.bool_):
|
|
15
15
|
return bool(obj)
|
|
16
|
+
elif isinstance(obj, slice):
|
|
17
|
+
return "PYTHON-SLICE", (obj.start, obj.stop, obj.step)
|
|
16
18
|
# Let the base class default method raise the TypeError
|
|
17
19
|
return json.JSONEncoder.default(self, obj)
|
dcnum/meta/ppid.py
CHANGED
|
@@ -10,7 +10,7 @@ import warnings
|
|
|
10
10
|
|
|
11
11
|
#: Increment this string if there are breaking changes that make
|
|
12
12
|
#: previous pipelines unreproducible.
|
|
13
|
-
DCNUM_PPID_GENERATION = "
|
|
13
|
+
DCNUM_PPID_GENERATION = "10"
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class ClassWithPPIDCapabilities(Protocol):
|
|
@@ -59,7 +59,9 @@ def convert_to_dtype(value, dtype):
|
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
def get_class_method_info(class_obj: ClassWithPPIDCapabilities,
|
|
62
|
-
static_kw_methods: List = None
|
|
62
|
+
static_kw_methods: List = None,
|
|
63
|
+
static_kw_defaults: Dict = None,
|
|
64
|
+
):
|
|
63
65
|
"""Return dictionary of class info with static keyword methods docs
|
|
64
66
|
|
|
65
67
|
Parameters
|
|
@@ -69,7 +71,16 @@ def get_class_method_info(class_obj: ClassWithPPIDCapabilities,
|
|
|
69
71
|
static_kw_methods: list of callable
|
|
70
72
|
The methods to inspect; all kwargs-only keyword arguments
|
|
71
73
|
are extracted.
|
|
74
|
+
static_kw_defaults: dict
|
|
75
|
+
If a key in this dictionary matches an item in `static_kw_methods`,
|
|
76
|
+
then these are the default values returned in the "defaults"
|
|
77
|
+
dictionary. This is used in cases where a base class does
|
|
78
|
+
implement some annotations, but the subclass does not actually
|
|
79
|
+
use them, because e.g. they are taken from a property such as is
|
|
80
|
+
the case for the mask postprocessing of segmenter classes.
|
|
72
81
|
"""
|
|
82
|
+
if static_kw_defaults is None:
|
|
83
|
+
static_kw_defaults = {}
|
|
73
84
|
doc = class_obj.__doc__ or class_obj.__init__.__doc__
|
|
74
85
|
info = {
|
|
75
86
|
"code": class_obj.get_ppid_code(),
|
|
@@ -82,7 +93,10 @@ def get_class_method_info(class_obj: ClassWithPPIDCapabilities,
|
|
|
82
93
|
for mm in static_kw_methods:
|
|
83
94
|
meth = getattr(class_obj, mm)
|
|
84
95
|
spec = inspect.getfullargspec(meth)
|
|
85
|
-
|
|
96
|
+
if mm_defaults := static_kw_defaults.get(mm):
|
|
97
|
+
defau[mm] = mm_defaults
|
|
98
|
+
else:
|
|
99
|
+
defau[mm] = spec.kwonlydefaults or {}
|
|
86
100
|
annot[mm] = spec.annotations
|
|
87
101
|
info["defaults"] = defau
|
|
88
102
|
info["annotations"] = annot
|
dcnum/read/__init__.py
CHANGED