dclab 0.67.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show
  1. dclab/__init__.py +41 -0
  2. dclab/_version.py +34 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +182 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cpython-314-darwin.so +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +260 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde/__init__.py +1 -0
  73. dclab/kde/base.py +459 -0
  74. dclab/kde/contours.py +222 -0
  75. dclab/kde/methods.py +313 -0
  76. dclab/kde_contours.py +10 -0
  77. dclab/kde_methods.py +11 -0
  78. dclab/lme4/__init__.py +5 -0
  79. dclab/lme4/lme4_template.R +94 -0
  80. dclab/lme4/rsetup.py +204 -0
  81. dclab/lme4/wrapr.py +386 -0
  82. dclab/polygon_filter.py +398 -0
  83. dclab/rtdc_dataset/__init__.py +15 -0
  84. dclab/rtdc_dataset/check.py +902 -0
  85. dclab/rtdc_dataset/config.py +533 -0
  86. dclab/rtdc_dataset/copier.py +353 -0
  87. dclab/rtdc_dataset/core.py +896 -0
  88. dclab/rtdc_dataset/export.py +867 -0
  89. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  91. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  92. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  93. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  94. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  95. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  96. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  97. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  98. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  99. dclab/rtdc_dataset/feat_basin.py +762 -0
  100. dclab/rtdc_dataset/feat_temp.py +102 -0
  101. dclab/rtdc_dataset/filter.py +263 -0
  102. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  103. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  104. dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
  105. dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
  106. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  107. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  108. dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
  109. dclab/rtdc_dataset/fmt_dict.py +103 -0
  110. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  111. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  112. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  113. dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
  114. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  115. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  116. dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  118. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  119. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  120. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  121. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  122. dclab/rtdc_dataset/fmt_http.py +102 -0
  123. dclab/rtdc_dataset/fmt_s3.py +354 -0
  124. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  125. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  126. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  127. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  128. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  129. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  130. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  131. dclab/rtdc_dataset/load.py +77 -0
  132. dclab/rtdc_dataset/meta_table.py +25 -0
  133. dclab/rtdc_dataset/writer.py +1019 -0
  134. dclab/statistics.py +226 -0
  135. dclab/util.py +176 -0
  136. dclab/warn.py +15 -0
  137. dclab-0.67.0.dist-info/METADATA +153 -0
  138. dclab-0.67.0.dist-info/RECORD +142 -0
  139. dclab-0.67.0.dist-info/WHEEL +6 -0
  140. dclab-0.67.0.dist-info/entry_points.txt +8 -0
  141. dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
  142. dclab-0.67.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,223 @@
1
+ """Create .rtdc files with scalar-only features"""
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import pathlib
6
+ from typing import List
7
+ import warnings
8
+
9
+ import h5py
10
+ import hdf5plugin
11
+
12
+ from ..rtdc_dataset import (
13
+ fmt_hdf5, new_dataset, rtdc_copy, RTDCWriter, RTDCBase
14
+ )
15
+ from .. import util
16
+ from .._version import version
17
+
18
+ from . import common
19
+
20
+
21
+ def condense(
22
+ path_in: str | pathlib.Path = None,
23
+ path_out: str | pathlib.Path = None,
24
+ ancillaries: bool = None,
25
+ store_ancillary_features: bool = True,
26
+ store_basin_features: bool = True,
27
+ check_suffix: bool = True,
28
+ ret_path: bool = False
29
+ ):
30
+ """Create a new dataset with all available scalar-only features
31
+
32
+ Besides the innate scalar features, this also includes all
33
+ fast-to-compute ancillary and all basin features (`features_loaded`).
34
+
35
+ Parameters
36
+ ----------
37
+ path_in: str or pathlib.Path
38
+ file to compress
39
+ path_out: str or pathlib
40
+ output file path
41
+ ancillaries: bool
42
+ DEPRECATED, use `store_ancillary_features` instead
43
+ store_ancillary_features: bool
44
+ compute and store ancillary features in the output file
45
+ store_basin_features: bool
46
+ copy basin features from the input path to the output file;
47
+ Note that the basin information (including any internal
48
+ basin dataset) are always copied over to the new dataset.
49
+ check_suffix: bool
50
+ check suffixes for input and output files
51
+ ret_path: bool
52
+ whether to return the output path
53
+
54
+ Returns
55
+ -------
56
+ path_out: pathlib.Path (optional)
57
+ output path (with possibly corrected suffix)
58
+ """
59
+ if ancillaries is not None:
60
+ warnings.warn("Please use `store_ancillary_features` instead of "
61
+ "`ancillaries`", DeprecationWarning)
62
+ store_ancillary_features = ancillaries
63
+
64
+ if path_out is None or path_in is None:
65
+ parser = condense_parser()
66
+ args = parser.parse_args()
67
+ path_in = args.input
68
+ path_out = args.output
69
+ store_ancillary_features = not args.no_ancillaries
70
+ store_basin_features = not args.no_basins
71
+
72
+ allowed_input_suffixes = [".rtdc", ".tdms"]
73
+ if not check_suffix:
74
+ allowed_input_suffixes.append(pathlib.Path(path_in).suffix)
75
+
76
+ path_in, path_out, path_temp = common.setup_task_paths(
77
+ path_in, path_out, allowed_input_suffixes=allowed_input_suffixes)
78
+
79
+ with warnings.catch_warnings(record=True) as w:
80
+ warnings.simplefilter("always")
81
+ # We use `store_basin_features` during initialization (to avoid
82
+ # conflicts with ancillary features) and in the actual function
83
+ # as well, to correctly determine which features to use.
84
+ with new_dataset(path_in, enable_basins=store_basin_features) as ds, \
85
+ h5py.File(path_temp, "w") as h5_cond:
86
+ condense_dataset(ds=ds,
87
+ h5_cond=h5_cond,
88
+ store_ancillary_features=store_ancillary_features,
89
+ store_basin_features=store_basin_features,
90
+ warnings_list=w)
91
+
92
+ # Finally, rename temp to out
93
+ path_temp.rename(path_out)
94
+ if ret_path:
95
+ return path_out
96
+
97
+
98
+ def condense_dataset(
99
+ ds: RTDCBase,
100
+ h5_cond: h5py.File,
101
+ ancillaries: bool = None,
102
+ store_ancillary_features: bool = True,
103
+ store_basin_features: bool = True,
104
+ warnings_list: List = None):
105
+ """Condense a dataset using low-level HDF5 methods
106
+
107
+ For ancillary and basin features, high-level dclab methods are used.
108
+ """
109
+ if ancillaries is not None:
110
+ warnings.warn("Please use `store_ancillary_features` instead of "
111
+ "`ancillaries`", DeprecationWarning)
112
+ store_ancillary_features = ancillaries
113
+
114
+ cmp_kw = hdf5plugin.Zstd(clevel=5)
115
+ cmd_dict = {}
116
+
117
+ # If we have an input HDF5 file, then we might readily copy most
118
+ # of the features over using rtdc_copy. If we have a .tdms file,
119
+ # then we have to go the long route.
120
+ if isinstance(ds, fmt_hdf5.RTDC_HDF5):
121
+ rtdc_copy(src_h5file=ds.h5file,
122
+ dst_h5file=h5_cond,
123
+ features="scalar",
124
+ include_basins=True,
125
+ include_logs=True,
126
+ include_tables=True,
127
+ meta_prefix="")
128
+
129
+ h5_cond.require_group("logs")
130
+
131
+ # scalar features
132
+ feats_sc = ds.features_scalar
133
+ # loaded (computationally cheap) scalar features
134
+ feats_sc_loaded = [f for f in ds.features_loaded if f in feats_sc]
135
+ # internal basin features that have already been copied with `rtdc_copy`
136
+ feats_sc_basint = sorted(h5_cond.get("basin_events", {}).keys())
137
+ # features that are excluded, because we already copied them
138
+ feats_exclude = feats_sc_loaded + feats_sc_basint
139
+
140
+ cmd_dict["features_original_innate"] = ds.features_innate
141
+
142
+ features = set(feats_sc_loaded)
143
+ if store_basin_features:
144
+ feats_sc_basin = [f for f in ds.features_basin if
145
+ (f in feats_sc and f not in feats_exclude)]
146
+ cmd_dict["features_basin"] = feats_sc_basin
147
+ if feats_sc_basin:
148
+ print(f"Using basin features {feats_sc_basin}")
149
+ features |= set(feats_sc_basin)
150
+
151
+ if store_ancillary_features:
152
+ feats_sc_anc = [f for f in ds.features_ancillary if
153
+ (f in feats_sc and f not in feats_exclude)]
154
+ cmd_dict["features_ancillary"] = feats_sc_anc
155
+ if feats_sc_anc:
156
+ features |= set(feats_sc_anc)
157
+ print(f"Using ancillary features {feats_sc_anc}")
158
+
159
+ # command log
160
+ logs = {"dclab-condense": common.get_command_log(
161
+ paths=[ds.path], custom_dict=cmd_dict)}
162
+
163
+ # rename old dclab-condense logs
164
+ for l_key in ["dclab-condense", "dclab-condense-warnings"]:
165
+ if l_key in h5_cond["logs"]:
166
+ # This is cached, so no worry calling it multiple times.
167
+ md5_cfg = util.hashobj(ds.config)
168
+ # rename
169
+ new_log_name = f"{l_key}_{md5_cfg}"
170
+ if new_log_name not in h5_cond["logs"]:
171
+ # If the user repeatedly condensed one file, then there is
172
+ # no benefit in storing the log under a different name (the
173
+ # metadata did not change). Only write the log if it does
174
+ # not already exist.
175
+ h5_cond["logs"][f"{l_key}_{md5_cfg}"] = h5_cond["logs"][l_key]
176
+ del h5_cond["logs"][l_key]
177
+
178
+ with RTDCWriter(h5_cond,
179
+ mode="append",
180
+ compression_kwargs=cmp_kw,
181
+ ) as hw:
182
+ # Write all remaining scalar features to the file
183
+ # (these are *all* scalar features in the case of .tdms data).
184
+ for feat in features:
185
+ if feat not in h5_cond["events"]:
186
+ hw.store_feature(feat=feat, data=ds[feat])
187
+
188
+ # collect warnings log
189
+ if warnings_list:
190
+ logs["dclab-condense-warnings"] = \
191
+ common.assemble_warnings(warnings_list)
192
+
193
+ # Write logs
194
+ for name in logs:
195
+ hw.store_log(name, logs[name])
196
+
197
+
198
+ def condense_parser():
199
+ descr = "Reduce an RT-DC measurement to its scalar-only features " \
200
+ + "(i.e. without `contour`, `image`, `mask`, or `trace`). " \
201
+ + "All available ancillary features are computed."
202
+ parser = argparse.ArgumentParser(description=descr)
203
+ parser.add_argument('input', metavar="INPUT", type=str,
204
+ help='Input path (.tdms or .rtdc file)')
205
+ parser.add_argument('output', metavar="OUTPUT", type=str,
206
+ help='Output path (.rtdc file)')
207
+ parser.add_argument('--no-ancillary-features',
208
+ dest='no_ancillaries',
209
+ action='store_true',
210
+ help='Do not compute expensive ancillary features '
211
+ 'such as volume'
212
+ )
213
+ parser.set_defaults(no_ancillaries=False)
214
+ parser.add_argument('--no-basin-features',
215
+ dest='no_basins',
216
+ action='store_true',
217
+ help='Do not store basin-based feature data from the '
218
+ 'input file in the output file'
219
+ )
220
+ parser.set_defaults(no_basins=False)
221
+ parser.add_argument('--version', action='version',
222
+ version=f'dclab-condense {version}')
223
+ return parser
dclab/cli/task_join.py ADDED
@@ -0,0 +1,229 @@
1
+ """Concatenate .rtdc files"""
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import pathlib
6
+ import time
7
+ from typing import Dict, List
8
+ import warnings
9
+
10
+ import hdf5plugin
11
+ import numpy as np
12
+
13
+ from ..rtdc_dataset import new_dataset, RTDCWriter
14
+ from .. import definitions as dfn
15
+ from .._version import version
16
+
17
+ from . import common
18
+
19
+
20
+ class FeatureSetNotIdenticalJoinWarning(UserWarning):
21
+ pass
22
+
23
+
24
+ def join(
25
+ paths_in: List[str | pathlib.Path] = None,
26
+ path_out: str | pathlib.Path = None,
27
+ metadata: Dict = None,
28
+ ret_path: bool = False,
29
+ ):
30
+ """Join multiple RT-DC measurements into a single .rtdc file
31
+
32
+ Parameters
33
+ ----------
34
+ paths_in: list of paths
35
+ input paths to join
36
+ path_out: str or pathlib.Path
37
+ output path
38
+ metadata: dict
39
+ optional metadata dictionary (configuration dict) to store
40
+ in the output file
41
+ ret_path: bool
42
+ whether to return the output path
43
+
44
+ Returns
45
+ -------
46
+ path_out: pathlib.Path (optional)
47
+ output path (with corrected path suffix if applicable)
48
+
49
+ Notes
50
+ -----
51
+ The first input file defines the metadata written to the output
52
+ file. Only features that are present in all input files are written
53
+ to the output file.
54
+ """
55
+ cmp_kw = hdf5plugin.Zstd(clevel=5)
56
+ if metadata is None:
57
+ metadata = {"experiment": {"run index": 1}}
58
+ if path_out is None or paths_in is None:
59
+ parser = join_parser()
60
+ args = parser.parse_args()
61
+ paths_in = args.input
62
+ path_out = args.output
63
+
64
+ if len(paths_in) < 2:
65
+ raise ValueError("At least two input files must be specified!")
66
+
67
+ paths_in, path_out, path_temp = common.setup_task_paths(
68
+ paths_in, path_out, allowed_input_suffixes=[".rtdc", ".tdms"])
69
+
70
+ # Order input files by date
71
+ key_paths = []
72
+ for pp in paths_in:
73
+ with new_dataset(pp) as dsa:
74
+ # sorting key
75
+ key = "_".join([dsa.config["experiment"]["date"],
76
+ dsa.config["experiment"]["time"],
77
+ str(dsa.config["experiment"]["run index"])
78
+ ])
79
+ key_paths.append((key, pp))
80
+ sorted_paths = [p[1] for p in sorted(key_paths, key=lambda x: x[0])]
81
+
82
+ logs = {"dclab-join": common.get_command_log(paths=sorted_paths)}
83
+
84
+ # Determine temporal offsets
85
+ t_offsets = np.zeros(len(sorted_paths), dtype=np.float64)
86
+ for ii, pp in enumerate(sorted_paths):
87
+ with new_dataset(pp) as dsb:
88
+ etime = dsb.config["experiment"]["time"]
89
+ st = time.strptime(dsb.config["experiment"]["date"]
90
+ + etime[:8],
91
+ "%Y-%m-%d%H:%M:%S")
92
+ t_offsets[ii] = time.mktime(st)
93
+ if len(etime) > 8:
94
+ # floating point time stored as well (HH:MM:SS.SS)
95
+ t_offsets[ii] += float(etime[8:])
96
+ t_offsets -= t_offsets[0]
97
+
98
+ # Determine features to export (based on first file)
99
+ with warnings.catch_warnings(record=True) as w:
100
+ # Catch all FeatureSetNotIdenticalJoinWarnings
101
+ warnings.simplefilter("ignore")
102
+ warnings.simplefilter("always",
103
+ category=FeatureSetNotIdenticalJoinWarning)
104
+ features = None
105
+ for pp in sorted_paths:
106
+ with new_dataset(pp) as dsc:
107
+ # features present
108
+ if features is None:
109
+ # The initial features are the innate features of the
110
+ # first file (sorted by time). If we didn't use the innate
111
+ # features, then the resulting file might become large
112
+ # (e.g. if we included ancillary features).
113
+ features = sorted(dsc.features_innate)
114
+ else:
115
+ # Remove features from the feature list, if it is not in
116
+ # this dataset, or cannot be computed on-the-fly.
117
+ for feat in features:
118
+ if feat not in dsc.features:
119
+ features.remove(feat)
120
+ warnings.warn(
121
+ f"Excluding feature '{feat}', because "
122
+ + f"it is not present in '{pp}'!",
123
+ FeatureSetNotIdenticalJoinWarning)
124
+ # Warn the user if this dataset has an innate feature that
125
+ # is being ignored, because it is not an innate feature of
126
+ # the first dataset.
127
+ for feat in dsc.features_innate:
128
+ if feat not in features:
129
+ warnings.warn(
130
+ f"Ignoring feature '{feat}' in '{pp}', "
131
+ + "because it is not present in the "
132
+ + "other files being joined!",
133
+ FeatureSetNotIdenticalJoinWarning)
134
+ if w:
135
+ logs["dclab-join-feature-warnings"] = common.assemble_warnings(w)
136
+
137
+ # Create initial output file
138
+ with warnings.catch_warnings(record=True) as w:
139
+ warnings.simplefilter("always")
140
+ with new_dataset(sorted_paths[0]) as ds0:
141
+ ds0.export.hdf5(path=path_temp,
142
+ features=features,
143
+ filtered=False,
144
+ override=True,
145
+ logs=True,
146
+ tables=True,
147
+ basins=False,
148
+ meta_prefix="src-#1_",
149
+ compression_kwargs=cmp_kw)
150
+ # store configuration
151
+ cfg0 = ds0.config.tostring(
152
+ sections=dfn.CFG_METADATA).split("\n")
153
+ if w:
154
+ logs["dclab-join-warnings-#1"] = common.assemble_warnings(w)
155
+
156
+ with RTDCWriter(path_temp, compression_kwargs=cmp_kw) as hw:
157
+ # store configuration of first dataset
158
+ hw.store_log(name="src-#1_cfg", lines=cfg0)
159
+ ii = 1
160
+ # Append data from other files
161
+ for pi, ti in zip(sorted_paths[1:], t_offsets[1:]):
162
+ ii += 1 # we start with the second dataset
163
+ with warnings.catch_warnings(record=True) as w:
164
+ warnings.simplefilter("always")
165
+ meta_key = f"src-#{ii}"
166
+ meta_prefix = meta_key + "_"
167
+ with new_dataset(pi) as dsi:
168
+ for feat in features:
169
+ if feat == "time":
170
+ # handle time offset
171
+ fdata = dsi["time"] + ti
172
+ elif feat == "frame":
173
+ # handle frame offset
174
+ fr = dsi.config["imaging"]["frame rate"]
175
+ fdata = (np.array(dsi["frame"], dtype=np.uint64)
176
+ + np.uint64(round(ti * fr)))
177
+ elif feat == "index_online":
178
+ if "events/index_online" in hw.h5file:
179
+ # index_online is usually larger than index
180
+ ido0 = hw.h5file["events/index_online"][-1] + 1
181
+ else:
182
+ ido0 = 0
183
+ fdata = dsi["index_online"] + ido0
184
+ else:
185
+ fdata = dsi[feat]
186
+ hw.store_feature(feat=feat, data=fdata)
187
+ # store logs
188
+ for log in dsi.logs:
189
+ hw.store_log(name=meta_prefix + log,
190
+ lines=dsi.logs[log])
191
+ # store tables
192
+ for tab in dsi.tables:
193
+ hw.store_table(name=meta_prefix + tab,
194
+ cmp_array=dsi.tables[tab])
195
+ # store configuration
196
+ cfg = dsi.config.tostring(
197
+ sections=dfn.CFG_METADATA).split("\n")
198
+ hw.store_log(name=f"{meta_key}_cfg",
199
+ lines=cfg)
200
+ if w:
201
+ hw.store_log(name=f"dclab-join-warnings-#{ii}",
202
+ lines=common.assemble_warnings(w))
203
+
204
+ # Write logs and missing meta data
205
+ for name in logs:
206
+ hw.store_log(name, logs[name])
207
+ hw.store_metadata(metadata)
208
+
209
+ # Finally, rename temp to out
210
+ path_temp.rename(path_out)
211
+ if ret_path:
212
+ return path_out
213
+
214
+
215
+ def join_parser():
216
+ descr = "Join two or more RT-DC measurements. This will produce " \
217
+ + "one larger .rtdc file. The meta data of the dataset " \
218
+ + "that was recorded earliest will be used in the output " \
219
+ + "file. Please only join datasets that were recorded " \
220
+ + "in the same measurement run."
221
+ parser = argparse.ArgumentParser(description=descr)
222
+ parser.add_argument('input', metavar="INPUT", nargs="*", type=str,
223
+ help='Input paths (.tdms or .rtdc files)')
224
+ required_named = parser.add_argument_group('required named arguments')
225
+ required_named.add_argument('-o', '--output', metavar="OUTPUT", type=str,
226
+ help='Output path (.rtdc file)', required=True)
227
+ parser.add_argument('--version', action='version',
228
+ version=f'dclab-join {version}')
229
+ return parser
@@ -0,0 +1,98 @@
1
+ """Repack (similar to h5repack) .rtdc files"""
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import pathlib
6
+
7
+ import h5py
8
+
9
+ from ..rtdc_dataset import rtdc_copy
10
+ from .._version import version
11
+
12
+ from . import common
13
+
14
+
15
+ def repack(
16
+ path_in: str | pathlib.Path = None,
17
+ path_out: str | pathlib.Path = None,
18
+ strip_basins: bool = False,
19
+ strip_logs: bool = False,
20
+ check_suffix: bool = True,
21
+ ret_path: bool = False,
22
+ ):
23
+ """Repack/recreate an .rtdc file, optionally stripping the logs
24
+
25
+ Parameters
26
+ ----------
27
+ path_in: str or pathlib.Path
28
+ file to compress
29
+ path_out: str or pathlib
30
+ output file path
31
+ strip_basins: bool
32
+ do not write basin information to the output file
33
+ strip_logs: bool
34
+ do not write logs to the output file
35
+ check_suffix: bool
36
+ check suffixes for input and output files
37
+ ret_path: bool
38
+ whether to return the output path
39
+
40
+ Returns
41
+ -------
42
+ path_out: pathlib.Path
43
+ output path (with possibly corrected suffix)
44
+ """
45
+ if path_in is None and path_out is None:
46
+ parser = repack_parser()
47
+ args = parser.parse_args()
48
+ path_in = args.input
49
+ path_out = args.output
50
+ strip_basins = args.strip_basins
51
+ strip_logs = args.strip_logs
52
+
53
+ allowed_input_suffixes = [".rtdc"]
54
+ if not check_suffix:
55
+ allowed_input_suffixes.append(pathlib.Path(path_in).suffix)
56
+
57
+ path_in, path_out, path_temp = common.setup_task_paths(
58
+ path_in, path_out, allowed_input_suffixes=allowed_input_suffixes)
59
+
60
+ with h5py.File(path_in) as h5, h5py.File(path_temp, "w") as hc:
61
+ rtdc_copy(src_h5file=h5,
62
+ dst_h5file=hc,
63
+ features="all",
64
+ include_basins=not strip_basins,
65
+ include_logs=not strip_logs,
66
+ include_tables=True,
67
+ meta_prefix="")
68
+
69
+ # Finally, rename temp to out
70
+ path_temp.rename(path_out)
71
+ if ret_path:
72
+ return path_out
73
+
74
+
75
+ def repack_parser():
76
+ descr = "Repack an .rtdc file. The difference to dclab-compress " \
77
+ + "is that no logs are added. Other logs can optionally be " \
78
+ + "stripped away. Repacking also gets rid of old clutter " \
79
+ + "data (e.g. previous metadata stored in the HDF5 file)."
80
+ parser = argparse.ArgumentParser(description=descr)
81
+ parser.add_argument('input', metavar="INPUT", type=str,
82
+ help='Input path (.rtdc file)')
83
+ parser.add_argument('output', metavar="OUTPUT", type=str,
84
+ help='Output path (.rtdc file)')
85
+ parser.add_argument('--strip-basins',
86
+ dest='strip_basins',
87
+ action='store_true',
88
+ help='Do not copy any basin information to the '
89
+ 'output file.')
90
+ parser.set_defaults(strip_basins=False)
91
+ parser.add_argument('--strip-logs',
92
+ dest='strip_logs',
93
+ action='store_true',
94
+ help='Do not copy any logs to the output file.')
95
+ parser.set_defaults(strip_logs=False)
96
+ parser.add_argument('--version', action='version',
97
+ version=f'dclab-repack {version}')
98
+ return parser