dclab 0.67.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (142) hide show
  1. dclab/__init__.py +41 -0
  2. dclab/_version.py +34 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +182 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cpython-314-darwin.so +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cpython-314-darwin.so +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cpython-314-darwin.so +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cpython-314-darwin.so +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +260 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde/__init__.py +1 -0
  73. dclab/kde/base.py +459 -0
  74. dclab/kde/contours.py +222 -0
  75. dclab/kde/methods.py +313 -0
  76. dclab/kde_contours.py +10 -0
  77. dclab/kde_methods.py +11 -0
  78. dclab/lme4/__init__.py +5 -0
  79. dclab/lme4/lme4_template.R +94 -0
  80. dclab/lme4/rsetup.py +204 -0
  81. dclab/lme4/wrapr.py +386 -0
  82. dclab/polygon_filter.py +398 -0
  83. dclab/rtdc_dataset/__init__.py +15 -0
  84. dclab/rtdc_dataset/check.py +902 -0
  85. dclab/rtdc_dataset/config.py +533 -0
  86. dclab/rtdc_dataset/copier.py +353 -0
  87. dclab/rtdc_dataset/core.py +896 -0
  88. dclab/rtdc_dataset/export.py +867 -0
  89. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  91. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  92. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  93. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  94. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  95. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  96. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  97. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  98. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  99. dclab/rtdc_dataset/feat_basin.py +762 -0
  100. dclab/rtdc_dataset/feat_temp.py +102 -0
  101. dclab/rtdc_dataset/filter.py +263 -0
  102. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  103. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  104. dclab/rtdc_dataset/fmt_dcor/api.py +173 -0
  105. dclab/rtdc_dataset/fmt_dcor/base.py +299 -0
  106. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  107. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  108. dclab/rtdc_dataset/fmt_dcor/tables.py +66 -0
  109. dclab/rtdc_dataset/fmt_dict.py +103 -0
  110. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  111. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  112. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  113. dclab/rtdc_dataset/fmt_hdf5/events.py +276 -0
  114. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  115. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  116. dclab/rtdc_dataset/fmt_hdf5/tables.py +60 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  118. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  119. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  120. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  121. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  122. dclab/rtdc_dataset/fmt_http.py +102 -0
  123. dclab/rtdc_dataset/fmt_s3.py +354 -0
  124. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  125. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  126. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  127. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  128. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  129. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  130. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  131. dclab/rtdc_dataset/load.py +77 -0
  132. dclab/rtdc_dataset/meta_table.py +25 -0
  133. dclab/rtdc_dataset/writer.py +1019 -0
  134. dclab/statistics.py +226 -0
  135. dclab/util.py +176 -0
  136. dclab/warn.py +15 -0
  137. dclab-0.67.0.dist-info/METADATA +153 -0
  138. dclab-0.67.0.dist-info/RECORD +142 -0
  139. dclab-0.67.0.dist-info/WHEEL +6 -0
  140. dclab-0.67.0.dist-info/entry_points.txt +8 -0
  141. dclab-0.67.0.dist-info/licenses/LICENSE +283 -0
  142. dclab-0.67.0.dist-info/top_level.txt +1 -0
dclab/__init__.py ADDED
@@ -0,0 +1,41 @@
1
+ """Core tools for the analysis of deformability cytometry datasets
2
+
3
+ Copyright (C) 2015 Paul Müller
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License along
16
+ with this program; if not, write to the Free Software Foundation, Inc.,
17
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18
+ """
19
+ # flake8: noqa: F401
20
+ from . import definitions as dfn
21
+ from . import features
22
+ from . import isoelastics
23
+ from . import lme4
24
+ from .polygon_filter import PolygonFilter
25
+ from . import rtdc_dataset
26
+ from .rtdc_dataset import new_dataset, IntegrityChecker, RTDCWriter
27
+ from .rtdc_dataset.feat_temp import (
28
+ register_temporary_feature, set_temporary_feature)
29
+ from .rtdc_dataset.feat_anc_ml import (
30
+ MachineLearningFeature, load_modc, load_ml_feature, save_modc)
31
+ from .rtdc_dataset.feat_anc_plugin.plugin_feature import (
32
+ PlugInFeature, load_plugin_feature)
33
+ from . import statistics
34
+ from . import util
35
+
36
+ from ._version import __version__, __version_tuple__
37
+
38
+
39
+ # Lazy-load deprecated kde modules
40
+ kde_contours = util.LazyLoader("dclab.kde_contours")
41
+ kde_methods = util.LazyLoader("dclab.kde_methods")
dclab/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.67.0'
32
+ __version_tuple__ = version_tuple = (0, 67, 0)
33
+
34
+ __commit_id__ = commit_id = 'g52c420117'
dclab/cached.py ADDED
@@ -0,0 +1,97 @@
1
+ """Cache for fast "recomputation"
2
+ """
3
+ import functools
4
+ import gc
5
+ import hashlib
6
+
7
+ import numpy as np
8
+
9
+
10
+ MAX_SIZE = 100
11
+
12
+
13
+ class Cache:
14
+ _cache = {}
15
+ _keys = []
16
+
17
+ def __init__(self, func):
18
+ """
19
+ A cache that can be used to decorate methods that accept
20
+ numpy ndarrays as arguments.
21
+
22
+ - cache is based on dictionary
23
+ - md5 hashes of method arguments are the dictionary keys
24
+ - applicable decorator for all methods in a module
25
+ - applicable to methods with the same name in different
26
+ source files
27
+ - set cache size with `cached.MAX_SIZE`
28
+ - only one global cache is generated, there are no instances
29
+ of `Cache`
30
+
31
+ Notes
32
+ -----
33
+ If you are using other decorators with this decorator, please
34
+ make sure to apply the `Cache` first (first line before method
35
+ definition). This wrapper uses name, doc, and filename of the
36
+ method to identify it. If another wrapper does not implement
37
+ a unique `__doc__` and is applied to multiple methods, then
38
+ `Cached` might return values of the wrong method.
39
+ """
40
+ self.func = func
41
+ functools.update_wrapper(self, func)
42
+
43
+ def __call__(self, *args, **kwargs):
44
+ self.ahash = hashlib.md5()
45
+
46
+ # hash arguments
47
+ for arg in args:
48
+ self._update_hash(arg)
49
+
50
+ # hash keyword arguments
51
+ kwds = list(kwargs.keys())
52
+ kwds.sort()
53
+ for k in kwds:
54
+ self._update_hash(k)
55
+ self._update_hash(kwargs[k])
56
+
57
+ # make sure we are caching for the correct method
58
+ self._update_hash(self.func.__name__)
59
+ self._update_hash(self.func.__doc__)
60
+ self._update_hash(self.func.__code__.co_filename)
61
+
62
+ ref = self.ahash.hexdigest()
63
+
64
+ if ref in Cache._cache:
65
+ return Cache._cache[ref]
66
+ else:
67
+ data = self.func(*args, **kwargs)
68
+ Cache._cache[ref] = data
69
+ Cache._keys.append(ref)
70
+ if len(Cache._keys) > MAX_SIZE:
71
+ delref = Cache._keys.pop(0)
72
+ Cache._cache.pop(delref)
73
+ return data
74
+
75
+ def _update_hash(self, arg):
76
+ """Takes an argument and updates the hash.
77
+ The argument can be an np.array, string, or list
78
+ of things that are convertable to strings.
79
+ """
80
+ if isinstance(arg, np.ndarray):
81
+ self.ahash.update(arg.view(np.uint8))
82
+ elif isinstance(arg, list):
83
+ [self._update_hash(a) for a in arg]
84
+ else:
85
+ self.ahash.update(str(arg).encode('utf-8'))
86
+
87
+ @staticmethod
88
+ def clear_cache():
89
+ """Remove all cached objects"""
90
+ del Cache._keys
91
+ for k in list(Cache._cache.keys()):
92
+ it = Cache._cache.pop(k)
93
+ del it
94
+ del Cache._cache
95
+ Cache._keys = []
96
+ Cache._cache = {}
97
+ gc.collect()
dclab/cli/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ # flake8: noqa: F401
2
+ """command line interface"""
3
+ from .common import get_command_log, get_job_info
4
+ from .task_compress import compress, compress_parser
5
+ from .task_condense import condense, condense_dataset, condense_parser
6
+ from .task_join import join, join_parser
7
+ from .task_repack import repack, repack_parser
8
+ from .task_split import split, split_parser
9
+ from .task_tdms2rtdc import tdms2rtdc, tdms2rtdc_parser
10
+ from .task_verify_dataset import verify_dataset, verify_dataset_parser
dclab/cli/common.py ADDED
@@ -0,0 +1,237 @@
1
+ import hashlib
2
+ import json
3
+ import numbers
4
+ import pathlib
5
+ import platform
6
+ import time
7
+ import warnings
8
+
9
+ import h5py
10
+ import numpy as np
11
+
12
+ try:
13
+ import imageio
14
+ except ModuleNotFoundError:
15
+ imageio = None
16
+
17
+ try:
18
+ import nptdms
19
+ except ModuleNotFoundError:
20
+ nptdms = None
21
+
22
+ from ..rtdc_dataset import fmt_tdms
23
+ from .. import util
24
+ from .._version import version
25
+
26
+
27
+ class ExtendedJSONEncoder(json.JSONEncoder):
28
+ def default(self, obj):
29
+ if isinstance(obj, pathlib.Path):
30
+ return str(obj)
31
+ elif isinstance(obj, numbers.Integral):
32
+ return int(obj)
33
+ elif isinstance(obj, np.bool_):
34
+ return bool(obj)
35
+ # Let the base class default method raise the TypeError
36
+ return json.JSONEncoder.default(self, obj)
37
+
38
+
39
+ def assemble_warnings(w):
40
+ """pretty-format all warnings for logs"""
41
+ wlog = []
42
+ for wi in w:
43
+ wlog.append(f"{wi.category.__name__}")
44
+ wlog.append(f" in {wi.category.__module__} line {wi.lineno}:")
45
+ # make sure line is not longer than 100 characters
46
+ words = str(wi.message).split(" ")
47
+ wline = " "
48
+ for ii in range(len(words)):
49
+ wline += " " + words[ii]
50
+ if ii == len(words) - 1:
51
+ # end
52
+ wlog.append(wline)
53
+ elif len(wline + words[ii+1]) + 1 >= 100:
54
+ # new line
55
+ wlog.append(wline)
56
+ wline = " "
57
+ # nothing to do here
58
+ return wlog
59
+
60
+
61
+ def get_command_log(paths, custom_dict=None):
62
+ """Return a json dump of system parameters
63
+
64
+ Parameters
65
+ ----------
66
+ paths: list of pathlib.Path or str
67
+ paths of related measurement files; up to 5MB of each of
68
+ them is md5-hashed and included in the "files" key
69
+ custom_dict: dict
70
+ additional user-defined entries; must contain simple
71
+ Python objects (json.dumps must still work)
72
+ """
73
+ if custom_dict is None:
74
+ custom_dict = {}
75
+ data = get_job_info()
76
+ data["files"] = []
77
+ for ii, pp in enumerate(paths):
78
+ if pathlib.Path(pp).exists():
79
+ fdict = {"name": pathlib.Path(pp).name,
80
+ # Hash only 5 MB of the input file
81
+ "md5-5M": util.hashfile(pp,
82
+ count=80,
83
+ constructor=hashlib.md5),
84
+ "index": ii+1
85
+ }
86
+ else:
87
+ fdict = {"name": f"{pp}",
88
+ "index": ii + 1
89
+ }
90
+ data["files"].append(fdict)
91
+ final_data = {}
92
+ final_data.update(custom_dict)
93
+ final_data.update(data)
94
+ dump = json.dumps(final_data,
95
+ sort_keys=True,
96
+ indent=2,
97
+ cls=ExtendedJSONEncoder).split("\n")
98
+ return dump
99
+
100
+
101
+ def get_job_info():
102
+ """Return dictionary with current job information
103
+
104
+ Returns
105
+ -------
106
+ info: dict of dicts
107
+ Job information including details about time, system,
108
+ python version, and libraries used.
109
+ """
110
+ data = {
111
+ "utc": {
112
+ "date": time.strftime("%Y-%m-%d", time.gmtime()),
113
+ "time": time.strftime("%H:%M:%S", time.gmtime()),
114
+ },
115
+ "system": {
116
+ "info": platform.platform(),
117
+ "machine": platform.machine(),
118
+ "name": platform.system(),
119
+ "release": platform.release(),
120
+ "version": platform.version(),
121
+ },
122
+ "python": {
123
+ "build": ", ".join(platform.python_build()),
124
+ "implementation": platform.python_implementation(),
125
+ "version": platform.python_version(),
126
+ },
127
+ "libraries": {
128
+ "dclab": version,
129
+ "h5py": h5py.__version__,
130
+ "numpy": np.__version__,
131
+ }
132
+ }
133
+ if imageio is not None:
134
+ data["libraries"]["imageio"] = imageio.__version__
135
+ if nptdms is not None:
136
+ data["libraries"]["nptdms"] = nptdms.__version__
137
+ return data
138
+
139
+
140
+ def print_info(string):
141
+ print(f"\033[1m{string}\033[0m")
142
+
143
+
144
+ def print_alert(string):
145
+ print_info(f"\033[33m{string}")
146
+
147
+
148
+ def print_violation(string):
149
+ print_info(f"\033[31m{string}")
150
+
151
+
152
+ def setup_task_paths(paths_in, paths_out, allowed_input_suffixes):
153
+ """Setup directories for a CLI task
154
+
155
+ Parameters
156
+ ----------
157
+ paths_in: list of str or lsit of pathlib.Path or str or pathlib.Path
158
+ Input paths
159
+ paths_out: list of str or list of pathlib.Path or str or pathlib.Path
160
+ Output paths
161
+ allowed_input_suffixes: list
162
+ List of allowed input suffixes (e.g. [".rtdc"])
163
+
164
+ Returns
165
+ -------
166
+ paths_in: list of pathlib.Path or pathlib.Path
167
+ Input paths
168
+ paths_out: list of pathlib.Path or pathlib.Path
169
+ Output paths
170
+ paths_temp: list of pathlib.Path or pathlib.Path
171
+ Temporary paths (working path)
172
+ """
173
+ if isinstance(paths_in, list):
174
+ list_in = True
175
+ else:
176
+ paths_in = [paths_in]
177
+ list_in = False
178
+
179
+ if isinstance(paths_out, list):
180
+ list_out = True
181
+ else:
182
+ paths_out = [paths_out]
183
+ list_out = False
184
+
185
+ paths_in = [pathlib.Path(pi) for pi in paths_in]
186
+ for pi in paths_in:
187
+ if pi.suffix not in allowed_input_suffixes:
188
+ raise ValueError(f"Unsupported file type: '{pi.suffix}'")
189
+
190
+ paths_out = [pathlib.Path(po) for po in paths_out]
191
+ for ii, po in enumerate(paths_out):
192
+ if po.suffix != ".rtdc":
193
+ paths_out[ii] = po.with_name(po.name + ".rtdc")
194
+ [po.unlink() for po in paths_out if po.exists()]
195
+
196
+ paths_temp = [po.with_suffix(".rtdc~") for po in paths_out]
197
+ [pt.unlink() for pt in paths_temp if pt.exists()]
198
+
199
+ # convert lists back to paths
200
+ if not list_in:
201
+ paths_in = paths_in[0]
202
+
203
+ if not list_out:
204
+ paths_out = paths_out[0]
205
+ paths_temp = paths_temp[0]
206
+
207
+ return paths_in, paths_out, paths_temp
208
+
209
+
210
+ def skip_empty_image_events(ds, initial=True, final=True):
211
+ """Set a manual filter to skip inital or final empty image events"""
212
+ if initial:
213
+ if (("image" in ds and ds.format == "tdms"
214
+ and ds.config["fmt_tdms"]["video frame offset"])
215
+ or ("contour" in ds and np.all(ds["contour"][0] == 0))
216
+ or ("image" in ds and np.all(ds["image"][0] == 0))):
217
+ ds.filter.manual[0] = False
218
+ ds.apply_filter()
219
+ if final:
220
+ # This is not easy to test, because we need a corrupt
221
+ # frame.
222
+ if "image" in ds:
223
+ idfin = len(ds) - 1
224
+ if ds.format == "tdms":
225
+ with warnings.catch_warnings(record=True) as wfin:
226
+ warnings.simplefilter(
227
+ "always",
228
+ fmt_tdms.event_image.CorruptFrameWarning)
229
+ _ = ds["image"][idfin] # provoke a warning
230
+ for ww in wfin:
231
+ if ww.category == fmt_tdms.event_image.CorruptFrameWarning:
232
+ ds.filter.manual[idfin] = False
233
+ ds.apply_filter()
234
+ break
235
+ elif np.all(ds["image"][idfin] == 0):
236
+ ds.filter.manual[idfin] = False
237
+ ds.apply_filter()
@@ -0,0 +1,126 @@
1
+ """Compress .rtdc files"""
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import pathlib
6
+ import warnings
7
+
8
+ import hdf5plugin
9
+ import h5py
10
+
11
+ from ..rtdc_dataset import rtdc_copy, RTDCWriter
12
+ from .. import util
13
+ from .._version import version
14
+
15
+ from . import common
16
+
17
+
18
+ def compress(
19
+ path_in: str | pathlib.Path = None,
20
+ path_out: str | pathlib.Path = None,
21
+ force: bool = False,
22
+ check_suffix: bool = True,
23
+ ret_path: bool = False,
24
+ ):
25
+ """Create a new dataset with all features compressed lossless
26
+
27
+ Parameters
28
+ ----------
29
+ path_in: str or pathlib.Path
30
+ file to compress
31
+ path_out: str or pathlib
32
+ output file path
33
+ force: bool
34
+ DEPRECATED
35
+ check_suffix: bool
36
+ check suffixes for input and output files
37
+ ret_path: bool
38
+ whether to return the output path
39
+
40
+ Returns
41
+ -------
42
+ path_out: pathlib.Path (optional)
43
+ output path (with possibly corrected suffix)
44
+ """
45
+ cmp_kw = hdf5plugin.Zstd(clevel=5)
46
+ if path_out is None or path_in is None:
47
+ parser = compress_parser()
48
+ args = parser.parse_args()
49
+ path_in = args.input
50
+ path_out = args.output
51
+ force = args.force
52
+
53
+ allowed_input_suffixes = [".rtdc"]
54
+ if not check_suffix:
55
+ allowed_input_suffixes.append(pathlib.Path(path_in).suffix)
56
+
57
+ path_in, path_out, path_temp = common.setup_task_paths(
58
+ path_in, path_out, allowed_input_suffixes=allowed_input_suffixes)
59
+
60
+ if force:
61
+ warnings.warn(
62
+ "The `force` keyword argument is deprecated since dclab 0.49.0, "
63
+ "because compressed HDF5 Datasets are now copied and there "
64
+ "is no reason to avoid or use force anymore.",
65
+ DeprecationWarning)
66
+
67
+ # command log
68
+ logs = {"dclab-compress": common.get_command_log(paths=[path_in])}
69
+
70
+ with warnings.catch_warnings(record=True) as w:
71
+ warnings.simplefilter("always")
72
+ with h5py.File(path_in) as h5, h5py.File(path_temp, "w") as hc:
73
+ rtdc_copy(src_h5file=h5,
74
+ dst_h5file=hc,
75
+ features="all",
76
+ include_basins=True,
77
+ include_logs=True,
78
+ include_tables=True,
79
+ meta_prefix="",
80
+ )
81
+
82
+ hc.require_group("logs")
83
+ # rename old dclab-compress logs
84
+ for lkey in ["dclab-compress", "dclab-compress-warnings"]:
85
+ if lkey in hc["logs"]:
86
+ # This is cached, so no worry calling it multiple times.
87
+ md55m = util.hashfile(path_in, count=80)
88
+ # rename
89
+ hc["logs"][f"{lkey}_{md55m}"] = hc["logs"][lkey]
90
+ del hc["logs"][lkey]
91
+
92
+ # warnings log
93
+ if w:
94
+ logs["dclab-compress-warnings"] = common.assemble_warnings(w)
95
+
96
+ # Write log file
97
+ with RTDCWriter(path_temp,
98
+ compression_kwargs=cmp_kw,
99
+ mode="append") as hw:
100
+ for name in logs:
101
+ hw.store_log(name, logs[name])
102
+
103
+ # Finally, rename temp to out
104
+ path_temp.rename(path_out)
105
+
106
+ if ret_path:
107
+ return path_out
108
+
109
+
110
+ def compress_parser():
111
+ descr = "Create a compressed version of an .rtdc file. This can be " \
112
+ + "used for saving disk space (loss-less compression). The " \
113
+ + "data generated during an experiment is usually not compressed."
114
+ parser = argparse.ArgumentParser(description=descr)
115
+ parser.add_argument('input', metavar="INPUT", type=str,
116
+ help='Input path (.rtdc file)')
117
+ parser.add_argument('output', metavar="OUTPUT", type=str,
118
+ help='Output path (.rtdc file)')
119
+ parser.add_argument('--force',
120
+ dest='force',
121
+ action='store_true',
122
+ help='DEPRECATED')
123
+ parser.set_defaults(force=False)
124
+ parser.add_argument('--version', action='version',
125
+ version=f'dclab-compress {version}')
126
+ return parser