dclab 0.62.11__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dclab might be problematic. Click here for more details.

Files changed (137) hide show
  1. dclab/__init__.py +23 -0
  2. dclab/_version.py +16 -0
  3. dclab/cached.py +97 -0
  4. dclab/cli/__init__.py +10 -0
  5. dclab/cli/common.py +237 -0
  6. dclab/cli/task_compress.py +126 -0
  7. dclab/cli/task_condense.py +223 -0
  8. dclab/cli/task_join.py +229 -0
  9. dclab/cli/task_repack.py +98 -0
  10. dclab/cli/task_split.py +154 -0
  11. dclab/cli/task_tdms2rtdc.py +186 -0
  12. dclab/cli/task_verify_dataset.py +75 -0
  13. dclab/definitions/__init__.py +79 -0
  14. dclab/definitions/feat_const.py +202 -0
  15. dclab/definitions/feat_logic.py +183 -0
  16. dclab/definitions/meta_const.py +252 -0
  17. dclab/definitions/meta_logic.py +111 -0
  18. dclab/definitions/meta_parse.py +94 -0
  19. dclab/downsampling.cp313-win_amd64.pyd +0 -0
  20. dclab/downsampling.pyx +230 -0
  21. dclab/external/__init__.py +4 -0
  22. dclab/external/packaging/LICENSE +3 -0
  23. dclab/external/packaging/LICENSE.APACHE +177 -0
  24. dclab/external/packaging/LICENSE.BSD +23 -0
  25. dclab/external/packaging/__init__.py +6 -0
  26. dclab/external/packaging/_structures.py +61 -0
  27. dclab/external/packaging/version.py +505 -0
  28. dclab/external/skimage/LICENSE +28 -0
  29. dclab/external/skimage/__init__.py +2 -0
  30. dclab/external/skimage/_find_contours.py +216 -0
  31. dclab/external/skimage/_find_contours_cy.cp313-win_amd64.pyd +0 -0
  32. dclab/external/skimage/_find_contours_cy.pyx +188 -0
  33. dclab/external/skimage/_pnpoly.cp313-win_amd64.pyd +0 -0
  34. dclab/external/skimage/_pnpoly.pyx +99 -0
  35. dclab/external/skimage/_shared/__init__.py +1 -0
  36. dclab/external/skimage/_shared/geometry.cp313-win_amd64.pyd +0 -0
  37. dclab/external/skimage/_shared/geometry.pxd +6 -0
  38. dclab/external/skimage/_shared/geometry.pyx +55 -0
  39. dclab/external/skimage/measure.py +7 -0
  40. dclab/external/skimage/pnpoly.py +53 -0
  41. dclab/external/statsmodels/LICENSE +35 -0
  42. dclab/external/statsmodels/__init__.py +6 -0
  43. dclab/external/statsmodels/nonparametric/__init__.py +1 -0
  44. dclab/external/statsmodels/nonparametric/_kernel_base.py +203 -0
  45. dclab/external/statsmodels/nonparametric/kernel_density.py +165 -0
  46. dclab/external/statsmodels/nonparametric/kernels.py +36 -0
  47. dclab/features/__init__.py +9 -0
  48. dclab/features/bright.py +81 -0
  49. dclab/features/bright_bc.py +93 -0
  50. dclab/features/bright_perc.py +63 -0
  51. dclab/features/contour.py +161 -0
  52. dclab/features/emodulus/__init__.py +339 -0
  53. dclab/features/emodulus/load.py +252 -0
  54. dclab/features/emodulus/lut_HE-2D-FEM-22.txt +16432 -0
  55. dclab/features/emodulus/lut_HE-3D-FEM-22.txt +1276 -0
  56. dclab/features/emodulus/lut_LE-2D-FEM-19.txt +13082 -0
  57. dclab/features/emodulus/pxcorr.py +135 -0
  58. dclab/features/emodulus/scale_linear.py +247 -0
  59. dclab/features/emodulus/viscosity.py +256 -0
  60. dclab/features/fl_crosstalk.py +95 -0
  61. dclab/features/inert_ratio.py +377 -0
  62. dclab/features/volume.py +242 -0
  63. dclab/http_utils.py +322 -0
  64. dclab/isoelastics/__init__.py +468 -0
  65. dclab/isoelastics/iso_HE-2D-FEM-22-area_um-deform.txt +2440 -0
  66. dclab/isoelastics/iso_HE-2D-FEM-22-volume-deform.txt +2635 -0
  67. dclab/isoelastics/iso_HE-3D-FEM-22-area_um-deform.txt +1930 -0
  68. dclab/isoelastics/iso_HE-3D-FEM-22-volume-deform.txt +2221 -0
  69. dclab/isoelastics/iso_LE-2D-FEM-19-area_um-deform.txt +2151 -0
  70. dclab/isoelastics/iso_LE-2D-FEM-19-volume-deform.txt +2250 -0
  71. dclab/isoelastics/iso_LE-2D-ana-18-area_um-deform.txt +1266 -0
  72. dclab/kde_contours.py +222 -0
  73. dclab/kde_methods.py +303 -0
  74. dclab/lme4/__init__.py +5 -0
  75. dclab/lme4/lme4_template.R +94 -0
  76. dclab/lme4/rsetup.py +204 -0
  77. dclab/lme4/wrapr.py +386 -0
  78. dclab/polygon_filter.py +398 -0
  79. dclab/rtdc_dataset/__init__.py +15 -0
  80. dclab/rtdc_dataset/check.py +902 -0
  81. dclab/rtdc_dataset/config.py +533 -0
  82. dclab/rtdc_dataset/copier.py +353 -0
  83. dclab/rtdc_dataset/core.py +1001 -0
  84. dclab/rtdc_dataset/export.py +737 -0
  85. dclab/rtdc_dataset/feat_anc_core/__init__.py +24 -0
  86. dclab/rtdc_dataset/feat_anc_core/af_basic.py +75 -0
  87. dclab/rtdc_dataset/feat_anc_core/af_emodulus.py +160 -0
  88. dclab/rtdc_dataset/feat_anc_core/af_fl_max_ctc.py +133 -0
  89. dclab/rtdc_dataset/feat_anc_core/af_image_contour.py +113 -0
  90. dclab/rtdc_dataset/feat_anc_core/af_ml_class.py +102 -0
  91. dclab/rtdc_dataset/feat_anc_core/ancillary_feature.py +320 -0
  92. dclab/rtdc_dataset/feat_anc_ml/__init__.py +32 -0
  93. dclab/rtdc_dataset/feat_anc_plugin/__init__.py +3 -0
  94. dclab/rtdc_dataset/feat_anc_plugin/plugin_feature.py +329 -0
  95. dclab/rtdc_dataset/feat_basin.py +550 -0
  96. dclab/rtdc_dataset/feat_temp.py +102 -0
  97. dclab/rtdc_dataset/filter.py +263 -0
  98. dclab/rtdc_dataset/fmt_dcor/__init__.py +7 -0
  99. dclab/rtdc_dataset/fmt_dcor/access_token.py +52 -0
  100. dclab/rtdc_dataset/fmt_dcor/api.py +111 -0
  101. dclab/rtdc_dataset/fmt_dcor/base.py +200 -0
  102. dclab/rtdc_dataset/fmt_dcor/basin.py +73 -0
  103. dclab/rtdc_dataset/fmt_dcor/logs.py +26 -0
  104. dclab/rtdc_dataset/fmt_dcor/tables.py +42 -0
  105. dclab/rtdc_dataset/fmt_dict.py +103 -0
  106. dclab/rtdc_dataset/fmt_hdf5/__init__.py +6 -0
  107. dclab/rtdc_dataset/fmt_hdf5/base.py +192 -0
  108. dclab/rtdc_dataset/fmt_hdf5/basin.py +30 -0
  109. dclab/rtdc_dataset/fmt_hdf5/events.py +257 -0
  110. dclab/rtdc_dataset/fmt_hdf5/feat_defect.py +164 -0
  111. dclab/rtdc_dataset/fmt_hdf5/logs.py +33 -0
  112. dclab/rtdc_dataset/fmt_hdf5/tables.py +30 -0
  113. dclab/rtdc_dataset/fmt_hierarchy/__init__.py +11 -0
  114. dclab/rtdc_dataset/fmt_hierarchy/base.py +278 -0
  115. dclab/rtdc_dataset/fmt_hierarchy/events.py +146 -0
  116. dclab/rtdc_dataset/fmt_hierarchy/hfilter.py +140 -0
  117. dclab/rtdc_dataset/fmt_hierarchy/mapper.py +134 -0
  118. dclab/rtdc_dataset/fmt_http.py +102 -0
  119. dclab/rtdc_dataset/fmt_s3.py +320 -0
  120. dclab/rtdc_dataset/fmt_tdms/__init__.py +476 -0
  121. dclab/rtdc_dataset/fmt_tdms/event_contour.py +264 -0
  122. dclab/rtdc_dataset/fmt_tdms/event_image.py +220 -0
  123. dclab/rtdc_dataset/fmt_tdms/event_mask.py +62 -0
  124. dclab/rtdc_dataset/fmt_tdms/event_trace.py +146 -0
  125. dclab/rtdc_dataset/fmt_tdms/exc.py +37 -0
  126. dclab/rtdc_dataset/fmt_tdms/naming.py +151 -0
  127. dclab/rtdc_dataset/load.py +72 -0
  128. dclab/rtdc_dataset/writer.py +985 -0
  129. dclab/statistics.py +203 -0
  130. dclab/util.py +156 -0
  131. dclab/warn.py +15 -0
  132. dclab-0.62.11.dist-info/LICENSE +343 -0
  133. dclab-0.62.11.dist-info/METADATA +146 -0
  134. dclab-0.62.11.dist-info/RECORD +137 -0
  135. dclab-0.62.11.dist-info/WHEEL +5 -0
  136. dclab-0.62.11.dist-info/entry_points.txt +8 -0
  137. dclab-0.62.11.dist-info/top_level.txt +1 -0
dclab/statistics.py ADDED
@@ -0,0 +1,203 @@
1
+ """Statistics computation for RT-DC dataset instances"""
2
+
3
+ import numpy as np
4
+ import traceback as tb
5
+ import warnings
6
+
7
+ from . import definitions as dfn
8
+
9
+
10
+ class BadMethodWarning(UserWarning):
11
+ pass
12
+
13
+
14
+ class Statistics(object):
15
+ available_methods = {}
16
+
17
+ def __init__(self, name, method, req_feature=False):
18
+ """A helper class for computing statistics
19
+
20
+ All statistical methods are registered in the dictionary
21
+ `Statistics.available_methods`.
22
+ """
23
+ self.method = method
24
+ self.name = name
25
+ self.req_feature = req_feature
26
+ Statistics.available_methods[name] = self
27
+
28
+ def __call__(self, **kwargs):
29
+ data = self._get_data(kwargs)
30
+ if len(data) == 0:
31
+ result = np.nan
32
+ else:
33
+ try:
34
+ result = self.method(data)
35
+ except BaseException:
36
+ exc = tb.format_exc().replace("\n", "\n | ")
37
+ warnings.warn("Failed to compute {} for {}: {}".format(
38
+ self.name, kwargs["ds"].title, exc),
39
+ BadMethodWarning)
40
+ result = np.nan
41
+ return result
42
+
43
+ def _get_data(self, kwargs):
44
+ """Convenience wrapper to get statistics data"""
45
+ if "ds" not in kwargs:
46
+ raise ValueError("Keyword argument 'ds' missing.")
47
+
48
+ ds = kwargs["ds"]
49
+
50
+ if self.req_feature:
51
+ if "feature" not in kwargs:
52
+ raise ValueError("Keyword argument 'feature' missing.")
53
+ return self.get_feature(ds, kwargs["feature"])
54
+ else:
55
+ return ds
56
+
57
+ def get_feature(self, ds, feat):
58
+ """Return filtered feature data
59
+
60
+ The features are filtered according to the user-defined filters,
61
+ using the information in `ds.filter.all`. In addition, all
62
+ `nan` and `inf` values are purged.
63
+
64
+ Parameters
65
+ ----------
66
+ ds: dclab.rtdc_dataset.RTDCBase
67
+ The dataset containing the feature
68
+ feat: str
69
+ The name of the feature; must be a scalar feature
70
+ """
71
+ if ds.config["filtering"]["enable filters"]:
72
+ x = ds[feat][ds.filter.all]
73
+ else:
74
+ x = ds[feat]
75
+ bad = np.isnan(x) | np.isinf(x)
76
+ xout = x[~bad]
77
+ return xout
78
+
79
+
80
+ def flow_rate(ds):
81
+ """Return the flow rate of an RT-DC dataset"""
82
+ conf = ds.config["setup"]
83
+ if "flow rate" in conf:
84
+ return conf["flow rate"]
85
+ else:
86
+ return np.nan
87
+
88
+
89
+ def get_statistics(ds, methods=None, features=None):
90
+ """Compute statistics for an RT-DC dataset
91
+
92
+ Parameters
93
+ ----------
94
+ ds: dclab.rtdc_dataset.RTDCBase
95
+ The dataset for which to compute the statistics.
96
+ methods: list of str or None
97
+ The methods wih which to compute the statistics.
98
+ The list of available methods is given with
99
+ `dclab.statistics.Statistics.available_methods.keys()`
100
+ If set to `None`, statistics for all methods are computed.
101
+ features: list of str
102
+ Feature name identifiers are defined by
103
+ `dclab.definitions.feature_exists`.
104
+ If set to `None`, statistics for all scalar features
105
+ available are computed.
106
+
107
+ Returns
108
+ -------
109
+ header: list of str
110
+ The header (feature + method names) of the computed statistics.
111
+ values: list of float
112
+ The computed statistics.
113
+ """
114
+ if methods is None:
115
+ cls = list(Statistics.available_methods.keys())
116
+ # sort the features in a usable way
117
+ avm = Statistics.available_methods
118
+ me1 = [m for m in cls if not avm[m].req_feature]
119
+ me2 = [m for m in cls if avm[m].req_feature]
120
+ methods = me1 + me2
121
+
122
+ if features is None:
123
+ features = ds.features_scalar
124
+ else:
125
+ features = [a.lower() for a in features]
126
+
127
+ header = []
128
+ values = []
129
+
130
+ # First loop over all methods that do not require a feature
131
+ for mt in methods:
132
+ meth = Statistics.available_methods[mt]
133
+ if not meth.req_feature:
134
+ values.append(meth(ds=ds))
135
+ header.append(mt)
136
+
137
+ # To make sure that all methods are computed for each feature in a block,
138
+ # we loop over all features. It would be easier to loop over the methods,
139
+ # but the ordering of the resulting statistics would not be human-friendly.
140
+ for ft in features:
141
+ for mt in methods:
142
+ meth = Statistics.available_methods[mt]
143
+ if meth.req_feature:
144
+ if ft in ds:
145
+ values.append(meth(ds=ds, feature=ft))
146
+ else:
147
+ values.append(np.nan)
148
+ label = dfn.get_feature_label(ft, rtdc_ds=ds)
149
+ header.append(" ".join([mt, label]))
150
+
151
+ return header, values
152
+
153
+
154
+ def mode(data):
155
+ """Compute an intelligent value for the mode
156
+
157
+ The most common value in experimental is not very useful if there
158
+ are a lot of digits after the comma. This method approaches this
159
+ issue by rounding to bin size that is determined by the
160
+ Freedman–Diaconis rule.
161
+
162
+ Parameters
163
+ ----------
164
+ data: 1d ndarray
165
+ The data for which the mode should be computed.
166
+
167
+ Returns
168
+ -------
169
+ mode: float
170
+ The mode computed with the Freedman-Diaconis rule.
171
+ """
172
+ # size
173
+ n = data.shape[0]
174
+ # interquartile range
175
+ iqr = np.percentile(data, 75)-np.percentile(data, 25)
176
+ # Freedman–Diaconis
177
+ bin_size = 2 * iqr / n**(1/3)
178
+
179
+ if bin_size == 0:
180
+ return np.nan
181
+
182
+ # Add bin_size/2, because we want the center of the bin and
183
+ # not the left corner of the bin.
184
+ databin = np.round(data/bin_size)*bin_size + bin_size/2
185
+ u, indices = np.unique(databin, return_inverse=True)
186
+ mode = u[np.argmax(np.bincount(indices))]
187
+
188
+ return mode
189
+
190
+
191
+ # Register all the methods
192
+ # Methods that require an axis
193
+ Statistics(name="Mean", req_feature=True, method=np.average)
194
+ Statistics(name="Median", req_feature=True, method=np.median)
195
+ Statistics(name="Mode", req_feature=True, method=mode)
196
+ Statistics(name="SD", req_feature=True, method=np.std)
197
+ # Methods that work on RTDCBase
198
+ Statistics(name="Events",
199
+ method=lambda mm: np.sum(mm.filter.all))
200
+ Statistics(name="%-gated",
201
+ method=lambda mm: np.average(mm.filter.all)*100)
202
+ Statistics(name="Flow rate",
203
+ method=lambda mm: flow_rate(mm))
dclab/util.py ADDED
@@ -0,0 +1,156 @@
1
+ """Utility methods"""
2
+ import functools
3
+ import hashlib
4
+ import numbers
5
+ import pathlib
6
+ import warnings
7
+
8
+ import h5py
9
+ import numpy as np
10
+ from .rtdc_dataset.config import Configuration, ConfigurationDict
11
+
12
+
13
+ if np.lib.NumpyVersion(np.__version__) >= "2.0.0":
14
+ copy_if_needed = None
15
+ else:
16
+ copy_if_needed = False
17
+
18
+
19
+ class file_monitoring_lru_cache:
20
+ """Decorator for caching data extracted from files
21
+
22
+ The function that is decorated with `file_monitoring_lru_cache`
23
+ must accept `path` as its first argument. Caching is
24
+ done with an `lru_cache`. In addition to the full path
25
+ and the other arguments to the decorated function, the
26
+ size and the modification time of `path` is used as a
27
+ key for the decorator.
28
+ If the path does not exist, no caching is done.
29
+
30
+ Use case: Extract and cache metadata from a file on disk
31
+ that may change.
32
+ """
33
+ def __init__(self, maxsize=100):
34
+ self.lru_cache = functools.lru_cache(maxsize=maxsize)
35
+ self.cached_wrapper = None
36
+
37
+ def __call__(self, func):
38
+ @self.lru_cache
39
+ def cached_wrapper(path, path_stats, *args, **kwargs):
40
+ assert path_stats, "We need stat for validating the cache"
41
+ return func(path, *args, **kwargs)
42
+
43
+ @functools.wraps(func)
44
+ def wrapper(path, *args, **kwargs):
45
+ full_path = pathlib.Path(path).resolve()
46
+ if full_path.exists():
47
+ path_stat = full_path.stat()
48
+ return cached_wrapper(
49
+ path=full_path,
50
+ path_stats=(path_stat.st_mtime_ns, path_stat.st_size),
51
+ *args,
52
+ **kwargs)
53
+ else:
54
+ # `func` will most-likely raise an exception
55
+ return func(path, *args, **kwargs)
56
+
57
+ wrapper.cache_clear = cached_wrapper.cache_clear
58
+ wrapper.cache_info = cached_wrapper.cache_info
59
+
60
+ return wrapper
61
+
62
+
63
+ @file_monitoring_lru_cache(maxsize=100)
64
+ def hashfile(fname, blocksize=65536, count=0, constructor=hashlib.md5,
65
+ hasher_class=None):
66
+ """Compute md5 hex-hash of a file
67
+
68
+ Parameters
69
+ ----------
70
+ fname: str or pathlib.Path
71
+ path to the file
72
+ blocksize: int
73
+ block size in bytes read from the file
74
+ (set to `0` to hash the entire file)
75
+ count: int
76
+ number of blocks read from the file
77
+ hasher_class: callable
78
+ deprecated, see use `constructor` instead
79
+ constructor: callable
80
+ hash algorithm constructor
81
+ """
82
+ if hasher_class is not None:
83
+ warnings.warn("The `hasher_class` argument is deprecated, please use "
84
+ "`constructor` instead.")
85
+ constructor = hasher_class
86
+
87
+ path = pathlib.Path(fname)
88
+
89
+ hasher = constructor()
90
+ with path.open('rb') as fd:
91
+ buf = fd.read(blocksize)
92
+ ii = 0
93
+ while len(buf) > 0:
94
+ hasher.update(buf)
95
+ buf = fd.read(blocksize)
96
+ ii += 1
97
+ if count and ii == count:
98
+ break
99
+ return hasher.hexdigest()
100
+
101
+
102
+ def hashobj(obj):
103
+ """Compute md5 hex-hash of a Python object"""
104
+ return hashlib.md5(obj2bytes(obj)).hexdigest()
105
+
106
+
107
+ def obj2bytes(obj):
108
+ """Bytes representation of an object for hashing
109
+
110
+ Note that there is no guarantee that the bytes representation
111
+ returned is reproducible across sessions. This is currently the
112
+ case when an :class:`.RTDCBase` instance is passed. There is no
113
+ opinion on wether/how this should be changed.
114
+ """
115
+ if isinstance(obj, str):
116
+ return obj.encode("utf-8")
117
+ elif isinstance(obj, pathlib.Path):
118
+ return obj2bytes(str(obj))
119
+ elif isinstance(obj, (bool, numbers.Number)):
120
+ return str(obj).encode("utf-8")
121
+ elif obj is None:
122
+ return b"none"
123
+ elif isinstance(obj, np.ndarray):
124
+ return obj.tobytes()
125
+ elif isinstance(obj, tuple):
126
+ return obj2bytes(list(obj))
127
+ elif isinstance(obj, list):
128
+ return b"".join(obj2bytes(o) for o in obj)
129
+ elif isinstance(obj, dict):
130
+ return obj2bytes(sorted(obj.items()))
131
+ elif hasattr(obj, "identifier"):
132
+ # For RTDCBase, this identifier is not reproducible in-between
133
+ # sessions. We might want to change this to something that is
134
+ # reproducible in the future (if the need arises).
135
+ return obj2bytes(obj.identifier)
136
+ elif isinstance(obj, h5py.Dataset):
137
+ # path within the HDF5 file
138
+ o_name = obj.name
139
+ # filename
140
+ o_filename = obj.file.filename
141
+ _data = [o_name, o_filename]
142
+ if pathlib.Path(o_filename).exists():
143
+ # when the file was changed
144
+ _data.append(pathlib.Path(obj.file.filename).stat().st_mtime)
145
+ # size of the file
146
+ _data.append(pathlib.Path(obj.file.filename).stat().st_size)
147
+ return obj2bytes(_data)
148
+ elif hasattr(obj, "__array__"): # must come after h5py.Dataset
149
+ return obj2bytes(obj.__array__())
150
+ elif isinstance(obj, Configuration):
151
+ return obj2bytes(obj.tostring())
152
+ elif isinstance(obj, ConfigurationDict):
153
+ return obj2bytes(dict(obj))
154
+ else:
155
+ raise ValueError("No rule to convert object '{}' to string.".
156
+ format(obj.__class__))
dclab/warn.py ADDED
@@ -0,0 +1,15 @@
1
+
2
+
3
+ class PipelineWarning(UserWarning):
4
+ """Super-class for warnings relevant to data analysis
5
+
6
+ There are those types of warnings in dclab that are
7
+ important to the user, because they suggest that the
8
+ user may not use the correct model (e.g. Young's modulus
9
+ computation) in his analysis pipeline. All of these
10
+ warnings should be subclassed from PipelineWarning
11
+ to allow identifying them in higher-level software
12
+ such as Shape-Out and to present them correctly to the
13
+ user.
14
+ """
15
+ pass