doppy 0.5.9__cp310-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doppy/raw/halo_hpl.py ADDED
@@ -0,0 +1,480 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import logging
5
+ import re
6
+ from dataclasses import dataclass
7
+ from datetime import datetime, timedelta, timezone
8
+ from io import BufferedIOBase
9
+ from os.path import commonprefix
10
+ from pathlib import Path
11
+ from typing import Any, Sequence, cast
12
+
13
+ import numpy as np
14
+ import numpy.typing as npt
15
+ from numpy import datetime64, timedelta64
16
+
17
+ import doppy
18
+ from doppy import exceptions
19
+ from doppy.raw.utils import bytes_from_src
20
+ from doppy.utils import merge_all_equal
21
+
22
+
23
+ @dataclass
24
+ class HaloHpl:
25
+ header: HaloHplHeader
26
+ time: npt.NDArray[datetime64] # dim: (time, )
27
+ radial_distance: npt.NDArray[np.float64] # dim: (radial_distance, )
28
+ azimuth: npt.NDArray[np.float64] # dim: (time, )
29
+ elevation: npt.NDArray[np.float64] # dim: (time, )
30
+ pitch: npt.NDArray[np.float64] | None # dim: (time, )
31
+ roll: npt.NDArray[np.float64] | None # dim: (time, )
32
+ radial_velocity: npt.NDArray[np.float64] # dim: (time, radial_distance)
33
+ intensity: npt.NDArray[np.float64] # dim: (time, radial_distance)
34
+ beta: npt.NDArray[np.float64] # dim: (time, radial_distance)
35
+ spectral_width: npt.NDArray[np.float64] | None # dim: (time, radial_distance )
36
+
37
+ @classmethod
38
+ def from_srcs(
39
+ cls, data: Sequence[str | bytes | Path | BufferedIOBase]
40
+ ) -> list[HaloHpl]:
41
+ data_bytes = [bytes_from_src(src) for src in data]
42
+ raw_dicts = doppy.rs.raw.halo_hpl.from_bytes_srcs(data_bytes)
43
+ raws = []
44
+ for r in raw_dicts:
45
+ try:
46
+ raws.append(_raw_tuple2halo_hpl(r))
47
+ except exceptions.RawParsingError as err:
48
+ logging.warning("Skipping %s: %s", r[0].get("filename"), err)
49
+ return raws
50
+
51
+ @classmethod
52
+ def from_src(cls, data: str | Path | bytes | BufferedIOBase) -> HaloHpl:
53
+ data_bytes = bytes_from_src(data)
54
+ try:
55
+ return _raw_tuple2halo_hpl(doppy.rs.raw.halo_hpl.from_bytes_src(data_bytes))
56
+ except RuntimeError as err:
57
+ raise exceptions.RawParsingError(err) from err
58
+
59
+ def __getitem__(
60
+ self,
61
+ index: int
62
+ | slice
63
+ | list[int]
64
+ | npt.NDArray[np.int64]
65
+ | npt.NDArray[np.bool_]
66
+ | tuple[slice, slice],
67
+ ) -> HaloHpl:
68
+ if isinstance(index, (int, slice, list, np.ndarray)):
69
+ return HaloHpl(
70
+ header=self.header,
71
+ time=self.time[index],
72
+ radial_distance=self.radial_distance,
73
+ azimuth=self.azimuth[index],
74
+ elevation=self.elevation[index],
75
+ radial_velocity=self.radial_velocity[index],
76
+ intensity=self.intensity[index],
77
+ beta=self.beta[index],
78
+ pitch=self.pitch[index] if self.pitch is not None else None,
79
+ roll=self.roll[index] if self.roll is not None else None,
80
+ spectral_width=self.spectral_width[index]
81
+ if self.spectral_width is not None
82
+ else None,
83
+ )
84
+ raise TypeError
85
+
86
+ @classmethod
87
+ def merge(cls, raws: Sequence[HaloHpl]) -> HaloHpl:
88
+ return cls(
89
+ header=_merge_headers([r.header for r in raws]),
90
+ time=np.concatenate(tuple(r.time for r in raws)),
91
+ radial_distance=raws[0].radial_distance,
92
+ azimuth=np.concatenate(tuple(r.azimuth for r in raws)),
93
+ elevation=np.concatenate(tuple(r.elevation for r in raws)),
94
+ radial_velocity=np.concatenate(tuple(r.radial_velocity for r in raws)),
95
+ intensity=np.concatenate(tuple(r.intensity for r in raws)),
96
+ beta=np.concatenate(tuple(r.beta for r in raws)),
97
+ pitch=_merge_float_arrays_or_nones(tuple(r.pitch for r in raws)),
98
+ roll=_merge_float_arrays_or_nones(tuple(r.roll for r in raws)),
99
+ spectral_width=_merge_float_arrays_or_nones(
100
+ tuple(r.spectral_width for r in raws)
101
+ ),
102
+ )
103
+
104
+ @functools.cached_property
105
+ def azimuth_angles(self) -> set[int]:
106
+ return set(int(x) % 360 for x in np.round(self.azimuth))
107
+
108
+ @functools.cached_property
109
+ def elevation_angles(self) -> set[int]:
110
+ return set(int(x) for x in np.round(self.elevation))
111
+
112
+ @functools.cached_property
113
+ def time_diffs(self) -> set[int]:
114
+ return set(np.diff(self.time.astype("datetime64[s]").astype("int")))
115
+
116
+ @functools.cached_property
117
+ def median_time_diff(self) -> float:
118
+ med = np.round(
119
+ np.median(
120
+ np.diff(1e-6 * self.time.astype("datetime64[us]").astype("float"))
121
+ ),
122
+ 2,
123
+ )
124
+ if isinstance(med, float):
125
+ return med
126
+ raise TypeError
127
+
128
+ def sorted_by_time(self) -> HaloHpl:
129
+ sort_indices = np.argsort(self.time)
130
+ return self[sort_indices]
131
+
132
+ def non_strictly_increasing_timesteps_removed(self) -> HaloHpl:
133
+ if len(self.time) == 0:
134
+ return self
135
+ mask = np.ones_like(self.time, dtype=np.bool_)
136
+ latest_time = self.time[0]
137
+ for i, t in enumerate(self.time[1:], start=1):
138
+ if t <= latest_time:
139
+ mask[i] = False
140
+ else:
141
+ latest_time = t
142
+ return self[mask]
143
+
144
+ def nans_removed(self) -> HaloHpl:
145
+ is_ok = np.array(~np.isnan(self.intensity).any(axis=1), dtype=np.bool_)
146
+ return self[is_ok]
147
+
148
+
149
+ @dataclass(slots=True)
150
+ class HaloHplHeader:
151
+ filename: str
152
+ gate_points: int
153
+ nrays: int | None
154
+ nwaypoints: int | None
155
+ ngates: int
156
+ pulses_per_ray: int
157
+ range_gate_length: float
158
+ resolution: float
159
+ scan_type: str
160
+ focus_range: int
161
+ start_time: datetime64
162
+ system_id: str
163
+ instrument_spectral_width: float | None
164
+
165
+ def mergeable_hash(self) -> int:
166
+ return hash(
167
+ (
168
+ self.gate_points,
169
+ self.nrays,
170
+ self.nwaypoints,
171
+ self.ngates,
172
+ self.pulses_per_ray,
173
+ round(self.range_gate_length, 1),
174
+ round(self.resolution, 1),
175
+ self.scan_type,
176
+ self.focus_range,
177
+ self.system_id,
178
+ round(x, 1)
179
+ if isinstance((x := self.instrument_spectral_width), float)
180
+ else None,
181
+ )
182
+ )
183
+
184
+ @classmethod
185
+ def from_dict(cls, data: dict[bytes, bytes]) -> HaloHplHeader:
186
+ return cls(
187
+ filename=data[b"Filename"].decode(),
188
+ gate_points=int(data[b"Gate length (pts)"]),
189
+ nrays=(
190
+ int(data[b"No. of rays in file"])
191
+ if b"No. of rays in file" in data
192
+ else None
193
+ ),
194
+ nwaypoints=(
195
+ int(data[b"No. of waypoints in file"])
196
+ if b"No. of waypoints in file" in data
197
+ else None
198
+ ),
199
+ ngates=int(data[b"Number of gates"]),
200
+ pulses_per_ray=int(data[b"Pulses/ray"]),
201
+ range_gate_length=float(data[b"Range gate length (m)"]),
202
+ resolution=float(data[b"Resolution (m/s)"]),
203
+ scan_type=data[b"Scan type"].decode(),
204
+ focus_range=int(data[b"Focus range"]),
205
+ start_time=_parser_start_time(data[b"Start time"]),
206
+ system_id=data[b"System ID"].decode(),
207
+ instrument_spectral_width=(
208
+ float(data[b"instrument_spectral_width"])
209
+ if b"instrument_spectral_width" in data
210
+ else None
211
+ ),
212
+ )
213
+
214
+
215
+ def _merge_headers(headers: list[HaloHplHeader]) -> HaloHplHeader:
216
+ return HaloHplHeader(
217
+ filename=commonprefix([h.filename for h in headers]),
218
+ start_time=np.min([h.start_time for h in headers]),
219
+ **{
220
+ key: merge_all_equal(key, [getattr(h, key) for h in headers])
221
+ for key in (
222
+ "gate_points",
223
+ "nrays",
224
+ "nwaypoints",
225
+ "ngates",
226
+ "pulses_per_ray",
227
+ "range_gate_length",
228
+ "resolution",
229
+ "scan_type",
230
+ "focus_range",
231
+ "system_id",
232
+ "instrument_spectral_width",
233
+ )
234
+ },
235
+ )
236
+
237
+
238
+ def _merge_float_arrays_or_nones(
239
+ arrs: tuple[npt.NDArray[np.float64] | None, ...],
240
+ ) -> npt.NDArray[np.float64] | None:
241
+ isnone = tuple(x is None for x in arrs)
242
+ if all(isnone):
243
+ return None
244
+ if any(isnone):
245
+ raise ValueError
246
+ arrs = cast(tuple[npt.NDArray[np.float64], ...], arrs)
247
+ return np.concatenate(arrs, axis=0)
248
+
249
+
250
+ def _raw_tuple2halo_hpl(
251
+ raw_tuple: tuple[dict[str, Any], dict[str, npt.NDArray[np.float64] | None]],
252
+ ) -> HaloHpl:
253
+ header_dict, data_dict = raw_tuple
254
+ header = HaloHplHeader(
255
+ filename=str(header_dict["filename"]),
256
+ gate_points=int(header_dict["gate_points"]),
257
+ nrays=int(header_dict["nrays"]) if header_dict["nrays"] is not None else None,
258
+ nwaypoints=int(header_dict["nwaypoints"])
259
+ if header_dict["nwaypoints"] is not None
260
+ else None,
261
+ ngates=int(header_dict["ngates"]),
262
+ pulses_per_ray=int(header_dict["pulses_per_ray"]),
263
+ range_gate_length=float(header_dict["range_gate_length"]),
264
+ resolution=float(header_dict["resolution"]),
265
+ scan_type=str(header_dict["scan_type"]),
266
+ focus_range=int(header_dict["focus_range"]),
267
+ start_time=datetime64(
268
+ datetime.fromtimestamp(header_dict["start_time"], timezone.utc).replace(
269
+ tzinfo=None
270
+ )
271
+ ),
272
+ system_id=str(header_dict["system_id"]),
273
+ instrument_spectral_width=float(header_dict["instrument_spectral_width"])
274
+ if header_dict["instrument_spectral_width"] is not None
275
+ else None,
276
+ )
277
+ expected_range = np.arange(header.ngates, dtype=np.float64)
278
+
279
+ if any(
280
+ data_dict[key] is None
281
+ for key in (
282
+ "range",
283
+ "time",
284
+ "radial_distance",
285
+ "azimuth",
286
+ "elevation",
287
+ "radial_velocity",
288
+ "intensity",
289
+ "beta",
290
+ )
291
+ ):
292
+ raise TypeError
293
+ range_ = cast(npt.NDArray[np.float64], data_dict["range"]).reshape(
294
+ -1, header.ngates
295
+ )
296
+ radial_distance = cast(npt.NDArray[np.float64], data_dict["radial_distance"])
297
+ azimuth = cast(npt.NDArray[np.float64], data_dict["azimuth"])
298
+ elevation = cast(npt.NDArray[np.float64], data_dict["elevation"])
299
+ radial_velocity = cast(
300
+ npt.NDArray[np.float64], data_dict["radial_velocity"]
301
+ ).reshape(-1, header.ngates)
302
+ intensity = cast(npt.NDArray[np.float64], data_dict["intensity"]).reshape(
303
+ -1, header.ngates
304
+ )
305
+ beta = cast(npt.NDArray[np.float64], data_dict["beta"]).reshape(-1, header.ngates)
306
+ if not np.isclose(range_, expected_range).all():
307
+ raise exceptions.RawParsingError(
308
+ "Incoherent range gates: Number of gates in the middle of the file"
309
+ )
310
+ return HaloHpl(
311
+ header=header,
312
+ time=_convert_time(
313
+ header.start_time, cast(npt.NDArray[np.float64], data_dict["time"])
314
+ ),
315
+ radial_distance=radial_distance,
316
+ azimuth=azimuth,
317
+ elevation=elevation,
318
+ pitch=data_dict["pitch"] if data_dict["pitch"] is not None else None,
319
+ roll=data_dict["roll"] if data_dict["roll"] is not None else None,
320
+ radial_velocity=radial_velocity,
321
+ intensity=intensity,
322
+ beta=beta,
323
+ spectral_width=data_dict["spectral_width"].reshape(-1, header.ngates)
324
+ if data_dict["spectral_width"] is not None
325
+ else None,
326
+ )
327
+
328
+
329
+ def _convert_time(
330
+ start_time: datetime64, decimal_time: npt.NDArray[np.float64]
331
+ ) -> npt.NDArray[datetime64]:
332
+ """
333
+ Parameters
334
+ ----------
335
+ start_time: unix-time
336
+ decimal_time: hours since beginning of the day of start_time
337
+ """
338
+ HOURS_TO_MICROSECONDS = 3600000000.0
339
+ start_of_day = start_time.astype("datetime64[D]").astype("datetime64[us]")
340
+ delta_hours = (decimal_time * HOURS_TO_MICROSECONDS).astype("timedelta64[us]")
341
+ return np.array(start_of_day + delta_hours, dtype=datetime64)
342
+
343
+
344
+ def _parser_start_time(s: bytes) -> datetime64:
345
+ return datetime64(datetime.strptime(s.decode(), "%Y%m%d %H:%M:%S.%f"))
346
+
347
+
348
+ def _from_src(data: BufferedIOBase) -> HaloHpl:
349
+ head = data.read(1000)
350
+ match_header_div = re.search(b"\\*\\*\\*\\*.*\n+", head, re.MULTILINE)
351
+ if match_header_div is None:
352
+ raise exceptions.RawParsingError("Cannot find header divider '****'")
353
+ data.seek(0)
354
+ _, div = match_header_div.span()
355
+ header_bytes = data.read(div)
356
+ header = _read_header(header_bytes)
357
+ data_bytes = data.read()
358
+ res = _read_data(data_bytes, header)
359
+ return res
360
+
361
+
362
+ def _read_header(data: bytes) -> HaloHplHeader:
363
+ data = data.strip()
364
+ data_dict = {}
365
+ expected_header_rows = [
366
+ b"Altitude of measurement (center of gate) = (range gate + 0.5) * Gate length",
367
+ b"Range of measurement (center of gate) = (range gate + 0.5) * Gate length",
368
+ b"Data line 1: Decimal time (hours) Azimuth (degrees) Elevation (degrees) "
369
+ b"Pitch (degrees) Roll (degrees)",
370
+ b"Data line 1: Decimal time (hours) Azimuth (degrees) Elevation (degrees)",
371
+ b"f9.6,1x,f6.2,1x,f6.2",
372
+ b"Data line 2: Range Gate Doppler (m/s) Intensity (SNR + 1) Beta (m-1 sr-1)",
373
+ b"Data line 2: Range Gate Doppler (m/s) Intensity (SNR + 1) Beta (m-1 sr-1) "
374
+ b"Spectral Width",
375
+ b"i3,1x,f6.4,1x,f8.6,1x,e12.6 - repeat for no. gates",
376
+ b"i3,1x,f6.4,1x,f8.6,1x,e12.6,1x,f6.4 - repeat for no. gates",
377
+ b"****",
378
+ ]
379
+ for line in data.split(b"\r\n"):
380
+ split = line.split(b":\t")
381
+ if len(split) == 2:
382
+ key, val = split
383
+ data_dict[key] = val
384
+ else:
385
+ (val,) = split
386
+ if m := re.match(rb"\*\*\*\* Instrument spectral width = (.*)", val):
387
+ data_dict[b"instrument_spectral_width"] = m.group(1)
388
+ elif val not in expected_header_rows:
389
+ raise ValueError(f"Unexpected row '{val!r}'")
390
+ return HaloHplHeader.from_dict(data_dict)
391
+
392
+
393
+ def _read_data(data: bytes, header: HaloHplHeader) -> HaloHpl:
394
+ if not data:
395
+ raise exceptions.RawParsingError("No data found")
396
+ data = data.strip()
397
+ data = data.replace(
398
+ b"\x00", b""
399
+ ) # Some files contain null characters between profiles
400
+ data_lines = data.split(b"\r\n")
401
+
402
+ i = 0
403
+ while i + 1 < len(data_lines) and data_lines[i + 1].strip().split()[0] != b"0":
404
+ i += 1
405
+ del data_lines[:i]
406
+
407
+ i = len(data_lines) - 1
408
+ while (
409
+ i - 1 >= 0
410
+ and header.ngates > 1
411
+ and len(data_lines[i].strip().split()) != len(data_lines[i - 1].strip().split())
412
+ ):
413
+ i -= 1
414
+ del data_lines[i + 1 :]
415
+
416
+ trailing_lines = len(data_lines) % (header.ngates + 1)
417
+ if trailing_lines > 0:
418
+ del data_lines[-trailing_lines:]
419
+
420
+ data1D_lines = data_lines[:: header.ngates + 1]
421
+ data1D = [list(map(float, line.split())) for line in data1D_lines]
422
+ try:
423
+ data1Darr = np.array(data1D)
424
+ except ValueError as err:
425
+ if "inhomogeneous" in str(err):
426
+ raise exceptions.RawParsingError(
427
+ "Inhomogeneous raw data. "
428
+ "Probable reason: Number of gates changes in middle of the file"
429
+ ) from err
430
+ else:
431
+ raise
432
+
433
+ del data_lines[:: header.ngates + 1]
434
+ data2D = [list(map(float, line.split())) for line in data_lines]
435
+ data2Darr = np.array(data2D)
436
+
437
+ decimal_time = data1Darr[:, 0]
438
+ time = header.start_time.astype("datetime64[D]") + np.array(
439
+ list(map(_decimal_time2timedelta, decimal_time))
440
+ )
441
+ azimuth = data1Darr[:, 1]
442
+ elevation = data1Darr[:, 2]
443
+ pitch = data1Darr[:, 3] if data1Darr.shape[1] > 3 else None
444
+ roll = data1Darr[:, 4] if data1Darr.shape[1] > 4 else None
445
+
446
+ ntimes = len(decimal_time)
447
+
448
+ data2Darr_reshape = data2Darr.reshape(ntimes, header.ngates, -1)
449
+
450
+ gate = data2Darr_reshape[:, :, 0]
451
+ gate_expected = np.arange(len(gate[0])).astype("float64")
452
+ if not all(np.allclose(gate_expected, gate[i, :]) for i in range(gate.shape[0])):
453
+ raise ValueError("all gate indices should be equal")
454
+ radial_distance = (gate_expected + 0.5) * header.range_gate_length
455
+
456
+ radial_velocity = data2Darr_reshape[:, :, 1]
457
+ intensity = data2Darr_reshape[:, :, 2]
458
+ beta = data2Darr_reshape[:, :, 3]
459
+
460
+ spectral_width = (
461
+ data2Darr_reshape[:, :, 4] if data2Darr_reshape.shape[2] > 4 else None
462
+ )
463
+
464
+ return HaloHpl(
465
+ header,
466
+ time,
467
+ radial_distance,
468
+ azimuth,
469
+ elevation,
470
+ pitch,
471
+ roll,
472
+ radial_velocity,
473
+ intensity,
474
+ beta,
475
+ spectral_width,
476
+ )
477
+
478
+
479
+ def _decimal_time2timedelta(h: float) -> timedelta64:
480
+ return timedelta64(timedelta(hours=h))
@@ -0,0 +1,135 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import re
5
+ from dataclasses import dataclass
6
+ from datetime import datetime
7
+ from io import BufferedIOBase
8
+ from pathlib import Path
9
+ from typing import Iterable
10
+
11
+ import numpy as np
12
+ import numpy.typing as npt
13
+ from numpy import datetime64
14
+
15
+
16
+ @dataclass
17
+ class HaloSysParams:
18
+ time: npt.NDArray[datetime64] # dim: (time, )
19
+ internal_temperature: npt.NDArray[np.float64] # dim: (time, ), unit: degree Celsius
20
+ internal_relative_humidity: npt.NDArray[np.float64] # dim: (time, )
21
+ supply_voltage: npt.NDArray[np.float64] # dim: (time, )
22
+ acquisition_card_temperature: npt.NDArray[np.float64] # dim: (time, )
23
+ platform_pitch_angle: npt.NDArray[np.float64] # dim: (time, ), unit: degrees
24
+ platform_roll_angle: npt.NDArray[np.float64] # dim: (time, ), unit: degrees
25
+
26
+ @classmethod
27
+ def from_src(cls, data: str | Path | bytes | BufferedIOBase) -> HaloSysParams:
28
+ if isinstance(data, str):
29
+ path = Path(data)
30
+ with path.open("rb") as f:
31
+ return _from_src(f)
32
+ elif isinstance(data, Path):
33
+ with data.open("rb") as f:
34
+ return _from_src(f)
35
+ elif isinstance(data, bytes):
36
+ return _from_src(io.BytesIO(data))
37
+ elif isinstance(data, BufferedIOBase):
38
+ return _from_src(data)
39
+ else:
40
+ raise TypeError("Unsupported data type")
41
+
42
+ @classmethod
43
+ def merge(cls, raws: Iterable[HaloSysParams]) -> HaloSysParams:
44
+ return cls(
45
+ np.concatenate(tuple(r.time for r in raws)),
46
+ np.concatenate(tuple(r.internal_temperature for r in raws)),
47
+ np.concatenate(tuple(r.internal_relative_humidity for r in raws)),
48
+ np.concatenate(tuple(r.supply_voltage for r in raws)),
49
+ np.concatenate(tuple(r.acquisition_card_temperature for r in raws)),
50
+ np.concatenate(tuple(r.platform_pitch_angle for r in raws)),
51
+ np.concatenate(tuple(r.platform_roll_angle for r in raws)),
52
+ )
53
+
54
+ def __getitem__(
55
+ self,
56
+ index: int | slice | list[int] | npt.NDArray[np.int64] | npt.NDArray[np.bool_],
57
+ ) -> HaloSysParams:
58
+ if isinstance(index, (int, slice, list, np.ndarray)):
59
+ return HaloSysParams(
60
+ self.time[index],
61
+ self.internal_temperature[index],
62
+ self.internal_relative_humidity[index],
63
+ self.supply_voltage[index],
64
+ self.acquisition_card_temperature[index],
65
+ self.platform_pitch_angle[index],
66
+ self.platform_roll_angle[index],
67
+ )
68
+ raise TypeError
69
+
70
+ def sorted_by_time(self) -> HaloSysParams:
71
+ sort_indices = np.argsort(self.time)
72
+ return self[sort_indices]
73
+
74
+ def non_strictly_increasing_timesteps_removed(self) -> HaloSysParams:
75
+ is_increasing = np.insert(np.diff(self.time).astype(int) > 0, 0, True)
76
+ return self[is_increasing]
77
+
78
+
79
+ def _correct_concatenated_rows(rows: list[bytes]) -> list[bytes]:
80
+ concat_pattern = re.compile(rb".*(\t[-+0-9]*\.[-+0-9]*\.[-+0-9]*\t).*")
81
+
82
+ matches = [concat_pattern.fullmatch(row) for row in rows]
83
+
84
+ if not any(matches):
85
+ return rows
86
+ elif not all(matches):
87
+ raise ValueError("Cannot correct the concatenated rows")
88
+
89
+ zero_column_pattern = re.compile(rb".*\t0\t.*")
90
+ if not all(zero_column_pattern.fullmatch(row) for row in rows):
91
+ raise ValueError(r"Concatenated rows are expected to have \t0\t pattern")
92
+ rows = [row.replace(b"\t0\t", b"\t") for row in rows]
93
+
94
+ new_rows = []
95
+ pattern = re.compile(rb"(.*\t[-+]?[0-9]+\.[0-9]+)([-+][0-9]+\.[0-9]+\t.*)")
96
+ pattern_nan = re.compile(rb"(.*\t)[-+]?[0-9]+\.[0-9]+\.[0-9]+(\t.*)")
97
+ for row in rows:
98
+ m = pattern.fullmatch(row)
99
+ if m:
100
+ new_rows.append(m.group(1) + b"\t" + m.group(2))
101
+ elif m_nan := pattern_nan.fullmatch(row):
102
+ new_rows.append(m_nan.group(1) + b"nan\tnan" + m_nan.group(2))
103
+ else:
104
+ raise ValueError("Cannot separate concatenated floats")
105
+ return new_rows
106
+
107
+
108
+ def _from_src(data: BufferedIOBase) -> HaloSysParams:
109
+ data_bytes = data.read().strip().replace(b",", b".").replace(b"\x00", b"")
110
+ a = data_bytes.strip().split(b"\r\n")
111
+ a = _correct_concatenated_rows(a)
112
+ b = [r.strip().split(b"\t") for r in a]
113
+ arr = np.array(b)
114
+ if arr.shape[1] != 7:
115
+ raise ValueError("Unexpected data format")
116
+
117
+ def timestr2datetime64_12H(datetime_bytes: bytes) -> np.datetime64:
118
+ return datetime64(
119
+ datetime.strptime(datetime_bytes.decode("utf-8"), "%m/%d/%Y %I:%M:%S %p"),
120
+ "s",
121
+ )
122
+
123
+ def timestr2datetime64_24H(datetime_bytes: bytes) -> np.datetime64:
124
+ return datetime64(
125
+ datetime.strptime(datetime_bytes.decode("utf-8"), "%d/%m/%Y %H:%M:%S"), "s"
126
+ )
127
+
128
+ arr_time = np.full_like(arr[:, 0], np.datetime64("NaT"), dtype="datetime64[s]")
129
+ for i, time in enumerate(arr[:, 0]):
130
+ try:
131
+ arr_time[i] = timestr2datetime64_12H(time)
132
+ except ValueError:
133
+ arr_time[i] = timestr2datetime64_24H(time)
134
+ arr_others = np.vectorize(float)(arr[:, 1:])
135
+ return HaloSysParams(arr_time, *arr_others.T)
doppy/raw/utils.py ADDED
@@ -0,0 +1,14 @@
1
+ from io import BufferedIOBase
2
+ from pathlib import Path
3
+
4
+
5
+ def bytes_from_src(src: str | bytes | Path | BufferedIOBase) -> bytes:
6
+ if isinstance(src, (str, Path)):
7
+ with open(src, "rb") as f:
8
+ return f.read()
9
+ elif isinstance(src, bytes):
10
+ return src
11
+ elif isinstance(src, BufferedIOBase):
12
+ return src.read()
13
+ else:
14
+ raise TypeError(f"Unexpected type {type(src)} for src")