libxrk 0.7.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
libxrk/aim_xrk.pyx ADDED
@@ -0,0 +1,982 @@
1
+
2
+ # Copyright 2024, Scott Smith. MIT License (see LICENSE).
3
+
4
+ from array import array
5
+ import concurrent.futures
6
+ import ctypes
7
+ from dataclasses import dataclass, field
8
+ import math
9
+ import mmap
10
+ import numpy as np
11
+ import os
12
+ from pprint import pprint # pylint: disable=unused-import
13
+ import struct
14
+ import sys
15
+ import time
16
+ import traceback # pylint: disable=unused-import
17
+ from typing import Dict, List, Optional
18
+ import zlib
19
+
20
+ import cython
21
+ from cython.operator cimport dereference
22
+ from libcpp.vector cimport vector
23
+
24
+ import pyarrow as pa
25
+
26
+ from . import gps
27
+ from .gps import fix_gps_timing_gaps
28
+ from . import base
29
+
30
+ # 1,2,5,10,20,25,50 Hz
31
+ # units
32
+ # dec ptr
33
+
34
+ dc_slots = {'slots': True} if sys.version_info.minor >= 10 else {}
35
+
36
+ @dataclass(**dc_slots)
37
+ class Group:
38
+ index: int
39
+ channels: List[int]
40
+ samples: array = field(default_factory=lambda: array('I'), repr=False)
41
+ # used during building:
42
+ timecodes: Optional[array] = field(default=None, repr=False)
43
+
44
+ @dataclass(**dc_slots)
45
+ class GroupRef:
46
+ group: Group
47
+ offset: int
48
+
49
+ @dataclass(**dc_slots)
50
+ class Channel:
51
+ index: int = -1
52
+ short_name: str = ""
53
+ long_name: str = ""
54
+ size: int = 0
55
+ units: str = ""
56
+ dec_pts: int = 0
57
+ interpolate: bool = False
58
+ unknown: bytes = b""
59
+ group: Optional[GroupRef] = None
60
+ timecodes: object = field(default=None, repr=False)
61
+ sampledata: object = field(default=None, repr=False)
62
+
63
+ @dataclass(**dc_slots)
64
+ class Message:
65
+ token: bytes
66
+ num: int
67
+ content: bytes
68
+
69
+ @dataclass(**dc_slots)
70
+ class DataStream:
71
+ channels: Dict[str, Channel]
72
+ messages: Dict[str, List[Message]]
73
+ laps: pa.Table
74
+ time_offset: int
75
+ gnfi_timecodes: Optional[object] = None
76
+
77
+ @dataclass(**dc_slots)
78
+ class Decoder:
79
+ stype: str
80
+ interpolate: bool = False
81
+ fixup: object = None
82
+
83
+ def _nullterm_string(s):
84
+ zero = s.find(0)
85
+ if zero >= 0: s = s[:zero]
86
+ return s.decode('ascii')
87
+
88
+ _manual_decoders = {
89
+ 'Calculated_Gear': Decoder('Q', fixup=lambda a: array('I', [0 if int(x) & 0x80000 else
90
+ (int(x) >> 16) & 7 for x in a])),
91
+ 'PreCalcGear': Decoder('Q', fixup=lambda a: array('I', [0 if int(x) & 0x80000 else
92
+ (int(x) >> 16) & 7 for x in a])),
93
+ }
94
+
95
+ _gear_table = np.arange(65536, dtype=np.uint16)
96
+ _gear_table[ord('N')] = 0
97
+ _gear_table[ord('1')] = 1
98
+ _gear_table[ord('2')] = 2
99
+ _gear_table[ord('3')] = 3
100
+ _gear_table[ord('4')] = 4
101
+ _gear_table[ord('5')] = 5
102
+ _gear_table[ord('6')] = 6
103
+
104
+ _decoders = {
105
+ 0: Decoder('i'), # Master Clock on M4GT4?
106
+ 1: Decoder('H', interpolate=True,
107
+ fixup=lambda a: np.ndarray(buffer=a, shape=(len(a),),
108
+ dtype=np.float16).astype(np.float32).data),
109
+ 3: Decoder('i'), # Master Clock on ScottE46?
110
+ 4: Decoder('h'),
111
+ 6: Decoder('f', interpolate=True),
112
+ 11: Decoder('h'),
113
+ 12: Decoder('i'), # Predictive Time?
114
+ 13: Decoder('B'), # status field?
115
+ 15: Decoder('H', fixup=lambda a: _gear_table[a]), # ?? NdscSwitch on M4GT4. Also actual size is 8 bytes
116
+ 20: Decoder('H', interpolate=True,
117
+ fixup=lambda a: np.ndarray(buffer=a, shape=(len(a),),
118
+ dtype=np.float16).astype(np.float32).data),
119
+ 24: Decoder('i'), # Best Run Diff?
120
+ }
121
+
122
+ # Logger model ID to name mapping
123
+ # These values are from the idn message in XRK files
124
+ _logger_models = {
125
+ 649: "MXP 1.3",
126
+ 793: "MXm",
127
+ }
128
+
129
+ _unit_map = {
130
+ 1: ('%', 2),
131
+ 3: ('G', 2),
132
+ 4: ('deg', 1),
133
+ 5: ('deg/s', 1),
134
+ 6: ('', 0), # number
135
+ 9: ('Hz', 0),
136
+ 11: ('', 0), # number
137
+ 12: ('mm', 0),
138
+ 14: ('bar', 2),
139
+ 15: ('rpm', 0),
140
+ 16: ('km/h', 0),
141
+ 17: ('C', 1),
142
+ 18: ('ms', 0),
143
+ 19: ('Nm', 0),
144
+ 20: ('km/h', 0),
145
+ 21: ('V', 1), # mv?
146
+ 22: ('l', 1),
147
+ 24: ('l/s', 0), # ? rs3 displayed 1l/h
148
+ 26: ('time?', 0),
149
+ 27: ('A', 0),
150
+ 30: ('lambda', 2),
151
+ 31: ('gear', 0),
152
+ 33: ('%', 2),
153
+ 43: ('kg', 3),
154
+ }
155
+
156
+ def _ndarray_from_mv(mv):
157
+ mv = memoryview(mv) # force it
158
+ return np.ndarray(buffer=mv, shape=(len(mv),), dtype=np.dtype(mv.format))
159
+
160
+ def _sliding_ndarray(buf, typ):
161
+ return np.ndarray(buffer=buf, dtype=typ,
162
+ shape=(len(buf) - array(typ).itemsize + 1,), strides=(1,))
163
+
164
+ def _tokdec(s):
165
+ if s: return ord(s[0]) + 256 * _tokdec(s[1:])
166
+ return 0
167
+
168
+ def _tokenc(i):
169
+ s = ''
170
+ while i:
171
+ s += chr(i & 255)
172
+ i >>= 8
173
+ return s
174
+
175
+ accum = cython.struct(
176
+ last_timecode=cython.int,
177
+ add_helper=cython.ushort,
178
+ Mms=cython.ushort,
179
+ data=vector[cython.uchar],
180
+ timecodes=vector[cython.int])
181
+
182
+ cdef packed struct smsg_hdr: # covers G, S, and M messages
183
+ cython.ushort op
184
+ cython.int timecode
185
+ cython.ushort index
186
+ cython.ushort count # for M messages only
187
+ # data field(s) follow(s), size depends on type/group
188
+
189
+ cdef packed struct cmsg_hdr: # covers c messages
190
+ cython.ushort op
191
+ cython.uchar unk1 # always 0?
192
+ cython.ushort channel # bottom 3 bits always 4?
193
+ cython.uchar unk3 # always 0x84?
194
+ cython.uchar unk4 # always 6?
195
+ cython.int timecode
196
+ # data field follows, size depends on type
197
+
198
+ cdef packed struct hmsg_hdr:
199
+ cython.ushort op
200
+ cython.uint tok
201
+ cython.int hlen
202
+ cython.uchar ver
203
+ cython.uchar cl
204
+
205
+ cdef packed struct hmsg_ftr:
206
+ cython.uchar op
207
+ cython.uint tok
208
+ cython.ushort bytesum
209
+ cython.uchar cl
210
+
211
+ cdef union msg_hdr:
212
+ smsg_hdr s
213
+ cmsg_hdr c
214
+ hmsg_hdr h
215
+
216
+ ctypedef const cython.uchar* byte_ptr
217
+ ctypedef vector[accum] vaccum
218
+
219
+ cdef extern from '<numeric>' namespace 'std' nogil:
220
+ T accumulate[InputIt, T](InputIt first, InputIt last, T init)
221
+
222
+ cdef _resize_vaccum(vaccum & v, size_t idx):
223
+ if idx >= v.size():
224
+ old_len = v.size()
225
+ v.resize(idx + 1)
226
+ for i in range(old_len, v.size()):
227
+ v[i].last_timecode = -1
228
+ v[i].add_helper = 1
229
+ v[i].Mms = 0
230
+
231
+ cdef _Mms_lookup(int k):
232
+ # Not sure how to represent 500 Hz
233
+ if k == 8: return 5 # 200 Hz
234
+ if k == 16: return 10 # 100 Hz
235
+ if k == 32: return 20 # 50 Hz
236
+ if k == 64: return 40 # 25 Hz
237
+ if k == 80: return 50 # 20 Hz
238
+ # I guess 10Hz, 5Hz, 2Hz, and 1Hz don't use M messages
239
+ return 0
240
+
241
+ @cython.wraparound(False)
242
+ def _decode_sequence(s, progress=None):
243
+ cdef const cython.uchar[::1] sv = s
244
+ groups = []
245
+ channels = []
246
+ messages = {}
247
+ tok_GPS: cython.uint = _tokdec('GPS')
248
+ tok_GPS1: cython.uint = _tokdec('GPS1')
249
+ tok_GNFI: cython.uint = _tokdec('GNFI')
250
+ progress_interval: cython.Py_ssize_t = 8_000_000
251
+ next_progress: cython.Py_ssize_t = progress_interval
252
+ pos: cython.Py_ssize_t = 0
253
+ oldpos: cython.Py_ssize_t = pos
254
+ badbytes: cython.Py_ssize_t = 0
255
+ badpos: cython.Py_ssize_t = 0
256
+ ord_op: cython.int = ord('(')
257
+ ord_cp: cython.int = ord(')')
258
+ ord_op_G : cython.int = ord_op + 256 * ord('G')
259
+ ord_op_S : cython.int = ord_op + 256 * ord('S')
260
+ ord_op_M : cython.int = ord_op + 256 * ord('M')
261
+ ord_op_c : cython.int = ord_op + 256 * ord('c')
262
+ ord_lt: cython.int = ord('<')
263
+ ord_lt_h : cython.int = ord_lt + 256 * ord('h')
264
+ ord_gt: cython.int = ord('>')
265
+ len_s: cython.Py_ssize_t = len(s)
266
+ cdef vaccum[4] gc_data # [0]: G messages (groups) [1]: S messages (samples?) [2]: c messages (channels from expansion) [3]: M messages
267
+ time_offset = None
268
+ last_time = None
269
+ t1 = time.perf_counter()
270
+ cdef vaccum * data_cat
271
+ cdef accum * data_p
272
+ gpsmsg: vector[cython.uchar]
273
+ gnfimsg: vector[cython.uchar]
274
+ show_all: cython.int = 0
275
+ show_bad: cython.int = 0
276
+ while pos < len_s:
277
+ try:
278
+ while True:
279
+ oldpos = pos
280
+ if pos + 10 >= len_s: # smallest message is 3 (frame) + 4 (tc) + 2 (idx) + 1 (data)
281
+ raise IndexError
282
+ msg = <msg_hdr *>&sv[pos]
283
+ typ: cython.int = msg.s.op
284
+ if abs(typ - (ord_op_G + ord_op_S) // 2) == (ord_op_S - ord_op_G) // 2:
285
+ data_cat = &gc_data[typ == ord_op_S]
286
+ data_p = &dereference(data_cat)[msg.s.index]
287
+ if data_p >= &dereference(data_cat.end()):
288
+ raise IndexError
289
+ pos += data_p.add_helper
290
+ last = &sv[pos-1]
291
+ if last[0] != ord_cp:
292
+ raise ValueError("%s at %x" % (chr(s[pos-1]), pos-1))
293
+ if show_all:
294
+ print('tc=%d %s idx=%d' % (msg.s.timecode, chr(msg.s.op >> 8), msg.s.index))
295
+ if msg.s.timecode > data_p.last_timecode:
296
+ data_p.last_timecode = msg.s.timecode
297
+ data_p.data.insert(data_p.data.end(),
298
+ <const cython.uchar *>&msg.s.timecode, last)
299
+ elif typ == ord_op_M:
300
+ data_p = &gc_data[3][msg.s.index]
301
+ if data_p >= &dereference(gc_data[3].end()):
302
+ raise IndexError
303
+ if data_p.Mms == 0:
304
+ raise ValueError('No ms understood for channel %s' %
305
+ channels[msg.s.index].long_name)
306
+ pos += data_p.add_helper * msg.s.count + 10
307
+ if sv[pos] != ord_cp:
308
+ raise ValueError("%s at %x" % (chr(s[pos]), pos))
309
+ if show_all:
310
+ print('tc=%d M idx=%d cnt=%d ms=%d' %
311
+ (msg.s.timecode, msg.s.index, msg.s.count, data_p.Mms))
312
+ if msg.s.timecode > data_p.last_timecode:
313
+ data_p.last_timecode = msg.s.timecode + (msg.s.count-1) * data_p.Mms
314
+ m_tc : cython.int
315
+ for m_tc in range(msg.s.count):
316
+ data_p.timecodes.push_back(msg.s.timecode + m_tc * data_p.Mms)
317
+ data_p.data.insert(data_p.data.end(),
318
+ &sv[oldpos+10], &sv[pos])
319
+ pos += 1
320
+ elif typ == ord_op_c:
321
+ assert msg.c.unk1 == 0, '%x' % msg.c.unk1
322
+ assert (msg.c.channel & 7) == 4, '%x' % (msg.c.channel & 7)
323
+ assert msg.c.unk3 == 0x84, '%x' % msg.c.unk3
324
+ assert msg.c.unk4 == 6, '%x' % msg.c.unk4
325
+ data_cat = &gc_data[2]
326
+ data_p = &dereference(data_cat)[msg.c.channel >> 3]
327
+ if data_p >= &dereference(data_cat.end()):
328
+ raise IndexError
329
+ pos += data_p.add_helper
330
+ last = &sv[pos-1]
331
+ if last[0] != ord_cp:
332
+ raise ValueError("%s at %x" % (chr(s[pos-1]), pos-1))
333
+ if show_all:
334
+ print('tc=%d c idx=%d' % (msg.c.timecode, msg.c.channel >> 3))
335
+ if msg.c.timecode > data_p.last_timecode:
336
+ data_p.last_timecode = msg.c.timecode
337
+ data_p.data.insert(data_p.data.end(),
338
+ <const cython.uchar *>&msg.c.timecode, last)
339
+ elif typ == ord_lt_h:
340
+ if pos > next_progress:
341
+ next_progress += progress_interval
342
+ if progress:
343
+ progress(pos, len(s))
344
+ tok: cython.uint = msg.h.tok
345
+ hlen: cython.Py_ssize_t = msg.h.hlen
346
+ if hlen >= len_s:
347
+ raise IndexError
348
+ ver = msg.h.ver
349
+ assert msg.h.cl == ord_gt, "%s at %x" % (chr(msg.h.cl), pos+11)
350
+ pos += 12
351
+
352
+ # get some "free" range checking here before we go walking data[]
353
+ assert sv[pos+hlen] == ord_lt, "%s at %x" % (s[pos+hlen], pos+hlen)
354
+
355
+ bytesum: cython.ushort = accumulate[byte_ptr, cython.int](
356
+ &sv[pos], &sv[pos+hlen], 0)
357
+ pos += hlen
358
+
359
+ msgf = <hmsg_ftr *>&sv[pos]
360
+
361
+ assert msgf.tok == tok, "%x vs %x at %x" % (msgf.tok, tok, pos+1)
362
+ assert msgf.bytesum == bytesum, '%x vs %x at %x' % (msgf.bytesum, bytesum, pos+5)
363
+ assert msgf.cl == ord_gt, "%s at %x" % (chr(msgf.cl), pos+7)
364
+ pos += 8
365
+
366
+ if (tok >> 24) == 32:
367
+ tok -= 32 << 24 # rstrip(' ')
368
+
369
+ if tok == tok_GPS or tok == tok_GPS1:
370
+ # fast path common case
371
+ gpsmsg.insert(gpsmsg.end(), &sv[oldpos+12], &sv[pos-8])
372
+ elif tok == tok_GNFI:
373
+ # fast path for GNFI messages (logger internal clock)
374
+ gnfimsg.insert(gnfimsg.end(), &sv[oldpos+12], &sv[pos-8])
375
+ else:
376
+ data = s[oldpos + 12 : pos - 8]
377
+ if tok == _tokdec('CNF'):
378
+ data = _decode_sequence(data).messages
379
+ #channels = {} # Replays don't necessarily contain all the original channels
380
+ for m in data[_tokdec('CHS')]:
381
+ channels += [None] * (m.content.index - len(channels) + 1)
382
+ if not channels[m.content.index]:
383
+ channels[m.content.index] = m.content
384
+ _resize_vaccum(gc_data[1], m.content.index)
385
+ gc_data[1][m.content.index].add_helper = m.content.size + 9
386
+ _resize_vaccum(gc_data[2], m.content.index)
387
+ gc_data[2][m.content.index].add_helper = m.content.size + 12
388
+ _resize_vaccum(gc_data[3], m.content.index)
389
+ gc_data[3][m.content.index].add_helper = m.content.size
390
+ gc_data[3][m.content.index].Mms = _Mms_lookup(
391
+ m.content.unknown[64] & 127)
392
+ else:
393
+ assert channels[m.content.index].short_name == m.content.short_name, "%s vs %s" % (channels[m.content.index].short_name, m.content.short_name)
394
+ assert channels[m.content.index].long_name == m.content.long_name
395
+ for m in data.get(_tokdec('GRP'), []):
396
+ groups += [None] * (m.content.index - len(groups) + 1)
397
+ groups[m.content.index] = m.content
398
+ idx = 6
399
+ for ch in m.content.channels:
400
+ channels[ch].group = GroupRef(m.content, idx)
401
+ idx += channels[ch].size
402
+ if show_all:
403
+ print('GROUP', m.content.index,
404
+ [(ch, channels[ch].long_name, channels[ch].size)
405
+ for ch in m.content.channels])
406
+
407
+ _resize_vaccum(gc_data[0], m.content.index)
408
+ gc_data[0][m.content.index].add_helper = 9 + sum(
409
+ channels[ch].size for ch in m.content.channels)
410
+ elif tok == _tokdec('GRP'):
411
+ data = memoryview(data).cast('H')
412
+ assert data[1] == len(data[2:])
413
+ data = Group(index = data[0], channels = data[2:])
414
+ elif tok == _tokdec('CDE'):
415
+ data = ['%02x' % x for x in data]
416
+ elif tok == _tokdec('CHS'):
417
+ dcopy = bytearray(data) # copy
418
+ data = Channel()
419
+ (data.index,
420
+ data.short_name,
421
+ data.long_name,
422
+ data.size) = struct.unpack('<H22x8s24s16xB39x', dcopy)
423
+ try:
424
+ data.units, data.dec_pts = _unit_map[dcopy[12] & 127]
425
+ except KeyError:
426
+ print('Unknown units[%d] for %s' %
427
+ (dcopy[12] & 127, data.long_name))
428
+ data.units = ''
429
+ data.dec_pts = 0
430
+
431
+ # [12] maybe type (lower bits) combined with scale or ??
432
+ # [13] decoder of some type?
433
+ # [20] possibly how to decode bytes
434
+ # [64] data rate. 32=50Hz, 64=25Hz, 80=20Hz, 160=10Hz. What about 5Hz, 2Hz, 1Hz?
435
+ # [84] decoder of some type?
436
+ dcopy[0:2] = [0] * 2 # reset index
437
+ dcopy[24:32] = [0] * 8 # short name
438
+ dcopy[32:56] = [0] * 24 # long name
439
+ data.unknown = bytes(dcopy)
440
+ data.short_name = _nullterm_string(data.short_name)
441
+ data.long_name = _nullterm_string(data.long_name)
442
+ data.timecodes = array('i')
443
+ data.sampledata = bytearray()
444
+ elif tok == _tokdec('LAP'):
445
+ # cache first time offset for use later
446
+ duration, end_time = struct.unpack('4xI8xI', data)
447
+ if time_offset is None:
448
+ time_offset = end_time - duration
449
+ last_time = end_time
450
+ elif tok in (_tokdec('RCR'), _tokdec('VEH'), _tokdec('CMP'), _tokdec('VTY'), _tokdec('NDV'), _tokdec('TMD'), _tokdec('TMT'),
451
+ _tokdec('DBUN'), _tokdec('DBUT'), _tokdec('DVER'), _tokdec('MANL'), _tokdec('MODL'), _tokdec('MANI'),
452
+ _tokdec('MODI'), _tokdec('HWNF'), _tokdec('PDLT'), _tokdec('NTE')):
453
+ data = _nullterm_string(data)
454
+ elif tok == _tokdec('idn'):
455
+ # idn message: 56-byte payload with logger info
456
+ # Offset +0: model ID (16-bit LE)
457
+ # Offset +6: logger ID (32-bit LE)
458
+ if len(data) >= 10:
459
+ model_id = struct.unpack('<H', data[0:2])[0]
460
+ logger_id = struct.unpack('<I', data[6:10])[0]
461
+ data = {'model_id': model_id, 'logger_id': logger_id}
462
+ elif tok == _tokdec('SRC'):
463
+ # SRC message contains embedded idn data
464
+ # Format: 3-byte token + 1-byte version + 2-byte length + payload
465
+ if len(data) >= 62 and data[:3] == b'idn':
466
+ # Parse the embedded idn payload (skip 6-byte header)
467
+ idn_payload = data[6:62]
468
+ model_id = struct.unpack('<H', idn_payload[0:2])[0]
469
+ logger_id = struct.unpack('<I', idn_payload[6:10])[0]
470
+ # Store as idn message type for metadata extraction
471
+ idn_msg = Message(_tokdec('idn'), 1, {'model_id': model_id, 'logger_id': logger_id})
472
+ if _tokdec('idn') not in messages:
473
+ messages[_tokdec('idn')] = []
474
+ messages[_tokdec('idn')].append(idn_msg)
475
+ elif tok == _tokdec('ENF'):
476
+ data = _decode_sequence(data).messages
477
+ elif tok == _tokdec('TRK'):
478
+ data = {'name': _nullterm_string(data[:32]),
479
+ 'sf_lat': memoryview(data).cast('i')[9] / 1e7,
480
+ 'sf_long': memoryview(data).cast('i')[10] / 1e7}
481
+ elif tok == _tokdec('ODO'):
482
+ # not sure how to map fuel.
483
+ # Fuel Used channel claims 8.56l used (2046.0-2037.4)
484
+ # Fuel Used odo says 70689.
485
+ data = {_nullterm_string(data[i:i+16]):
486
+ {'time': memoryview(data[i+16:i+24]).cast('I')[0], # seconds
487
+ 'dist': memoryview(data[i+16:i+24]).cast('I')[1]} # meters
488
+ for i in range(0, len(data), 64)
489
+ # not sure how to parse fuel, doesn't match any expected units
490
+ if not _nullterm_string(data[i:i+16]).startswith('Fuel')}
491
+
492
+ try:
493
+ messages[tok].append(Message(tok, ver, data))
494
+ except KeyError:
495
+ messages[tok] = [Message(tok, ver, data)]
496
+ else:
497
+ assert False, "%02x%02x at %x" % (s[pos], s[pos+1], pos)
498
+ except Exception as _err: # pylint: disable=broad-exception-caught
499
+ if oldpos != badpos + badbytes and badbytes:
500
+ if show_bad:
501
+ print('Bad bytes(%d at %x):' % (badbytes, badpos),
502
+ ', '.join('%02x' % c for c in s[badpos:badpos + badbytes])
503
+ )
504
+ badbytes = 0
505
+ if not badbytes:
506
+ if show_bad:
507
+ sys.stdout.flush()
508
+ traceback.print_exc()
509
+ badpos = oldpos # pylint: disable=unused-variable
510
+ if oldpos < len_s:
511
+ badbytes += 1
512
+ pos = oldpos + 1
513
+ t2 = time.perf_counter()
514
+ if badbytes:
515
+ if show_bad:
516
+ print('Bad bytes(%d at %x):' % (badbytes, badpos),
517
+ ', '.join('%02x' % c for c in s[badpos:badpos + badbytes])
518
+ )
519
+ badbytes = 0
520
+ assert pos == len(s)
521
+ # quick scan through all the groups/channels for the first used timecode
522
+ if channels:
523
+ # int(min(time_offset, time_offset,
524
+ time_offset = int(min(
525
+ ([time_offset] if time_offset is not None else [])
526
+ #XXX*[s2mv[l[0]] for l in g_indices if l.size()],
527
+ #XXX*[s2mv[l[0]] for l in ch_indices if l.size()],
528
+ + [c.timecodes[0] for c in channels if c and len(c.timecodes)],
529
+ default=0))
530
+ last_time = int(max(
531
+ ([last_time] if last_time is not None else [])
532
+ #XXX*[s2mv[l[l.size()-1]] for l in g_indices if l.size()],
533
+ #XXX*[s2mv[l[l.size()-1]] for l in ch_indices if l.size()],
534
+ + [c.timecodes[len(c.timecodes)-1] for c in channels if c and len(c.timecodes)],
535
+ default=0))
536
+ def process_group(g):
537
+ g.samples = np.array([], dtype=np.int32)
538
+ g.timecodes = g.samples.data
539
+ if g.index < gc_data[0].size():
540
+ data_p = &gc_data[0][g.index]
541
+ if data_p.data.size():
542
+ g.samples = np.asarray(<cython.uchar[:data_p.data.size()]> &data_p.data[0])
543
+ rows = len(g.samples) // (data_p.add_helper - 3)
544
+ g.timecodes = np.ndarray(buffer=g.samples, dtype=np.int32,
545
+ shape=(rows,),
546
+ strides=(data_p.add_helper-3,)) - time_offset
547
+ for ch in g.channels:
548
+ process_channel(channels[ch])
549
+
550
+ def process_channel(c):
551
+ if c.long_name in _manual_decoders:
552
+ d = _manual_decoders[c.long_name]
553
+ elif c.unknown[20] in _decoders:
554
+ d = _decoders[c.unknown[20]]
555
+ else:
556
+ return
557
+
558
+ c.interpolate = d.interpolate
559
+ if c.group:
560
+ grp = c.group.group
561
+ c.timecodes = grp.timecodes
562
+ c.sampledata = np.ndarray(buffer=grp.samples[c.group.offset:], dtype=d.stype,
563
+ shape=grp.timecodes.shape,
564
+ strides=(gc_data[0][grp.index].add_helper-3,)).copy()
565
+ else:
566
+ # check for S messages
567
+ view_offset = 6
568
+ stride_offset = 3
569
+ data_p = &gc_data[1][c.index]
570
+ if not data_p.data.size():
571
+ # No? maybe c messages
572
+ view_offset = 4
573
+ stride_offset = 8
574
+ data_p = &gc_data[2][c.index]
575
+ if data_p.data.size():
576
+ assert len(c.timecodes) == 0, "Can't have both S/c and M records for channel %s (index=%d, %d vs %d)" % (c.long_name, c.index, len(c.timecodes), data_p.data.size())
577
+
578
+ # TREAD LIGHTLY - raw pointers here
579
+ view = np.asarray(<cython.uchar[:data_p.data.size()]> &data_p.data[0])
580
+ rows = len(view) // (data_p.add_helper - stride_offset)
581
+
582
+ tc = np.ndarray(buffer=view, dtype=np.int32,
583
+ shape=(rows,), strides=(data_p.add_helper-stride_offset,)).copy()
584
+ samp = np.ndarray(buffer=view[view_offset:], dtype=d.stype,
585
+ shape=(rows,), strides=(data_p.add_helper-stride_offset,)).copy()
586
+ else:
587
+ data_p = &gc_data[3][c.index] # M messages
588
+ if data_p.timecodes.size():
589
+ tc = np.asarray(<cython.int[:data_p.timecodes.size()]>
590
+ &data_p.timecodes[0]).copy()
591
+ samp = np.ndarray(buffer=np.asarray(<cython.uchar[:data_p.data.size()]>
592
+ &data_p.data[0]),
593
+ dtype=d.stype, shape=tc.shape).copy()
594
+ else:
595
+ tc = _ndarray_from_mv(c.timecodes)
596
+ samp = _ndarray_from_mv(memoryview(c.sampledata).cast(d.stype))
597
+ c.timecodes = (tc - time_offset).data
598
+ c.sampledata = samp.data
599
+
600
+ if d.fixup:
601
+ c.sampledata = memoryview(d.fixup(c.sampledata))
602
+ if c.units == 'V': # most are really encoded as mV, but one or two aren't....
603
+ c.sampledata = np.divide(c.sampledata, 1000).data
604
+
605
+ laps = None
606
+ gnfi_timecodes = None
607
+ if not channels:
608
+ t4 = time.perf_counter()
609
+ pass # nothing to do
610
+ elif progress:
611
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(2, os.cpu_count())) as worker:
612
+ bg_work = worker.submit(_bg_gps_laps, <cython.uchar[:gpsmsg.size()]> &gpsmsg[0],
613
+ <cython.uchar[:gnfimsg.size()]> &gnfimsg[0] if gnfimsg.size() else None,
614
+ messages, time_offset, last_time)
615
+ group_work = worker.map(process_group, [x for x in groups if x])
616
+ channel_work = worker.map(process_channel,
617
+ [x for x in channels if x and not x.group])
618
+ gps_ch, laps, gnfi_timecodes = bg_work.result()
619
+ t4 = time.perf_counter()
620
+ for i in group_work:
621
+ pass
622
+ for i in channel_work:
623
+ pass
624
+ channels.extend(gps_ch)
625
+ else:
626
+ for g in groups:
627
+ if g: process_group(g)
628
+ for c in channels:
629
+ if c and not c.group: process_channel(c)
630
+ t4 = time.perf_counter()
631
+ gps_ch, laps, gnfi_timecodes = _bg_gps_laps(
632
+ <cython.uchar[:gpsmsg.size()]> &gpsmsg[0],
633
+ <cython.uchar[:gnfimsg.size()]> &gnfimsg[0] if gnfimsg.size() else None,
634
+ messages, time_offset, last_time)
635
+ channels.extend(gps_ch)
636
+
637
+ t3 = time.perf_counter()
638
+ if t3-t1 > 0.1:
639
+ print('division: scan=%f, gps=%f, group/ch=%f more' % (t2-t1, t4-t2, t3-t4))
640
+
641
+ return DataStream(
642
+ channels={ch.long_name: ch for ch in channels
643
+ if ch and len(ch.sampledata)
644
+ and ch.long_name not in ('StrtRec', 'Master Clk')},
645
+ messages=messages,
646
+ laps=laps,
647
+ time_offset=time_offset,
648
+ gnfi_timecodes=gnfi_timecodes)
649
+
650
+ def _get_metadata(msg_by_type):
651
+ ret = {}
652
+ for msg, name in [(_tokdec('RCR'), 'Driver'),
653
+ (_tokdec('VEH'), 'Vehicle'),
654
+ (_tokdec('TMD'), 'Log Date'),
655
+ (_tokdec('TMT'), 'Log Time'),
656
+ (_tokdec('VTY'), 'Session'),
657
+ (_tokdec('CMP'), 'Series'),
658
+ (_tokdec('NTE'), 'Long Comment'),
659
+ ]:
660
+ if msg in msg_by_type:
661
+ ret[name] = msg_by_type[msg][-1].content
662
+ if _tokdec('TRK') in msg_by_type:
663
+ ret['Venue'] = msg_by_type[_tokdec('TRK')][-1].content['name']
664
+ # ignore the start/finish line?
665
+ if _tokdec('ODO') in msg_by_type:
666
+ for name, stats in msg_by_type[_tokdec('ODO')][-1].content.items():
667
+ ret['Odo/%s Distance (km)' % name] = stats['dist'] / 1000
668
+ ret['Odo/%s Time' % name] = '%d:%02d:%02d' % (stats['time'] // 3600,
669
+ stats['time'] // 60 % 60,
670
+ stats['time'] % 60)
671
+ # Logger info from idn message
672
+ if _tokdec('idn') in msg_by_type:
673
+ idn_data = msg_by_type[_tokdec('idn')][-1].content
674
+ if isinstance(idn_data, dict):
675
+ ret['Logger ID'] = idn_data['logger_id']
676
+ ret['Logger Model ID'] = idn_data['model_id']
677
+ ret['Logger Model'] = _logger_models.get(idn_data['model_id'])
678
+ # Device name from NDV message
679
+ if _tokdec('NDV') in msg_by_type:
680
+ ret['Device Name'] = msg_by_type[_tokdec('NDV')][-1].content
681
+ return ret
682
+
683
+ def _bg_gps_laps(gpsmsg, gnfimsg, msg_by_type, time_offset, last_time):
684
+ channels = _decode_gps(gpsmsg, time_offset)
685
+ gnfi_timecodes = _decode_gnfi(gnfimsg, time_offset)
686
+ lat_ch = None
687
+ lon_ch = None
688
+ for ch in channels:
689
+ if ch.long_name == 'GPS Latitude': lat_ch = ch
690
+ if ch.long_name == 'GPS Longitude': lon_ch = ch
691
+ laps = _get_laps(lat_ch, lon_ch, msg_by_type, time_offset, last_time)
692
+ return channels, laps, gnfi_timecodes
693
+
694
+ def _decode_gps(gpsmsg, time_offset):
695
+ if not gpsmsg: return []
696
+ alldata = memoryview(gpsmsg)
697
+ assert len(alldata) % 56 == 0
698
+ timecodes = np.asarray(alldata[0:].cast('i')[::56//4])
699
+ # certain old MXP firmware (and maybe others) would periodically
700
+ # butcher the upper 16-bits of the timecode field. If necessary,
701
+ # reconstruct it using only the bottom 16-bits and assuming time
702
+ # never skips ahead too far.
703
+ if np.any(timecodes[1:] < timecodes[:-1]):
704
+ timecodes = (timecodes & 65535) + (timecodes[0] - (timecodes[0] & 65535))
705
+ timecodes += 65536 * np.cumsum(np.concatenate(([0], timecodes[1:] < timecodes[:-1])))
706
+ #itow_ms = alldata[4:].cast('I')[::56//4]
707
+ #weekN = alldata[12:].cast('H')[::56//2]
708
+ ecefX_cm = alldata[16:].cast('i')[::56//4]
709
+ ecefY_cm = alldata[20:].cast('i')[::56//4]
710
+ ecefZ_cm = alldata[24:].cast('i')[::56//4]
711
+ #posacc_cm = alldata[28:].cast('i')[::56//4]
712
+ ecefdX_cms = alldata[32:].cast('i')[::56//4]
713
+ ecefdY_cms = alldata[36:].cast('i')[::56//4]
714
+ ecefdZ_cms = alldata[40:].cast('i')[::56//4]
715
+ #velacc_cms = alldata[44:].cast('i')[::56//4]
716
+ #nsat = alldata[51::56]
717
+
718
+ timecodes = memoryview(timecodes - time_offset)
719
+
720
+ gpsconv = gps.ecef2lla(np.divide(ecefX_cm, 100),
721
+ np.divide(ecefY_cm, 100),
722
+ np.divide(ecefZ_cm, 100))
723
+
724
+ return [Channel(
725
+ long_name='GPS Speed',
726
+ units='m/s',
727
+ dec_pts=1,
728
+ interpolate=True,
729
+ timecodes=timecodes,
730
+ sampledata=memoryview(np.sqrt(np.square(ecefdX_cms) +
731
+ np.square(ecefdY_cms) +
732
+ np.square(ecefdZ_cms)) / 100.)),
733
+ Channel(long_name='GPS Latitude', units='deg', dec_pts=4, interpolate=True,
734
+ timecodes=timecodes, sampledata=memoryview(gpsconv.lat)),
735
+ Channel(long_name='GPS Longitude', units='deg', dec_pts=4, interpolate=True,
736
+ timecodes=timecodes, sampledata=memoryview(gpsconv.long)),
737
+ Channel(long_name='GPS Altitude', units='m', dec_pts=1, interpolate=True,
738
+ timecodes=timecodes, sampledata=memoryview(gpsconv.alt))]
739
+
740
+ def _decode_gnfi(gnfimsg, time_offset):
741
+ """Parse GNFI messages and return timecodes array.
742
+
743
+ GNFI messages run on the logger's internal clock, not the GPS timecode stream.
744
+ This provides a ground truth reference for detecting GPS timing bugs.
745
+
746
+ GNFI message structure (32 bytes each):
747
+ - Bytes 0-3: Logger timecode (int32)
748
+ - Bytes 4-31: Other data (not used for timing)
749
+
750
+ Args:
751
+ gnfimsg: Raw GNFI message bytes
752
+ time_offset: Time offset to subtract from timecodes
753
+
754
+ Returns:
755
+ numpy array of GNFI timecodes, or None if no GNFI data
756
+ """
757
+ if not gnfimsg:
758
+ return None
759
+ alldata = memoryview(gnfimsg)
760
+ if len(alldata) % 32 != 0:
761
+ return None
762
+ timecodes = np.asarray(alldata[0:].cast('i')[::32//4]) - time_offset
763
+ return timecodes
764
+
765
+
766
+ def _get_laps(lat_ch, lon_ch, msg_by_type, time_offset, last_time):
767
+ lap_nums = []
768
+ start_times = []
769
+ end_times = []
770
+
771
+ if lat_ch and lon_ch:
772
+ # If we have GPS, do gps lap insert.
773
+
774
+ track = msg_by_type[_tokdec('TRK')][-1].content
775
+ XYZ = np.column_stack(gps.lla2ecef(np.array(lat_ch.sampledata),
776
+ np.array(lon_ch.sampledata), 0))
777
+ lap_markers = gps.find_laps(XYZ,
778
+ np.array(lat_ch.timecodes),
779
+ (track['sf_lat'], track['sf_long']))
780
+
781
+ # Use GPS channel's last timecode as session end (already adjusted)
782
+ # This avoids relying on last_time which may be 0 when no LAP messages exist
783
+ session_end = int(lat_ch.timecodes[-1]) if len(lat_ch.timecodes) else (last_time - time_offset if last_time else 0)
784
+ lap_markers = [0] + lap_markers + [session_end]
785
+
786
+ for lap, (start_time, end_time) in enumerate(zip(lap_markers[:-1], lap_markers[1:])):
787
+ lap_nums.append(lap)
788
+ start_times.append(start_time)
789
+ end_times.append(end_time)
790
+ else:
791
+ # otherwise, use the lap data provided.
792
+ if _tokdec('LAP') in msg_by_type:
793
+ for m in msg_by_type[_tokdec('LAP')]:
794
+ # 2nd byte is segment #, see M4GT4
795
+ segment, lap, duration, end_time = struct.unpack('xBHIxxxxxxxxI', m.content)
796
+ end_time -= time_offset
797
+ if segment:
798
+ continue
799
+ elif not lap_nums:
800
+ pass
801
+ elif lap_nums[-1] == lap:
802
+ continue
803
+ elif lap_nums[-1] + 1 == lap:
804
+ pass
805
+ elif lap_nums[-1] + 2 == lap:
806
+ # emit inferred lap
807
+ lap_nums.append(lap - 1)
808
+ start_times.append(end_times[-1])
809
+ end_times.append(end_time - duration)
810
+ else:
811
+ assert False, 'Lap gap from %d to %d' % (lap_nums[-1], lap)
812
+ lap_nums.append(lap)
813
+ start_times.append(end_time - duration)
814
+ end_times.append(end_time)
815
+
816
+ # Create PyArrow table
817
+ return pa.table({
818
+ 'num': pa.array(lap_nums, type=pa.int32()),
819
+ 'start_time': pa.array(start_times, type=pa.int64()),
820
+ 'end_time': pa.array(end_times, type=pa.int64())
821
+ })
822
+
823
+
824
+ def _channel_to_table(ch):
825
+ """Convert a Channel object to a PyArrow table with metadata."""
826
+ # Create metadata dict for the channel data field (without name, as it's the column name)
827
+ metadata = {
828
+ b'units': (ch.units if ch.size != 1 else '').encode('utf-8'),
829
+ b'dec_pts': str(ch.dec_pts).encode('utf-8'),
830
+ b'interpolate': str(ch.interpolate).encode('utf-8')
831
+ }
832
+
833
+ # Determine the appropriate type for values based on the data
834
+ if isinstance(ch.sampledata, memoryview):
835
+ values_array = np.array(ch.sampledata)
836
+ else:
837
+ values_array = ch.sampledata
838
+
839
+ # Create the schema with metadata on the channel data field
840
+ # Use the actual channel name as the column name
841
+ channel_field = pa.field(ch.long_name, pa.from_numpy_dtype(values_array.dtype), metadata=metadata)
842
+ schema = pa.schema([
843
+ pa.field('timecodes', pa.int64()),
844
+ channel_field
845
+ ])
846
+
847
+ # Create the table with the channel name as the column name
848
+ return pa.table({
849
+ 'timecodes': pa.array(ch.timecodes, type=pa.int64()),
850
+ ch.long_name: pa.array(values_array)
851
+ }, schema=schema)
852
+
853
+
854
+ def _decompress_if_zlib(data):
855
+ """Decompress zlib-compressed data if detected, otherwise return as-is.
856
+
857
+ XRZ files are XRK files compressed with zlib. They start with zlib magic
858
+ bytes (0x78 followed by 0x01, 0x9C, or 0xDA).
859
+ """
860
+ if len(data) < 2:
861
+ return data
862
+
863
+ # Check for zlib magic bytes
864
+ first_byte = data[0] if isinstance(data[0], int) else ord(data[0])
865
+ second_byte = data[1] if isinstance(data[1], int) else ord(data[1])
866
+
867
+ if first_byte == 0x78 and second_byte in (0x01, 0x9C, 0xDA):
868
+ deco = zlib.decompressobj()
869
+ try:
870
+ return deco.decompress(bytes(data))
871
+ except zlib.error:
872
+ # Truncated stream - recover partial data
873
+ return deco.flush()
874
+
875
+ return data
876
+
877
+
878
+ class _open_xrk:
879
+ """Context manager that opens an XRK/XRZ file, using mmap if available, falling back to read().
880
+
881
+ This handles environments like JupyterLite where mmap may not be supported.
882
+ Also accepts bytes or file-like objects directly.
883
+ XRZ files (zlib-compressed XRK) are automatically decompressed.
884
+ """
885
+ def __init__(self, source):
886
+ self._source = source
887
+ self._file = None
888
+ self._mmap = None
889
+ self._data = None
890
+
891
+ def __enter__(self):
892
+ # Handle bytes input directly
893
+ if isinstance(self._source, (bytes, bytearray)):
894
+ self._data = _decompress_if_zlib(self._source)
895
+ return self._data
896
+
897
+ # Handle memoryview - convert to bytes for consistent handling
898
+ if isinstance(self._source, memoryview):
899
+ self._data = _decompress_if_zlib(bytes(self._source))
900
+ return self._data
901
+
902
+ # Handle file-like objects (BytesIO, etc.)
903
+ if hasattr(self._source, 'read'):
904
+ self._source.seek(0)
905
+ self._data = _decompress_if_zlib(self._source.read())
906
+ return self._data
907
+
908
+ # Handle file path - try mmap first, fall back to read()
909
+ self._file = open(self._source, 'rb')
910
+ try:
911
+ self._mmap = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ)
912
+ # Check if zlib compressed - if so, decompress and use bytes instead of mmap
913
+ if len(self._mmap) >= 2 and self._mmap[0] == 0x78 and self._mmap[1] in (0x01, 0x9C, 0xDA):
914
+ deco = zlib.decompressobj()
915
+ try:
916
+ self._data = deco.decompress(self._mmap[:])
917
+ except zlib.error:
918
+ # Truncated stream - recover partial data
919
+ self._data = deco.flush()
920
+ self._mmap.close()
921
+ self._mmap = None
922
+ return self._data
923
+ return self._mmap
924
+ except (OSError, ValueError):
925
+ # mmap failed (e.g., JupyterLite/IDBFS) - fall back to read()
926
+ self._file.seek(0)
927
+ self._data = _decompress_if_zlib(self._file.read())
928
+ return self._data
929
+
930
+ def __exit__(self, exc_type, exc_val, exc_tb):
931
+ if self._mmap is not None:
932
+ self._mmap.close()
933
+ if self._file is not None:
934
+ self._file.close()
935
+ return False
936
+
937
+
938
+ def aim_xrk(fname, progress=None):
939
+ """Load an AIM XRK or XRZ file.
940
+
941
+ Args:
942
+ fname: Path to the XRK/XRZ file, or bytes/BytesIO containing file data
943
+ progress: Optional progress callback
944
+
945
+ Returns:
946
+ LogFile object with channels, laps, and metadata
947
+ """
948
+ with _open_xrk(fname) as m:
949
+ data = _decode_sequence(m, progress)
950
+
951
+ log = base.LogFile(
952
+ {ch.long_name: _channel_to_table(ch) for ch in data.channels.values()},
953
+ data.laps,
954
+ _get_metadata(data.messages),
955
+ fname if not isinstance(fname, (bytes, bytearray, memoryview)) and not hasattr(fname, 'read') else "<bytes>")
956
+
957
+ # Fix GPS timing gaps (spurious timestamp jumps in some AIM loggers)
958
+ # Pass GNFI timecodes for more robust detection (if available)
959
+ fix_gps_timing_gaps(log, gnfi_timecodes=data.gnfi_timecodes)
960
+
961
+ return log
962
+
963
+
964
+ def aim_track_dbg(fname):
965
+ """Debug function to extract track data from an AIM XRK file."""
966
+ with _open_xrk(fname) as m:
967
+ data = _decode_sequence(m, None)
968
+ return {_tokenc(k): v for k, v in data.messages.items()}
969
+
970
+ #def _help_decode_channels(self, chmap):
971
+ # pprint(chmap)
972
+ # for i in range(len(self.data.channels[0].unknown)):
973
+ # d = sorted([(v.unknown[i], chmap.get(v.long_name, ''), v.long_name)
974
+ # for v in self.data.channels
975
+ # if len(v.unknown) > i])
976
+ # if len(set([x[0] for x in d])) == 1:
977
+ # continue
978
+ # pprint((i, d))
979
+ # d = sorted([(len(v.sampledata), chmap.get(v.long_name, ''), v.long_name)
980
+ # for v in self.data.channels])
981
+ # if len(set([x[0] for x in d])) != 1:
982
+ # pprint(('len', d))