libxrk 0.6.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: libxrk
3
- Version: 0.6.0
3
+ Version: 0.8.0
4
4
  Summary: Library for reading AIM XRK files from AIM automotive data loggers
5
5
  License-File: LICENSE
6
6
  Author: Christopher Dewan
@@ -20,7 +20,8 @@ Classifier: Topic :: Scientific/Engineering
20
20
  Provides-Extra: dev
21
21
  Provides-Extra: test
22
22
  Requires-Dist: cython (>=3.0.0)
23
- Requires-Dist: numpy (>=1.26.0) ; python_version < "3.14"
23
+ Requires-Dist: numpy (>=1.26.0) ; python_version < "3.13"
24
+ Requires-Dist: numpy (>=2.1.0) ; python_version == "3.13"
24
25
  Requires-Dist: numpy (>=2.4.0) ; python_version >= "3.14"
25
26
  Requires-Dist: parameterized (>=0.9.0) ; extra == "dev"
26
27
  Requires-Dist: parameterized (>=0.9.0) ; extra == "test"
@@ -105,6 +106,7 @@ for i in range(log.laps.num_rows):
105
106
 
106
107
  # Access metadata
107
108
  print(log.metadata)
109
+ # Includes: Driver, Vehicle, Venue, Log Date/Time, Logger ID, Logger Model, Device Name, etc.
108
110
  ```
109
111
 
110
112
  ### Filtering and Resampling
@@ -69,6 +69,7 @@ for i in range(log.laps.num_rows):
69
69
 
70
70
  # Access metadata
71
71
  print(log.metadata)
72
+ # Includes: Driver, Vehicle, Venue, Log Date/Time, Logger ID, Logger Model, Device Name, etc.
72
73
  ```
73
74
 
74
75
  ### Filtering and Resampling
@@ -1,6 +1,7 @@
1
1
  """Build script for compiling Cython extensions."""
2
2
 
3
3
  import os
4
+ import platform
4
5
  import shutil
5
6
  from pathlib import Path
6
7
  from setuptools import Extension
@@ -34,13 +35,19 @@ def build(setup_kwargs):
34
35
  """
35
36
  This function is mandatory in order to build the extensions.
36
37
  """
38
+ # MSVC (Windows) defaults to C++14, GCC/Clang need explicit flag
39
+ if platform.system() == "Windows":
40
+ extra_compile_args = []
41
+ else:
42
+ extra_compile_args = ["-std=c++11"]
43
+
37
44
  extensions = [
38
45
  Extension(
39
46
  "libxrk.aim_xrk",
40
47
  sources=["src/libxrk/aim_xrk.pyx"],
41
48
  include_dirs=[np.get_include()],
42
49
  language="c++",
43
- extra_compile_args=["-std=c++11"],
50
+ extra_compile_args=extra_compile_args,
44
51
  )
45
52
  ]
46
53
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "libxrk"
3
- version = "0.6.0"
3
+ version = "0.8.0"
4
4
  description = "Library for reading AIM XRK files from AIM automotive data loggers"
5
5
  authors = [
6
6
  {name = "Christopher Dewan",email = "chris.dewan@m3rlin.net"}
@@ -21,7 +21,8 @@ classifiers = [
21
21
  "Topic :: Scientific/Engineering",
22
22
  ]
23
23
  dependencies = [
24
- "numpy>=1.26.0; python_version < '3.14'",
24
+ "numpy>=1.26.0; python_version < '3.13'",
25
+ "numpy>=2.1.0; python_version >= '3.13' and python_version < '3.14'",
25
26
  "numpy>=2.4.0; python_version >= '3.14'",
26
27
  "cython>=3.0.0",
27
28
  "pyarrow>=18.1.0; python_version < '3.14'",
@@ -48,6 +49,7 @@ test = [
48
49
  packages = [{include = "libxrk", from = "src"}]
49
50
  include = [
50
51
  { path = "src/libxrk/**/*.so", format = "wheel" },
52
+ { path = "src/libxrk/**/*.pyd", format = "wheel" },
51
53
  { path = "src/libxrk/**/*.pyx", format = "sdist" },
52
54
  { path = "src/libxrk/**/*.pyi", format = ["wheel", "sdist"] },
53
55
  { path = "src/libxrk/py.typed", format = ["wheel", "sdist"] },
@@ -61,7 +63,8 @@ exclude = [
61
63
  [tool.poetry.dependencies]
62
64
  python = "^3.10"
63
65
  numpy = [
64
- {version = ">=1.26.0", python = "<3.14"},
66
+ {version = ">=1.26.0", python = "<3.13"},
67
+ {version = ">=2.1.0", python = ">=3.13,<3.14"},
65
68
  {version = ">=2.4.0", python = ">=3.14"},
66
69
  ]
67
70
  cython = "^3.0.0"
@@ -78,6 +81,7 @@ mypy = "^1.18.2"
78
81
  poethepoet = "^0.24.0"
79
82
  pyodide-build = {version = "^0.27.3", python = ">=3.12"}
80
83
  parameterized = "^0.9.0"
84
+ py-spy = "^0.4.1"
81
85
 
82
86
  [tool.black]
83
87
  line-length = 100
@@ -110,11 +114,12 @@ warn_unused_ignores = true
110
114
  warn_no_return = true
111
115
  strict_optional = true
112
116
 
113
- # Exclude build artifacts and Cython-generated files
117
+ # Exclude build artifacts, Cython files, and DLL comparison test utilities
114
118
  exclude = [
115
119
  "^build/",
116
120
  "^dist/",
117
121
  "\\.pyx$",
122
+ "^tests/reference_dll/",
118
123
  ]
119
124
 
120
125
  [[tool.mypy.overrides]]
@@ -137,6 +142,10 @@ ignore_missing_imports = true
137
142
  module = "parameterized.*"
138
143
  ignore_missing_imports = true
139
144
 
145
+ [[tool.mypy.overrides]]
146
+ module = "aim_dll_wrapper"
147
+ ignore_missing_imports = true
148
+
140
149
  [tool.poe.tasks]
141
150
  format = "black ."
142
151
  lint = "black --check ."
@@ -174,7 +183,7 @@ deps = ["emsdk-setup"]
174
183
 
175
184
  [tool.cibuildwheel]
176
185
  build = "cp310-* cp311-* cp312-* cp313-* cp314-*"
177
- skip = "*-musllinux_* *-win*"
186
+ skip = "*-musllinux_*"
178
187
  build-verbosity = 1
179
188
 
180
189
  [tool.cibuildwheel.linux]
@@ -185,6 +194,11 @@ before-build = "pip install numpy cython"
185
194
  archs = ["x86_64", "arm64"]
186
195
  before-build = "pip install numpy cython"
187
196
 
197
+ [tool.cibuildwheel.windows]
198
+ # ARM64 excluded until pyarrow publishes Windows ARM64 wheels
199
+ archs = ["AMD64"]
200
+ before-build = "pip install numpy cython"
201
+
188
202
  [tool.poetry.build]
189
203
  script = "cython_build.py"
190
204
  generate-setup-file = false
@@ -194,7 +208,8 @@ requires = [
194
208
  "poetry-core>=2.0.0,<3.0.0",
195
209
  "setuptools>=68.0.0",
196
210
  "cython>=3.0.0",
197
- "numpy>=1.26.0; python_version < '3.14'",
211
+ "numpy>=1.26.0; python_version < '3.13'",
212
+ "numpy>=2.1.0; python_version >= '3.13' and python_version < '3.14'",
198
213
  "numpy>=2.4.0; python_version >= '3.14'",
199
214
  ]
200
215
  build-backend = "poetry.core.masonry.api"
@@ -11,14 +11,44 @@ log = aim_xrk('path/to/file.xrk') # or .xrz, bytes, BytesIO
11
11
  df = log.get_channels_as_table().to_pandas()
12
12
  ```
13
13
 
14
+ ## aim_xrk Function
15
+
16
+ ```python
17
+ aim_xrk(fname, progress=None) -> LogFile
18
+ ```
19
+
20
+ **Parameters:**
21
+ - `fname`: Path to XRK/XRZ file, or bytes/BytesIO containing file data
22
+ - `progress`: Optional callback `(current: int, total: int) -> None` for progress updates
23
+
14
24
  ## LogFile Structure
15
25
 
16
26
  ```python
17
27
  log.channels # Dict[str, pa.Table] - channel name -> PyArrow table
18
28
  log.laps # pa.Table - columns: num, start_time, end_time (ms)
19
- log.metadata # Dict[str, str] - session info
29
+ log.metadata # Dict[str, Any] - session info
20
30
  ```
21
31
 
32
+ ## Metadata Fields
33
+
34
+ Standard metadata fields extracted from XRK files:
35
+
36
+ | Key | Type | Description |
37
+ |-----|------|-------------|
38
+ | Driver | str | Driver name |
39
+ | Vehicle | str | Vehicle name |
40
+ | Venue | str | Track/venue name |
41
+ | Log Date | str | Date of log (DD/MM/YYYY) |
42
+ | Log Time | str | Time of log (HH:MM:SS) |
43
+ | Series | str | Series/competition name |
44
+ | Session | str | Session type |
45
+ | Long Comment | str | User notes |
46
+ | Logger ID | int | Unique logger serial number |
47
+ | Logger Model ID | int | Numeric model code |
48
+ | Logger Model | str or None | Human-readable model name (e.g., "MXP 1.3", "MXm") |
49
+ | Device Name | str | User-configured device name |
50
+ | Odo/* | various | Odometer readings |
51
+
22
52
  ## Channel Tables
23
53
 
24
54
  Each channel table has:
@@ -72,6 +72,7 @@ class DataStream:
72
72
  messages: Dict[str, List[Message]]
73
73
  laps: pa.Table
74
74
  time_offset: int
75
+ gnfi_timecodes: Optional[object] = None
75
76
 
76
77
  @dataclass(**dc_slots)
77
78
  class Decoder:
@@ -118,9 +119,16 @@ _decoders = {
118
119
  24: Decoder('i'), # Best Run Diff?
119
120
  }
120
121
 
122
+ # Logger model ID to name mapping
123
+ # These values are from the idn message in XRK files
124
+ _logger_models = {
125
+ 649: "MXP 1.3",
126
+ 793: "MXm",
127
+ }
128
+
121
129
  _unit_map = {
122
130
  1: ('%', 2),
123
- 3: ('G', 2),
131
+ 3: ('g', 2),
124
132
  4: ('deg', 1),
125
133
  5: ('deg/s', 1),
126
134
  6: ('', 0), # number
@@ -238,6 +246,7 @@ def _decode_sequence(s, progress=None):
238
246
  messages = {}
239
247
  tok_GPS: cython.uint = _tokdec('GPS')
240
248
  tok_GPS1: cython.uint = _tokdec('GPS1')
249
+ tok_GNFI: cython.uint = _tokdec('GNFI')
241
250
  progress_interval: cython.Py_ssize_t = 8_000_000
242
251
  next_progress: cython.Py_ssize_t = progress_interval
243
252
  pos: cython.Py_ssize_t = 0
@@ -261,6 +270,7 @@ def _decode_sequence(s, progress=None):
261
270
  cdef vaccum * data_cat
262
271
  cdef accum * data_p
263
272
  gpsmsg: vector[cython.uchar]
273
+ gnfimsg: vector[cython.uchar]
264
274
  show_all: cython.int = 0
265
275
  show_bad: cython.int = 0
266
276
  while pos < len_s:
@@ -359,6 +369,9 @@ def _decode_sequence(s, progress=None):
359
369
  if tok == tok_GPS or tok == tok_GPS1:
360
370
  # fast path common case
361
371
  gpsmsg.insert(gpsmsg.end(), &sv[oldpos+12], &sv[pos-8])
372
+ elif tok == tok_GNFI:
373
+ # fast path for GNFI messages (logger internal clock)
374
+ gnfimsg.insert(gnfimsg.end(), &sv[oldpos+12], &sv[pos-8])
362
375
  else:
363
376
  data = s[oldpos + 12 : pos - 8]
364
377
  if tok == _tokdec('CNF'):
@@ -438,6 +451,27 @@ def _decode_sequence(s, progress=None):
438
451
  _tokdec('DBUN'), _tokdec('DBUT'), _tokdec('DVER'), _tokdec('MANL'), _tokdec('MODL'), _tokdec('MANI'),
439
452
  _tokdec('MODI'), _tokdec('HWNF'), _tokdec('PDLT'), _tokdec('NTE')):
440
453
  data = _nullterm_string(data)
454
+ elif tok == _tokdec('idn'):
455
+ # idn message: 56-byte payload with logger info
456
+ # Offset +0: model ID (16-bit LE)
457
+ # Offset +6: logger ID (32-bit LE)
458
+ if len(data) >= 10:
459
+ model_id = struct.unpack('<H', data[0:2])[0]
460
+ logger_id = struct.unpack('<I', data[6:10])[0]
461
+ data = {'model_id': model_id, 'logger_id': logger_id}
462
+ elif tok == _tokdec('SRC'):
463
+ # SRC message contains embedded idn data
464
+ # Format: 3-byte token + 1-byte version + 2-byte length + payload
465
+ if len(data) >= 62 and data[:3] == b'idn':
466
+ # Parse the embedded idn payload (skip 6-byte header)
467
+ idn_payload = data[6:62]
468
+ model_id = struct.unpack('<H', idn_payload[0:2])[0]
469
+ logger_id = struct.unpack('<I', idn_payload[6:10])[0]
470
+ # Store as idn message type for metadata extraction
471
+ idn_msg = Message(_tokdec('idn'), 1, {'model_id': model_id, 'logger_id': logger_id})
472
+ if _tokdec('idn') not in messages:
473
+ messages[_tokdec('idn')] = []
474
+ messages[_tokdec('idn')].append(idn_msg)
441
475
  elif tok == _tokdec('ENF'):
442
476
  data = _decode_sequence(data).messages
443
477
  elif tok == _tokdec('TRK'):
@@ -569,17 +603,19 @@ def _decode_sequence(s, progress=None):
569
603
  c.sampledata = np.divide(c.sampledata, 1000).data
570
604
 
571
605
  laps = None
606
+ gnfi_timecodes = None
572
607
  if not channels:
573
608
  t4 = time.perf_counter()
574
609
  pass # nothing to do
575
610
  elif progress:
576
611
  with concurrent.futures.ThreadPoolExecutor(max_workers=min(2, os.cpu_count())) as worker:
577
612
  bg_work = worker.submit(_bg_gps_laps, <cython.uchar[:gpsmsg.size()]> &gpsmsg[0],
613
+ <cython.uchar[:gnfimsg.size()]> &gnfimsg[0] if gnfimsg.size() else None,
578
614
  messages, time_offset, last_time)
579
615
  group_work = worker.map(process_group, [x for x in groups if x])
580
616
  channel_work = worker.map(process_channel,
581
617
  [x for x in channels if x and not x.group])
582
- gps_ch, laps = bg_work.result()
618
+ gps_ch, laps, gnfi_timecodes = bg_work.result()
583
619
  t4 = time.perf_counter()
584
620
  for i in group_work:
585
621
  pass
@@ -592,8 +628,10 @@ def _decode_sequence(s, progress=None):
592
628
  for c in channels:
593
629
  if c and not c.group: process_channel(c)
594
630
  t4 = time.perf_counter()
595
- gps_ch, laps = _bg_gps_laps(<cython.uchar[:gpsmsg.size()]> &gpsmsg[0],
596
- messages, time_offset, last_time)
631
+ gps_ch, laps, gnfi_timecodes = _bg_gps_laps(
632
+ <cython.uchar[:gpsmsg.size()]> &gpsmsg[0],
633
+ <cython.uchar[:gnfimsg.size()]> &gnfimsg[0] if gnfimsg.size() else None,
634
+ messages, time_offset, last_time)
597
635
  channels.extend(gps_ch)
598
636
 
599
637
  t3 = time.perf_counter()
@@ -606,7 +644,8 @@ def _decode_sequence(s, progress=None):
606
644
  and ch.long_name not in ('StrtRec', 'Master Clk')},
607
645
  messages=messages,
608
646
  laps=laps,
609
- time_offset=time_offset)
647
+ time_offset=time_offset,
648
+ gnfi_timecodes=gnfi_timecodes)
610
649
 
611
650
  def _get_metadata(msg_by_type):
612
651
  ret = {}
@@ -629,17 +668,28 @@ def _get_metadata(msg_by_type):
629
668
  ret['Odo/%s Time' % name] = '%d:%02d:%02d' % (stats['time'] // 3600,
630
669
  stats['time'] // 60 % 60,
631
670
  stats['time'] % 60)
671
+ # Logger info from idn message
672
+ if _tokdec('idn') in msg_by_type:
673
+ idn_data = msg_by_type[_tokdec('idn')][-1].content
674
+ if isinstance(idn_data, dict):
675
+ ret['Logger ID'] = idn_data['logger_id']
676
+ ret['Logger Model ID'] = idn_data['model_id']
677
+ ret['Logger Model'] = _logger_models.get(idn_data['model_id'])
678
+ # Device name from NDV message
679
+ if _tokdec('NDV') in msg_by_type:
680
+ ret['Device Name'] = msg_by_type[_tokdec('NDV')][-1].content
632
681
  return ret
633
682
 
634
- def _bg_gps_laps(gpsmsg, msg_by_type, time_offset, last_time):
683
+ def _bg_gps_laps(gpsmsg, gnfimsg, msg_by_type, time_offset, last_time):
635
684
  channels = _decode_gps(gpsmsg, time_offset)
685
+ gnfi_timecodes = _decode_gnfi(gnfimsg, time_offset)
636
686
  lat_ch = None
637
687
  lon_ch = None
638
688
  for ch in channels:
639
689
  if ch.long_name == 'GPS Latitude': lat_ch = ch
640
690
  if ch.long_name == 'GPS Longitude': lon_ch = ch
641
691
  laps = _get_laps(lat_ch, lon_ch, msg_by_type, time_offset, last_time)
642
- return channels, laps
692
+ return channels, laps, gnfi_timecodes
643
693
 
644
694
  def _decode_gps(gpsmsg, time_offset):
645
695
  if not gpsmsg: return []
@@ -687,14 +737,63 @@ def _decode_gps(gpsmsg, time_offset):
687
737
  Channel(long_name='GPS Altitude', units='m', dec_pts=1, interpolate=True,
688
738
  timecodes=timecodes, sampledata=memoryview(gpsconv.alt))]
689
739
 
740
+ def _decode_gnfi(gnfimsg, time_offset):
741
+ """Parse GNFI messages and return timecodes array.
742
+
743
+ GNFI messages run on the logger's internal clock, not the GPS timecode stream.
744
+ This provides a ground truth reference for detecting GPS timing bugs.
745
+
746
+ GNFI message structure (32 bytes each):
747
+ - Bytes 0-3: Logger timecode (int32)
748
+ - Bytes 4-31: Other data (not used for timing)
749
+
750
+ Args:
751
+ gnfimsg: Raw GNFI message bytes
752
+ time_offset: Time offset to subtract from timecodes
753
+
754
+ Returns:
755
+ numpy array of GNFI timecodes, or None if no GNFI data
756
+ """
757
+ if not gnfimsg:
758
+ return None
759
+ alldata = memoryview(gnfimsg)
760
+ if len(alldata) % 32 != 0:
761
+ return None
762
+ timecodes = np.asarray(alldata[0:].cast('i')[::32//4]) - time_offset
763
+ return timecodes
764
+
765
+
690
766
  def _get_laps(lat_ch, lon_ch, msg_by_type, time_offset, last_time):
691
767
  lap_nums = []
692
768
  start_times = []
693
769
  end_times = []
694
-
695
- if lat_ch and lon_ch:
696
- # If we have GPS, do gps lap insert.
697
770
 
771
+ # Prefer LAP messages when available (matches official DLL behavior)
772
+ if _tokdec('LAP') in msg_by_type:
773
+ for m in msg_by_type[_tokdec('LAP')]:
774
+ # 2nd byte is segment #, see M4GT4
775
+ segment, lap, duration, end_time = struct.unpack('xBHIxxxxxxxxI', m.content)
776
+ end_time -= time_offset
777
+ if segment:
778
+ continue
779
+ elif not lap_nums:
780
+ pass
781
+ elif lap_nums[-1] == lap:
782
+ continue
783
+ elif lap_nums[-1] + 1 == lap:
784
+ pass
785
+ elif lap_nums[-1] + 2 == lap:
786
+ # emit inferred lap
787
+ lap_nums.append(lap - 1)
788
+ start_times.append(end_times[-1])
789
+ end_times.append(end_time - duration)
790
+ else:
791
+ assert False, 'Lap gap from %d to %d' % (lap_nums[-1], lap)
792
+ lap_nums.append(lap)
793
+ start_times.append(end_time - duration)
794
+ end_times.append(end_time)
795
+ elif lat_ch and lon_ch:
796
+ # Fall back to GPS-based lap detection only when no LAP messages exist
698
797
  track = msg_by_type[_tokdec('TRK')][-1].content
699
798
  XYZ = np.column_stack(gps.lla2ecef(np.array(lat_ch.sampledata),
700
799
  np.array(lon_ch.sampledata), 0))
@@ -702,38 +801,24 @@ def _get_laps(lat_ch, lon_ch, msg_by_type, time_offset, last_time):
702
801
  np.array(lat_ch.timecodes),
703
802
  (track['sf_lat'], track['sf_long']))
704
803
 
705
- lap_markers = [0] + lap_markers + [last_time - time_offset]
804
+ # Use GPS channel's last timecode as session end (already adjusted)
805
+ # This avoids relying on last_time which may be 0 when no LAP messages exist
806
+ session_end = int(lat_ch.timecodes[-1]) if len(lat_ch.timecodes) else (last_time - time_offset if last_time else 0)
706
807
 
707
- for lap, (start_time, end_time) in enumerate(zip(lap_markers[:-1], lap_markers[1:])):
708
- lap_nums.append(lap)
709
- start_times.append(start_time)
710
- end_times.append(end_time)
711
- else:
712
- # otherwise, use the lap data provided.
713
- if _tokdec('LAP') in msg_by_type:
714
- for m in msg_by_type[_tokdec('LAP')]:
715
- # 2nd byte is segment #, see M4GT4
716
- segment, lap, duration, end_time = struct.unpack('xBHIxxxxxxxxI', m.content)
717
- end_time -= time_offset
718
- if segment:
719
- continue
720
- elif not lap_nums:
721
- pass
722
- elif lap_nums[-1] == lap:
723
- continue
724
- elif lap_nums[-1] + 1 == lap:
725
- pass
726
- elif lap_nums[-1] + 2 == lap:
727
- # emit inferred lap
728
- lap_nums.append(lap - 1)
729
- start_times.append(end_times[-1])
730
- end_times.append(end_time - duration)
731
- else:
732
- assert False, 'Lap gap from %d to %d' % (lap_nums[-1], lap)
808
+ # Only add session boundaries if we have detected lap crossings
809
+ # This creates laps from each crossing to the next
810
+ if lap_markers:
811
+ lap_markers = [0] + lap_markers + [session_end]
812
+ for lap, (start_time, end_time) in enumerate(zip(lap_markers[:-1], lap_markers[1:])):
733
813
  lap_nums.append(lap)
734
- start_times.append(end_time - duration)
814
+ start_times.append(start_time)
735
815
  end_times.append(end_time)
736
816
 
817
+ # Normalize lap numbers to 0-based indexing (matches DLL behavior)
818
+ if lap_nums:
819
+ min_lap = min(lap_nums)
820
+ lap_nums = [n - min_lap for n in lap_nums]
821
+
737
822
  # Create PyArrow table
738
823
  return pa.table({
739
824
  'num': pa.array(lap_nums, type=pa.int32()),
@@ -786,8 +871,13 @@ def _decompress_if_zlib(data):
786
871
  second_byte = data[1] if isinstance(data[1], int) else ord(data[1])
787
872
 
788
873
  if first_byte == 0x78 and second_byte in (0x01, 0x9C, 0xDA):
789
- return zlib.decompress(bytes(data))
790
-
874
+ deco = zlib.decompressobj()
875
+ try:
876
+ return deco.decompress(bytes(data))
877
+ except zlib.error:
878
+ # Truncated stream - recover partial data
879
+ return deco.flush()
880
+
791
881
  return data
792
882
 
793
883
 
@@ -827,7 +917,12 @@ class _open_xrk:
827
917
  self._mmap = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ)
828
918
  # Check if zlib compressed - if so, decompress and use bytes instead of mmap
829
919
  if len(self._mmap) >= 2 and self._mmap[0] == 0x78 and self._mmap[1] in (0x01, 0x9C, 0xDA):
830
- self._data = zlib.decompress(self._mmap[:])
920
+ deco = zlib.decompressobj()
921
+ try:
922
+ self._data = deco.decompress(self._mmap[:])
923
+ except zlib.error:
924
+ # Truncated stream - recover partial data
925
+ self._data = deco.flush()
831
926
  self._mmap.close()
832
927
  self._mmap = None
833
928
  return self._data
@@ -866,7 +961,8 @@ def aim_xrk(fname, progress=None):
866
961
  fname if not isinstance(fname, (bytes, bytearray, memoryview)) and not hasattr(fname, 'read') else "<bytes>")
867
962
 
868
963
  # Fix GPS timing gaps (spurious timestamp jumps in some AIM loggers)
869
- fix_gps_timing_gaps(log)
964
+ # Pass GNFI timecodes for more robust detection (if available)
965
+ fix_gps_timing_gaps(log, gnfi_timecodes=data.gnfi_timecodes)
870
966
 
871
967
  return log
872
968
 
@@ -2,8 +2,6 @@
2
2
 
3
3
  from collections.abc import Sequence
4
4
  from dataclasses import dataclass
5
- import heapq
6
- from itertools import groupby
7
5
  import sys
8
6
  import pyarrow as pa
9
7
  import pyarrow.compute as pc
@@ -59,15 +57,12 @@ class LogFile:
59
57
  # Return an empty table with just timecodes column if no channels
60
58
  return pa.table({"timecodes": pa.array([], type=pa.int64())})
61
59
 
62
- # Compute union of all channel timecodes using k-way merge (O(N) vs O(N log N) for sort)
63
- # Each channel's timecodes are already sorted, so we merge and deduplicate in one pass
64
- timecode_iterators = [
65
- channel_table.column("timecodes").to_pylist()
66
- for channel_table in self.channels.values()
60
+ # Compute union of all channel timecodes using numpy concatenate + unique
61
+ # This is faster than k-way merge with heapq due to optimized C implementation
62
+ timecode_arrays = [
63
+ channel_table.column("timecodes").to_numpy() for channel_table in self.channels.values()
67
64
  ]
68
- merged = heapq.merge(*timecode_iterators)
69
- unique_timecodes = [k for k, _ in groupby(merged)]
70
- union_timecodes = pa.array(unique_timecodes, type=pa.int64())
65
+ union_timecodes = pa.array(np.unique(np.concatenate(timecode_arrays)), type=pa.int64())
71
66
 
72
67
  # Resample all channels to the union timecodes
73
68
  resampled = self.resample_to_timecodes(union_timecodes)
@@ -414,13 +414,88 @@ if __name__ == "__main__":
414
414
  perf_test()
415
415
 
416
416
 
417
- def fix_gps_timing_gaps(log: "LogFile", expected_dt_ms: float = 40.0) -> "LogFile":
418
- """Detect and correct large timing gaps in GPS channels and lap boundaries.
417
+ def detect_gps_timing_offset_from_gnfi(
418
+ gps_timecodes: np.ndarray,
419
+ gnfi_timecodes: np.ndarray,
420
+ expected_dt_ms: float = 40.0,
421
+ ) -> list[tuple[int, int]]:
422
+ """Detect GPS timing offset using GNFI as reference clock.
423
+
424
+ GNFI messages run on the logger's internal clock (NOT the buggy GPS timecode
425
+ stream). They are continuous with no gaps and end at the true session end time.
426
+ By comparing the GPS end time to GNFI end time, we can detect if the GPS
427
+ firmware bug added ~65533ms to GPS timecodes.
428
+
429
+ Args:
430
+ gps_timecodes: GPS channel timecodes array
431
+ gnfi_timecodes: GNFI timecodes array (logger internal clock)
432
+ expected_dt_ms: Expected time delta between GPS samples (default 40ms = 25Hz)
433
+
434
+ Returns:
435
+ List of (gap_time, correction) tuples, or empty list if no bug detected
436
+ """
437
+ if gnfi_timecodes is None or len(gnfi_timecodes) < 2:
438
+ return []
439
+
440
+ if len(gps_timecodes) < 2:
441
+ return []
442
+
443
+ OVERFLOW_BUG_MS = 65533
444
+ TOLERANCE = 5000 # Allow 5 second tolerance for end-time comparison
445
+
446
+ gps_end = int(gps_timecodes[-1])
447
+ gnfi_end = int(gnfi_timecodes[-1])
448
+ offset = gps_end - gnfi_end
449
+
450
+ # Check if GPS extends ~65533ms beyond GNFI (the bug signature)
451
+ if not (OVERFLOW_BUG_MS - TOLERANCE <= offset <= OVERFLOW_BUG_MS + TOLERANCE):
452
+ return []
453
+
454
+ # Bug detected! Find the gap where it likely occurred
455
+ # Look for the largest gap in GPS timecodes
456
+ dt = np.diff(gps_timecodes)
457
+ gap_threshold = expected_dt_ms * 10 # 400ms default
458
+
459
+ gap_indices = np.where(dt > gap_threshold)[0]
460
+ if len(gap_indices) == 0:
461
+ return []
462
+
463
+ # Find the largest gap
464
+ largest_gap_idx = gap_indices[np.argmax(dt[gap_indices])]
465
+ gap_time = int(gps_timecodes[largest_gap_idx])
466
+ gap_size = dt[largest_gap_idx]
467
+
468
+ # For direct gaps (60000-70000ms), use gap_size - expected_dt as correction
469
+ # For hidden bugs (detected via GNFI), use OVERFLOW_BUG_MS as correction
470
+ # because the gap_size might be smaller due to signal loss masking the bug
471
+ if OVERFLOW_BUG_MS - TOLERANCE <= gap_size <= OVERFLOW_BUG_MS + TOLERANCE:
472
+ # Direct gap - correction is the excess time
473
+ correction = gap_size - expected_dt_ms
474
+ else:
475
+ # Hidden bug - correction is the full overflow amount
476
+ correction = OVERFLOW_BUG_MS
477
+ return [(gap_time, int(correction))]
478
+
479
+
480
+ def fix_gps_timing_gaps(
481
+ log: "LogFile",
482
+ expected_dt_ms: float = 40.0,
483
+ gnfi_timecodes: np.ndarray | None = None,
484
+ ) -> "LogFile":
485
+ """Detect and correct 16-bit overflow timing gaps in GPS channels and lap boundaries.
419
486
 
420
487
  Some AIM data loggers produce GPS data with spurious timestamp jumps
421
- (e.g., 65533ms gaps that should be ~40ms). This is likely caused by a
422
- 16-bit overflow bug in the logger firmware. This function detects such
423
- gaps and corrects the timecodes by removing the excess time.
488
+ (e.g., 65533ms gaps that should be ~40ms). This is caused by a 16-bit
489
+ overflow bug in the logger firmware where the upper 16 bits of the
490
+ timecode are corrupted, resulting in a gap of approximately 65533ms
491
+ (0xFFED, or 2^16 - 3).
492
+
493
+ This function detects the firmware bug in three ways (in order of preference):
494
+ 1. GNFI-based detection: If GNFI timecodes are available, compare GPS end time
495
+ to GNFI end time (GNFI runs on logger's internal clock, provides ground truth)
496
+ 2. Direct detection: gaps between 60000ms and 70000ms
497
+ 3. Indirect detection: GPS ends ~65533ms after other channels, indicating
498
+ the bug occurred during a GPS signal loss (hidden within a smaller gap)
424
499
 
425
500
  The fix is applied in-place to the LogFile's channels dict and laps table.
426
501
 
@@ -431,12 +506,20 @@ def fix_gps_timing_gaps(log: "LogFile", expected_dt_ms: float = 40.0) -> "LogFil
431
506
  expected_dt_ms : float, default=40.0
432
507
  Expected time delta between GPS samples in milliseconds.
433
508
  Default is 40ms (25 Hz GPS).
509
+ gnfi_timecodes : np.ndarray or None, default=None
510
+ Optional GNFI timecodes from logger's internal clock. If provided,
511
+ used for more robust detection of the GPS timing bug.
434
512
 
435
513
  Returns
436
514
  -------
437
515
  LogFile
438
516
  The same LogFile object with corrected GPS timecodes and lap boundaries.
439
517
  """
518
+ # The firmware bug causes a gap of approximately 65533ms (0xFFED).
519
+ OVERFLOW_BUG_MS = 65533
520
+ OVERFLOW_GAP_MIN = 60000 # 60 seconds minimum
521
+ OVERFLOW_GAP_MAX = 70000 # 70 seconds maximum
522
+
440
523
  # Find the first GPS channel that exists
441
524
  gps_channel_name = None
442
525
  for name in GPS_CHANNEL_NAMES:
@@ -464,13 +547,53 @@ def fix_gps_timing_gaps(log: "LogFile", expected_dt_ms: float = 40.0) -> "LogFil
464
547
  if len(gap_indices) == 0:
465
548
  return log
466
549
 
467
- # Build list of (gap_time, correction) pairs
550
+ # Build list of (gap_time, correction) pairs - only for firmware bug gaps
468
551
  gap_corrections = []
469
- for gap_idx in gap_indices:
470
- gap_time = gps_time[gap_idx]
471
- gap_size = dt[gap_idx]
472
- correction = gap_size - expected_dt_ms
473
- gap_corrections.append((gap_time, correction))
552
+
553
+ # Method 1: GNFI-based detection (most reliable, if available)
554
+ if gnfi_timecodes is not None:
555
+ gap_corrections = detect_gps_timing_offset_from_gnfi(
556
+ gps_time, gnfi_timecodes, expected_dt_ms
557
+ )
558
+
559
+ # Method 2: Direct detection - gaps between 60000ms and 70000ms
560
+ if len(gap_corrections) == 0:
561
+ for gap_idx in gap_indices:
562
+ gap_time = gps_time[gap_idx]
563
+ gap_size = dt[gap_idx]
564
+
565
+ # Only fix gaps that match the firmware bug signature (around 65533ms)
566
+ if not (OVERFLOW_GAP_MIN <= gap_size <= OVERFLOW_GAP_MAX):
567
+ continue # Skip - this is a legitimate gap, not the firmware bug
568
+
569
+ correction = gap_size - expected_dt_ms
570
+ gap_corrections.append((gap_time, correction))
571
+
572
+ # Method 3: Indirect detection - GPS extends ~65533ms beyond other channels
573
+ # This happens when the bug occurs during GPS signal loss
574
+ if len(gap_corrections) == 0 and len(gap_indices) > 0:
575
+ # Find end time of non-GPS channels
576
+ non_gps_end_times = []
577
+ for ch_name, ch_table in log.channels.items():
578
+ if ch_name not in GPS_CHANNEL_NAMES:
579
+ ch_time = ch_table.column("timecodes").to_numpy()
580
+ if len(ch_time) > 0:
581
+ non_gps_end_times.append(ch_time[-1])
582
+
583
+ if non_gps_end_times:
584
+ max_non_gps_end = max(non_gps_end_times)
585
+ gps_end = gps_time[-1]
586
+ end_offset = gps_end - max_non_gps_end
587
+
588
+ # If GPS extends ~65533ms beyond other channels, the bug is hidden
589
+ if OVERFLOW_GAP_MIN <= end_offset <= OVERFLOW_GAP_MAX:
590
+ # Find the gap where the bug likely occurred (largest gap)
591
+ largest_gap_idx = gap_indices[np.argmax(dt[gap_indices])]
592
+ gap_time = gps_time[largest_gap_idx]
593
+
594
+ # Apply correction of ~65533ms (the overflow amount)
595
+ correction = OVERFLOW_BUG_MS
596
+ gap_corrections.append((gap_time, correction))
474
597
 
475
598
  # Fix GPS channel timecodes
476
599
  gps_time_fixed = gps_time.astype(np.float64)
File without changes
File without changes
File without changes
File without changes