PyPI - libxrk - Versions diffs - 0.6.0__tar.gz → 0.8.0__tar.gz - Mend

libxrk 0.6.0tar.gz → 0.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{libxrk-0.6.0 → libxrk-0.8.0}/PKG-INFO +4 -2
{libxrk-0.6.0 → libxrk-0.8.0}/README.md +1 -0
{libxrk-0.6.0 → libxrk-0.8.0}/cython_build.py +8 -1
{libxrk-0.6.0 → libxrk-0.8.0}/pyproject.toml +21 -6
{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/CLAUDE.md +31 -1
{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/aim_xrk.pyx +138 -42
{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/base.py +5 -10
{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/gps.py +134 -11
{libxrk-0.6.0 → libxrk-0.8.0}/LICENSE +0 -0
{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/__init__.py +0 -0
{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/aim_xrk.pyi +0 -0
{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/py.typed +0 -0

{libxrk-0.6.0 → libxrk-0.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: libxrk
-Version: 0.6.0
+Version: 0.8.0
 Summary: Library for reading AIM XRK files from AIM automotive data loggers
 License-File: LICENSE
 Author: Christopher Dewan
@@ -20,7 +20,8 @@ Classifier: Topic :: Scientific/Engineering
 Provides-Extra: dev
 Provides-Extra: test
 Requires-Dist: cython (>=3.0.0)
-Requires-Dist: numpy (>=1.26.0) ; python_version < "3.14"
+Requires-Dist: numpy (>=1.26.0) ; python_version < "3.13"
+Requires-Dist: numpy (>=2.1.0) ; python_version == "3.13"
 Requires-Dist: numpy (>=2.4.0) ; python_version >= "3.14"
 Requires-Dist: parameterized (>=0.9.0) ; extra == "dev"
 Requires-Dist: parameterized (>=0.9.0) ; extra == "test"
@@ -105,6 +106,7 @@ for i in range(log.laps.num_rows):
 # Access metadata
 print(log.metadata)
+# Includes: Driver, Vehicle, Venue, Log Date/Time, Logger ID, Logger Model, Device Name, etc.
 ```
 ### Filtering and Resampling

{libxrk-0.6.0 → libxrk-0.8.0}/README.md RENAMED Viewed

@@ -69,6 +69,7 @@ for i in range(log.laps.num_rows):
 # Access metadata
 print(log.metadata)
+# Includes: Driver, Vehicle, Venue, Log Date/Time, Logger ID, Logger Model, Device Name, etc.
 ```
 ### Filtering and Resampling

{libxrk-0.6.0 → libxrk-0.8.0}/cython_build.py RENAMED Viewed

@@ -1,6 +1,7 @@
 """Build script for compiling Cython extensions."""
 import os
+import platform
 import shutil
 from pathlib import Path
 from setuptools import Extension
@@ -34,13 +35,19 @@ def build(setup_kwargs):
     """
     This function is mandatory in order to build the extensions.
     """
+    # MSVC (Windows) defaults to C++14, GCC/Clang need explicit flag
+    if platform.system() == "Windows":
+        extra_compile_args = []
+    else:
+        extra_compile_args = ["-std=c++11"]
     extensions = [
         Extension(
             "libxrk.aim_xrk",
             sources=["src/libxrk/aim_xrk.pyx"],
             include_dirs=[np.get_include()],
             language="c++",
-            extra_compile_args=["-std=c++11"],
+            extra_compile_args=extra_compile_args,
         )
     ]

{libxrk-0.6.0 → libxrk-0.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "libxrk"
-version = "0.6.0"
+version = "0.8.0"
 description = "Library for reading AIM XRK files from AIM automotive data loggers"
 authors = [
     {name = "Christopher Dewan",email = "chris.dewan@m3rlin.net"}
@@ -21,7 +21,8 @@ classifiers = [
     "Topic :: Scientific/Engineering",
 ]
 dependencies = [
-    "numpy>=1.26.0; python_version < '3.14'",
+    "numpy>=1.26.0; python_version < '3.13'",
+    "numpy>=2.1.0; python_version >= '3.13' and python_version < '3.14'",
     "numpy>=2.4.0; python_version >= '3.14'",
     "cython>=3.0.0",
     "pyarrow>=18.1.0; python_version < '3.14'",
@@ -48,6 +49,7 @@ test = [
 packages = [{include = "libxrk", from = "src"}]
 include = [
     { path = "src/libxrk/**/*.so", format = "wheel" },
+    { path = "src/libxrk/**/*.pyd", format = "wheel" },
     { path = "src/libxrk/**/*.pyx", format = "sdist" },
     { path = "src/libxrk/**/*.pyi", format = ["wheel", "sdist"] },
     { path = "src/libxrk/py.typed", format = ["wheel", "sdist"] },
@@ -61,7 +63,8 @@ exclude = [
 [tool.poetry.dependencies]
 python = "^3.10"
 numpy = [
-    {version = ">=1.26.0", python = "<3.14"},
+    {version = ">=1.26.0", python = "<3.13"},
+    {version = ">=2.1.0", python = ">=3.13,<3.14"},
     {version = ">=2.4.0", python = ">=3.14"},
 ]
 cython = "^3.0.0"
@@ -78,6 +81,7 @@ mypy = "^1.18.2"
 poethepoet = "^0.24.0"
 pyodide-build = {version = "^0.27.3", python = ">=3.12"}
 parameterized = "^0.9.0"
+py-spy = "^0.4.1"
 [tool.black]
 line-length = 100
@@ -110,11 +114,12 @@ warn_unused_ignores = true
 warn_no_return = true
 strict_optional = true
-# Exclude build artifacts and Cython-generated files
+# Exclude build artifacts, Cython files, and DLL comparison test utilities
 exclude = [
     "^build/",
     "^dist/",
     "\\.pyx$",
+    "^tests/reference_dll/",
 ]
 [[tool.mypy.overrides]]
@@ -137,6 +142,10 @@ ignore_missing_imports = true
 module = "parameterized.*"
 ignore_missing_imports = true
+[[tool.mypy.overrides]]
+module = "aim_dll_wrapper"
+ignore_missing_imports = true
 [tool.poe.tasks]
 format = "black ."
 lint = "black --check ."
@@ -174,7 +183,7 @@ deps = ["emsdk-setup"]
 [tool.cibuildwheel]
 build = "cp310-* cp311-* cp312-* cp313-* cp314-*"
-skip = "*-musllinux_* *-win*"
+skip = "*-musllinux_*"
 build-verbosity = 1
 [tool.cibuildwheel.linux]
@@ -185,6 +194,11 @@ before-build = "pip install numpy cython"
 archs = ["x86_64", "arm64"]
 before-build = "pip install numpy cython"
+[tool.cibuildwheel.windows]
+# ARM64 excluded until pyarrow publishes Windows ARM64 wheels
+archs = ["AMD64"]
+before-build = "pip install numpy cython"
 [tool.poetry.build]
 script = "cython_build.py"
 generate-setup-file = false
@@ -194,7 +208,8 @@ requires = [
     "poetry-core>=2.0.0,<3.0.0",
     "setuptools>=68.0.0",
     "cython>=3.0.0",
-    "numpy>=1.26.0; python_version < '3.14'",
+    "numpy>=1.26.0; python_version < '3.13'",
+    "numpy>=2.1.0; python_version >= '3.13' and python_version < '3.14'",
     "numpy>=2.4.0; python_version >= '3.14'",
 ]
 build-backend = "poetry.core.masonry.api"

{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/CLAUDE.md RENAMED Viewed

@@ -11,14 +11,44 @@ log = aim_xrk('path/to/file.xrk')  # or .xrz, bytes, BytesIO
 df = log.get_channels_as_table().to_pandas()
 ```
+## aim_xrk Function
+```python
+aim_xrk(fname, progress=None) -> LogFile
+```
+**Parameters:**
+- `fname`: Path to XRK/XRZ file, or bytes/BytesIO containing file data
+- `progress`: Optional callback `(current: int, total: int) -> None` for progress updates
 ## LogFile Structure
 ```python
 log.channels   # Dict[str, pa.Table] - channel name -> PyArrow table
 log.laps       # pa.Table - columns: num, start_time, end_time (ms)
-log.metadata   # Dict[str, str] - session info
+log.metadata   # Dict[str, Any] - session info
 ```
+## Metadata Fields
+Standard metadata fields extracted from XRK files:
+| Key | Type | Description |
+|-----|------|-------------|
+| Driver | str | Driver name |
+| Vehicle | str | Vehicle name |
+| Venue | str | Track/venue name |
+| Log Date | str | Date of log (DD/MM/YYYY) |
+| Log Time | str | Time of log (HH:MM:SS) |
+| Series | str | Series/competition name |
+| Session | str | Session type |
+| Long Comment | str | User notes |
+| Logger ID | int | Unique logger serial number |
+| Logger Model ID | int | Numeric model code |
+| Logger Model | str or None | Human-readable model name (e.g., "MXP 1.3", "MXm") |
+| Device Name | str | User-configured device name |
+| Odo/* | various | Odometer readings |
 ## Channel Tables
 Each channel table has:

{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/aim_xrk.pyx RENAMED Viewed

@@ -72,6 +72,7 @@ class DataStream:
     messages: Dict[str, List[Message]]
     laps: pa.Table
     time_offset: int
+    gnfi_timecodes: Optional[object] = None
 @dataclass(**dc_slots)
 class Decoder:
@@ -118,9 +119,16 @@ _decoders = {
     24: Decoder('i'), # Best Run Diff?
 }
+# Logger model ID to name mapping
+# These values are from the idn message in XRK files
+_logger_models = {
+    649: "MXP 1.3",
+    793: "MXm",
+}
 _unit_map = {
     1:  ('%', 2),
-    3:  ('G', 2),
+    3:  ('g', 2),
     4:  ('deg', 1),
     5:  ('deg/s', 1),
     6:  ('', 0), # number
@@ -238,6 +246,7 @@ def _decode_sequence(s, progress=None):
     messages = {}
     tok_GPS: cython.uint = _tokdec('GPS')
     tok_GPS1: cython.uint = _tokdec('GPS1')
+    tok_GNFI: cython.uint = _tokdec('GNFI')
     progress_interval: cython.Py_ssize_t = 8_000_000
     next_progress: cython.Py_ssize_t = progress_interval
     pos: cython.Py_ssize_t = 0
@@ -261,6 +270,7 @@ def _decode_sequence(s, progress=None):
     cdef vaccum * data_cat
     cdef accum * data_p
     gpsmsg: vector[cython.uchar]
+    gnfimsg: vector[cython.uchar]
     show_all: cython.int = 0
     show_bad: cython.int = 0
     while pos < len_s:
@@ -359,6 +369,9 @@ def _decode_sequence(s, progress=None):
                     if tok == tok_GPS or tok == tok_GPS1:
                         # fast path common case
                         gpsmsg.insert(gpsmsg.end(), &sv[oldpos+12], &sv[pos-8])
+                    elif tok == tok_GNFI:
+                        # fast path for GNFI messages (logger internal clock)
+                        gnfimsg.insert(gnfimsg.end(), &sv[oldpos+12], &sv[pos-8])
                     else:
                         data = s[oldpos + 12 : pos - 8]
                         if tok == _tokdec('CNF'):
@@ -438,6 +451,27 @@ def _decode_sequence(s, progress=None):
                                      _tokdec('DBUN'), _tokdec('DBUT'), _tokdec('DVER'), _tokdec('MANL'), _tokdec('MODL'), _tokdec('MANI'),
                                      _tokdec('MODI'), _tokdec('HWNF'), _tokdec('PDLT'), _tokdec('NTE')):
                             data = _nullterm_string(data)
+                        elif tok == _tokdec('idn'):
+                            # idn message: 56-byte payload with logger info
+                            # Offset +0: model ID (16-bit LE)
+                            # Offset +6: logger ID (32-bit LE)
+                            if len(data) >= 10:
+                                model_id = struct.unpack('<H', data[0:2])[0]
+                                logger_id = struct.unpack('<I', data[6:10])[0]
+                                data = {'model_id': model_id, 'logger_id': logger_id}
+                        elif tok == _tokdec('SRC'):
+                            # SRC message contains embedded idn data
+                            # Format: 3-byte token + 1-byte version + 2-byte length + payload
+                            if len(data) >= 62 and data[:3] == b'idn':
+                                # Parse the embedded idn payload (skip 6-byte header)
+                                idn_payload = data[6:62]
+                                model_id = struct.unpack('<H', idn_payload[0:2])[0]
+                                logger_id = struct.unpack('<I', idn_payload[6:10])[0]
+                                # Store as idn message type for metadata extraction
+                                idn_msg = Message(_tokdec('idn'), 1, {'model_id': model_id, 'logger_id': logger_id})
+                                if _tokdec('idn') not in messages:
+                                    messages[_tokdec('idn')] = []
+                                messages[_tokdec('idn')].append(idn_msg)
                         elif tok == _tokdec('ENF'):
                             data = _decode_sequence(data).messages
                         elif tok == _tokdec('TRK'):
@@ -569,17 +603,19 @@ def _decode_sequence(s, progress=None):
             c.sampledata = np.divide(c.sampledata, 1000).data
     laps = None
+    gnfi_timecodes = None
     if not channels:
         t4 = time.perf_counter()
         pass # nothing to do
     elif progress:
         with concurrent.futures.ThreadPoolExecutor(max_workers=min(2, os.cpu_count())) as worker:
             bg_work = worker.submit(_bg_gps_laps, <cython.uchar[:gpsmsg.size()]> &gpsmsg[0],
+                                    <cython.uchar[:gnfimsg.size()]> &gnfimsg[0] if gnfimsg.size() else None,
                                     messages, time_offset, last_time)
             group_work = worker.map(process_group, [x for x in groups if x])
             channel_work = worker.map(process_channel,
                                       [x for x in channels if x and not x.group])
-            gps_ch, laps = bg_work.result()
+            gps_ch, laps, gnfi_timecodes = bg_work.result()
             t4 = time.perf_counter()
             for i in group_work:
                 pass
@@ -592,8 +628,10 @@ def _decode_sequence(s, progress=None):
         for c in channels:
             if c and not c.group: process_channel(c)
         t4 = time.perf_counter()
-        gps_ch, laps = _bg_gps_laps(<cython.uchar[:gpsmsg.size()]> &gpsmsg[0],
-                                    messages, time_offset, last_time)
+        gps_ch, laps, gnfi_timecodes = _bg_gps_laps(
+            <cython.uchar[:gpsmsg.size()]> &gpsmsg[0],
+            <cython.uchar[:gnfimsg.size()]> &gnfimsg[0] if gnfimsg.size() else None,
+            messages, time_offset, last_time)
         channels.extend(gps_ch)
     t3 = time.perf_counter()
@@ -606,7 +644,8 @@ def _decode_sequence(s, progress=None):
                   and ch.long_name not in ('StrtRec', 'Master Clk')},
         messages=messages,
         laps=laps,
-        time_offset=time_offset)
+        time_offset=time_offset,
+        gnfi_timecodes=gnfi_timecodes)
 def _get_metadata(msg_by_type):
     ret = {}
@@ -629,17 +668,28 @@ def _get_metadata(msg_by_type):
             ret['Odo/%s Time' % name] = '%d:%02d:%02d' % (stats['time'] // 3600,
                                                           stats['time'] // 60 % 60,
                                                           stats['time'] % 60)
+    # Logger info from idn message
+    if _tokdec('idn') in msg_by_type:
+        idn_data = msg_by_type[_tokdec('idn')][-1].content
+        if isinstance(idn_data, dict):
+            ret['Logger ID'] = idn_data['logger_id']
+            ret['Logger Model ID'] = idn_data['model_id']
+            ret['Logger Model'] = _logger_models.get(idn_data['model_id'])
+    # Device name from NDV message
+    if _tokdec('NDV') in msg_by_type:
+        ret['Device Name'] = msg_by_type[_tokdec('NDV')][-1].content
     return ret
-def _bg_gps_laps(gpsmsg, msg_by_type, time_offset, last_time):
+def _bg_gps_laps(gpsmsg, gnfimsg, msg_by_type, time_offset, last_time):
     channels = _decode_gps(gpsmsg, time_offset)
+    gnfi_timecodes = _decode_gnfi(gnfimsg, time_offset)
     lat_ch = None
     lon_ch = None
     for ch in channels:
         if ch.long_name == 'GPS Latitude': lat_ch = ch
         if ch.long_name == 'GPS Longitude': lon_ch = ch
     laps = _get_laps(lat_ch, lon_ch, msg_by_type, time_offset, last_time)
-    return channels, laps
+    return channels, laps, gnfi_timecodes
 def _decode_gps(gpsmsg, time_offset):
     if not gpsmsg: return []
@@ -687,14 +737,63 @@ def _decode_gps(gpsmsg, time_offset):
             Channel(long_name='GPS Altitude', units='m', dec_pts=1, interpolate=True,
                     timecodes=timecodes, sampledata=memoryview(gpsconv.alt))]
+def _decode_gnfi(gnfimsg, time_offset):
+    """Parse GNFI messages and return timecodes array.
+    GNFI messages run on the logger's internal clock, not the GPS timecode stream.
+    This provides a ground truth reference for detecting GPS timing bugs.
+    GNFI message structure (32 bytes each):
+    - Bytes 0-3: Logger timecode (int32)
+    - Bytes 4-31: Other data (not used for timing)
+    Args:
+        gnfimsg: Raw GNFI message bytes
+        time_offset: Time offset to subtract from timecodes
+    Returns:
+        numpy array of GNFI timecodes, or None if no GNFI data
+    """
+    if not gnfimsg:
+        return None
+    alldata = memoryview(gnfimsg)
+    if len(alldata) % 32 != 0:
+        return None
+    timecodes = np.asarray(alldata[0:].cast('i')[::32//4]) - time_offset
+    return timecodes
 def _get_laps(lat_ch, lon_ch, msg_by_type, time_offset, last_time):
     lap_nums = []
     start_times = []
     end_times = []
-    if lat_ch and lon_ch:
-        # If we have GPS, do gps lap insert.
+    # Prefer LAP messages when available (matches official DLL behavior)
+    if _tokdec('LAP') in msg_by_type:
+        for m in msg_by_type[_tokdec('LAP')]:
+            # 2nd byte is segment #, see M4GT4
+            segment, lap, duration, end_time = struct.unpack('xBHIxxxxxxxxI', m.content)
+            end_time -= time_offset
+            if segment:
+                continue
+            elif not lap_nums:
+                pass
+            elif lap_nums[-1] == lap:
+                continue
+            elif lap_nums[-1] + 1 == lap:
+                pass
+            elif lap_nums[-1] + 2 == lap:
+                # emit inferred lap
+                lap_nums.append(lap - 1)
+                start_times.append(end_times[-1])
+                end_times.append(end_time - duration)
+            else:
+                assert False, 'Lap gap from %d to %d' % (lap_nums[-1], lap)
+            lap_nums.append(lap)
+            start_times.append(end_time - duration)
+            end_times.append(end_time)
+    elif lat_ch and lon_ch:
+        # Fall back to GPS-based lap detection only when no LAP messages exist
         track = msg_by_type[_tokdec('TRK')][-1].content
         XYZ = np.column_stack(gps.lla2ecef(np.array(lat_ch.sampledata),
                                            np.array(lon_ch.sampledata), 0))
@@ -702,38 +801,24 @@ def _get_laps(lat_ch, lon_ch, msg_by_type, time_offset, last_time):
                                     np.array(lat_ch.timecodes),
                                     (track['sf_lat'], track['sf_long']))
-        lap_markers = [0] + lap_markers + [last_time - time_offset]
+        # Use GPS channel's last timecode as session end (already adjusted)
+        # This avoids relying on last_time which may be 0 when no LAP messages exist
+        session_end = int(lat_ch.timecodes[-1]) if len(lat_ch.timecodes) else (last_time - time_offset if last_time else 0)
-        for lap, (start_time, end_time) in enumerate(zip(lap_markers[:-1], lap_markers[1:])):
-            lap_nums.append(lap)
-            start_times.append(start_time)
-            end_times.append(end_time)
-    else:
-        # otherwise, use the lap data provided.
-        if _tokdec('LAP') in msg_by_type:
-            for m in msg_by_type[_tokdec('LAP')]:
-                # 2nd byte is segment #, see M4GT4
-                segment, lap, duration, end_time = struct.unpack('xBHIxxxxxxxxI', m.content)
-                end_time -= time_offset
-                if segment:
-                    continue
-                elif not lap_nums:
-                    pass
-                elif lap_nums[-1] == lap:
-                    continue
-                elif lap_nums[-1] + 1 == lap:
-                    pass
-                elif lap_nums[-1] + 2 == lap:
-                    # emit inferred lap
-                    lap_nums.append(lap - 1)
-                    start_times.append(end_times[-1])
-                    end_times.append(end_time - duration)
-                else:
-                    assert False, 'Lap gap from %d to %d' % (lap_nums[-1], lap)
+        # Only add session boundaries if we have detected lap crossings
+        # This creates laps from each crossing to the next
+        if lap_markers:
+            lap_markers = [0] + lap_markers + [session_end]
+            for lap, (start_time, end_time) in enumerate(zip(lap_markers[:-1], lap_markers[1:])):
                 lap_nums.append(lap)
-                start_times.append(end_time - duration)
+                start_times.append(start_time)
                 end_times.append(end_time)
+    # Normalize lap numbers to 0-based indexing (matches DLL behavior)
+    if lap_nums:
+        min_lap = min(lap_nums)
+        lap_nums = [n - min_lap for n in lap_nums]
     # Create PyArrow table
     return pa.table({
         'num': pa.array(lap_nums, type=pa.int32()),
@@ -786,8 +871,13 @@ def _decompress_if_zlib(data):
     second_byte = data[1] if isinstance(data[1], int) else ord(data[1])
     if first_byte == 0x78 and second_byte in (0x01, 0x9C, 0xDA):
-        return zlib.decompress(bytes(data))
+        deco = zlib.decompressobj()
+        try:
+            return deco.decompress(bytes(data))
+        except zlib.error:
+            # Truncated stream - recover partial data
+            return deco.flush()
     return data
@@ -827,7 +917,12 @@ class _open_xrk:
             self._mmap = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ)
             # Check if zlib compressed - if so, decompress and use bytes instead of mmap
             if len(self._mmap) >= 2 and self._mmap[0] == 0x78 and self._mmap[1] in (0x01, 0x9C, 0xDA):
-                self._data = zlib.decompress(self._mmap[:])
+                deco = zlib.decompressobj()
+                try:
+                    self._data = deco.decompress(self._mmap[:])
+                except zlib.error:
+                    # Truncated stream - recover partial data
+                    self._data = deco.flush()
                 self._mmap.close()
                 self._mmap = None
                 return self._data
@@ -866,7 +961,8 @@ def aim_xrk(fname, progress=None):
         fname if not isinstance(fname, (bytes, bytearray, memoryview)) and not hasattr(fname, 'read') else "<bytes>")
     # Fix GPS timing gaps (spurious timestamp jumps in some AIM loggers)
-    fix_gps_timing_gaps(log)
+    # Pass GNFI timecodes for more robust detection (if available)
+    fix_gps_timing_gaps(log, gnfi_timecodes=data.gnfi_timecodes)
     return log

{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/base.py RENAMED Viewed

@@ -2,8 +2,6 @@
 from collections.abc import Sequence
 from dataclasses import dataclass
-import heapq
-from itertools import groupby
 import sys
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -59,15 +57,12 @@ class LogFile:
             # Return an empty table with just timecodes column if no channels
             return pa.table({"timecodes": pa.array([], type=pa.int64())})
-        # Compute union of all channel timecodes using k-way merge (O(N) vs O(N log N) for sort)
-        # Each channel's timecodes are already sorted, so we merge and deduplicate in one pass
-        timecode_iterators = [
-            channel_table.column("timecodes").to_pylist()
-            for channel_table in self.channels.values()
+        # Compute union of all channel timecodes using numpy concatenate + unique
+        # This is faster than k-way merge with heapq due to optimized C implementation
+        timecode_arrays = [
+            channel_table.column("timecodes").to_numpy() for channel_table in self.channels.values()
         ]
-        merged = heapq.merge(*timecode_iterators)
-        unique_timecodes = [k for k, _ in groupby(merged)]
-        union_timecodes = pa.array(unique_timecodes, type=pa.int64())
+        union_timecodes = pa.array(np.unique(np.concatenate(timecode_arrays)), type=pa.int64())
         # Resample all channels to the union timecodes
         resampled = self.resample_to_timecodes(union_timecodes)

{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/gps.py RENAMED Viewed

@@ -414,13 +414,88 @@ if __name__ == "__main__":
     perf_test()
-def fix_gps_timing_gaps(log: "LogFile", expected_dt_ms: float = 40.0) -> "LogFile":
-    """Detect and correct large timing gaps in GPS channels and lap boundaries.
+def detect_gps_timing_offset_from_gnfi(
+    gps_timecodes: np.ndarray,
+    gnfi_timecodes: np.ndarray,
+    expected_dt_ms: float = 40.0,
+) -> list[tuple[int, int]]:
+    """Detect GPS timing offset using GNFI as reference clock.
+    GNFI messages run on the logger's internal clock (NOT the buggy GPS timecode
+    stream). They are continuous with no gaps and end at the true session end time.
+    By comparing the GPS end time to GNFI end time, we can detect if the GPS
+    firmware bug added ~65533ms to GPS timecodes.
+    Args:
+        gps_timecodes: GPS channel timecodes array
+        gnfi_timecodes: GNFI timecodes array (logger internal clock)
+        expected_dt_ms: Expected time delta between GPS samples (default 40ms = 25Hz)
+    Returns:
+        List of (gap_time, correction) tuples, or empty list if no bug detected
+    """
+    if gnfi_timecodes is None or len(gnfi_timecodes) < 2:
+        return []
+    if len(gps_timecodes) < 2:
+        return []
+    OVERFLOW_BUG_MS = 65533
+    TOLERANCE = 5000  # Allow 5 second tolerance for end-time comparison
+    gps_end = int(gps_timecodes[-1])
+    gnfi_end = int(gnfi_timecodes[-1])
+    offset = gps_end - gnfi_end
+    # Check if GPS extends ~65533ms beyond GNFI (the bug signature)
+    if not (OVERFLOW_BUG_MS - TOLERANCE <= offset <= OVERFLOW_BUG_MS + TOLERANCE):
+        return []
+    # Bug detected! Find the gap where it likely occurred
+    # Look for the largest gap in GPS timecodes
+    dt = np.diff(gps_timecodes)
+    gap_threshold = expected_dt_ms * 10  # 400ms default
+    gap_indices = np.where(dt > gap_threshold)[0]
+    if len(gap_indices) == 0:
+        return []
+    # Find the largest gap
+    largest_gap_idx = gap_indices[np.argmax(dt[gap_indices])]
+    gap_time = int(gps_timecodes[largest_gap_idx])
+    gap_size = dt[largest_gap_idx]
+    # For direct gaps (60000-70000ms), use gap_size - expected_dt as correction
+    # For hidden bugs (detected via GNFI), use OVERFLOW_BUG_MS as correction
+    # because the gap_size might be smaller due to signal loss masking the bug
+    if OVERFLOW_BUG_MS - TOLERANCE <= gap_size <= OVERFLOW_BUG_MS + TOLERANCE:
+        # Direct gap - correction is the excess time
+        correction = gap_size - expected_dt_ms
+    else:
+        # Hidden bug - correction is the full overflow amount
+        correction = OVERFLOW_BUG_MS
+    return [(gap_time, int(correction))]
+def fix_gps_timing_gaps(
+    log: "LogFile",
+    expected_dt_ms: float = 40.0,
+    gnfi_timecodes: np.ndarray | None = None,
+) -> "LogFile":
+    """Detect and correct 16-bit overflow timing gaps in GPS channels and lap boundaries.
     Some AIM data loggers produce GPS data with spurious timestamp jumps
-    (e.g., 65533ms gaps that should be ~40ms). This is likely caused by a
-    16-bit overflow bug in the logger firmware. This function detects such
-    gaps and corrects the timecodes by removing the excess time.
+    (e.g., 65533ms gaps that should be ~40ms). This is caused by a 16-bit
+    overflow bug in the logger firmware where the upper 16 bits of the
+    timecode are corrupted, resulting in a gap of approximately 65533ms
+    (0xFFED, or 2^16 - 3).
+    This function detects the firmware bug in three ways (in order of preference):
+    1. GNFI-based detection: If GNFI timecodes are available, compare GPS end time
+       to GNFI end time (GNFI runs on logger's internal clock, provides ground truth)
+    2. Direct detection: gaps between 60000ms and 70000ms
+    3. Indirect detection: GPS ends ~65533ms after other channels, indicating
+       the bug occurred during a GPS signal loss (hidden within a smaller gap)
     The fix is applied in-place to the LogFile's channels dict and laps table.
@@ -431,12 +506,20 @@ def fix_gps_timing_gaps(log: "LogFile", expected_dt_ms: float = 40.0) -> "LogFil
     expected_dt_ms : float, default=40.0
         Expected time delta between GPS samples in milliseconds.
         Default is 40ms (25 Hz GPS).
+    gnfi_timecodes : np.ndarray or None, default=None
+        Optional GNFI timecodes from logger's internal clock. If provided,
+        used for more robust detection of the GPS timing bug.
     Returns
     -------
     LogFile
         The same LogFile object with corrected GPS timecodes and lap boundaries.
     """
+    # The firmware bug causes a gap of approximately 65533ms (0xFFED).
+    OVERFLOW_BUG_MS = 65533
+    OVERFLOW_GAP_MIN = 60000  # 60 seconds minimum
+    OVERFLOW_GAP_MAX = 70000  # 70 seconds maximum
     # Find the first GPS channel that exists
     gps_channel_name = None
     for name in GPS_CHANNEL_NAMES:
@@ -464,13 +547,53 @@ def fix_gps_timing_gaps(log: "LogFile", expected_dt_ms: float = 40.0) -> "LogFil
     if len(gap_indices) == 0:
         return log
-    # Build list of (gap_time, correction) pairs
+    # Build list of (gap_time, correction) pairs - only for firmware bug gaps
     gap_corrections = []
-    for gap_idx in gap_indices:
-        gap_time = gps_time[gap_idx]
-        gap_size = dt[gap_idx]
-        correction = gap_size - expected_dt_ms
-        gap_corrections.append((gap_time, correction))
+    # Method 1: GNFI-based detection (most reliable, if available)
+    if gnfi_timecodes is not None:
+        gap_corrections = detect_gps_timing_offset_from_gnfi(
+            gps_time, gnfi_timecodes, expected_dt_ms
+        )
+    # Method 2: Direct detection - gaps between 60000ms and 70000ms
+    if len(gap_corrections) == 0:
+        for gap_idx in gap_indices:
+            gap_time = gps_time[gap_idx]
+            gap_size = dt[gap_idx]
+            # Only fix gaps that match the firmware bug signature (around 65533ms)
+            if not (OVERFLOW_GAP_MIN <= gap_size <= OVERFLOW_GAP_MAX):
+                continue  # Skip - this is a legitimate gap, not the firmware bug
+            correction = gap_size - expected_dt_ms
+            gap_corrections.append((gap_time, correction))
+    # Method 3: Indirect detection - GPS extends ~65533ms beyond other channels
+    # This happens when the bug occurs during GPS signal loss
+    if len(gap_corrections) == 0 and len(gap_indices) > 0:
+        # Find end time of non-GPS channels
+        non_gps_end_times = []
+        for ch_name, ch_table in log.channels.items():
+            if ch_name not in GPS_CHANNEL_NAMES:
+                ch_time = ch_table.column("timecodes").to_numpy()
+                if len(ch_time) > 0:
+                    non_gps_end_times.append(ch_time[-1])
+        if non_gps_end_times:
+            max_non_gps_end = max(non_gps_end_times)
+            gps_end = gps_time[-1]
+            end_offset = gps_end - max_non_gps_end
+            # If GPS extends ~65533ms beyond other channels, the bug is hidden
+            if OVERFLOW_GAP_MIN <= end_offset <= OVERFLOW_GAP_MAX:
+                # Find the gap where the bug likely occurred (largest gap)
+                largest_gap_idx = gap_indices[np.argmax(dt[gap_indices])]
+                gap_time = gps_time[largest_gap_idx]
+                # Apply correction of ~65533ms (the overflow amount)
+                correction = OVERFLOW_BUG_MS
+                gap_corrections.append((gap_time, correction))
     # Fix GPS channel timecodes
     gps_time_fixed = gps_time.astype(np.float64)

{libxrk-0.6.0 → libxrk-0.8.0}/LICENSE RENAMED Viewed

File without changes

{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/__init__.py RENAMED Viewed

File without changes

{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/aim_xrk.pyi RENAMED Viewed

File without changes

{libxrk-0.6.0 → libxrk-0.8.0}/src/libxrk/py.typed RENAMED Viewed

File without changes

libxrk 0.6.0__tar.gz → 0.8.0__tar.gz

libxrk 0.6.0tar.gz → 0.8.0tar.gz