pymcap-cli 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/PKG-INFO +1 -1
  2. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/pyproject.toml +1 -1
  3. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cli.py +2 -0
  4. pymcap_cli-0.7.0/src/pymcap_cli/cmd/diff_cmd.py +689 -0
  5. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/info_cmd.py +8 -2
  6. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/recover_cmd.py +6 -8
  7. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/core/processors.py +2 -0
  8. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/encoding/video_pyav.py +24 -0
  9. pymcap_cli-0.7.0/src/pymcap_cli/rihs01.py +144 -0
  10. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/README.md +0 -0
  11. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/__init__.py +0 -0
  12. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/__init__.py +0 -0
  13. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/_run_processor.py +0 -0
  14. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/bag2mcap_cmd.py +0 -0
  15. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/cat_cmd.py +0 -0
  16. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/compress_cmd.py +0 -0
  17. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/convert_cmd.py +0 -0
  18. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/diag_cmd.py +0 -0
  19. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/du_cmd.py +0 -0
  20. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/filter_cmd.py +0 -0
  21. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/info_json_cmd.py +0 -0
  22. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/list_cmd.py +0 -0
  23. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/merge_cmd.py +0 -0
  24. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/plot_cmd.py +0 -0
  25. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/process_cmd.py +0 -0
  26. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/rechunk_cmd.py +0 -0
  27. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/records_cmd.py +0 -0
  28. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/recover_inplace_cmd.py +0 -0
  29. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/roscompress_cmd.py +0 -0
  30. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/rosdecompress_cmd.py +0 -0
  31. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/tftree_cmd.py +0 -0
  32. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/topic_chunks_cmd.py +0 -0
  33. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/cmd/video_cmd.py +0 -0
  34. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/core/__init__.py +0 -0
  35. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/core/input_handler.py +0 -0
  36. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/core/mcap_processor.py +0 -0
  37. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/core/mcap_transform.py +0 -0
  38. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/core/msg_resolver.py +0 -0
  39. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/debug_wrapper.py +0 -0
  40. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/display/__init__.py +0 -0
  41. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/display/display_utils.py +0 -0
  42. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/display/osc_utils.py +0 -0
  43. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/display/sparkline.py +0 -0
  44. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/encoding/__init__.py +0 -0
  45. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/encoding/decompress.py +0 -0
  46. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/encoding/encoder_common.py +0 -0
  47. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/encoding/pointcloud.py +0 -0
  48. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/encoding/video_factory.py +0 -0
  49. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/encoding/video_ffmpeg.py +0 -0
  50. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/encoding/video_protocols.py +0 -0
  51. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/http_utils.py +0 -0
  52. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/info_types.py +0 -0
  53. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/py.typed +0 -0
  54. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/rosbag_reader/__init__.py +0 -0
  55. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/rosbag_reader/_reader.py +0 -0
  56. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/rosbag_reader/_types.py +0 -0
  57. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/rosbag_reader/py.typed +0 -0
  58. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/types/__init__.py +0 -0
  59. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/types/info_data.py +0 -0
  60. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/types/info_link.py +0 -0
  61. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/types/info_types.py +0 -0
  62. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/types/types_manual.py +0 -0
  63. {pymcap_cli-0.6.0 → pymcap_cli-0.7.0}/src/pymcap_cli/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pymcap-cli
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: High-performance Python CLI for MCAP file processing with advanced recovery, filtering, and optimization capabilities
5
5
  Keywords: mcap,cli,robotics,ros,ros2,recovery,filtering,compression
6
6
  Author: Marko Bausch
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pymcap-cli"
3
- version = "0.6.0"
3
+ version = "0.7.0"
4
4
  description = "High-performance Python CLI for MCAP file processing with advanced recovery, filtering, and optimization capabilities"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -10,6 +10,7 @@ from pymcap_cli.cmd import (
10
10
  compress_cmd,
11
11
  convert_cmd,
12
12
  diag_cmd,
13
+ diff_cmd,
13
14
  du_cmd,
14
15
  filter_cmd,
15
16
  info_cmd,
@@ -122,6 +123,7 @@ transform_group = Group("Transform", sort_key=1)
122
123
  app.command(name="cat", group=inspect_group)(cat_cmd.cat)
123
124
  app.command(name="diag", group=inspect_group)(diag_cmd.diag)
124
125
  app.command(name="du", group=inspect_group)(du_cmd.du)
126
+ app.command(name="diff", group=inspect_group)(diff_cmd.diff_cmd)
125
127
  app.command(name="info", group=inspect_group)(info_cmd.info)
126
128
  app.command(name="info-json", group=inspect_group)(info_json_cmd.info_json)
127
129
  list_cmd.list_app.group = (inspect_group,)
@@ -0,0 +1,689 @@
1
+ """Diff command - compare MCAP files using message indexes."""
2
+
3
+ import hashlib
4
+ from collections.abc import Callable
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timedelta
7
+ from pathlib import Path
8
+ from typing import Annotated, TypeVar
9
+
10
+ from cyclopts import Parameter
11
+ from rich.console import Console
12
+ from rich.table import Table
13
+ from small_mcap import RebuildInfo, Schema, Statistics, Summary, rebuild_summary
14
+
15
+ from pymcap_cli.core.input_handler import open_input
16
+ from pymcap_cli.rihs01 import compute_rihs01
17
+ from pymcap_cli.utils import bytes_to_human
18
+
19
+ console = Console()
20
+
21
+ _NS_TO_MS = 1_000_000
22
+ _NS_TO_SEC = 1_000_000_000
23
+
24
+
25
+ def time_str(time_ns: int) -> str:
26
+ if time_ns == 0:
27
+ return "N/A"
28
+ dt = datetime.fromtimestamp(time_ns / _NS_TO_SEC)
29
+ return dt.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
30
+
31
+
32
+ def duration_human(duration_ns: int) -> str:
33
+ duration = timedelta(milliseconds=duration_ns / _NS_TO_MS)
34
+ total_seconds = duration.total_seconds()
35
+ if total_seconds < 1:
36
+ return f"{total_seconds * 1000:.0f} ms"
37
+ if total_seconds < 60:
38
+ return f"{total_seconds:.0f} s"
39
+ if total_seconds < 3600:
40
+ return f"{total_seconds / 60:.0f} min"
41
+ return f"{total_seconds / 3600:.1f} hr"
42
+
43
+
44
+ def _file_label(path: str) -> str:
45
+ return Path(path).name
46
+
47
+
48
+ @dataclass(frozen=True, slots=True)
49
+ class FileSummary:
50
+ path: str
51
+ size_bytes: int
52
+ summary: Summary
53
+ statistics: Statistics
54
+
55
+ @property
56
+ def duration_ns(self) -> int:
57
+ return self.statistics.message_end_time - self.statistics.message_start_time
58
+
59
+ @property
60
+ def label(self) -> str:
61
+ return _file_label(self.path)
62
+
63
+
64
+ @dataclass(slots=True)
65
+ class ChannelDiff:
66
+ topic: str
67
+ counts: dict[str, int] = field(default_factory=dict)
68
+ timestamps: dict[str, set[int]] = field(default_factory=dict)
69
+ common_count: int = 0
70
+
71
+ @property
72
+ def is_identical(self) -> bool:
73
+ return all(len(ts) == 0 for ts in self.timestamps.values())
74
+
75
+ def unique_in(self, label: str) -> int:
76
+ return len(self.timestamps.get(label, set()))
77
+
78
+
79
+ def _schema_fingerprint(schema: Schema) -> str:
80
+ if schema.encoding == "ros2msg":
81
+ try:
82
+ return compute_rihs01(schema.name, schema.data)
83
+ except Exception: # noqa: BLE001, S110
84
+ pass
85
+ return hashlib.sha256(schema.data).hexdigest()[:16]
86
+
87
+
88
+ @dataclass(frozen=True, slots=True)
89
+ class SchemaDiff:
90
+ name: str
91
+ per_label: dict[str, str | None] = field(default_factory=dict)
92
+ encodings: dict[str, str | None] = field(default_factory=dict)
93
+
94
+ @property
95
+ def is_identical(self) -> bool:
96
+ present = {v for v in self.per_label.values() if v is not None}
97
+ return len(present) <= 1
98
+
99
+
100
+ @dataclass(frozen=True, slots=True)
101
+ class ChannelSchemaMismatch:
102
+ topic: str
103
+ schema_names: dict[str, str | None] = field(default_factory=dict)
104
+ schema_encodings: dict[str, str | None] = field(default_factory=dict)
105
+ message_encodings: dict[str, str | None] = field(default_factory=dict)
106
+
107
+
108
+ def _compare_schemas(
109
+ all_summaries: dict[str, FileSummary],
110
+ ) -> dict[str, SchemaDiff]:
111
+ all_names: set[str] = set()
112
+ schemas_by_label: dict[str, dict[str, Schema]] = {}
113
+ for label, fs in all_summaries.items():
114
+ by_name = {s.name: s for s in fs.summary.schemas.values()}
115
+ all_names |= by_name.keys()
116
+ schemas_by_label[label] = by_name
117
+
118
+ diffs: dict[str, SchemaDiff] = {}
119
+ for name in sorted(all_names):
120
+ per_label: dict[str, str | None] = {}
121
+ encodings: dict[str, str | None] = {}
122
+ for label in all_summaries:
123
+ schema = schemas_by_label[label].get(name)
124
+ if schema is not None:
125
+ per_label[label] = _schema_fingerprint(schema)
126
+ encodings[label] = schema.encoding
127
+ else:
128
+ per_label[label] = None
129
+ encodings[label] = None
130
+ diffs[name] = SchemaDiff(name=name, per_label=per_label, encodings=encodings)
131
+ return diffs
132
+
133
+
134
+ def _check_channel_schema_mismatches(
135
+ all_summaries: dict[str, FileSummary],
136
+ ) -> tuple[list[ChannelSchemaMismatch], list[str]]:
137
+ topic_info: dict[str, dict[str, tuple[str, str, str, str] | None]] = {}
138
+ warnings: list[str] = []
139
+
140
+ for label, fs in all_summaries.items():
141
+ seen_topics: dict[str, int] = {}
142
+ for channel in fs.summary.channels.values():
143
+ count = seen_topics.get(channel.topic, 0)
144
+ if count > 0:
145
+ warnings.append(
146
+ f"{label}: topic {channel.topic!r} has {count + 1} channels, "
147
+ f"using schema from channel_id={channel.id}"
148
+ )
149
+ seen_topics[channel.topic] = count + 1
150
+
151
+ schema = fs.summary.schemas.get(channel.schema_id) if channel.schema_id != 0 else None
152
+ if schema is not None:
153
+ fp = _schema_fingerprint(schema)
154
+ entry = (schema.name, schema.encoding, fp)
155
+ else:
156
+ entry = None
157
+ msg_enc = channel.message_encoding
158
+ topic_info.setdefault(channel.topic, {})[label] = (
159
+ (entry[0], entry[1], entry[2], msg_enc) if entry is not None else None
160
+ )
161
+
162
+ mismatches: list[ChannelSchemaMismatch] = []
163
+ for topic in sorted(topic_info):
164
+ schema_names: dict[str, str | None] = {}
165
+ schema_encodings: dict[str, str | None] = {}
166
+ message_encodings: dict[str, str | None] = {}
167
+ fingerprints: set[str] = set()
168
+ msg_encs: set[str] = set()
169
+
170
+ for label in all_summaries:
171
+ info = topic_info[topic].get(label)
172
+ if info is not None:
173
+ schema_names[label] = info[0]
174
+ schema_encodings[label] = info[1]
175
+ fingerprints.add(info[2])
176
+ message_encodings[label] = info[3]
177
+ msg_encs.add(info[3])
178
+ else:
179
+ schema_names[label] = None
180
+ schema_encodings[label] = None
181
+ message_encodings[label] = None
182
+
183
+ if len(fingerprints) > 1 or len(msg_encs) > 1:
184
+ mismatches.append(
185
+ ChannelSchemaMismatch(
186
+ topic=topic,
187
+ schema_names=schema_names,
188
+ schema_encodings=schema_encodings,
189
+ message_encodings=message_encodings,
190
+ )
191
+ )
192
+
193
+ return mismatches, warnings
194
+
195
+
196
+ def _extract_summary(path: str, info: RebuildInfo, file_size: int) -> FileSummary:
197
+ summary = info.summary
198
+ stats = summary.statistics
199
+ assert stats is not None
200
+ return FileSummary(path=path, size_bytes=file_size, summary=summary, statistics=stats)
201
+
202
+
203
+ def _collect_message_timestamps(info: RebuildInfo) -> dict[int, set[int]]:
204
+ timestamps_by_channel: dict[int, set[int]] = {}
205
+ if not info.chunk_information:
206
+ return timestamps_by_channel
207
+ for msg_idx_list in info.chunk_information.values():
208
+ for msg_idx in msg_idx_list:
209
+ if not msg_idx.timestamps:
210
+ continue
211
+ channel_id = msg_idx.channel_id
212
+ if channel_id not in timestamps_by_channel:
213
+ timestamps_by_channel[channel_id] = set()
214
+ timestamps_by_channel[channel_id].update(msg_idx.timestamps)
215
+ return timestamps_by_channel
216
+
217
+
218
+ def _process_file(path: str) -> tuple[FileSummary, dict[int, set[int]]]:
219
+ with open_input(path, buffering=0) as (f, size):
220
+ info = rebuild_summary(
221
+ f, validate_crc=False, calculate_channel_sizes=False, exact_sizes=False
222
+ )
223
+ return _extract_summary(path, info, size), _collect_message_timestamps(info)
224
+
225
+
226
+ def _compare_channels(
227
+ all_timestamps: dict[str, dict[int, set[int]]],
228
+ all_summaries: dict[str, FileSummary],
229
+ ) -> dict[str, ChannelDiff]:
230
+ topic_timestamps: dict[str, dict[str, set[int]]] = {}
231
+ for label, fs in all_summaries.items():
232
+ ch_by_id = fs.summary.channels
233
+ ts_by_ch = all_timestamps.get(label, {})
234
+ per_topic: dict[str, set[int]] = {}
235
+ for ch_id, timestamps in ts_by_ch.items():
236
+ ch = ch_by_id.get(ch_id)
237
+ topic = ch.topic if ch is not None else f"Channel_{ch_id}"
238
+ if topic in per_topic:
239
+ per_topic[topic] |= timestamps
240
+ else:
241
+ per_topic[topic] = set(timestamps)
242
+ topic_timestamps[label] = per_topic
243
+
244
+ all_topics: set[str] = set()
245
+ for ts in topic_timestamps.values():
246
+ all_topics |= ts.keys()
247
+
248
+ labels = list(all_summaries.keys())
249
+ diffs: dict[str, ChannelDiff] = {}
250
+ for topic in all_topics:
251
+ per_file_ts = {label: topic_timestamps[label].get(topic, set()) for label in labels}
252
+ common = set.intersection(*per_file_ts.values()) if per_file_ts else set()
253
+ diffs[topic] = ChannelDiff(
254
+ topic=topic,
255
+ counts={label: len(ts) for label, ts in per_file_ts.items()},
256
+ timestamps={
257
+ label: ts - set.union(*(s for k, s in per_file_ts.items() if k != label), set())
258
+ for label, ts in per_file_ts.items()
259
+ },
260
+ common_count=len(common),
261
+ )
262
+ return diffs
263
+
264
+
265
+ _T = TypeVar("_T")
266
+
267
+
268
+ def _format_values(
269
+ summaries: list[FileSummary],
270
+ selector: Callable[[FileSummary], _T],
271
+ formatter: Callable[[_T], str] | None = None,
272
+ ) -> list[str]:
273
+ if formatter is None:
274
+ formatter = str
275
+ values = [selector(fs) for fs in summaries]
276
+ all_equal = len(set(values)) == 1
277
+ color = "green" if all_equal else "yellow"
278
+ return [f"[{color}]{formatter(v)}[/]" for v in values]
279
+
280
+
281
+ def _format_number_diffs(
282
+ summaries: list[FileSummary],
283
+ selector: Callable[[FileSummary], int],
284
+ ) -> list[str]:
285
+ values = [selector(fs) for fs in summaries]
286
+ ref = values[0]
287
+ all_equal = len(set(values)) == 1
288
+ result: list[str] = []
289
+ for v in values:
290
+ s = f"{v:,}"
291
+ if all_equal:
292
+ result.append(f"[green]{s}[/]")
293
+ elif v == ref:
294
+ result.append(f"[yellow]{s}[/]")
295
+ else:
296
+ diff = v - ref
297
+ sign = "+" if diff > 0 else ""
298
+ result.append(f"[yellow]{s} ({sign}{diff:,})[/]")
299
+ return result
300
+
301
+
302
+ def _build_summary_table(summaries: list[FileSummary]) -> Table:
303
+ table = Table(title="File Comparison")
304
+ table.add_column("Property", style="bold cyan")
305
+ for fs in summaries:
306
+ table.add_column(fs.label, justify="right")
307
+
308
+ table.add_row("Size", *_format_values(summaries, lambda fs: fs.size_bytes, bytes_to_human))
309
+ table.add_row(
310
+ "Messages", *_format_number_diffs(summaries, lambda fs: fs.statistics.message_count)
311
+ )
312
+ table.add_row("Duration", *_format_values(summaries, lambda fs: fs.duration_ns, duration_human))
313
+ table.add_row(
314
+ "Start Time",
315
+ *_format_values(summaries, lambda fs: fs.statistics.message_start_time, time_str),
316
+ )
317
+ table.add_row(
318
+ "End Time",
319
+ *_format_values(summaries, lambda fs: fs.statistics.message_end_time, time_str),
320
+ )
321
+ table.add_row("Chunks", *_format_number_diffs(summaries, lambda fs: fs.statistics.chunk_count))
322
+ table.add_row(
323
+ "Channels", *_format_number_diffs(summaries, lambda fs: fs.statistics.channel_count)
324
+ )
325
+ table.add_row(
326
+ "Attachments",
327
+ *_format_number_diffs(summaries, lambda fs: fs.statistics.attachment_count),
328
+ )
329
+ table.add_row(
330
+ "Metadata", *_format_number_diffs(summaries, lambda fs: fs.statistics.metadata_count)
331
+ )
332
+
333
+ return table
334
+
335
+
336
+ def _build_channel_diff_table(
337
+ diffs: dict[str, ChannelDiff],
338
+ labels: list[str],
339
+ *,
340
+ skip_identical: bool = False,
341
+ ) -> Table | None:
342
+ if not diffs:
343
+ return None
344
+
345
+ table = Table(title="Message Index Diff (by Channel)")
346
+ table.add_column("Topic", style="bold cyan")
347
+ for label in labels:
348
+ table.add_column(label, justify="right")
349
+ table.add_column("Common", justify="right")
350
+
351
+ first_label = labels[0]
352
+ if len(labels) == 2:
353
+ table.add_column("Added", justify="right")
354
+ table.add_column("Removed", justify="right")
355
+
356
+ sorted_diffs = sorted(diffs.values(), key=lambda d: d.topic)
357
+ identical_count = 0
358
+
359
+ for diff in sorted_diffs:
360
+ if diff.is_identical:
361
+ identical_count += 1
362
+ if skip_identical:
363
+ continue
364
+
365
+ cells: list[str] = []
366
+ ref_count = diff.counts.get(first_label, 0)
367
+ all_counts_equal = len(set(diff.counts.values())) <= 1
368
+
369
+ for label in labels:
370
+ count = diff.counts.get(label, 0)
371
+ s = f"{count:,}" if count > 0 else "[dim]0[/]"
372
+ if count != ref_count:
373
+ s = f"[yellow]{s}[/]"
374
+ elif all_counts_equal and count > 0:
375
+ s = f"[green]{s}[/]"
376
+ cells.append(s)
377
+
378
+ common = f"{diff.common_count:,}" if diff.common_count > 0 else "[dim]0[/]"
379
+ cells.append(common)
380
+
381
+ if len(labels) == 2:
382
+ other_label = labels[1]
383
+ added = diff.unique_in(other_label)
384
+ removed = diff.unique_in(first_label)
385
+ cells.append(f"[green]+{added:,}[/]" if added > 0 else "[dim]0[/]")
386
+ cells.append(f"[red]-{removed:,}[/]" if removed > 0 else "[dim]0[/]")
387
+
388
+ table.add_row(diff.topic, *cells)
389
+
390
+ if skip_identical and identical_count > 0:
391
+ table.caption = f"[dim]{identical_count} identical channels hidden[/]"
392
+
393
+ return table
394
+
395
+
396
+ def _format_ts_short(time_ns: int) -> str:
397
+ dt = datetime.fromtimestamp(time_ns / _NS_TO_SEC)
398
+ return dt.strftime("%H:%M:%S.%f")[:-3]
399
+
400
+
401
+ def _split_into_segments(sorted_ts: list[int], gap_multiplier: float = 3.0) -> list[list[int]]:
402
+ if len(sorted_ts) <= 1:
403
+ return [sorted_ts[:]] if sorted_ts else []
404
+
405
+ gaps = [sorted_ts[i + 1] - sorted_ts[i] for i in range(len(sorted_ts) - 1)]
406
+ median_gap = sorted(gaps)[len(gaps) // 2]
407
+ threshold = median_gap * gap_multiplier
408
+
409
+ segments: list[list[int]] = []
410
+ current = [sorted_ts[0]]
411
+ for i, gap in enumerate(gaps):
412
+ if gap > threshold:
413
+ segments.append(current)
414
+ current = [sorted_ts[i + 1]]
415
+ else:
416
+ current.append(sorted_ts[i + 1])
417
+ segments.append(current)
418
+ return segments
419
+
420
+
421
+ def _format_timestamp_ranges(
422
+ timestamps: set[int], max_ranges: int = 3, *, total: int | None = None
423
+ ) -> str:
424
+ if not timestamps:
425
+ return "[dim]-[/]"
426
+
427
+ if total is not None and len(timestamps) == total:
428
+ return f"all ({len(timestamps):,} msgs)"
429
+
430
+ segments = _split_into_segments(sorted(timestamps))
431
+ parts: list[str] = []
432
+
433
+ for seg in segments[:max_ranges]:
434
+ if len(seg) == 1:
435
+ parts.append(_format_ts_short(seg[0]))
436
+ else:
437
+ parts.append(
438
+ f"{_format_ts_short(seg[0])} - {_format_ts_short(seg[-1])} ({len(seg):,} msgs)"
439
+ )
440
+
441
+ total_msgs = len(timestamps)
442
+ remaining = total_msgs - sum(len(s) for s in segments[:max_ranges])
443
+ if remaining > 0:
444
+ remaining_segs = len(segments) - max_ranges
445
+ parts.append(f"[dim]+{remaining:,} msgs in {remaining_segs} more ranges[/]")
446
+
447
+ return ", ".join(parts)
448
+
449
+
450
+ def _build_sample_diffs_table(
451
+ diffs: dict[str, ChannelDiff],
452
+ labels: list[str],
453
+ max_ranges: int = 3,
454
+ ) -> Table | None:
455
+ has_diffs = any(not d.is_identical for d in diffs.values())
456
+ if not has_diffs:
457
+ return None
458
+
459
+ table = Table(title="Differing Timestamps")
460
+ table.add_column("Topic", style="bold cyan")
461
+ for label in labels:
462
+ table.add_column(f"Only in {label}", justify="right")
463
+
464
+ for diff in sorted(diffs.values(), key=lambda d: d.topic):
465
+ if diff.is_identical:
466
+ continue
467
+
468
+ cells = [
469
+ _format_timestamp_ranges(
470
+ diff.timestamps.get(label, set()),
471
+ max_ranges=max_ranges,
472
+ total=diff.counts.get(label),
473
+ )
474
+ for label in labels
475
+ ]
476
+ table.add_row(diff.topic, *cells)
477
+
478
+ return table
479
+
480
+
481
+ def _build_schema_diff_table(
482
+ schema_diffs: dict[str, SchemaDiff],
483
+ labels: list[str],
484
+ ) -> Table | None:
485
+ non_identical = [d for d in schema_diffs.values() if not d.is_identical]
486
+ if not non_identical:
487
+ return None
488
+
489
+ table = Table(title="Schema Differences")
490
+ table.add_column("Schema", style="bold cyan")
491
+ for label in labels:
492
+ table.add_column(label, justify="right")
493
+
494
+ for diff in non_identical:
495
+ cells: list[str] = []
496
+ for label in labels:
497
+ if diff.per_label.get(label) is None:
498
+ cells.append("[dim]missing[/]")
499
+ else:
500
+ cell = f"[yellow]{diff.per_label[label]}[/]"
501
+ enc = diff.encodings.get(label)
502
+ if enc:
503
+ cell += f"\n[dim]{enc}[/]"
504
+ cells.append(cell)
505
+ table.add_row(diff.name, *cells)
506
+
507
+ identical_count = len(schema_diffs) - len(non_identical)
508
+ if identical_count > 0:
509
+ table.caption = f"[dim]{identical_count} identical schemas hidden[/]"
510
+
511
+ return table
512
+
513
+
514
+ def _build_channel_schema_mismatch_table(
515
+ mismatches: list[ChannelSchemaMismatch],
516
+ labels: list[str],
517
+ ) -> Table | None:
518
+ if not mismatches:
519
+ return None
520
+
521
+ table = Table(title="Channel Mismatches")
522
+ table.add_column("Topic", style="bold cyan")
523
+ for label in labels:
524
+ table.add_column(label, justify="right")
525
+
526
+ for mm in mismatches:
527
+ cells: list[str] = []
528
+ for label in labels:
529
+ name = mm.schema_names.get(label)
530
+ if name is None:
531
+ cells.append("[dim]missing[/]")
532
+ continue
533
+ parts = [f"[red]{name}[/]"]
534
+ sch_enc = mm.schema_encodings.get(label)
535
+ msg_enc = mm.message_encodings.get(label)
536
+ dim_parts = []
537
+ if sch_enc:
538
+ dim_parts.append(f"schema: {sch_enc}")
539
+ if msg_enc:
540
+ dim_parts.append(f"encoding: {msg_enc}")
541
+ if dim_parts:
542
+ parts.append(f"[dim]{', '.join(dim_parts)}[/]")
543
+ cells.append("\n".join(parts))
544
+ table.add_row(mm.topic, *cells)
545
+
546
+ return table
547
+
548
+
549
+ def diff_cmd(
550
+ files: Annotated[
551
+ list[str],
552
+ Parameter(
553
+ name=["files"],
554
+ help="Paths to MCAP files to compare (local files or HTTP/HTTPS URLs)",
555
+ ),
556
+ ],
557
+ *,
558
+ skip_identical: Annotated[
559
+ bool,
560
+ Parameter(
561
+ name=["--skip-identical"],
562
+ help="Hide channels with identical message timestamps",
563
+ ),
564
+ ] = False,
565
+ max_ranges: Annotated[
566
+ int,
567
+ Parameter(
568
+ name=["--max-ranges"],
569
+ help="Maximum number of timestamp ranges to show per channel",
570
+ ),
571
+ ] = 3,
572
+ ) -> int:
573
+ """Compare MCAP files using message index timestamps.
574
+
575
+ Fast comparison by scanning data sections and extracting message
576
+ timestamps from message indexes. Works even with broken or
577
+ summary-less MCAP files.
578
+
579
+ Parameters
580
+ ----------
581
+ files
582
+ Paths to MCAP files to compare (2 or more)
583
+ skip_identical
584
+ Hide channels where all message timestamps match exactly
585
+ max_ranges
586
+ Maximum timestamp ranges to display per channel (default: 3)
587
+
588
+ Examples
589
+ --------
590
+ ```
591
+ # Compare two files
592
+ pymcap-cli diff recording1.mcap recording2.mcap
593
+
594
+ # Compare three files
595
+ pymcap-cli diff a.mcap b.mcap c.mcap
596
+
597
+ # Show only channels with differences
598
+ pymcap-cli diff file1.mcap file2.mcap --skip-identical
599
+ ```
600
+ """
601
+ if len(files) < 2:
602
+ console.print("[red]Error:[/] At least two files must be specified")
603
+ return 1
604
+
605
+ summaries: list[FileSummary] = []
606
+ all_timestamps: dict[str, dict[int, set[int]]] = {}
607
+ all_summaries: dict[str, FileSummary] = {}
608
+
609
+ for path in files:
610
+ try:
611
+ fs, ts = _process_file(path)
612
+ except Exception as exc: # noqa: BLE001
613
+ console.print(f"[red]Error reading {path}:[/] {exc}")
614
+ return 1
615
+ summaries.append(fs)
616
+ label = fs.label
617
+ all_timestamps[label] = ts
618
+ all_summaries[label] = fs
619
+
620
+ labels = [fs.label for fs in summaries]
621
+ first_label = labels[0]
622
+
623
+ channel_diffs = _compare_channels(all_timestamps, all_summaries)
624
+ schema_diffs = _compare_schemas(all_summaries)
625
+ channel_schema_mismatches, topic_warnings = _check_channel_schema_mismatches(all_summaries)
626
+
627
+ total_common = sum(d.common_count for d in channel_diffs.values())
628
+ total_added = sum(d.unique_in(lbl) for d in channel_diffs.values() for lbl in labels[1:])
629
+ total_removed = sum(d.unique_in(first_label) for d in channel_diffs.values())
630
+ has_diffs = total_added > 0 or total_removed > 0
631
+ has_schema_diffs = any(not d.is_identical for d in schema_diffs.values())
632
+ has_mismatches = bool(channel_schema_mismatches)
633
+
634
+ console.print()
635
+ console.print(_build_summary_table(summaries))
636
+
637
+ if topic_warnings:
638
+ console.print()
639
+ for w in topic_warnings:
640
+ console.print(f"[yellow]⚠ {w}[/]")
641
+
642
+ channel_table = _build_channel_diff_table(channel_diffs, labels, skip_identical=skip_identical)
643
+ if channel_table:
644
+ console.print()
645
+ console.print(channel_table)
646
+
647
+ if has_diffs:
648
+ sample_table = _build_sample_diffs_table(channel_diffs, labels, max_ranges=max_ranges)
649
+ if sample_table:
650
+ console.print()
651
+ console.print(sample_table)
652
+
653
+ if has_schema_diffs:
654
+ schema_table = _build_schema_diff_table(schema_diffs, labels)
655
+ if schema_table:
656
+ console.print()
657
+ console.print(schema_table)
658
+
659
+ if has_mismatches:
660
+ mismatch_table = _build_channel_schema_mismatch_table(channel_schema_mismatches, labels)
661
+ if mismatch_table:
662
+ console.print()
663
+ console.print(mismatch_table)
664
+
665
+ console.print()
666
+ all_good = not has_diffs and not has_schema_diffs and not has_mismatches
667
+ if all_good:
668
+ console.print(
669
+ f"[green]✓ All {total_common:,} messages have identical timestamps and schemas[/]"
670
+ )
671
+ else:
672
+ if not has_diffs:
673
+ console.print(f"[green]✓ All {total_common:,} messages have identical timestamps[/]")
674
+ else:
675
+ console.print(f"[green]✓ {total_common:,} messages match[/]")
676
+ if total_added > 0:
677
+ console.print(f"[yellow]⚠ {total_added:,} messages added in other files[/]")
678
+ if total_removed > 0:
679
+ console.print(f"[red]⚠ {total_removed:,} messages removed from {first_label}[/]")
680
+ if has_schema_diffs:
681
+ diff_count = sum(1 for d in schema_diffs.values() if not d.is_identical)
682
+ console.print(f"[red]⚠ {diff_count} schema(s) differ across files[/]")
683
+ if has_mismatches:
684
+ console.print(
685
+ f"[red]⚠ {len(channel_schema_mismatches)} "
686
+ f"channel(s) use different schemas across files[/]"
687
+ )
688
+
689
+ return 0
@@ -115,8 +115,14 @@ def _build_file_info_and_summary(data: McapInfoOutput) -> Table:
115
115
  "Duration:",
116
116
  f"[yellow]{duration_ns / 1_000_000:.2f} ms[/] [cyan]({duration_human})[/]",
117
117
  )
118
- info_table.add_row("Start:", f"[cyan]{date_start}[/]")
119
- info_table.add_row("End:", f"[cyan]{date_end}[/]")
118
+ info_table.add_row(
119
+ "Start:",
120
+ f"[cyan]{date_start}[/] [dim]({stats['message_start_time'] / _NS_TO_SEC:.9f})[/]",
121
+ )
122
+ info_table.add_row(
123
+ "End:",
124
+ f"[cyan]{date_end}[/] [dim]({stats['message_end_time'] / _NS_TO_SEC:.9f})[/]",
125
+ )
120
126
  info_table.add_row("Channels:", f"[green]{stats['channel_count']:,}[/]")
121
127
  info_table.add_row("Attachments:", f"[yellow]{stats['attachment_count']:,}[/]")
122
128
  info_table.add_row("Metadata:", f"[cyan]{stats['metadata_count']:,}[/]")
@@ -2,6 +2,7 @@ from typing import Annotated
2
2
 
3
3
  from cyclopts import Group, Parameter
4
4
  from rich.console import Console
5
+ from small_mcap.exceptions import WriterNotStartedError
5
6
 
6
7
  from pymcap_cli.cmd._run_processor import run_processor
7
8
  from pymcap_cli.core.mcap_processor import InputOptions, OutputOptions
@@ -75,15 +76,12 @@ def recover(
75
76
  )
76
77
  console.print("[green]✓ Recovery completed successfully![/green]")
77
78
  console.print(result.stats)
79
+ except WriterNotStartedError:
80
+ console.print("[yellow]Warning: File appears to be empty or severely corrupted[/yellow]")
81
+ console.print("No valid MCAP data found to recover")
78
82
  except RuntimeError as e:
79
- if "Writer not started" in str(e):
80
- console.print(
81
- "[yellow]Warning: File appears to be empty or severely corrupted[/yellow]"
82
- )
83
- console.print("No valid MCAP data found to recover")
84
- else:
85
- console.print(f"[red]Error during recovery: {e}[/red]")
86
- return 1
83
+ console.print(f"[red]Error during recovery: {e}[/red]")
84
+ return 1
87
85
  except Exception as e: # noqa: BLE001
88
86
  console.print(f"[red]Error during recovery: {e}[/red]")
89
87
  return 1
@@ -118,6 +118,8 @@ class TimeFilterProcessor(Processor):
118
118
  """
119
119
 
120
120
  def __init__(self, start_ns: int | None = None, end_ns: int | None = None) -> None:
121
+ if start_ns is not None and end_ns is not None and start_ns >= end_ns:
122
+ raise ValueError(f"start_ns ({start_ns}) must be less than end_ns ({end_ns})")
121
123
  # Pre-compute bounds with defaults for fast path
122
124
  self.start = start_ns if start_ns is not None else 0
123
125
  self.end = end_ns if end_ns is not None else MAX_INT64
@@ -15,6 +15,7 @@ import av
15
15
  import av.error
16
16
  import numpy as np
17
17
  from av import Packet, VideoFrame
18
+ from typing_extensions import Self
18
19
 
19
20
  from pymcap_cli.encoding.encoder_common import (
20
21
  EncoderConfig,
@@ -233,8 +234,20 @@ class VideoEncoder:
233
234
  try:
234
235
  self._context.open()
235
236
  except av.error.FFmpegError as exc:
237
+ del self._context
236
238
  raise VideoEncoderError(f"Failed to open encoder {codec_name}: {exc}") from exc
237
239
 
240
+ def close(self) -> None:
241
+ """Release the native codec context."""
242
+ if hasattr(self, "_context"):
243
+ del self._context
244
+
245
+ def __enter__(self) -> Self:
246
+ return self
247
+
248
+ def __exit__(self, *_: object) -> None:
249
+ self.close()
250
+
238
251
  def encode(self, frame: VideoFrame) -> bytes | None:
239
252
  """Encode a single frame and return compressed video bytes, or None if buffered."""
240
253
  needs_resize = frame.width != self.config.width or frame.height != self.config.height
@@ -338,6 +351,17 @@ class PyAVVideoDecompressor:
338
351
  is_jpeg=False,
339
352
  )
340
353
 
354
+ def close(self) -> None:
355
+ """Release native codec contexts."""
356
+ self._decoder = None
357
+ self._jpeg_encoder = None
358
+
359
+ def __enter__(self) -> Self:
360
+ return self
361
+
362
+ def __exit__(self, *_: object) -> None:
363
+ self.close()
364
+
341
365
  def flush(self) -> list[DecompressedFrame]:
342
366
  if self._decoder is None:
343
367
  return []
@@ -0,0 +1,144 @@
1
+ """RIHS01: ROS Interface Hashing Standard v1
2
+
3
+ - https://roscon.ros.org/2023/talks/ROS_2_Types_On-the-wire_Type_Descriptions_and_Hashing_in_Iron_and_onwards.pdf
4
+ - https://github.com/ros-infrastructure/rep/pull/381
5
+
6
+ """
7
+
8
+ import hashlib
9
+ import json
10
+
11
+ from ros_parser.models import Type as _RosType
12
+ from ros_parser.ros2_msg import parse_schema_to_definitions
13
+
14
+ _RIHS01_PREFIX = "RIHS01_"
15
+ _RIHS01_PRIMITIVE_IDS: dict[str, int] = {
16
+ "int8": 2,
17
+ "uint8": 3,
18
+ "int16": 4,
19
+ "uint16": 5,
20
+ "int32": 6,
21
+ "uint32": 7,
22
+ "int64": 8,
23
+ "uint64": 9,
24
+ "float32": 10,
25
+ "float64": 11,
26
+ "char": 13,
27
+ "wchar": 14,
28
+ "bool": 15,
29
+ "boolean": 15,
30
+ "byte": 16,
31
+ "octet": 16,
32
+ "string": 17,
33
+ "wstring": 18,
34
+ }
35
+
36
+
37
+ def _rihs01_field_type(t: _RosType) -> dict[str, int | str]:
38
+ if t.package_name is not None:
39
+ base_id = 1 # NESTED_TYPE
40
+ nested_type_name = f"{t.package_name}/msg/{t.type_name}"
41
+ else:
42
+ base_id = _RIHS01_PRIMITIVE_IDS.get(t.type_name, 0)
43
+ nested_type_name = ""
44
+ if t.string_upper_bound is not None:
45
+ base_id = 21 if t.type_name == "string" else 22
46
+
47
+ string_capacity = t.string_upper_bound or 0
48
+ capacity = 0
49
+
50
+ if t.is_array:
51
+ if t.array_size is not None and not t.is_upper_bound:
52
+ type_id, capacity = base_id + 48, t.array_size
53
+ elif t.is_upper_bound:
54
+ type_id, capacity = base_id + 96, t.array_size or 0
55
+ else:
56
+ type_id = base_id + 144
57
+ else:
58
+ type_id = base_id
59
+
60
+ return {
61
+ "type_id": type_id,
62
+ "capacity": capacity,
63
+ "string_capacity": string_capacity,
64
+ "nested_type_name": nested_type_name,
65
+ }
66
+
67
+
68
+ def _rihs01_type_name(name: str) -> str:
69
+ parts = name.split("/")
70
+ if len(parts) == 2 and parts[1][0].isupper():
71
+ return f"{parts[0]}/msg/{parts[1]}"
72
+ return name
73
+
74
+
75
+ def _rihs01_individual_type_desc(name: str, msgdef: object) -> dict[str, object]:
76
+ return {
77
+ "type_name": _rihs01_type_name(name),
78
+ "fields": [
79
+ {"name": f.name, "type": _rihs01_field_type(f.type)}
80
+ for f in msgdef.fields # type: ignore[union-attr]
81
+ ],
82
+ }
83
+
84
+
85
+ def _collect_refs(schema_name: str, canonical: dict[str, object]) -> list[str]:
86
+ visited: set[str] = set()
87
+ queue = [schema_name]
88
+ while queue:
89
+ name = queue.pop()
90
+ if name in canonical:
91
+ resolved = name
92
+ else:
93
+ parts = name.split("/")
94
+ resolved = f"{parts[0]}/{parts[2]}" if len(parts) >= 3 and parts[1] == "msg" else name
95
+ if resolved in visited or resolved not in canonical:
96
+ continue
97
+ visited.add(resolved)
98
+ for f in canonical[resolved].fields: # type: ignore[union-attr]
99
+ if f.type.package_name is not None:
100
+ ref = f"{f.type.package_name}/msg/{f.type.type_name}"
101
+ if ref not in visited:
102
+ queue.append(ref)
103
+ visited.discard(schema_name)
104
+ return sorted(visited)
105
+
106
+
107
+ def _find_main_def(schema_name: str, canonical: dict[str, object]) -> tuple[str, object]:
108
+ if schema_name in canonical:
109
+ return schema_name, canonical[schema_name]
110
+ parts = schema_name.split("/")
111
+ if len(parts) >= 3 and parts[1] == "msg":
112
+ alt = f"{parts[0]}/{parts[2]}"
113
+ if alt in canonical:
114
+ return alt, canonical[alt]
115
+ if len(parts) >= 2:
116
+ alt = parts[-1]
117
+ if alt in canonical:
118
+ return alt, canonical[alt]
119
+ raise ValueError(f"Schema {schema_name} not found in definitions")
120
+
121
+
122
+ def compute_rihs01(schema_name: str, schema_data: bytes) -> str:
123
+ """Compute RIHS01 hash for a ros2msg schema per REP-2011."""
124
+ definitions = parse_schema_to_definitions(schema_name, schema_data)
125
+
126
+ seen: set[int] = set()
127
+ canonical: dict[str, object] = {}
128
+ for key, msgdef in definitions.items():
129
+ if id(msgdef) not in seen:
130
+ seen.add(id(msgdef))
131
+ canonical[key] = msgdef
132
+
133
+ main_name, main_def = _find_main_def(schema_name, canonical)
134
+ ref_names = _collect_refs(main_name, canonical)
135
+
136
+ hashable = {
137
+ "type_description": _rihs01_individual_type_desc(main_name, main_def),
138
+ "referenced_type_descriptions": [
139
+ _rihs01_individual_type_desc(n, canonical[n]) for n in ref_names
140
+ ],
141
+ }
142
+
143
+ sha256 = hashlib.sha256(json.dumps(hashable, separators=(", ", ": ")).encode()).hexdigest()
144
+ return f"{_RIHS01_PREFIX}{sha256}"
File without changes