oscura 0.8.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. oscura/__init__.py +19 -19
  2. oscura/__main__.py +4 -0
  3. oscura/analyzers/__init__.py +2 -0
  4. oscura/analyzers/digital/extraction.py +2 -3
  5. oscura/analyzers/digital/quality.py +1 -1
  6. oscura/analyzers/digital/timing.py +1 -1
  7. oscura/analyzers/ml/signal_classifier.py +6 -0
  8. oscura/analyzers/patterns/__init__.py +66 -0
  9. oscura/analyzers/power/basic.py +3 -3
  10. oscura/analyzers/power/soa.py +1 -1
  11. oscura/analyzers/power/switching.py +3 -3
  12. oscura/analyzers/signal_classification.py +529 -0
  13. oscura/analyzers/signal_integrity/sparams.py +3 -3
  14. oscura/analyzers/statistics/basic.py +10 -7
  15. oscura/analyzers/validation.py +1 -1
  16. oscura/analyzers/waveform/measurements.py +200 -156
  17. oscura/analyzers/waveform/measurements_with_uncertainty.py +91 -35
  18. oscura/analyzers/waveform/spectral.py +182 -84
  19. oscura/api/dsl/commands.py +15 -6
  20. oscura/api/server/templates/base.html +137 -146
  21. oscura/api/server/templates/export.html +84 -110
  22. oscura/api/server/templates/home.html +248 -267
  23. oscura/api/server/templates/protocols.html +44 -48
  24. oscura/api/server/templates/reports.html +27 -35
  25. oscura/api/server/templates/session_detail.html +68 -78
  26. oscura/api/server/templates/sessions.html +62 -72
  27. oscura/api/server/templates/waveforms.html +54 -64
  28. oscura/automotive/__init__.py +1 -1
  29. oscura/automotive/can/session.py +1 -1
  30. oscura/automotive/dbc/generator.py +638 -23
  31. oscura/automotive/dtc/data.json +17 -102
  32. oscura/automotive/flexray/fibex.py +9 -1
  33. oscura/automotive/uds/decoder.py +99 -6
  34. oscura/cli/analyze.py +8 -2
  35. oscura/cli/batch.py +36 -5
  36. oscura/cli/characterize.py +18 -4
  37. oscura/cli/export.py +47 -5
  38. oscura/cli/main.py +2 -0
  39. oscura/cli/onboarding/wizard.py +10 -6
  40. oscura/cli/pipeline.py +585 -0
  41. oscura/cli/visualize.py +6 -4
  42. oscura/convenience.py +400 -32
  43. oscura/core/measurement_result.py +286 -0
  44. oscura/core/progress.py +1 -1
  45. oscura/core/schemas/device_mapping.json +2 -8
  46. oscura/core/schemas/packet_format.json +4 -24
  47. oscura/core/schemas/protocol_definition.json +2 -12
  48. oscura/core/types.py +232 -239
  49. oscura/correlation/multi_protocol.py +1 -1
  50. oscura/export/legacy/__init__.py +11 -0
  51. oscura/export/legacy/wav.py +75 -0
  52. oscura/exporters/__init__.py +19 -0
  53. oscura/exporters/wireshark.py +809 -0
  54. oscura/hardware/acquisition/file.py +5 -19
  55. oscura/hardware/acquisition/saleae.py +10 -10
  56. oscura/hardware/acquisition/socketcan.py +4 -6
  57. oscura/hardware/acquisition/synthetic.py +1 -5
  58. oscura/hardware/acquisition/visa.py +6 -6
  59. oscura/hardware/security/side_channel_detector.py +5 -508
  60. oscura/inference/message_format.py +686 -1
  61. oscura/jupyter/display.py +2 -2
  62. oscura/jupyter/magic.py +3 -3
  63. oscura/loaders/__init__.py +17 -12
  64. oscura/loaders/binary.py +1 -1
  65. oscura/loaders/chipwhisperer.py +1 -2
  66. oscura/loaders/configurable.py +1 -1
  67. oscura/loaders/csv_loader.py +2 -2
  68. oscura/loaders/hdf5_loader.py +1 -1
  69. oscura/loaders/lazy.py +6 -1
  70. oscura/loaders/mmap_loader.py +0 -1
  71. oscura/loaders/numpy_loader.py +8 -7
  72. oscura/loaders/preprocessing.py +3 -5
  73. oscura/loaders/rigol.py +21 -7
  74. oscura/loaders/sigrok.py +2 -5
  75. oscura/loaders/tdms.py +3 -2
  76. oscura/loaders/tektronix.py +38 -32
  77. oscura/loaders/tss.py +20 -27
  78. oscura/loaders/validation.py +17 -10
  79. oscura/loaders/vcd.py +13 -8
  80. oscura/loaders/wav.py +1 -6
  81. oscura/pipeline/__init__.py +76 -0
  82. oscura/pipeline/handlers/__init__.py +165 -0
  83. oscura/pipeline/handlers/analyzers.py +1045 -0
  84. oscura/pipeline/handlers/decoders.py +899 -0
  85. oscura/pipeline/handlers/exporters.py +1103 -0
  86. oscura/pipeline/handlers/filters.py +891 -0
  87. oscura/pipeline/handlers/loaders.py +640 -0
  88. oscura/pipeline/handlers/transforms.py +768 -0
  89. oscura/reporting/formatting/measurements.py +55 -14
  90. oscura/reporting/templates/enhanced/protocol_re.html +504 -503
  91. oscura/sessions/legacy.py +49 -1
  92. oscura/side_channel/__init__.py +38 -57
  93. oscura/utils/builders/signal_builder.py +5 -5
  94. oscura/utils/comparison/compare.py +7 -9
  95. oscura/utils/comparison/golden.py +1 -1
  96. oscura/utils/filtering/convenience.py +2 -2
  97. oscura/utils/math/arithmetic.py +38 -62
  98. oscura/utils/math/interpolation.py +20 -20
  99. oscura/utils/pipeline/__init__.py +4 -17
  100. oscura/utils/progressive.py +1 -4
  101. oscura/utils/triggering/edge.py +1 -1
  102. oscura/utils/triggering/pattern.py +2 -2
  103. oscura/utils/triggering/pulse.py +2 -2
  104. oscura/utils/triggering/window.py +3 -3
  105. oscura/validation/hil_testing.py +11 -11
  106. oscura/visualization/__init__.py +46 -284
  107. oscura/visualization/batch.py +72 -433
  108. oscura/visualization/plot.py +542 -53
  109. oscura/visualization/styles.py +184 -318
  110. oscura/workflows/batch/advanced.py +1 -1
  111. oscura/workflows/batch/aggregate.py +12 -9
  112. oscura/workflows/complete_re.py +251 -23
  113. oscura/workflows/digital.py +27 -4
  114. oscura/workflows/multi_trace.py +136 -17
  115. oscura/workflows/waveform.py +11 -6
  116. oscura-0.11.0.dist-info/METADATA +460 -0
  117. {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/RECORD +120 -145
  118. oscura/side_channel/dpa.py +0 -1025
  119. oscura/utils/optimization/__init__.py +0 -19
  120. oscura/utils/optimization/parallel.py +0 -443
  121. oscura/utils/optimization/search.py +0 -532
  122. oscura/utils/pipeline/base.py +0 -338
  123. oscura/utils/pipeline/composition.py +0 -248
  124. oscura/utils/pipeline/parallel.py +0 -449
  125. oscura/utils/pipeline/pipeline.py +0 -375
  126. oscura/utils/search/__init__.py +0 -16
  127. oscura/utils/search/anomaly.py +0 -424
  128. oscura/utils/search/context.py +0 -294
  129. oscura/utils/search/pattern.py +0 -288
  130. oscura/utils/storage/__init__.py +0 -61
  131. oscura/utils/storage/database.py +0 -1166
  132. oscura/visualization/accessibility.py +0 -526
  133. oscura/visualization/annotations.py +0 -371
  134. oscura/visualization/axis_scaling.py +0 -305
  135. oscura/visualization/colors.py +0 -451
  136. oscura/visualization/digital.py +0 -436
  137. oscura/visualization/eye.py +0 -571
  138. oscura/visualization/histogram.py +0 -281
  139. oscura/visualization/interactive.py +0 -1035
  140. oscura/visualization/jitter.py +0 -1042
  141. oscura/visualization/keyboard.py +0 -394
  142. oscura/visualization/layout.py +0 -400
  143. oscura/visualization/optimization.py +0 -1079
  144. oscura/visualization/palettes.py +0 -446
  145. oscura/visualization/power.py +0 -508
  146. oscura/visualization/power_extended.py +0 -955
  147. oscura/visualization/presets.py +0 -469
  148. oscura/visualization/protocols.py +0 -1246
  149. oscura/visualization/render.py +0 -223
  150. oscura/visualization/rendering.py +0 -444
  151. oscura/visualization/reverse_engineering.py +0 -838
  152. oscura/visualization/signal_integrity.py +0 -989
  153. oscura/visualization/specialized.py +0 -643
  154. oscura/visualization/spectral.py +0 -1226
  155. oscura/visualization/thumbnails.py +0 -340
  156. oscura/visualization/time_axis.py +0 -351
  157. oscura/visualization/waveform.py +0 -454
  158. oscura-0.8.0.dist-info/METADATA +0 -661
  159. {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/WHEEL +0 -0
  160. {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/entry_points.txt +0 -0
  161. {oscura-0.8.0.dist-info → oscura-0.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,809 @@
1
+ """Production-quality Wireshark Lua dissector generation.
2
+
3
+ This module generates idiomatic Lua dissectors for Wireshark from inferred protocol
4
+ message schemas, supporting:
5
+
6
+ - Smart field naming from context and patterns
7
+ - Enum detection and value_string tables
8
+ - Nested field hierarchies and subtrees
9
+ - Expert info for validation (checksums, reserved fields)
10
+ - Clean, documented Lua code following Wireshark best practices
11
+
12
+ Requirements addressed: Protocol Export, Wireshark Integration
13
+
14
+ Example:
15
+ >>> from oscura.inference.message_format import infer_format
16
+ >>> from oscura.exporters.wireshark import generate_dissector
17
+ >>> messages = [b'\\x01\\x00\\x05Hello', b'\\x02\\x00\\x05World']
18
+ >>> schema = infer_format(messages)
19
+ >>> lua_code = generate_dissector(schema, protocol_name="custom")
20
+ >>> with open("custom.lua", "w") as f:
21
+ ... f.write(lua_code)
22
+
23
+ References:
24
+ Wireshark Lua API: https://www.wireshark.org/docs/wsdg_html_chunked/lua_module_Proto.html
25
+ Wireshark dissector best practices
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ from typing import TYPE_CHECKING, Any
31
+
32
+ if TYPE_CHECKING:
33
+ from oscura.inference.message_format import InferredField, MessageSchema
34
+
35
+ # Version should come from package metadata
36
+ _OSCURA_VERSION = "0.9.0"
37
+
38
+
39
+ def generate_dissector(
40
+ schema: MessageSchema,
41
+ protocol_name: str = "unknown",
42
+ protocol_description: str | None = None,
43
+ author: str = "Oscura Framework",
44
+ *,
45
+ include_expert_info: bool = True,
46
+ detect_enums: bool = True,
47
+ enum_threshold: int = 10,
48
+ add_comments: bool = True,
49
+ ) -> str:
50
+ """Generate production-quality Wireshark Lua dissector from message schema.
51
+
52
+ Creates idiomatic Lua code with:
53
+ - Smart field naming based on field types and patterns
54
+ - Enum detection and value_string tables with semantic labels
55
+ - Expert info for checksums, reserved fields, and validation
56
+ - Proper indentation and comments
57
+ - Protocol metadata (author, version, description)
58
+ - Best practices for Wireshark dissectors
59
+
60
+ Args:
61
+ schema: Inferred message schema from message_format.infer_format()
62
+ protocol_name: Short protocol name (e.g., "uart", "custom_proto")
63
+ protocol_description: Human-readable description (default: auto-generated)
64
+ author: Dissector author name
65
+ include_expert_info: Add expert info for validation (default: True)
66
+ detect_enums: Detect enum fields from value sets (default: True)
67
+ enum_threshold: Max unique values to treat as enum (default: 10)
68
+ add_comments: Add inline comments for clarity (default: True)
69
+
70
+ Returns:
71
+ Complete Lua dissector code ready for Wireshark
72
+
73
+ Example:
74
+ >>> from oscura.inference.message_format import infer_format
75
+ >>> messages = [b'\\xAA\\x01\\x00\\x05DATA1', b'\\xAA\\x02\\x00\\x05DATA2']
76
+ >>> schema = infer_format(messages)
77
+ >>> lua = generate_dissector(schema, "custom", "Custom Protocol")
78
+ >>> print(lua[:100])
79
+ -- Auto-generated Wireshark dissector for Custom Protocol
80
+ -- Generated by Oscura v0.9.0
81
+ """
82
+ if protocol_description is None:
83
+ protocol_description = f"{protocol_name.upper()} Protocol"
84
+
85
+ # Detect enums if requested
86
+ enum_fields = {}
87
+ if detect_enums:
88
+ enum_fields = _detect_enum_fields(schema, enum_threshold)
89
+
90
+ # Generate smart field names with enhanced context
91
+ field_names = _generate_smart_field_names(schema, enum_fields)
92
+
93
+ # Build Lua code sections
94
+ header = _generate_header(protocol_name, protocol_description, author)
95
+ proto_declaration = _generate_proto_declaration(protocol_name, protocol_description)
96
+ value_strings = _generate_value_strings(enum_fields, field_names, protocol_name, schema)
97
+ field_definitions = _generate_field_definitions(
98
+ schema, field_names, protocol_name, enum_fields, add_comments
99
+ )
100
+ dissector_function = _generate_dissector_function(
101
+ schema,
102
+ field_names,
103
+ protocol_name,
104
+ enum_fields,
105
+ include_expert_info,
106
+ add_comments,
107
+ )
108
+ registration = _generate_registration(protocol_name)
109
+
110
+ # Combine all sections
111
+ sections = [
112
+ header,
113
+ proto_declaration,
114
+ value_strings,
115
+ field_definitions,
116
+ dissector_function,
117
+ registration,
118
+ ]
119
+ return "\n\n".join(s for s in sections if s)
120
+
121
+
122
+ def _generate_header(protocol_name: str, protocol_description: str, author: str) -> str:
123
+ """Generate Lua file header with metadata and installation instructions."""
124
+ safe_name = protocol_name.lower().replace("-", "_").replace(" ", "_")
125
+ return f"""-- Auto-generated Wireshark dissector for {protocol_description}
126
+ -- Generated by Oscura v{_OSCURA_VERSION}
127
+ -- Author: {author}
128
+ --
129
+ -- This dissector was automatically generated from inferred protocol structure.
130
+ -- It includes smart field naming, enum detection, and validation checks.
131
+ --
132
+ -- Installation:
133
+ -- 1. Copy this file to your Wireshark plugins directory:
134
+ -- - Windows: %APPDATA%\\Wireshark\\plugins
135
+ -- - Linux: ~/.local/lib/wireshark/plugins
136
+ -- - macOS: ~/.config/wireshark/plugins
137
+ -- 2. Restart Wireshark or reload Lua plugins (Ctrl+Shift+L)
138
+ -- 3. Protocol will appear as "{protocol_name.upper()}" in packet list
139
+ --
140
+ -- Usage:
141
+ -- - For UDP traffic: DissectorTable.get("udp.port"):add(PORT_NUM, {safe_name}_proto)
142
+ -- - For TCP traffic: DissectorTable.get("tcp.port"):add(PORT_NUM, {safe_name}_proto)
143
+ -- - Currently registered as postdissector (processes all packets)"""
144
+
145
+
146
+ def _generate_proto_declaration(protocol_name: str, protocol_description: str) -> str:
147
+ """Generate protocol object declaration with safe naming."""
148
+ safe_name = protocol_name.lower().replace("-", "_").replace(" ", "_")
149
+ return f"""-- Protocol declaration
150
+ local {safe_name}_proto = Proto("{safe_name}", "{protocol_description}")"""
151
+
152
+
153
+ def _generate_value_strings(
154
+ enum_fields: dict[str, dict[int, str]],
155
+ field_names: dict[int, str],
156
+ protocol_name: str,
157
+ schema: MessageSchema,
158
+ ) -> str:
159
+ """Generate value_string tables for enum fields with semantic labels."""
160
+ if not enum_fields:
161
+ return ""
162
+
163
+ safe_name = protocol_name.lower().replace("-", "_").replace(" ", "_")
164
+ lines = ["-- Value string tables for enum fields"]
165
+
166
+ for field_idx_str, value_map in enum_fields.items():
167
+ field_idx = int(field_idx_str)
168
+ field_name = field_names[field_idx]
169
+ vs_name = f"{safe_name}_{field_name}_vals"
170
+
171
+ # Build value string entries with better formatting
172
+ entries = []
173
+ for value, label in sorted(value_map.items()):
174
+ # Escape quotes in labels
175
+ safe_label = label.replace('"', '\\"')
176
+ entries.append(f' [{value}] = "{safe_label}",')
177
+
178
+ lines.append(f"local {vs_name} = {{")
179
+ lines.extend(entries)
180
+ lines.append("}")
181
+
182
+ return "\n".join(lines)
183
+
184
+
185
+ def _generate_field_definitions(
186
+ schema: MessageSchema,
187
+ field_names: dict[int, str],
188
+ protocol_name: str,
189
+ enum_fields: dict[str, dict[int, str]],
190
+ add_comments: bool,
191
+ ) -> str:
192
+ """Generate ProtoField definitions with enhanced descriptions."""
193
+ safe_name = protocol_name.lower().replace("-", "_").replace(" ", "_")
194
+ lines = ["-- Field definitions"]
195
+ field_vars = []
196
+
197
+ for idx, field in enumerate(schema.fields):
198
+ field_name = field_names[idx]
199
+ var_name = f"f_{field_name}"
200
+ field_vars.append(var_name)
201
+
202
+ fqn = f"{safe_name}.{field_name}" # Fully qualified name
203
+ label = _generate_field_label(field, field_name)
204
+ base, field_type = _get_field_type_and_base(field, str(idx) in enum_fields)
205
+
206
+ # Add inline comment for field context
207
+ if add_comments:
208
+ comment = f" -- {field.field_type} at offset {field.offset}"
209
+ if field.confidence < 0.8:
210
+ comment += f" (confidence: {field.confidence:.2f})"
211
+ else:
212
+ comment = ""
213
+
214
+ # Add value_string for enums
215
+ vs_suffix = ""
216
+ if str(idx) in enum_fields:
217
+ vs_name = f"{safe_name}_{field_name}_vals"
218
+ vs_suffix = f", {vs_name}"
219
+
220
+ lines.append(
221
+ f'local {var_name} = ProtoField.{field_type}("{fqn}", "{label}", '
222
+ f"{base}{vs_suffix}){comment}"
223
+ )
224
+
225
+ # Register fields with protocol
226
+ lines.append("")
227
+ lines.append(f"{safe_name}_proto.fields = {{{', '.join(field_vars)}}}")
228
+
229
+ return "\n".join(lines)
230
+
231
+
232
+ def _generate_dissector_function(
233
+ schema: MessageSchema,
234
+ field_names: dict[int, str],
235
+ protocol_name: str,
236
+ enum_fields: dict[str, dict[int, str]],
237
+ include_expert_info: bool,
238
+ add_comments: bool,
239
+ ) -> str:
240
+ """Generate main dissector function with enhanced validation."""
241
+ safe_name = protocol_name.lower().replace("-", "_").replace(" ", "_")
242
+
243
+ lines = [
244
+ "-- Dissector function",
245
+ f"function {safe_name}_proto.dissector(buffer, pinfo, tree)",
246
+ " local pkt_len = buffer:len()",
247
+ f" if pkt_len < {schema.total_size} then",
248
+ " return 0 -- Not enough data for minimum message size",
249
+ " end",
250
+ "",
251
+ " -- Set protocol column",
252
+ f' pinfo.cols.protocol = "{protocol_name.upper()}"',
253
+ "",
254
+ ]
255
+
256
+ # Add info column with dynamic content if we have identifiable fields
257
+ info_parts = _generate_info_column_content(schema, field_names)
258
+ if info_parts:
259
+ lines.append(" -- Set info column with message details")
260
+ lines.append(f' local info = "{protocol_name.upper()}"')
261
+ for part in info_parts:
262
+ lines.append(f" {part}")
263
+ lines.append(" pinfo.cols.info = info")
264
+ lines.append("")
265
+
266
+ lines.extend(
267
+ [
268
+ " -- Create protocol tree",
269
+ f" local subtree = tree:add({safe_name}_proto, buffer(), "
270
+ f'"{protocol_name.upper()} Message")',
271
+ "",
272
+ ]
273
+ )
274
+
275
+ # Add fields to tree with enhanced structure
276
+ for idx, field in enumerate(schema.fields):
277
+ field_name = field_names[idx]
278
+ var_name = f"f_{field_name}"
279
+ offset = field.offset
280
+ size = field.size
281
+
282
+ # Add section comment for field groups
283
+ if add_comments and _is_field_group_boundary(idx, schema):
284
+ group_name = _get_field_group_name(idx, schema)
285
+ lines.append(f" -- {group_name}")
286
+
287
+ # Add field to tree
288
+ field_tree_var = f"field_{idx}_tree"
289
+ if add_comments:
290
+ lines.append(
291
+ f" -- {field.field_type.upper()}: {field_name} (offset={offset}, size={size})"
292
+ )
293
+
294
+ # For multi-byte fields, use little-endian by default (most common)
295
+ if size > 1 and field.field_type not in ["data", "checksum"]:
296
+ endian_comment = " -- Little-endian" if add_comments else ""
297
+ lines.append(
298
+ f" local {field_tree_var} = subtree:add_le({var_name}, "
299
+ f"buffer({offset}, {size})){endian_comment}"
300
+ )
301
+ else:
302
+ lines.append(
303
+ f" local {field_tree_var} = subtree:add({var_name}, buffer({offset}, {size}))"
304
+ )
305
+
306
+ # Add expert info for special fields
307
+ if include_expert_info:
308
+ expert_lines = _generate_expert_info(field, field_name, offset, size, idx, enum_fields)
309
+ if expert_lines:
310
+ lines.extend(f" {line}" for line in expert_lines)
311
+
312
+ lines.append("")
313
+
314
+ lines.append(f" return {schema.total_size}")
315
+ lines.append("end")
316
+
317
+ return "\n".join(lines)
318
+
319
+
320
+ def _generate_info_column_content(schema: MessageSchema, field_names: dict[int, str]) -> list[str]:
321
+ """Generate code to populate info column with message details.
322
+
323
+ Args:
324
+ schema: Message schema
325
+ field_names: Field name mapping
326
+
327
+ Returns:
328
+ List of Lua code lines to build info string
329
+ """
330
+ parts = []
331
+
332
+ # Look for counter or sequence number
333
+ for field in schema.fields:
334
+ if field.field_type == "counter" and field.size <= 2:
335
+ parts.append(f'info = info .. " Seq=" .. buffer({field.offset}, {field.size}):uint()')
336
+ break
337
+
338
+ # Look for length field
339
+ for field in schema.fields:
340
+ if field.field_type == "length" and field.size <= 2:
341
+ parts.append(f'info = info .. " Len=" .. buffer({field.offset}, {field.size}):uint()')
342
+ break
343
+
344
+ return parts
345
+
346
+
347
+ def _is_field_group_boundary(idx: int, schema: MessageSchema) -> bool:
348
+ """Check if field is at a logical group boundary (e.g., header->payload)."""
349
+ if idx == 0:
350
+ return True
351
+ field = schema.fields[idx]
352
+ prev_field = schema.fields[idx - 1]
353
+
354
+ # Header to payload transition
355
+ if prev_field.offset < schema.header_size <= field.offset:
356
+ return True
357
+
358
+ # Type changes
359
+ return prev_field.field_type != field.field_type
360
+
361
+
362
+ def _get_field_group_name(idx: int, schema: MessageSchema) -> str:
363
+ """Get name for field group at given index."""
364
+ field = schema.fields[idx]
365
+
366
+ if field.offset < schema.header_size:
367
+ return "Header Fields"
368
+ elif field.field_type == "checksum":
369
+ return "Checksum/Validation"
370
+ elif field.field_type == "data":
371
+ return "Payload Data"
372
+ else:
373
+ return f"{field.field_type.title()} Fields"
374
+
375
+
376
+ def _generate_expert_info(
377
+ field: InferredField,
378
+ field_name: str,
379
+ offset: int,
380
+ size: int,
381
+ field_idx: int,
382
+ enum_fields: dict[str, dict[int, str]],
383
+ ) -> list[str]:
384
+ """Generate expert info annotations for field validation."""
385
+ lines = []
386
+ field_tree_var = f"field_{field_idx}_tree"
387
+
388
+ # Checksum validation placeholder with more detailed instructions
389
+ if field.field_type == "checksum":
390
+ lines.append("")
391
+ lines.append("-- TODO: Implement checksum validation")
392
+ lines.append("-- Steps:")
393
+ lines.append("-- 1. Determine checksum algorithm (CRC16, CRC32, etc.)")
394
+ lines.append("-- 2. Calculate checksum over appropriate data range")
395
+ lines.append("-- 3. Compare with field value")
396
+ lines.append("-- 4. Uncomment validation code below")
397
+ lines.append("--")
398
+ lines.append("-- local calculated_checksum = calculate_checksum(buffer, ...)")
399
+ lines.append(f"-- local expected_checksum = buffer({offset}, {size}):uint()")
400
+ lines.append("-- if calculated_checksum ~= expected_checksum then")
401
+ lines.append(
402
+ f'-- {field_tree_var}:add_expert_info(PI_CHECKSUM, PI_ERROR, "Invalid checksum")'
403
+ )
404
+ lines.append("-- end")
405
+
406
+ # Reserved field warnings with value display
407
+ elif field.field_type == "reserved":
408
+ lines.append(f"local {field_name}_value = buffer({offset}, {size}):uint()")
409
+ lines.append(f"if {field_name}_value ~= 0 then")
410
+ lines.append(
411
+ f" {field_tree_var}:add_expert_info(PI_PROTOCOL, PI_WARN, "
412
+ f'"Reserved field has non-zero value: 0x" .. '
413
+ f'string.format("%0{size * 2}X", {field_name}_value))'
414
+ )
415
+ lines.append("end")
416
+
417
+ # Enum value validation with detailed error messages
418
+ elif str(field_idx) in enum_fields:
419
+ value_map = enum_fields[str(field_idx)]
420
+ valid_values = sorted(value_map.keys())
421
+ lines.append(f"local {field_name}_value = buffer({offset}, {size}):uint()")
422
+
423
+ # More efficient validation using Lua table lookup
424
+ lines.append("local valid_values = {")
425
+ for v in valid_values:
426
+ lines.append(f" [{v}] = true,")
427
+ lines.append("}")
428
+
429
+ lines.append(f"if not valid_values[{field_name}_value] then")
430
+ lines.append(
431
+ f" {field_tree_var}:add_expert_info(PI_MALFORMED, PI_WARN, "
432
+ f'"Unexpected {field_name} value: 0x" .. '
433
+ f'string.format("%0{size * 2}X", {field_name}_value) .. '
434
+ f'" (valid: {", ".join(f"0x{v:X}" for v in valid_values)}")")'
435
+ )
436
+ lines.append("end")
437
+
438
+ # Length field validation
439
+ elif field.field_type == "length":
440
+ lines.append(f"local {field_name}_value = buffer({offset}, {size}):uint()")
441
+ lines.append(f"if {field_name}_value > buffer:len() then")
442
+ lines.append(
443
+ f" {field_tree_var}:add_expert_info(PI_MALFORMED, PI_ERROR, "
444
+ f'"Length field exceeds packet size")'
445
+ )
446
+ lines.append("end")
447
+
448
+ return lines
449
+
450
+
451
+ def _generate_registration(protocol_name: str) -> str:
452
+ """Generate protocol registration code with usage examples."""
453
+ safe_name = protocol_name.lower().replace("-", "_").replace(" ", "_")
454
+ return f"""-- Protocol registration
455
+ --
456
+ -- By default, registered as a postdissector (processes all packets).
457
+ -- To register for specific ports/protocols, use one of:
458
+ --
459
+ -- UDP port:
460
+ -- DissectorTable.get("udp.port"):add(12345, {safe_name}_proto)
461
+ --
462
+ -- TCP port:
463
+ -- DissectorTable.get("tcp.port"):add(12345, {safe_name}_proto)
464
+ --
465
+ -- Heuristic dissector (auto-detect based on packet content):
466
+ -- function {safe_name}_proto.heuristic_checker(buffer, pinfo, tree)
467
+ -- -- Check for protocol signature/magic bytes
468
+ -- if buffer:len() < 4 then return false end
469
+ -- -- Add your detection logic here
470
+ -- -- if buffer(0, 2):uint() == 0xAABB then
471
+ -- -- {safe_name}_proto.dissector(buffer, pinfo, tree)
472
+ -- -- return true
473
+ -- -- end
474
+ -- return false
475
+ -- end
476
+ -- {safe_name}_proto:register_heuristic("udp", {safe_name}_proto.heuristic_checker)
477
+
478
+ register_postdissector({safe_name}_proto)"""
479
+
480
+
481
+ def _generate_smart_field_names(
482
+ schema: MessageSchema, enum_fields: dict[str, dict[int, str]]
483
+ ) -> dict[int, str]:
484
+ """Generate semantic field names based on field types, positions, and patterns.
485
+
486
+ Enhanced naming that considers:
487
+ - Field type and characteristics
488
+ - Position in message (header vs payload)
489
+ - Relationships to other fields
490
+ - Enum detection results
491
+ - Common protocol patterns
492
+
493
+ Args:
494
+ schema: Message schema with inferred fields
495
+ enum_fields: Detected enum fields
496
+
497
+ Returns:
498
+ Dictionary mapping field index to smart name
499
+ """
500
+ field_names = {}
501
+ type_counters: dict[str, int] = {}
502
+
503
+ for idx, field in enumerate(schema.fields):
504
+ field_type = field.field_type
505
+
506
+ # Special naming for specific field types with enhanced heuristics
507
+ if field_type == "constant":
508
+ # Check if this looks like a magic/sync byte or protocol version
509
+ if idx == 0 and field.offset == 0:
510
+ if field.size == 1:
511
+ name = "sync_byte"
512
+ elif field.size == 2:
513
+ name = "magic"
514
+ else:
515
+ name = "protocol_signature"
516
+ elif field.offset < schema.header_size and field.size == 1:
517
+ # Could be protocol version or flags
518
+ if field.entropy < 0.5: # Very low entropy suggests version
519
+ name = "version"
520
+ else:
521
+ name = "flags"
522
+ else:
523
+ type_counters[field_type] = type_counters.get(field_type, 0) + 1
524
+ name = f"constant_{type_counters[field_type]}"
525
+
526
+ elif field_type == "counter":
527
+ type_counters[field_type] = type_counters.get(field_type, 0) + 1
528
+ if type_counters[field_type] == 1:
529
+ # First counter is usually sequence number
530
+ name = "sequence_num"
531
+ elif field.offset < schema.header_size:
532
+ name = "msg_counter"
533
+ else:
534
+ name = f"counter_{type_counters[field_type]}"
535
+
536
+ elif field_type == "checksum":
537
+ # Name by size and position
538
+ if field.offset + field.size == schema.total_size:
539
+ # Checksum at end of message
540
+ suffix = "trailer"
541
+ elif field.offset < schema.header_size:
542
+ suffix = "header"
543
+ else:
544
+ suffix = "payload"
545
+
546
+ if field.size == 1:
547
+ name = f"checksum_{suffix}"
548
+ elif field.size == 2:
549
+ name = f"crc16_{suffix}"
550
+ elif field.size == 4:
551
+ name = f"crc32_{suffix}"
552
+ else:
553
+ name = f"checksum_{field.size}b"
554
+
555
+ elif field_type == "length":
556
+ type_counters[field_type] = type_counters.get(field_type, 0) + 1
557
+ if type_counters[field_type] == 1:
558
+ if field.offset < 4: # Early in message
559
+ name = "msg_length"
560
+ else:
561
+ name = "payload_length"
562
+ else:
563
+ name = f"length_{type_counters[field_type]}"
564
+
565
+ elif field_type == "timestamp":
566
+ type_counters[field_type] = type_counters.get(field_type, 0) + 1
567
+ if field.size == 4:
568
+ name = "timestamp_sec" if type_counters[field_type] == 1 else "timestamp"
569
+ elif field.size == 8:
570
+ name = "timestamp_usec" if type_counters[field_type] == 1 else "timestamp"
571
+ else:
572
+ name = f"timestamp_{type_counters[field_type]}"
573
+
574
+ elif field_type == "data":
575
+ # Distinguish header vs payload data
576
+ if field.offset < schema.header_size:
577
+ type_counters["header_data"] = type_counters.get("header_data", 0) + 1
578
+ if type_counters["header_data"] == 1:
579
+ name = "header_data"
580
+ else:
581
+ name = f"header_data_{type_counters['header_data']}"
582
+ else:
583
+ # Check if this is the main payload
584
+ remaining_fields = len(schema.fields) - idx - 1
585
+ if remaining_fields <= 1: # Last or second-to-last field
586
+ name = "payload"
587
+ else:
588
+ type_counters["payload"] = type_counters.get("payload", 0) + 1
589
+ name = f"payload_{type_counters['payload']}"
590
+
591
+ elif field_type == "enum":
592
+ # Check if this might be a specific enum type
593
+ if str(idx) in enum_fields:
594
+ values = list(enum_fields[str(idx)].keys())
595
+ if all(v < 256 for v in values): # Single byte enum
596
+ if field.offset < 4:
597
+ name = "msg_type"
598
+ else:
599
+ name = "status_code"
600
+ else:
601
+ name = "enum_field"
602
+ else:
603
+ type_counters[field_type] = type_counters.get(field_type, 0) + 1
604
+ name = f"enum_{type_counters[field_type]}"
605
+
606
+ elif field_type == "reserved":
607
+ type_counters[field_type] = type_counters.get(field_type, 0) + 1
608
+ name = f"reserved_{type_counters[field_type]}"
609
+
610
+ elif field_type == "float":
611
+ type_counters[field_type] = type_counters.get(field_type, 0) + 1
612
+ if field.size == 4:
613
+ name = f"float32_{type_counters[field_type]}"
614
+ elif field.size == 8:
615
+ name = f"float64_{type_counters[field_type]}"
616
+ else:
617
+ name = f"float_{type_counters[field_type]}"
618
+
619
+ else: # unknown
620
+ name = f"field_{idx}"
621
+
622
+ field_names[idx] = name
623
+
624
+ return field_names
625
+
626
+
627
+ def _generate_field_label(field: InferredField, field_name: str) -> str:
628
+ """Generate human-readable field label with enhanced context.
629
+
630
+ Args:
631
+ field: Inferred field object
632
+ field_name: Generated field name
633
+
634
+ Returns:
635
+ Human-readable label for Wireshark UI
636
+ """
637
+ # Start with field name formatted nicely
638
+ base_label = field_name.replace("_", " ").title()
639
+
640
+ # Add size information
641
+ size_str = f"{field.size} byte" if field.size == 1 else f"{field.size} bytes"
642
+
643
+ # Add type hint if name doesn't make it obvious
644
+ if field.field_type not in field_name.lower():
645
+ type_hint = f" ({field.field_type})"
646
+ else:
647
+ type_hint = ""
648
+
649
+ return f"{base_label} [{size_str}]{type_hint}"
650
+
651
+
652
+ def _get_field_type_and_base(field: InferredField, is_enum: bool) -> tuple[str, str]:
653
+ """Determine Lua ProtoField type and base display format.
654
+
655
+ Returns:
656
+ Tuple of (base_format, field_type)
657
+ e.g., ("base.HEX", "uint16") or ("base.DEC", "uint8")
658
+ """
659
+ # Determine field type
660
+ if field.size == 1:
661
+ lua_type = "uint8"
662
+ elif field.size == 2:
663
+ lua_type = "uint16"
664
+ elif field.size == 4:
665
+ lua_type = "uint32"
666
+ elif field.size == 8:
667
+ lua_type = "uint64"
668
+ else:
669
+ # Use bytes for larger fields
670
+ return ("base.NONE", "bytes")
671
+
672
+ # Determine base display format
673
+ if field.field_type in ["checksum", "data"]:
674
+ base = "base.HEX"
675
+ elif field.field_type in ["counter", "length", "timestamp"] or is_enum:
676
+ base = "base.DEC"
677
+ elif field.field_type == "constant":
678
+ base = "base.HEX"
679
+ else:
680
+ base = "base.DEC"
681
+
682
+ return (base, lua_type)
683
+
684
+
685
+ def _detect_enum_fields(schema: MessageSchema, max_unique: int) -> dict[str, dict[int, str]]:
686
+ """Detect enum fields from limited value sets with enhanced labeling.
687
+
688
+ Args:
689
+ schema: Message schema with inferred fields
690
+ max_unique: Maximum unique values to treat as enum
691
+
692
+ Returns:
693
+ Dictionary mapping field index (as string) to {value: label} mapping
694
+ """
695
+ enum_fields: dict[str, dict[int, str]] = {}
696
+
697
+ for idx, field in enumerate(schema.fields):
698
+ # Only consider fields with sample values
699
+ if not field.values_seen:
700
+ continue
701
+
702
+ # Skip non-integer fields (tuples)
703
+ if isinstance(field.values_seen[0], tuple):
704
+ continue
705
+
706
+ # Check if value set is small enough to be enum
707
+ unique_values = set(field.values_seen)
708
+ if len(unique_values) <= max_unique and len(unique_values) > 1:
709
+ # Generate semantic enum labels
710
+ value_map = _generate_enum_labels(field, list(unique_values), idx, schema)
711
+ enum_fields[str(idx)] = value_map
712
+
713
+ return enum_fields
714
+
715
+
716
+ def _generate_enum_labels(
717
+ field: InferredField, values: list[Any], field_idx: int, schema: MessageSchema
718
+ ) -> dict[int, str]:
719
+ """Generate human-readable labels for enum values with enhanced semantics.
720
+
721
+ Args:
722
+ field: Field object
723
+ values: List of unique values
724
+ field_idx: Field index in schema
725
+ schema: Complete message schema for context
726
+
727
+ Returns:
728
+ Dictionary mapping value to semantic label
729
+ """
730
+ labels = {}
731
+
732
+ for val in values:
733
+ if not isinstance(val, int):
734
+ continue
735
+
736
+ # Generate label based on field type and value patterns
737
+ if field.field_type == "counter":
738
+ # Counters get simple count labels
739
+ labels[val] = f"Count {val}"
740
+
741
+ elif field.field_type == "constant":
742
+ # Constants shown in hex
743
+ labels[val] = f"0x{val:02X}"
744
+
745
+ elif field.field_type == "enum":
746
+ # Check if this looks like a message type (first few fields)
747
+ if field_idx < 3 and field.size == 1:
748
+ # Common message type patterns
749
+ msg_types = {
750
+ 0x00: "REQUEST",
751
+ 0x01: "RESPONSE",
752
+ 0x02: "ACK",
753
+ 0x03: "NACK",
754
+ 0x04: "ERROR",
755
+ 0x10: "DATA",
756
+ 0x20: "CONTROL",
757
+ 0xFF: "BROADCAST",
758
+ }
759
+ labels[val] = msg_types.get(val, f"Type 0x{val:02X}")
760
+
761
+ # Check if this looks like status codes
762
+ elif field_idx >= schema.header_size and field.size == 1:
763
+ status_codes = {
764
+ 0x00: "OK",
765
+ 0x01: "WARNING",
766
+ 0x02: "ERROR",
767
+ 0xFF: "INVALID",
768
+ }
769
+ labels[val] = status_codes.get(val, f"Status {val}")
770
+ else:
771
+ labels[val] = f"Value {val} (0x{val:02X})"
772
+
773
+ else:
774
+ # Generic labels with both decimal and hex
775
+ if val < 256:
776
+ labels[val] = f"Value {val} (0x{val:02X})"
777
+ else:
778
+ labels[val] = f"Value {val} (0x{val:04X})"
779
+
780
+ return labels
781
+
782
+
783
+ def export_to_file(
784
+ schema: MessageSchema,
785
+ output_path: str,
786
+ protocol_name: str = "unknown",
787
+ **kwargs: Any,
788
+ ) -> None:
789
+ """Generate dissector and write to file.
790
+
791
+ Convenience function to generate Lua dissector and save to disk.
792
+
793
+ Args:
794
+ schema: Message schema from inference
795
+ output_path: Path to output .lua file
796
+ protocol_name: Protocol short name
797
+ **kwargs: Additional arguments passed to generate_dissector()
798
+
799
+ Example:
800
+ >>> from oscura.inference.message_format import infer_format
801
+ >>> from oscura.exporters.wireshark import export_to_file
802
+ >>> messages = [b'\\xAA\\x01DATA', b'\\xAA\\x02DATA']
803
+ >>> schema = infer_format(messages)
804
+ >>> export_to_file(schema, "custom.lua", "custom")
805
+ """
806
+ lua_code = generate_dissector(schema, protocol_name, **kwargs)
807
+
808
+ with open(output_path, "w", encoding="utf-8") as f:
809
+ f.write(lua_code)