oscura 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +813 -8
- oscura/__main__.py +392 -0
- oscura/analyzers/__init__.py +37 -0
- oscura/analyzers/digital/__init__.py +177 -0
- oscura/analyzers/digital/bus.py +691 -0
- oscura/analyzers/digital/clock.py +805 -0
- oscura/analyzers/digital/correlation.py +720 -0
- oscura/analyzers/digital/edges.py +632 -0
- oscura/analyzers/digital/extraction.py +413 -0
- oscura/analyzers/digital/quality.py +878 -0
- oscura/analyzers/digital/signal_quality.py +877 -0
- oscura/analyzers/digital/thresholds.py +708 -0
- oscura/analyzers/digital/timing.py +1104 -0
- oscura/analyzers/eye/__init__.py +46 -0
- oscura/analyzers/eye/diagram.py +434 -0
- oscura/analyzers/eye/metrics.py +555 -0
- oscura/analyzers/jitter/__init__.py +83 -0
- oscura/analyzers/jitter/ber.py +333 -0
- oscura/analyzers/jitter/decomposition.py +759 -0
- oscura/analyzers/jitter/measurements.py +413 -0
- oscura/analyzers/jitter/spectrum.py +220 -0
- oscura/analyzers/measurements.py +40 -0
- oscura/analyzers/packet/__init__.py +171 -0
- oscura/analyzers/packet/daq.py +1077 -0
- oscura/analyzers/packet/metrics.py +437 -0
- oscura/analyzers/packet/parser.py +327 -0
- oscura/analyzers/packet/payload.py +2156 -0
- oscura/analyzers/packet/payload_analysis.py +1312 -0
- oscura/analyzers/packet/payload_extraction.py +236 -0
- oscura/analyzers/packet/payload_patterns.py +670 -0
- oscura/analyzers/packet/stream.py +359 -0
- oscura/analyzers/patterns/__init__.py +266 -0
- oscura/analyzers/patterns/clustering.py +1036 -0
- oscura/analyzers/patterns/discovery.py +539 -0
- oscura/analyzers/patterns/learning.py +797 -0
- oscura/analyzers/patterns/matching.py +1091 -0
- oscura/analyzers/patterns/periodic.py +650 -0
- oscura/analyzers/patterns/sequences.py +767 -0
- oscura/analyzers/power/__init__.py +116 -0
- oscura/analyzers/power/ac_power.py +391 -0
- oscura/analyzers/power/basic.py +383 -0
- oscura/analyzers/power/conduction.py +314 -0
- oscura/analyzers/power/efficiency.py +297 -0
- oscura/analyzers/power/ripple.py +356 -0
- oscura/analyzers/power/soa.py +372 -0
- oscura/analyzers/power/switching.py +479 -0
- oscura/analyzers/protocol/__init__.py +150 -0
- oscura/analyzers/protocols/__init__.py +150 -0
- oscura/analyzers/protocols/base.py +500 -0
- oscura/analyzers/protocols/can.py +620 -0
- oscura/analyzers/protocols/can_fd.py +448 -0
- oscura/analyzers/protocols/flexray.py +405 -0
- oscura/analyzers/protocols/hdlc.py +399 -0
- oscura/analyzers/protocols/i2c.py +368 -0
- oscura/analyzers/protocols/i2s.py +296 -0
- oscura/analyzers/protocols/jtag.py +393 -0
- oscura/analyzers/protocols/lin.py +445 -0
- oscura/analyzers/protocols/manchester.py +333 -0
- oscura/analyzers/protocols/onewire.py +501 -0
- oscura/analyzers/protocols/spi.py +334 -0
- oscura/analyzers/protocols/swd.py +325 -0
- oscura/analyzers/protocols/uart.py +393 -0
- oscura/analyzers/protocols/usb.py +495 -0
- oscura/analyzers/signal_integrity/__init__.py +63 -0
- oscura/analyzers/signal_integrity/embedding.py +294 -0
- oscura/analyzers/signal_integrity/equalization.py +370 -0
- oscura/analyzers/signal_integrity/sparams.py +484 -0
- oscura/analyzers/spectral/__init__.py +53 -0
- oscura/analyzers/spectral/chunked.py +273 -0
- oscura/analyzers/spectral/chunked_fft.py +571 -0
- oscura/analyzers/spectral/chunked_wavelet.py +391 -0
- oscura/analyzers/spectral/fft.py +92 -0
- oscura/analyzers/statistical/__init__.py +250 -0
- oscura/analyzers/statistical/checksum.py +923 -0
- oscura/analyzers/statistical/chunked_corr.py +228 -0
- oscura/analyzers/statistical/classification.py +778 -0
- oscura/analyzers/statistical/entropy.py +1113 -0
- oscura/analyzers/statistical/ngrams.py +614 -0
- oscura/analyzers/statistics/__init__.py +119 -0
- oscura/analyzers/statistics/advanced.py +885 -0
- oscura/analyzers/statistics/basic.py +263 -0
- oscura/analyzers/statistics/correlation.py +630 -0
- oscura/analyzers/statistics/distribution.py +298 -0
- oscura/analyzers/statistics/outliers.py +463 -0
- oscura/analyzers/statistics/streaming.py +93 -0
- oscura/analyzers/statistics/trend.py +520 -0
- oscura/analyzers/validation.py +598 -0
- oscura/analyzers/waveform/__init__.py +36 -0
- oscura/analyzers/waveform/measurements.py +943 -0
- oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
- oscura/analyzers/waveform/spectral.py +1689 -0
- oscura/analyzers/waveform/wavelets.py +298 -0
- oscura/api/__init__.py +62 -0
- oscura/api/dsl.py +538 -0
- oscura/api/fluent.py +571 -0
- oscura/api/operators.py +498 -0
- oscura/api/optimization.py +392 -0
- oscura/api/profiling.py +396 -0
- oscura/automotive/__init__.py +73 -0
- oscura/automotive/can/__init__.py +52 -0
- oscura/automotive/can/analysis.py +356 -0
- oscura/automotive/can/checksum.py +250 -0
- oscura/automotive/can/correlation.py +212 -0
- oscura/automotive/can/discovery.py +355 -0
- oscura/automotive/can/message_wrapper.py +375 -0
- oscura/automotive/can/models.py +385 -0
- oscura/automotive/can/patterns.py +381 -0
- oscura/automotive/can/session.py +452 -0
- oscura/automotive/can/state_machine.py +300 -0
- oscura/automotive/can/stimulus_response.py +461 -0
- oscura/automotive/dbc/__init__.py +15 -0
- oscura/automotive/dbc/generator.py +156 -0
- oscura/automotive/dbc/parser.py +146 -0
- oscura/automotive/dtc/__init__.py +30 -0
- oscura/automotive/dtc/database.py +3036 -0
- oscura/automotive/j1939/__init__.py +14 -0
- oscura/automotive/j1939/decoder.py +745 -0
- oscura/automotive/loaders/__init__.py +35 -0
- oscura/automotive/loaders/asc.py +98 -0
- oscura/automotive/loaders/blf.py +77 -0
- oscura/automotive/loaders/csv_can.py +136 -0
- oscura/automotive/loaders/dispatcher.py +136 -0
- oscura/automotive/loaders/mdf.py +331 -0
- oscura/automotive/loaders/pcap.py +132 -0
- oscura/automotive/obd/__init__.py +14 -0
- oscura/automotive/obd/decoder.py +707 -0
- oscura/automotive/uds/__init__.py +48 -0
- oscura/automotive/uds/decoder.py +265 -0
- oscura/automotive/uds/models.py +64 -0
- oscura/automotive/visualization.py +369 -0
- oscura/batch/__init__.py +55 -0
- oscura/batch/advanced.py +627 -0
- oscura/batch/aggregate.py +300 -0
- oscura/batch/analyze.py +139 -0
- oscura/batch/logging.py +487 -0
- oscura/batch/metrics.py +556 -0
- oscura/builders/__init__.py +41 -0
- oscura/builders/signal_builder.py +1131 -0
- oscura/cli/__init__.py +14 -0
- oscura/cli/batch.py +339 -0
- oscura/cli/characterize.py +273 -0
- oscura/cli/compare.py +775 -0
- oscura/cli/decode.py +551 -0
- oscura/cli/main.py +247 -0
- oscura/cli/shell.py +350 -0
- oscura/comparison/__init__.py +66 -0
- oscura/comparison/compare.py +397 -0
- oscura/comparison/golden.py +487 -0
- oscura/comparison/limits.py +391 -0
- oscura/comparison/mask.py +434 -0
- oscura/comparison/trace_diff.py +30 -0
- oscura/comparison/visualization.py +481 -0
- oscura/compliance/__init__.py +70 -0
- oscura/compliance/advanced.py +756 -0
- oscura/compliance/masks.py +363 -0
- oscura/compliance/reporting.py +483 -0
- oscura/compliance/testing.py +298 -0
- oscura/component/__init__.py +38 -0
- oscura/component/impedance.py +365 -0
- oscura/component/reactive.py +598 -0
- oscura/component/transmission_line.py +312 -0
- oscura/config/__init__.py +191 -0
- oscura/config/defaults.py +254 -0
- oscura/config/loader.py +348 -0
- oscura/config/memory.py +271 -0
- oscura/config/migration.py +458 -0
- oscura/config/pipeline.py +1077 -0
- oscura/config/preferences.py +530 -0
- oscura/config/protocol.py +875 -0
- oscura/config/schema.py +713 -0
- oscura/config/settings.py +420 -0
- oscura/config/thresholds.py +599 -0
- oscura/convenience.py +457 -0
- oscura/core/__init__.py +299 -0
- oscura/core/audit.py +457 -0
- oscura/core/backend_selector.py +405 -0
- oscura/core/cache.py +590 -0
- oscura/core/cancellation.py +439 -0
- oscura/core/confidence.py +225 -0
- oscura/core/config.py +506 -0
- oscura/core/correlation.py +216 -0
- oscura/core/cross_domain.py +422 -0
- oscura/core/debug.py +301 -0
- oscura/core/edge_cases.py +541 -0
- oscura/core/exceptions.py +535 -0
- oscura/core/gpu_backend.py +523 -0
- oscura/core/lazy.py +832 -0
- oscura/core/log_query.py +540 -0
- oscura/core/logging.py +931 -0
- oscura/core/logging_advanced.py +952 -0
- oscura/core/memoize.py +171 -0
- oscura/core/memory_check.py +274 -0
- oscura/core/memory_guard.py +290 -0
- oscura/core/memory_limits.py +336 -0
- oscura/core/memory_monitor.py +453 -0
- oscura/core/memory_progress.py +465 -0
- oscura/core/memory_warnings.py +315 -0
- oscura/core/numba_backend.py +362 -0
- oscura/core/performance.py +352 -0
- oscura/core/progress.py +524 -0
- oscura/core/provenance.py +358 -0
- oscura/core/results.py +331 -0
- oscura/core/types.py +504 -0
- oscura/core/uncertainty.py +383 -0
- oscura/discovery/__init__.py +52 -0
- oscura/discovery/anomaly_detector.py +672 -0
- oscura/discovery/auto_decoder.py +415 -0
- oscura/discovery/comparison.py +497 -0
- oscura/discovery/quality_validator.py +528 -0
- oscura/discovery/signal_detector.py +769 -0
- oscura/dsl/__init__.py +73 -0
- oscura/dsl/commands.py +246 -0
- oscura/dsl/interpreter.py +455 -0
- oscura/dsl/parser.py +689 -0
- oscura/dsl/repl.py +172 -0
- oscura/exceptions.py +59 -0
- oscura/exploratory/__init__.py +111 -0
- oscura/exploratory/error_recovery.py +642 -0
- oscura/exploratory/fuzzy.py +513 -0
- oscura/exploratory/fuzzy_advanced.py +786 -0
- oscura/exploratory/legacy.py +831 -0
- oscura/exploratory/parse.py +358 -0
- oscura/exploratory/recovery.py +275 -0
- oscura/exploratory/sync.py +382 -0
- oscura/exploratory/unknown.py +707 -0
- oscura/export/__init__.py +25 -0
- oscura/export/wireshark/README.md +265 -0
- oscura/export/wireshark/__init__.py +47 -0
- oscura/export/wireshark/generator.py +312 -0
- oscura/export/wireshark/lua_builder.py +159 -0
- oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
- oscura/export/wireshark/type_mapping.py +165 -0
- oscura/export/wireshark/validator.py +105 -0
- oscura/exporters/__init__.py +94 -0
- oscura/exporters/csv.py +303 -0
- oscura/exporters/exporters.py +44 -0
- oscura/exporters/hdf5.py +219 -0
- oscura/exporters/html_export.py +701 -0
- oscura/exporters/json_export.py +291 -0
- oscura/exporters/markdown_export.py +367 -0
- oscura/exporters/matlab_export.py +354 -0
- oscura/exporters/npz_export.py +219 -0
- oscura/exporters/spice_export.py +210 -0
- oscura/extensibility/__init__.py +131 -0
- oscura/extensibility/docs.py +752 -0
- oscura/extensibility/extensions.py +1125 -0
- oscura/extensibility/logging.py +259 -0
- oscura/extensibility/measurements.py +485 -0
- oscura/extensibility/plugins.py +414 -0
- oscura/extensibility/registry.py +346 -0
- oscura/extensibility/templates.py +913 -0
- oscura/extensibility/validation.py +651 -0
- oscura/filtering/__init__.py +89 -0
- oscura/filtering/base.py +563 -0
- oscura/filtering/convenience.py +564 -0
- oscura/filtering/design.py +725 -0
- oscura/filtering/filters.py +32 -0
- oscura/filtering/introspection.py +605 -0
- oscura/guidance/__init__.py +24 -0
- oscura/guidance/recommender.py +429 -0
- oscura/guidance/wizard.py +518 -0
- oscura/inference/__init__.py +251 -0
- oscura/inference/active_learning/README.md +153 -0
- oscura/inference/active_learning/__init__.py +38 -0
- oscura/inference/active_learning/lstar.py +257 -0
- oscura/inference/active_learning/observation_table.py +230 -0
- oscura/inference/active_learning/oracle.py +78 -0
- oscura/inference/active_learning/teachers/__init__.py +15 -0
- oscura/inference/active_learning/teachers/simulator.py +192 -0
- oscura/inference/adaptive_tuning.py +453 -0
- oscura/inference/alignment.py +653 -0
- oscura/inference/bayesian.py +943 -0
- oscura/inference/binary.py +1016 -0
- oscura/inference/crc_reverse.py +711 -0
- oscura/inference/logic.py +288 -0
- oscura/inference/message_format.py +1305 -0
- oscura/inference/protocol.py +417 -0
- oscura/inference/protocol_dsl.py +1084 -0
- oscura/inference/protocol_library.py +1230 -0
- oscura/inference/sequences.py +809 -0
- oscura/inference/signal_intelligence.py +1509 -0
- oscura/inference/spectral.py +215 -0
- oscura/inference/state_machine.py +634 -0
- oscura/inference/stream.py +918 -0
- oscura/integrations/__init__.py +59 -0
- oscura/integrations/llm.py +1827 -0
- oscura/jupyter/__init__.py +32 -0
- oscura/jupyter/display.py +268 -0
- oscura/jupyter/magic.py +334 -0
- oscura/loaders/__init__.py +526 -0
- oscura/loaders/binary.py +69 -0
- oscura/loaders/configurable.py +1255 -0
- oscura/loaders/csv.py +26 -0
- oscura/loaders/csv_loader.py +473 -0
- oscura/loaders/hdf5.py +9 -0
- oscura/loaders/hdf5_loader.py +510 -0
- oscura/loaders/lazy.py +370 -0
- oscura/loaders/mmap_loader.py +583 -0
- oscura/loaders/numpy_loader.py +436 -0
- oscura/loaders/pcap.py +432 -0
- oscura/loaders/preprocessing.py +368 -0
- oscura/loaders/rigol.py +287 -0
- oscura/loaders/sigrok.py +321 -0
- oscura/loaders/tdms.py +367 -0
- oscura/loaders/tektronix.py +711 -0
- oscura/loaders/validation.py +584 -0
- oscura/loaders/vcd.py +464 -0
- oscura/loaders/wav.py +233 -0
- oscura/math/__init__.py +45 -0
- oscura/math/arithmetic.py +824 -0
- oscura/math/interpolation.py +413 -0
- oscura/onboarding/__init__.py +39 -0
- oscura/onboarding/help.py +498 -0
- oscura/onboarding/tutorials.py +405 -0
- oscura/onboarding/wizard.py +466 -0
- oscura/optimization/__init__.py +19 -0
- oscura/optimization/parallel.py +440 -0
- oscura/optimization/search.py +532 -0
- oscura/pipeline/__init__.py +43 -0
- oscura/pipeline/base.py +338 -0
- oscura/pipeline/composition.py +242 -0
- oscura/pipeline/parallel.py +448 -0
- oscura/pipeline/pipeline.py +375 -0
- oscura/pipeline/reverse_engineering.py +1119 -0
- oscura/plugins/__init__.py +122 -0
- oscura/plugins/base.py +272 -0
- oscura/plugins/cli.py +497 -0
- oscura/plugins/discovery.py +411 -0
- oscura/plugins/isolation.py +418 -0
- oscura/plugins/lifecycle.py +959 -0
- oscura/plugins/manager.py +493 -0
- oscura/plugins/registry.py +421 -0
- oscura/plugins/versioning.py +372 -0
- oscura/py.typed +0 -0
- oscura/quality/__init__.py +65 -0
- oscura/quality/ensemble.py +740 -0
- oscura/quality/explainer.py +338 -0
- oscura/quality/scoring.py +616 -0
- oscura/quality/warnings.py +456 -0
- oscura/reporting/__init__.py +248 -0
- oscura/reporting/advanced.py +1234 -0
- oscura/reporting/analyze.py +448 -0
- oscura/reporting/argument_preparer.py +596 -0
- oscura/reporting/auto_report.py +507 -0
- oscura/reporting/batch.py +615 -0
- oscura/reporting/chart_selection.py +223 -0
- oscura/reporting/comparison.py +330 -0
- oscura/reporting/config.py +615 -0
- oscura/reporting/content/__init__.py +39 -0
- oscura/reporting/content/executive.py +127 -0
- oscura/reporting/content/filtering.py +191 -0
- oscura/reporting/content/minimal.py +257 -0
- oscura/reporting/content/verbosity.py +162 -0
- oscura/reporting/core.py +508 -0
- oscura/reporting/core_formats/__init__.py +17 -0
- oscura/reporting/core_formats/multi_format.py +210 -0
- oscura/reporting/engine.py +836 -0
- oscura/reporting/export.py +366 -0
- oscura/reporting/formatting/__init__.py +129 -0
- oscura/reporting/formatting/emphasis.py +81 -0
- oscura/reporting/formatting/numbers.py +403 -0
- oscura/reporting/formatting/standards.py +55 -0
- oscura/reporting/formatting.py +466 -0
- oscura/reporting/html.py +578 -0
- oscura/reporting/index.py +590 -0
- oscura/reporting/multichannel.py +296 -0
- oscura/reporting/output.py +379 -0
- oscura/reporting/pdf.py +373 -0
- oscura/reporting/plots.py +731 -0
- oscura/reporting/pptx_export.py +360 -0
- oscura/reporting/renderers/__init__.py +11 -0
- oscura/reporting/renderers/pdf.py +94 -0
- oscura/reporting/sections.py +471 -0
- oscura/reporting/standards.py +680 -0
- oscura/reporting/summary_generator.py +368 -0
- oscura/reporting/tables.py +397 -0
- oscura/reporting/template_system.py +724 -0
- oscura/reporting/templates/__init__.py +15 -0
- oscura/reporting/templates/definition.py +205 -0
- oscura/reporting/templates/index.html +649 -0
- oscura/reporting/templates/index.md +173 -0
- oscura/schemas/__init__.py +158 -0
- oscura/schemas/bus_configuration.json +322 -0
- oscura/schemas/device_mapping.json +182 -0
- oscura/schemas/packet_format.json +418 -0
- oscura/schemas/protocol_definition.json +363 -0
- oscura/search/__init__.py +16 -0
- oscura/search/anomaly.py +292 -0
- oscura/search/context.py +149 -0
- oscura/search/pattern.py +160 -0
- oscura/session/__init__.py +34 -0
- oscura/session/annotations.py +289 -0
- oscura/session/history.py +313 -0
- oscura/session/session.py +445 -0
- oscura/streaming/__init__.py +43 -0
- oscura/streaming/chunked.py +611 -0
- oscura/streaming/progressive.py +393 -0
- oscura/streaming/realtime.py +622 -0
- oscura/testing/__init__.py +54 -0
- oscura/testing/synthetic.py +808 -0
- oscura/triggering/__init__.py +68 -0
- oscura/triggering/base.py +229 -0
- oscura/triggering/edge.py +353 -0
- oscura/triggering/pattern.py +344 -0
- oscura/triggering/pulse.py +581 -0
- oscura/triggering/window.py +453 -0
- oscura/ui/__init__.py +48 -0
- oscura/ui/formatters.py +526 -0
- oscura/ui/progressive_display.py +340 -0
- oscura/utils/__init__.py +99 -0
- oscura/utils/autodetect.py +338 -0
- oscura/utils/buffer.py +389 -0
- oscura/utils/lazy.py +407 -0
- oscura/utils/lazy_imports.py +147 -0
- oscura/utils/memory.py +836 -0
- oscura/utils/memory_advanced.py +1326 -0
- oscura/utils/memory_extensions.py +465 -0
- oscura/utils/progressive.py +352 -0
- oscura/utils/windowing.py +362 -0
- oscura/visualization/__init__.py +321 -0
- oscura/visualization/accessibility.py +526 -0
- oscura/visualization/annotations.py +374 -0
- oscura/visualization/axis_scaling.py +305 -0
- oscura/visualization/colors.py +453 -0
- oscura/visualization/digital.py +337 -0
- oscura/visualization/eye.py +420 -0
- oscura/visualization/histogram.py +281 -0
- oscura/visualization/interactive.py +858 -0
- oscura/visualization/jitter.py +702 -0
- oscura/visualization/keyboard.py +394 -0
- oscura/visualization/layout.py +365 -0
- oscura/visualization/optimization.py +1028 -0
- oscura/visualization/palettes.py +446 -0
- oscura/visualization/plot.py +92 -0
- oscura/visualization/power.py +290 -0
- oscura/visualization/power_extended.py +626 -0
- oscura/visualization/presets.py +467 -0
- oscura/visualization/protocols.py +932 -0
- oscura/visualization/render.py +207 -0
- oscura/visualization/rendering.py +444 -0
- oscura/visualization/reverse_engineering.py +791 -0
- oscura/visualization/signal_integrity.py +808 -0
- oscura/visualization/specialized.py +553 -0
- oscura/visualization/spectral.py +811 -0
- oscura/visualization/styles.py +381 -0
- oscura/visualization/thumbnails.py +311 -0
- oscura/visualization/time_axis.py +351 -0
- oscura/visualization/waveform.py +367 -0
- oscura/workflow/__init__.py +13 -0
- oscura/workflow/dag.py +377 -0
- oscura/workflows/__init__.py +58 -0
- oscura/workflows/compliance.py +280 -0
- oscura/workflows/digital.py +272 -0
- oscura/workflows/multi_trace.py +502 -0
- oscura/workflows/power.py +178 -0
- oscura/workflows/protocol.py +492 -0
- oscura/workflows/reverse_engineering.py +639 -0
- oscura/workflows/signal_integrity.py +227 -0
- oscura-0.1.1.dist-info/METADATA +300 -0
- oscura-0.1.1.dist-info/RECORD +463 -0
- oscura-0.1.1.dist-info/entry_points.txt +2 -0
- {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/licenses/LICENSE +1 -1
- oscura-0.0.1.dist-info/METADATA +0 -63
- oscura-0.0.1.dist-info/RECORD +0 -5
- {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,918 @@
|
|
|
1
|
+
"""Stream reassembly and message framing for network protocols.
|
|
2
|
+
|
|
3
|
+
- RE-STR-001: UDP Stream Reconstruction
|
|
4
|
+
- RE-STR-002: TCP Stream Reassembly
|
|
5
|
+
- RE-STR-003: Message Framing and Segmentation
|
|
6
|
+
|
|
7
|
+
This module provides tools for reconstructing application-layer data from
|
|
8
|
+
transport-layer segments, handling out-of-order delivery, gaps, and
|
|
9
|
+
message boundaries.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
from collections.abc import Callable, Sequence
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Any, Literal
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class StreamSegment:
|
|
22
|
+
"""A segment of stream data.
|
|
23
|
+
|
|
24
|
+
Implements RE-STR-001, RE-STR-002: Stream segment.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
sequence_number: Sequence number (TCP) or packet number (UDP).
|
|
28
|
+
data: Segment payload.
|
|
29
|
+
timestamp: Capture timestamp.
|
|
30
|
+
src: Source address.
|
|
31
|
+
dst: Destination address.
|
|
32
|
+
flags: Protocol flags.
|
|
33
|
+
is_retransmit: Whether this is a retransmission.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
sequence_number: int
|
|
37
|
+
data: bytes
|
|
38
|
+
timestamp: float = 0.0
|
|
39
|
+
src: str = ""
|
|
40
|
+
dst: str = ""
|
|
41
|
+
flags: int = 0
|
|
42
|
+
is_retransmit: bool = False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class ReassembledStream:
|
|
47
|
+
"""A fully reassembled stream.
|
|
48
|
+
|
|
49
|
+
Implements RE-STR-001, RE-STR-002: Reassembled stream.
|
|
50
|
+
|
|
51
|
+
Attributes:
|
|
52
|
+
data: Complete reassembled data.
|
|
53
|
+
src: Source address.
|
|
54
|
+
dst: Destination address.
|
|
55
|
+
start_time: Stream start time.
|
|
56
|
+
end_time: Stream end time.
|
|
57
|
+
segments: Number of segments.
|
|
58
|
+
gaps: List of (start, end) gap ranges.
|
|
59
|
+
retransmits: Number of retransmissions detected.
|
|
60
|
+
out_of_order: Number of out-of-order segments.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
data: bytes
|
|
64
|
+
src: str
|
|
65
|
+
dst: str
|
|
66
|
+
start_time: float
|
|
67
|
+
end_time: float
|
|
68
|
+
segments: int
|
|
69
|
+
gaps: list[tuple[int, int]] = field(default_factory=list)
|
|
70
|
+
retransmits: int = 0
|
|
71
|
+
out_of_order: int = 0
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class MessageFrame:
|
|
76
|
+
"""A framed message from stream data.
|
|
77
|
+
|
|
78
|
+
Implements RE-STR-003: Message frame.
|
|
79
|
+
|
|
80
|
+
Attributes:
|
|
81
|
+
data: Message data.
|
|
82
|
+
offset: Offset in stream.
|
|
83
|
+
length: Message length.
|
|
84
|
+
frame_type: Detected frame type.
|
|
85
|
+
is_complete: Whether message is complete.
|
|
86
|
+
sequence: Message sequence number if detected.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
data: bytes
|
|
90
|
+
offset: int
|
|
91
|
+
length: int
|
|
92
|
+
frame_type: str = "unknown"
|
|
93
|
+
is_complete: bool = True
|
|
94
|
+
sequence: int | None = None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class FramingResult:
|
|
99
|
+
"""Result of message framing.
|
|
100
|
+
|
|
101
|
+
Implements RE-STR-003: Framing result.
|
|
102
|
+
|
|
103
|
+
Attributes:
|
|
104
|
+
messages: List of extracted messages.
|
|
105
|
+
framing_type: Detected framing type.
|
|
106
|
+
delimiter: Detected delimiter if applicable.
|
|
107
|
+
length_field_offset: Length field offset if applicable.
|
|
108
|
+
length_field_size: Length field size if applicable.
|
|
109
|
+
remaining: Unframed bytes at end.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
messages: list[MessageFrame]
|
|
113
|
+
framing_type: str
|
|
114
|
+
delimiter: bytes | None = None
|
|
115
|
+
length_field_offset: int | None = None
|
|
116
|
+
length_field_size: int | None = None
|
|
117
|
+
remaining: bytes = b""
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class UDPStreamReassembler:
|
|
121
|
+
"""Reassemble UDP datagram streams.
|
|
122
|
+
|
|
123
|
+
Implements RE-STR-001: UDP Stream Reconstruction.
|
|
124
|
+
|
|
125
|
+
UDP doesn't guarantee order, so this reassembler orders datagrams
|
|
126
|
+
by sequence number or timestamp and handles gaps.
|
|
127
|
+
|
|
128
|
+
Example:
|
|
129
|
+
>>> reassembler = UDPStreamReassembler()
|
|
130
|
+
>>> for packet in packets:
|
|
131
|
+
... reassembler.add_segment(packet)
|
|
132
|
+
>>> stream = reassembler.get_stream()
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
def __init__(
|
|
136
|
+
self,
|
|
137
|
+
sequence_key: Callable[[Any], int] | None = None,
|
|
138
|
+
max_gap: int = 1000,
|
|
139
|
+
) -> None:
|
|
140
|
+
"""Initialize UDP reassembler.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
sequence_key: Function to extract sequence number from packet.
|
|
144
|
+
max_gap: Maximum sequence gap before treating as new stream.
|
|
145
|
+
"""
|
|
146
|
+
self.sequence_key = sequence_key
|
|
147
|
+
self.max_gap = max_gap
|
|
148
|
+
self._segments: dict[str, list[StreamSegment]] = defaultdict(list)
|
|
149
|
+
|
|
150
|
+
def add_segment(
|
|
151
|
+
self,
|
|
152
|
+
packet: dict[str, Any] | bytes,
|
|
153
|
+
flow_key: str | None = None,
|
|
154
|
+
) -> None:
|
|
155
|
+
"""Add a UDP datagram to the reassembler.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
packet: Packet data or dict with metadata.
|
|
159
|
+
flow_key: Optional flow identifier.
|
|
160
|
+
"""
|
|
161
|
+
if isinstance(packet, bytes):
|
|
162
|
+
segment = StreamSegment(
|
|
163
|
+
sequence_number=len(self._segments.get(flow_key or "default", [])),
|
|
164
|
+
data=packet,
|
|
165
|
+
)
|
|
166
|
+
else:
|
|
167
|
+
seq = 0
|
|
168
|
+
if self.sequence_key is not None:
|
|
169
|
+
try:
|
|
170
|
+
seq = self.sequence_key(packet)
|
|
171
|
+
except (KeyError, TypeError):
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
segment = StreamSegment(
|
|
175
|
+
sequence_number=seq,
|
|
176
|
+
data=packet.get("data", packet.get("payload", b"")),
|
|
177
|
+
timestamp=packet.get("timestamp", 0.0),
|
|
178
|
+
src=packet.get("src", packet.get("src_ip", "")),
|
|
179
|
+
dst=packet.get("dst", packet.get("dst_ip", "")),
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
key = flow_key or f"{segment.src}-{segment.dst}"
|
|
183
|
+
self._segments[key].append(segment)
|
|
184
|
+
|
|
185
|
+
def get_stream(self, flow_key: str | None = None) -> ReassembledStream:
|
|
186
|
+
"""Get reassembled stream for a flow.
|
|
187
|
+
|
|
188
|
+
Implements RE-STR-001: UDP stream reconstruction.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
flow_key: Flow identifier.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
ReassembledStream with ordered data.
|
|
195
|
+
"""
|
|
196
|
+
if flow_key is None:
|
|
197
|
+
# Get first/only flow
|
|
198
|
+
if not self._segments:
|
|
199
|
+
return ReassembledStream(
|
|
200
|
+
data=b"",
|
|
201
|
+
src="",
|
|
202
|
+
dst="",
|
|
203
|
+
start_time=0.0,
|
|
204
|
+
end_time=0.0,
|
|
205
|
+
segments=0,
|
|
206
|
+
)
|
|
207
|
+
flow_key = next(iter(self._segments.keys()))
|
|
208
|
+
|
|
209
|
+
segments = self._segments.get(flow_key, [])
|
|
210
|
+
if not segments:
|
|
211
|
+
return ReassembledStream(
|
|
212
|
+
data=b"",
|
|
213
|
+
src="",
|
|
214
|
+
dst="",
|
|
215
|
+
start_time=0.0,
|
|
216
|
+
end_time=0.0,
|
|
217
|
+
segments=0,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Sort by sequence number
|
|
221
|
+
sorted_segments = sorted(segments, key=lambda s: s.sequence_number)
|
|
222
|
+
|
|
223
|
+
# Concatenate data
|
|
224
|
+
data = b"".join(s.data for s in sorted_segments)
|
|
225
|
+
|
|
226
|
+
# Detect out-of-order: count segments that arrived after a higher sequence number
|
|
227
|
+
out_of_order = 0
|
|
228
|
+
max_seq_seen = -1
|
|
229
|
+
for segment in segments:
|
|
230
|
+
if segment.sequence_number < max_seq_seen:
|
|
231
|
+
out_of_order += 1
|
|
232
|
+
max_seq_seen = max(max_seq_seen, segment.sequence_number)
|
|
233
|
+
|
|
234
|
+
# Detect gaps
|
|
235
|
+
gaps = []
|
|
236
|
+
for i in range(1, len(sorted_segments)):
|
|
237
|
+
expected = sorted_segments[i - 1].sequence_number + len(sorted_segments[i - 1].data)
|
|
238
|
+
actual = sorted_segments[i].sequence_number
|
|
239
|
+
if actual > expected:
|
|
240
|
+
gaps.append((expected, actual))
|
|
241
|
+
|
|
242
|
+
timestamps = [s.timestamp for s in sorted_segments if s.timestamp > 0]
|
|
243
|
+
|
|
244
|
+
return ReassembledStream(
|
|
245
|
+
data=data,
|
|
246
|
+
src=sorted_segments[0].src if sorted_segments else "",
|
|
247
|
+
dst=sorted_segments[0].dst if sorted_segments else "",
|
|
248
|
+
start_time=min(timestamps) if timestamps else 0.0,
|
|
249
|
+
end_time=max(timestamps) if timestamps else 0.0,
|
|
250
|
+
segments=len(sorted_segments),
|
|
251
|
+
gaps=gaps,
|
|
252
|
+
retransmits=0,
|
|
253
|
+
out_of_order=out_of_order,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
def get_all_streams(self) -> dict[str, ReassembledStream]:
|
|
257
|
+
"""Get all reassembled streams.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Dictionary mapping flow keys to streams.
|
|
261
|
+
"""
|
|
262
|
+
return {key: self.get_stream(key) for key in self._segments}
|
|
263
|
+
|
|
264
|
+
def clear(self) -> None:
|
|
265
|
+
"""Clear all segments."""
|
|
266
|
+
self._segments.clear()
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class TCPStreamReassembler:
|
|
270
|
+
"""Reassemble TCP byte streams.
|
|
271
|
+
|
|
272
|
+
Implements RE-STR-002: TCP Stream Reassembly.
|
|
273
|
+
|
|
274
|
+
Handles TCP sequence numbers, retransmissions, and ordering
|
|
275
|
+
to reconstruct the original byte stream.
|
|
276
|
+
|
|
277
|
+
Example:
|
|
278
|
+
>>> reassembler = TCPStreamReassembler()
|
|
279
|
+
>>> for segment in tcp_segments:
|
|
280
|
+
... reassembler.add_segment(segment)
|
|
281
|
+
>>> stream = reassembler.get_stream()
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
def __init__(
|
|
285
|
+
self,
|
|
286
|
+
initial_sequence: int | None = None,
|
|
287
|
+
max_buffer_size: int = 10 * 1024 * 1024,
|
|
288
|
+
) -> None:
|
|
289
|
+
"""Initialize TCP reassembler.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
initial_sequence: Initial sequence number (auto-detect if None).
|
|
293
|
+
max_buffer_size: Maximum buffer size in bytes.
|
|
294
|
+
"""
|
|
295
|
+
self.initial_sequence = initial_sequence
|
|
296
|
+
self.max_buffer_size = max_buffer_size
|
|
297
|
+
|
|
298
|
+
self._segments: dict[str, list[StreamSegment]] = defaultdict(list)
|
|
299
|
+
self._isn: dict[str, int | None] = {} # Initial sequence numbers
|
|
300
|
+
self._seen_seqs: dict[str, set[int]] = defaultdict(set) # Track seen sequence numbers
|
|
301
|
+
|
|
302
|
+
def add_segment(
|
|
303
|
+
self,
|
|
304
|
+
segment: dict[str, Any] | StreamSegment,
|
|
305
|
+
flow_key: str | None = None,
|
|
306
|
+
) -> None:
|
|
307
|
+
"""Add a TCP segment to the reassembler.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
segment: TCP segment data or StreamSegment.
|
|
311
|
+
flow_key: Optional flow identifier.
|
|
312
|
+
"""
|
|
313
|
+
if isinstance(segment, dict):
|
|
314
|
+
seq_num = segment.get("seq") or segment.get("sequence_number") or 0
|
|
315
|
+
seg_data = segment.get("data") or segment.get("payload") or b""
|
|
316
|
+
seg = StreamSegment(
|
|
317
|
+
sequence_number=seq_num,
|
|
318
|
+
data=seg_data if isinstance(seg_data, bytes) else b"",
|
|
319
|
+
timestamp=segment.get("timestamp", 0.0),
|
|
320
|
+
src=segment.get("src", ""),
|
|
321
|
+
dst=segment.get("dst", ""),
|
|
322
|
+
flags=segment.get("flags", 0),
|
|
323
|
+
)
|
|
324
|
+
else:
|
|
325
|
+
seg = segment
|
|
326
|
+
|
|
327
|
+
key = flow_key or f"{seg.src}-{seg.dst}"
|
|
328
|
+
|
|
329
|
+
# Detect initial sequence number (SYN)
|
|
330
|
+
if key not in self._isn or self._isn[key] is None:
|
|
331
|
+
if seg.flags & 0x02: # SYN flag
|
|
332
|
+
# SYN consumes one sequence number, so ISN+1 is first data byte
|
|
333
|
+
self._isn[key] = seg.sequence_number + 1
|
|
334
|
+
return # Don't store SYN itself
|
|
335
|
+
|
|
336
|
+
if self.initial_sequence is not None:
|
|
337
|
+
self._isn[key] = self.initial_sequence
|
|
338
|
+
else:
|
|
339
|
+
# Use first data segment's sequence as initial
|
|
340
|
+
self._isn[key] = seg.sequence_number
|
|
341
|
+
|
|
342
|
+
# Check for retransmit: same sequence number seen before WITH data
|
|
343
|
+
# Empty segments (ACK-only) shouldn't cause data segments to be marked as retransmits
|
|
344
|
+
if seg.sequence_number in self._seen_seqs[key] and seg.data:
|
|
345
|
+
# Check if there's already a segment with data at this sequence
|
|
346
|
+
has_data_at_seq = any(
|
|
347
|
+
s.sequence_number == seg.sequence_number and s.data for s in self._segments[key]
|
|
348
|
+
)
|
|
349
|
+
if has_data_at_seq:
|
|
350
|
+
seg.is_retransmit = True
|
|
351
|
+
|
|
352
|
+
if seg.data: # Only track sequences with data
|
|
353
|
+
self._seen_seqs[key].add(seg.sequence_number)
|
|
354
|
+
|
|
355
|
+
self._segments[key].append(seg)
|
|
356
|
+
|
|
357
|
+
def get_stream(self, flow_key: str | None = None) -> ReassembledStream:
|
|
358
|
+
"""Get reassembled TCP stream.
|
|
359
|
+
|
|
360
|
+
Implements RE-STR-002: TCP stream reassembly.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
flow_key: Flow identifier.
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
ReassembledStream with complete data.
|
|
367
|
+
"""
|
|
368
|
+
if flow_key is None:
|
|
369
|
+
if not self._segments:
|
|
370
|
+
return ReassembledStream(
|
|
371
|
+
data=b"",
|
|
372
|
+
src="",
|
|
373
|
+
dst="",
|
|
374
|
+
start_time=0.0,
|
|
375
|
+
end_time=0.0,
|
|
376
|
+
segments=0,
|
|
377
|
+
)
|
|
378
|
+
flow_key = next(iter(self._segments.keys()))
|
|
379
|
+
|
|
380
|
+
segments = self._segments.get(flow_key, [])
|
|
381
|
+
if not segments:
|
|
382
|
+
return ReassembledStream(
|
|
383
|
+
data=b"",
|
|
384
|
+
src="",
|
|
385
|
+
dst="",
|
|
386
|
+
start_time=0.0,
|
|
387
|
+
end_time=0.0,
|
|
388
|
+
segments=0,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
isn = self._isn.get(flow_key, 0) or 0
|
|
392
|
+
|
|
393
|
+
# Count retransmits first (before filtering)
|
|
394
|
+
retransmits = sum(1 for seg in segments if seg.is_retransmit)
|
|
395
|
+
|
|
396
|
+
# If ISN wasn't detected via SYN, use minimum sequence number
|
|
397
|
+
if isn == 0 or isn > min(s.sequence_number for s in segments):
|
|
398
|
+
isn = min(s.sequence_number for s in segments)
|
|
399
|
+
|
|
400
|
+
# Detect out-of-order by checking arrival order vs sequence order
|
|
401
|
+
# Count segments that arrived before a segment with lower sequence
|
|
402
|
+
out_of_order = 0
|
|
403
|
+
for i, seg in enumerate(segments):
|
|
404
|
+
# Check if any earlier segment has higher sequence
|
|
405
|
+
for j in range(i):
|
|
406
|
+
if segments[j].sequence_number > seg.sequence_number:
|
|
407
|
+
out_of_order += 1
|
|
408
|
+
break
|
|
409
|
+
|
|
410
|
+
# Sort by relative sequence number
|
|
411
|
+
sorted_segments = sorted(segments, key=lambda s: (s.sequence_number - isn) % (2**32))
|
|
412
|
+
|
|
413
|
+
# Build stream handling overlaps and gaps
|
|
414
|
+
data_buffer = bytearray()
|
|
415
|
+
current_offset = 0
|
|
416
|
+
gaps = []
|
|
417
|
+
|
|
418
|
+
for seg in sorted_segments:
|
|
419
|
+
if seg.is_retransmit:
|
|
420
|
+
continue # Skip retransmits when building data
|
|
421
|
+
|
|
422
|
+
rel_seq = (seg.sequence_number - isn) % (2**32)
|
|
423
|
+
|
|
424
|
+
if rel_seq > current_offset:
|
|
425
|
+
# Gap detected
|
|
426
|
+
gaps.append((current_offset, rel_seq))
|
|
427
|
+
# Fill gap with zeros
|
|
428
|
+
data_buffer.extend(b"\x00" * (rel_seq - current_offset))
|
|
429
|
+
current_offset = rel_seq
|
|
430
|
+
|
|
431
|
+
if rel_seq < current_offset:
|
|
432
|
+
# Overlap - use only non-overlapping part
|
|
433
|
+
overlap = current_offset - rel_seq
|
|
434
|
+
if overlap < len(seg.data):
|
|
435
|
+
data_buffer.extend(seg.data[overlap:])
|
|
436
|
+
current_offset += len(seg.data) - overlap
|
|
437
|
+
else:
|
|
438
|
+
data_buffer.extend(seg.data)
|
|
439
|
+
current_offset += len(seg.data)
|
|
440
|
+
|
|
441
|
+
timestamps = [s.timestamp for s in sorted_segments if s.timestamp > 0]
|
|
442
|
+
|
|
443
|
+
return ReassembledStream(
|
|
444
|
+
data=bytes(data_buffer),
|
|
445
|
+
src=sorted_segments[0].src if sorted_segments else "",
|
|
446
|
+
dst=sorted_segments[0].dst if sorted_segments else "",
|
|
447
|
+
start_time=min(timestamps) if timestamps else 0.0,
|
|
448
|
+
end_time=max(timestamps) if timestamps else 0.0,
|
|
449
|
+
segments=len(sorted_segments),
|
|
450
|
+
gaps=gaps,
|
|
451
|
+
retransmits=retransmits,
|
|
452
|
+
out_of_order=out_of_order,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
def get_all_streams(self) -> dict[str, ReassembledStream]:
|
|
456
|
+
"""Get all reassembled TCP streams."""
|
|
457
|
+
return {key: self.get_stream(key) for key in self._segments}
|
|
458
|
+
|
|
459
|
+
def clear(self) -> None:
|
|
460
|
+
"""Clear all data."""
|
|
461
|
+
self._segments.clear()
|
|
462
|
+
self._isn.clear()
|
|
463
|
+
self._seen_seqs.clear()
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
class MessageFramer:
|
|
467
|
+
"""Extract framed messages from stream data.
|
|
468
|
+
|
|
469
|
+
Implements RE-STR-003: Message Framing and Segmentation.
|
|
470
|
+
|
|
471
|
+
Supports multiple framing methods: delimiter-based, length-prefixed,
|
|
472
|
+
and fixed-size.
|
|
473
|
+
|
|
474
|
+
Example:
|
|
475
|
+
>>> framer = MessageFramer(framing_type='delimiter', delimiter=b'\\r\\n')
|
|
476
|
+
>>> result = framer.frame(stream_data)
|
|
477
|
+
>>> for msg in result.messages:
|
|
478
|
+
... print(msg.data)
|
|
479
|
+
"""
|
|
480
|
+
|
|
481
|
+
def __init__(
|
|
482
|
+
self,
|
|
483
|
+
framing_type: Literal["delimiter", "length_prefix", "fixed", "auto"] = "auto",
|
|
484
|
+
delimiter: bytes | None = None,
|
|
485
|
+
length_field_offset: int = 0,
|
|
486
|
+
length_field_size: int = 2,
|
|
487
|
+
length_field_endian: Literal["big", "little"] = "big",
|
|
488
|
+
length_includes_header: bool = False,
|
|
489
|
+
fixed_size: int = 0,
|
|
490
|
+
) -> None:
|
|
491
|
+
"""Initialize message framer.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
framing_type: Type of framing to use.
|
|
495
|
+
delimiter: Delimiter bytes for delimiter-based framing.
|
|
496
|
+
length_field_offset: Offset of length field.
|
|
497
|
+
length_field_size: Size of length field in bytes.
|
|
498
|
+
length_field_endian: Endianness of length field.
|
|
499
|
+
length_includes_header: Whether length includes header.
|
|
500
|
+
fixed_size: Fixed message size.
|
|
501
|
+
"""
|
|
502
|
+
self.framing_type = framing_type
|
|
503
|
+
self.delimiter = delimiter
|
|
504
|
+
self.length_field_offset = length_field_offset
|
|
505
|
+
self.length_field_size = length_field_size
|
|
506
|
+
self.length_field_endian = length_field_endian
|
|
507
|
+
self.length_includes_header = length_includes_header
|
|
508
|
+
self.fixed_size = fixed_size
|
|
509
|
+
|
|
510
|
+
def frame(self, data: bytes) -> FramingResult:
|
|
511
|
+
"""Extract framed messages from data.
|
|
512
|
+
|
|
513
|
+
Implements RE-STR-003: Message framing workflow.
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
data: Stream data to frame.
|
|
517
|
+
|
|
518
|
+
Returns:
|
|
519
|
+
FramingResult with extracted messages.
|
|
520
|
+
|
|
521
|
+
Example:
|
|
522
|
+
>>> result = framer.frame(stream_data)
|
|
523
|
+
>>> print(f"Found {len(result.messages)} messages")
|
|
524
|
+
"""
|
|
525
|
+
if self.framing_type == "auto":
|
|
526
|
+
return self._auto_frame(data)
|
|
527
|
+
elif self.framing_type == "delimiter":
|
|
528
|
+
return self._frame_by_delimiter(data)
|
|
529
|
+
elif self.framing_type == "length_prefix":
|
|
530
|
+
return self._frame_by_length(data)
|
|
531
|
+
else: # framing_type == "fixed"
|
|
532
|
+
return self._frame_fixed(data)
|
|
533
|
+
|
|
534
|
+
def detect_framing(self, data: bytes) -> str:
|
|
535
|
+
"""Detect framing type from data.
|
|
536
|
+
|
|
537
|
+
Implements RE-STR-003: Framing detection.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
data: Sample data.
|
|
541
|
+
|
|
542
|
+
Returns:
|
|
543
|
+
Detected framing type string.
|
|
544
|
+
"""
|
|
545
|
+
# Check for common delimiters
|
|
546
|
+
common_delimiters = [b"\r\n", b"\n", b"\x00", b"\r"]
|
|
547
|
+
for delim in common_delimiters:
|
|
548
|
+
count = data.count(delim)
|
|
549
|
+
if count >= 3:
|
|
550
|
+
# Check for regular spacing
|
|
551
|
+
parts = data.split(delim)
|
|
552
|
+
if parts and len({len(p) for p in parts if p}) <= 3:
|
|
553
|
+
return "delimiter"
|
|
554
|
+
|
|
555
|
+
# Check for length-prefixed
|
|
556
|
+
if len(data) >= 4:
|
|
557
|
+
# Try big-endian 2-byte length
|
|
558
|
+
for offset in range(min(8, len(data) - 2)):
|
|
559
|
+
length = int.from_bytes(data[offset : offset + 2], "big")
|
|
560
|
+
if 4 < length < len(data) and length < 65536:
|
|
561
|
+
# Check if data continues with similar pattern
|
|
562
|
+
next_offset = offset + length
|
|
563
|
+
if next_offset + 2 < len(data):
|
|
564
|
+
next_length = int.from_bytes(data[next_offset : next_offset + 2], "big")
|
|
565
|
+
if 4 < next_length < len(data):
|
|
566
|
+
return "length_prefix"
|
|
567
|
+
|
|
568
|
+
# Check for fixed size
|
|
569
|
+
if len(data) >= 32:
|
|
570
|
+
# Look for repeating pattern
|
|
571
|
+
for size in range(4, 128):
|
|
572
|
+
if len(data) % size == 0:
|
|
573
|
+
chunks = [data[i : i + size] for i in range(0, len(data), size)]
|
|
574
|
+
if len(chunks) >= 3:
|
|
575
|
+
# Check structural similarity
|
|
576
|
+
first = chunks[0][:4] if len(chunks[0]) >= 4 else chunks[0]
|
|
577
|
+
matches = sum(1 for c in chunks[1:] if c[: len(first)] == first)
|
|
578
|
+
if matches >= len(chunks) * 0.5:
|
|
579
|
+
return "fixed"
|
|
580
|
+
|
|
581
|
+
return "unknown"
|
|
582
|
+
|
|
583
|
+
def _auto_frame(self, data: bytes) -> FramingResult:
|
|
584
|
+
"""Automatically detect and apply framing.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
data: Stream data.
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
FramingResult with detected framing.
|
|
591
|
+
"""
|
|
592
|
+
framing_type = self.detect_framing(data)
|
|
593
|
+
|
|
594
|
+
if framing_type == "delimiter":
|
|
595
|
+
# Find the delimiter
|
|
596
|
+
for delim in [b"\r\n", b"\n", b"\x00", b"\r"]:
|
|
597
|
+
if data.count(delim) >= 3:
|
|
598
|
+
self.delimiter = delim
|
|
599
|
+
break
|
|
600
|
+
return self._frame_by_delimiter(data)
|
|
601
|
+
|
|
602
|
+
elif framing_type == "length_prefix":
|
|
603
|
+
return self._frame_by_length(data)
|
|
604
|
+
|
|
605
|
+
elif framing_type == "fixed":
|
|
606
|
+
# Try to detect fixed size
|
|
607
|
+
for size in range(4, 128):
|
|
608
|
+
if len(data) % size == 0 and len(data) // size >= 3:
|
|
609
|
+
self.fixed_size = size
|
|
610
|
+
break
|
|
611
|
+
return self._frame_fixed(data)
|
|
612
|
+
|
|
613
|
+
else:
|
|
614
|
+
# Return as single message
|
|
615
|
+
return FramingResult(
|
|
616
|
+
messages=[
|
|
617
|
+
MessageFrame(
|
|
618
|
+
data=data,
|
|
619
|
+
offset=0,
|
|
620
|
+
length=len(data),
|
|
621
|
+
frame_type="unknown",
|
|
622
|
+
)
|
|
623
|
+
],
|
|
624
|
+
framing_type="unknown",
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
def _frame_by_delimiter(self, data: bytes) -> FramingResult:
|
|
628
|
+
"""Frame by delimiter.
|
|
629
|
+
|
|
630
|
+
Args:
|
|
631
|
+
data: Stream data.
|
|
632
|
+
|
|
633
|
+
Returns:
|
|
634
|
+
FramingResult.
|
|
635
|
+
"""
|
|
636
|
+
if self.delimiter is None:
|
|
637
|
+
return FramingResult(messages=[], framing_type="delimiter")
|
|
638
|
+
|
|
639
|
+
messages = []
|
|
640
|
+
offset = 0
|
|
641
|
+
parts = data.split(self.delimiter)
|
|
642
|
+
|
|
643
|
+
for i, part in enumerate(parts):
|
|
644
|
+
if part: # Skip empty parts
|
|
645
|
+
messages.append(
|
|
646
|
+
MessageFrame(
|
|
647
|
+
data=part,
|
|
648
|
+
offset=offset,
|
|
649
|
+
length=len(part),
|
|
650
|
+
frame_type="delimited",
|
|
651
|
+
sequence=i,
|
|
652
|
+
)
|
|
653
|
+
)
|
|
654
|
+
offset += len(part) + len(self.delimiter)
|
|
655
|
+
|
|
656
|
+
# Check for remaining bytes
|
|
657
|
+
remaining = b""
|
|
658
|
+
if parts and not parts[-1]:
|
|
659
|
+
# Ends with delimiter, no remaining
|
|
660
|
+
pass
|
|
661
|
+
elif parts:
|
|
662
|
+
remaining = parts[-1] if not data.endswith(self.delimiter) else b""
|
|
663
|
+
|
|
664
|
+
return FramingResult(
|
|
665
|
+
messages=messages,
|
|
666
|
+
framing_type="delimiter",
|
|
667
|
+
delimiter=self.delimiter,
|
|
668
|
+
remaining=remaining,
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
def _frame_by_length(self, data: bytes) -> FramingResult:
|
|
672
|
+
"""Frame by length prefix.
|
|
673
|
+
|
|
674
|
+
Args:
|
|
675
|
+
data: Stream data.
|
|
676
|
+
|
|
677
|
+
Returns:
|
|
678
|
+
FramingResult.
|
|
679
|
+
"""
|
|
680
|
+
messages = []
|
|
681
|
+
offset = 0
|
|
682
|
+
sequence = 0
|
|
683
|
+
|
|
684
|
+
while offset + self.length_field_offset + self.length_field_size <= len(data):
|
|
685
|
+
# Read length field
|
|
686
|
+
length_start = offset + self.length_field_offset
|
|
687
|
+
length_bytes = data[length_start : length_start + self.length_field_size]
|
|
688
|
+
|
|
689
|
+
if self.length_field_endian == "big":
|
|
690
|
+
length = int.from_bytes(length_bytes, "big")
|
|
691
|
+
else:
|
|
692
|
+
length = int.from_bytes(length_bytes, "little")
|
|
693
|
+
|
|
694
|
+
# Calculate total message size
|
|
695
|
+
if self.length_includes_header:
|
|
696
|
+
msg_size = length
|
|
697
|
+
header_size = self.length_field_offset + self.length_field_size
|
|
698
|
+
else:
|
|
699
|
+
header_size = self.length_field_offset + self.length_field_size
|
|
700
|
+
msg_size = header_size + length
|
|
701
|
+
|
|
702
|
+
# Check if complete message available
|
|
703
|
+
if offset + msg_size > len(data):
|
|
704
|
+
break
|
|
705
|
+
|
|
706
|
+
messages.append(
|
|
707
|
+
MessageFrame(
|
|
708
|
+
data=data[offset : offset + msg_size],
|
|
709
|
+
offset=offset,
|
|
710
|
+
length=msg_size,
|
|
711
|
+
frame_type="length_prefixed",
|
|
712
|
+
sequence=sequence,
|
|
713
|
+
)
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
offset += msg_size
|
|
717
|
+
sequence += 1
|
|
718
|
+
|
|
719
|
+
remaining = data[offset:] if offset < len(data) else b""
|
|
720
|
+
|
|
721
|
+
return FramingResult(
|
|
722
|
+
messages=messages,
|
|
723
|
+
framing_type="length_prefix",
|
|
724
|
+
length_field_offset=self.length_field_offset,
|
|
725
|
+
length_field_size=self.length_field_size,
|
|
726
|
+
remaining=remaining,
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
def _frame_fixed(self, data: bytes) -> FramingResult:
|
|
730
|
+
"""Frame by fixed size.
|
|
731
|
+
|
|
732
|
+
Args:
|
|
733
|
+
data: Stream data.
|
|
734
|
+
|
|
735
|
+
Returns:
|
|
736
|
+
FramingResult.
|
|
737
|
+
"""
|
|
738
|
+
if self.fixed_size <= 0:
|
|
739
|
+
return FramingResult(messages=[], framing_type="fixed")
|
|
740
|
+
|
|
741
|
+
messages = []
|
|
742
|
+
offset = 0
|
|
743
|
+
sequence = 0
|
|
744
|
+
|
|
745
|
+
while offset + self.fixed_size <= len(data):
|
|
746
|
+
messages.append(
|
|
747
|
+
MessageFrame(
|
|
748
|
+
data=data[offset : offset + self.fixed_size],
|
|
749
|
+
offset=offset,
|
|
750
|
+
length=self.fixed_size,
|
|
751
|
+
frame_type="fixed",
|
|
752
|
+
sequence=sequence,
|
|
753
|
+
)
|
|
754
|
+
)
|
|
755
|
+
offset += self.fixed_size
|
|
756
|
+
sequence += 1
|
|
757
|
+
|
|
758
|
+
remaining = data[offset:] if offset < len(data) else b""
|
|
759
|
+
|
|
760
|
+
return FramingResult(
|
|
761
|
+
messages=messages,
|
|
762
|
+
framing_type="fixed",
|
|
763
|
+
remaining=remaining,
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
# =============================================================================
|
|
768
|
+
# Convenience functions
|
|
769
|
+
# =============================================================================
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
def reassemble_udp_stream(
|
|
773
|
+
packets: Sequence[dict[str, Any] | bytes],
|
|
774
|
+
sequence_key: Callable[[Any], int] | None = None,
|
|
775
|
+
) -> ReassembledStream:
|
|
776
|
+
"""Reassemble UDP datagram stream.
|
|
777
|
+
|
|
778
|
+
Implements RE-STR-001: UDP Stream Reconstruction.
|
|
779
|
+
|
|
780
|
+
Args:
|
|
781
|
+
packets: List of UDP packets.
|
|
782
|
+
sequence_key: Function to extract sequence number.
|
|
783
|
+
|
|
784
|
+
Returns:
|
|
785
|
+
ReassembledStream with ordered data.
|
|
786
|
+
|
|
787
|
+
Example:
|
|
788
|
+
>>> stream = reassemble_udp_stream(udp_packets)
|
|
789
|
+
>>> print(f"Reassembled {len(stream.data)} bytes")
|
|
790
|
+
"""
|
|
791
|
+
reassembler = UDPStreamReassembler(sequence_key=sequence_key)
|
|
792
|
+
for packet in packets:
|
|
793
|
+
reassembler.add_segment(packet)
|
|
794
|
+
return reassembler.get_stream()
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
def reassemble_tcp_stream(
|
|
798
|
+
segments: Sequence[dict[str, Any]],
|
|
799
|
+
flow_key: str | None = None,
|
|
800
|
+
) -> ReassembledStream:
|
|
801
|
+
"""Reassemble TCP byte stream.
|
|
802
|
+
|
|
803
|
+
Implements RE-STR-002: TCP Stream Reassembly.
|
|
804
|
+
|
|
805
|
+
Args:
|
|
806
|
+
segments: List of TCP segments.
|
|
807
|
+
flow_key: Optional flow identifier.
|
|
808
|
+
|
|
809
|
+
Returns:
|
|
810
|
+
ReassembledStream with complete data.
|
|
811
|
+
|
|
812
|
+
Example:
|
|
813
|
+
>>> stream = reassemble_tcp_stream(tcp_segments)
|
|
814
|
+
>>> print(f"Reassembled {len(stream.data)} bytes with {stream.gaps} gaps")
|
|
815
|
+
"""
|
|
816
|
+
reassembler = TCPStreamReassembler()
|
|
817
|
+
for segment in segments:
|
|
818
|
+
reassembler.add_segment(segment, flow_key)
|
|
819
|
+
return reassembler.get_stream(flow_key)
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def extract_messages(
|
|
823
|
+
data: bytes,
|
|
824
|
+
framing_type: Literal["auto", "delimiter", "length_prefix", "fixed"] = "auto",
|
|
825
|
+
delimiter: bytes | None = None,
|
|
826
|
+
length_field_offset: int = 0,
|
|
827
|
+
length_field_size: int = 2,
|
|
828
|
+
fixed_size: int = 0,
|
|
829
|
+
) -> FramingResult:
|
|
830
|
+
"""Extract framed messages from stream data.
|
|
831
|
+
|
|
832
|
+
Implements RE-STR-003: Message Framing and Segmentation.
|
|
833
|
+
|
|
834
|
+
Args:
|
|
835
|
+
data: Stream data.
|
|
836
|
+
framing_type: Type of framing.
|
|
837
|
+
delimiter: Delimiter for delimiter-based framing.
|
|
838
|
+
length_field_offset: Length field offset.
|
|
839
|
+
length_field_size: Length field size.
|
|
840
|
+
fixed_size: Fixed message size.
|
|
841
|
+
|
|
842
|
+
Returns:
|
|
843
|
+
FramingResult with extracted messages.
|
|
844
|
+
|
|
845
|
+
Example:
|
|
846
|
+
>>> result = extract_messages(data, framing_type='delimiter', delimiter=b'\\r\\n')
|
|
847
|
+
>>> for msg in result.messages:
|
|
848
|
+
... print(msg.data)
|
|
849
|
+
"""
|
|
850
|
+
framer = MessageFramer(
|
|
851
|
+
framing_type=framing_type,
|
|
852
|
+
delimiter=delimiter,
|
|
853
|
+
length_field_offset=length_field_offset,
|
|
854
|
+
length_field_size=length_field_size,
|
|
855
|
+
fixed_size=fixed_size,
|
|
856
|
+
)
|
|
857
|
+
return framer.frame(data)
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
def detect_message_framing(data: bytes) -> dict[str, Any]:
|
|
861
|
+
"""Detect message framing type in data.
|
|
862
|
+
|
|
863
|
+
Implements RE-STR-003: Framing detection.
|
|
864
|
+
|
|
865
|
+
Args:
|
|
866
|
+
data: Stream data sample.
|
|
867
|
+
|
|
868
|
+
Returns:
|
|
869
|
+
Dictionary with detected framing parameters.
|
|
870
|
+
|
|
871
|
+
Example:
|
|
872
|
+
>>> framing = detect_message_framing(stream_data)
|
|
873
|
+
>>> print(f"Detected: {framing['type']}")
|
|
874
|
+
"""
|
|
875
|
+
framer = MessageFramer()
|
|
876
|
+
framing_type = framer.detect_framing(data)
|
|
877
|
+
|
|
878
|
+
result: dict[str, Any] = {"type": framing_type}
|
|
879
|
+
|
|
880
|
+
if framing_type == "delimiter":
|
|
881
|
+
# Find the delimiter
|
|
882
|
+
for delim in [b"\r\n", b"\n", b"\x00", b"\r"]:
|
|
883
|
+
if data.count(delim) >= 3:
|
|
884
|
+
result["delimiter"] = delim
|
|
885
|
+
result["message_count"] = data.count(delim)
|
|
886
|
+
break
|
|
887
|
+
|
|
888
|
+
elif framing_type == "length_prefix":
|
|
889
|
+
result["length_field_offset"] = 0
|
|
890
|
+
result["length_field_size"] = 2
|
|
891
|
+
|
|
892
|
+
elif framing_type == "fixed":
|
|
893
|
+
# Try to detect fixed size
|
|
894
|
+
for size in range(4, 128):
|
|
895
|
+
if len(data) % size == 0 and len(data) // size >= 3:
|
|
896
|
+
result["fixed_size"] = size
|
|
897
|
+
result["message_count"] = len(data) // size
|
|
898
|
+
break
|
|
899
|
+
|
|
900
|
+
return result
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
__all__ = [
|
|
904
|
+
"FramingResult",
|
|
905
|
+
"MessageFrame",
|
|
906
|
+
"MessageFramer",
|
|
907
|
+
"ReassembledStream",
|
|
908
|
+
# Data classes
|
|
909
|
+
"StreamSegment",
|
|
910
|
+
"TCPStreamReassembler",
|
|
911
|
+
# Classes
|
|
912
|
+
"UDPStreamReassembler",
|
|
913
|
+
"detect_message_framing",
|
|
914
|
+
"extract_messages",
|
|
915
|
+
"reassemble_tcp_stream",
|
|
916
|
+
# Functions
|
|
917
|
+
"reassemble_udp_stream",
|
|
918
|
+
]
|