oscura 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +813 -8
- oscura/__main__.py +392 -0
- oscura/analyzers/__init__.py +37 -0
- oscura/analyzers/digital/__init__.py +177 -0
- oscura/analyzers/digital/bus.py +691 -0
- oscura/analyzers/digital/clock.py +805 -0
- oscura/analyzers/digital/correlation.py +720 -0
- oscura/analyzers/digital/edges.py +632 -0
- oscura/analyzers/digital/extraction.py +413 -0
- oscura/analyzers/digital/quality.py +878 -0
- oscura/analyzers/digital/signal_quality.py +877 -0
- oscura/analyzers/digital/thresholds.py +708 -0
- oscura/analyzers/digital/timing.py +1104 -0
- oscura/analyzers/eye/__init__.py +46 -0
- oscura/analyzers/eye/diagram.py +434 -0
- oscura/analyzers/eye/metrics.py +555 -0
- oscura/analyzers/jitter/__init__.py +83 -0
- oscura/analyzers/jitter/ber.py +333 -0
- oscura/analyzers/jitter/decomposition.py +759 -0
- oscura/analyzers/jitter/measurements.py +413 -0
- oscura/analyzers/jitter/spectrum.py +220 -0
- oscura/analyzers/measurements.py +40 -0
- oscura/analyzers/packet/__init__.py +171 -0
- oscura/analyzers/packet/daq.py +1077 -0
- oscura/analyzers/packet/metrics.py +437 -0
- oscura/analyzers/packet/parser.py +327 -0
- oscura/analyzers/packet/payload.py +2156 -0
- oscura/analyzers/packet/payload_analysis.py +1312 -0
- oscura/analyzers/packet/payload_extraction.py +236 -0
- oscura/analyzers/packet/payload_patterns.py +670 -0
- oscura/analyzers/packet/stream.py +359 -0
- oscura/analyzers/patterns/__init__.py +266 -0
- oscura/analyzers/patterns/clustering.py +1036 -0
- oscura/analyzers/patterns/discovery.py +539 -0
- oscura/analyzers/patterns/learning.py +797 -0
- oscura/analyzers/patterns/matching.py +1091 -0
- oscura/analyzers/patterns/periodic.py +650 -0
- oscura/analyzers/patterns/sequences.py +767 -0
- oscura/analyzers/power/__init__.py +116 -0
- oscura/analyzers/power/ac_power.py +391 -0
- oscura/analyzers/power/basic.py +383 -0
- oscura/analyzers/power/conduction.py +314 -0
- oscura/analyzers/power/efficiency.py +297 -0
- oscura/analyzers/power/ripple.py +356 -0
- oscura/analyzers/power/soa.py +372 -0
- oscura/analyzers/power/switching.py +479 -0
- oscura/analyzers/protocol/__init__.py +150 -0
- oscura/analyzers/protocols/__init__.py +150 -0
- oscura/analyzers/protocols/base.py +500 -0
- oscura/analyzers/protocols/can.py +620 -0
- oscura/analyzers/protocols/can_fd.py +448 -0
- oscura/analyzers/protocols/flexray.py +405 -0
- oscura/analyzers/protocols/hdlc.py +399 -0
- oscura/analyzers/protocols/i2c.py +368 -0
- oscura/analyzers/protocols/i2s.py +296 -0
- oscura/analyzers/protocols/jtag.py +393 -0
- oscura/analyzers/protocols/lin.py +445 -0
- oscura/analyzers/protocols/manchester.py +333 -0
- oscura/analyzers/protocols/onewire.py +501 -0
- oscura/analyzers/protocols/spi.py +334 -0
- oscura/analyzers/protocols/swd.py +325 -0
- oscura/analyzers/protocols/uart.py +393 -0
- oscura/analyzers/protocols/usb.py +495 -0
- oscura/analyzers/signal_integrity/__init__.py +63 -0
- oscura/analyzers/signal_integrity/embedding.py +294 -0
- oscura/analyzers/signal_integrity/equalization.py +370 -0
- oscura/analyzers/signal_integrity/sparams.py +484 -0
- oscura/analyzers/spectral/__init__.py +53 -0
- oscura/analyzers/spectral/chunked.py +273 -0
- oscura/analyzers/spectral/chunked_fft.py +571 -0
- oscura/analyzers/spectral/chunked_wavelet.py +391 -0
- oscura/analyzers/spectral/fft.py +92 -0
- oscura/analyzers/statistical/__init__.py +250 -0
- oscura/analyzers/statistical/checksum.py +923 -0
- oscura/analyzers/statistical/chunked_corr.py +228 -0
- oscura/analyzers/statistical/classification.py +778 -0
- oscura/analyzers/statistical/entropy.py +1113 -0
- oscura/analyzers/statistical/ngrams.py +614 -0
- oscura/analyzers/statistics/__init__.py +119 -0
- oscura/analyzers/statistics/advanced.py +885 -0
- oscura/analyzers/statistics/basic.py +263 -0
- oscura/analyzers/statistics/correlation.py +630 -0
- oscura/analyzers/statistics/distribution.py +298 -0
- oscura/analyzers/statistics/outliers.py +463 -0
- oscura/analyzers/statistics/streaming.py +93 -0
- oscura/analyzers/statistics/trend.py +520 -0
- oscura/analyzers/validation.py +598 -0
- oscura/analyzers/waveform/__init__.py +36 -0
- oscura/analyzers/waveform/measurements.py +943 -0
- oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
- oscura/analyzers/waveform/spectral.py +1689 -0
- oscura/analyzers/waveform/wavelets.py +298 -0
- oscura/api/__init__.py +62 -0
- oscura/api/dsl.py +538 -0
- oscura/api/fluent.py +571 -0
- oscura/api/operators.py +498 -0
- oscura/api/optimization.py +392 -0
- oscura/api/profiling.py +396 -0
- oscura/automotive/__init__.py +73 -0
- oscura/automotive/can/__init__.py +52 -0
- oscura/automotive/can/analysis.py +356 -0
- oscura/automotive/can/checksum.py +250 -0
- oscura/automotive/can/correlation.py +212 -0
- oscura/automotive/can/discovery.py +355 -0
- oscura/automotive/can/message_wrapper.py +375 -0
- oscura/automotive/can/models.py +385 -0
- oscura/automotive/can/patterns.py +381 -0
- oscura/automotive/can/session.py +452 -0
- oscura/automotive/can/state_machine.py +300 -0
- oscura/automotive/can/stimulus_response.py +461 -0
- oscura/automotive/dbc/__init__.py +15 -0
- oscura/automotive/dbc/generator.py +156 -0
- oscura/automotive/dbc/parser.py +146 -0
- oscura/automotive/dtc/__init__.py +30 -0
- oscura/automotive/dtc/database.py +3036 -0
- oscura/automotive/j1939/__init__.py +14 -0
- oscura/automotive/j1939/decoder.py +745 -0
- oscura/automotive/loaders/__init__.py +35 -0
- oscura/automotive/loaders/asc.py +98 -0
- oscura/automotive/loaders/blf.py +77 -0
- oscura/automotive/loaders/csv_can.py +136 -0
- oscura/automotive/loaders/dispatcher.py +136 -0
- oscura/automotive/loaders/mdf.py +331 -0
- oscura/automotive/loaders/pcap.py +132 -0
- oscura/automotive/obd/__init__.py +14 -0
- oscura/automotive/obd/decoder.py +707 -0
- oscura/automotive/uds/__init__.py +48 -0
- oscura/automotive/uds/decoder.py +265 -0
- oscura/automotive/uds/models.py +64 -0
- oscura/automotive/visualization.py +369 -0
- oscura/batch/__init__.py +55 -0
- oscura/batch/advanced.py +627 -0
- oscura/batch/aggregate.py +300 -0
- oscura/batch/analyze.py +139 -0
- oscura/batch/logging.py +487 -0
- oscura/batch/metrics.py +556 -0
- oscura/builders/__init__.py +41 -0
- oscura/builders/signal_builder.py +1131 -0
- oscura/cli/__init__.py +14 -0
- oscura/cli/batch.py +339 -0
- oscura/cli/characterize.py +273 -0
- oscura/cli/compare.py +775 -0
- oscura/cli/decode.py +551 -0
- oscura/cli/main.py +247 -0
- oscura/cli/shell.py +350 -0
- oscura/comparison/__init__.py +66 -0
- oscura/comparison/compare.py +397 -0
- oscura/comparison/golden.py +487 -0
- oscura/comparison/limits.py +391 -0
- oscura/comparison/mask.py +434 -0
- oscura/comparison/trace_diff.py +30 -0
- oscura/comparison/visualization.py +481 -0
- oscura/compliance/__init__.py +70 -0
- oscura/compliance/advanced.py +756 -0
- oscura/compliance/masks.py +363 -0
- oscura/compliance/reporting.py +483 -0
- oscura/compliance/testing.py +298 -0
- oscura/component/__init__.py +38 -0
- oscura/component/impedance.py +365 -0
- oscura/component/reactive.py +598 -0
- oscura/component/transmission_line.py +312 -0
- oscura/config/__init__.py +191 -0
- oscura/config/defaults.py +254 -0
- oscura/config/loader.py +348 -0
- oscura/config/memory.py +271 -0
- oscura/config/migration.py +458 -0
- oscura/config/pipeline.py +1077 -0
- oscura/config/preferences.py +530 -0
- oscura/config/protocol.py +875 -0
- oscura/config/schema.py +713 -0
- oscura/config/settings.py +420 -0
- oscura/config/thresholds.py +599 -0
- oscura/convenience.py +457 -0
- oscura/core/__init__.py +299 -0
- oscura/core/audit.py +457 -0
- oscura/core/backend_selector.py +405 -0
- oscura/core/cache.py +590 -0
- oscura/core/cancellation.py +439 -0
- oscura/core/confidence.py +225 -0
- oscura/core/config.py +506 -0
- oscura/core/correlation.py +216 -0
- oscura/core/cross_domain.py +422 -0
- oscura/core/debug.py +301 -0
- oscura/core/edge_cases.py +541 -0
- oscura/core/exceptions.py +535 -0
- oscura/core/gpu_backend.py +523 -0
- oscura/core/lazy.py +832 -0
- oscura/core/log_query.py +540 -0
- oscura/core/logging.py +931 -0
- oscura/core/logging_advanced.py +952 -0
- oscura/core/memoize.py +171 -0
- oscura/core/memory_check.py +274 -0
- oscura/core/memory_guard.py +290 -0
- oscura/core/memory_limits.py +336 -0
- oscura/core/memory_monitor.py +453 -0
- oscura/core/memory_progress.py +465 -0
- oscura/core/memory_warnings.py +315 -0
- oscura/core/numba_backend.py +362 -0
- oscura/core/performance.py +352 -0
- oscura/core/progress.py +524 -0
- oscura/core/provenance.py +358 -0
- oscura/core/results.py +331 -0
- oscura/core/types.py +504 -0
- oscura/core/uncertainty.py +383 -0
- oscura/discovery/__init__.py +52 -0
- oscura/discovery/anomaly_detector.py +672 -0
- oscura/discovery/auto_decoder.py +415 -0
- oscura/discovery/comparison.py +497 -0
- oscura/discovery/quality_validator.py +528 -0
- oscura/discovery/signal_detector.py +769 -0
- oscura/dsl/__init__.py +73 -0
- oscura/dsl/commands.py +246 -0
- oscura/dsl/interpreter.py +455 -0
- oscura/dsl/parser.py +689 -0
- oscura/dsl/repl.py +172 -0
- oscura/exceptions.py +59 -0
- oscura/exploratory/__init__.py +111 -0
- oscura/exploratory/error_recovery.py +642 -0
- oscura/exploratory/fuzzy.py +513 -0
- oscura/exploratory/fuzzy_advanced.py +786 -0
- oscura/exploratory/legacy.py +831 -0
- oscura/exploratory/parse.py +358 -0
- oscura/exploratory/recovery.py +275 -0
- oscura/exploratory/sync.py +382 -0
- oscura/exploratory/unknown.py +707 -0
- oscura/export/__init__.py +25 -0
- oscura/export/wireshark/README.md +265 -0
- oscura/export/wireshark/__init__.py +47 -0
- oscura/export/wireshark/generator.py +312 -0
- oscura/export/wireshark/lua_builder.py +159 -0
- oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
- oscura/export/wireshark/type_mapping.py +165 -0
- oscura/export/wireshark/validator.py +105 -0
- oscura/exporters/__init__.py +94 -0
- oscura/exporters/csv.py +303 -0
- oscura/exporters/exporters.py +44 -0
- oscura/exporters/hdf5.py +219 -0
- oscura/exporters/html_export.py +701 -0
- oscura/exporters/json_export.py +291 -0
- oscura/exporters/markdown_export.py +367 -0
- oscura/exporters/matlab_export.py +354 -0
- oscura/exporters/npz_export.py +219 -0
- oscura/exporters/spice_export.py +210 -0
- oscura/extensibility/__init__.py +131 -0
- oscura/extensibility/docs.py +752 -0
- oscura/extensibility/extensions.py +1125 -0
- oscura/extensibility/logging.py +259 -0
- oscura/extensibility/measurements.py +485 -0
- oscura/extensibility/plugins.py +414 -0
- oscura/extensibility/registry.py +346 -0
- oscura/extensibility/templates.py +913 -0
- oscura/extensibility/validation.py +651 -0
- oscura/filtering/__init__.py +89 -0
- oscura/filtering/base.py +563 -0
- oscura/filtering/convenience.py +564 -0
- oscura/filtering/design.py +725 -0
- oscura/filtering/filters.py +32 -0
- oscura/filtering/introspection.py +605 -0
- oscura/guidance/__init__.py +24 -0
- oscura/guidance/recommender.py +429 -0
- oscura/guidance/wizard.py +518 -0
- oscura/inference/__init__.py +251 -0
- oscura/inference/active_learning/README.md +153 -0
- oscura/inference/active_learning/__init__.py +38 -0
- oscura/inference/active_learning/lstar.py +257 -0
- oscura/inference/active_learning/observation_table.py +230 -0
- oscura/inference/active_learning/oracle.py +78 -0
- oscura/inference/active_learning/teachers/__init__.py +15 -0
- oscura/inference/active_learning/teachers/simulator.py +192 -0
- oscura/inference/adaptive_tuning.py +453 -0
- oscura/inference/alignment.py +653 -0
- oscura/inference/bayesian.py +943 -0
- oscura/inference/binary.py +1016 -0
- oscura/inference/crc_reverse.py +711 -0
- oscura/inference/logic.py +288 -0
- oscura/inference/message_format.py +1305 -0
- oscura/inference/protocol.py +417 -0
- oscura/inference/protocol_dsl.py +1084 -0
- oscura/inference/protocol_library.py +1230 -0
- oscura/inference/sequences.py +809 -0
- oscura/inference/signal_intelligence.py +1509 -0
- oscura/inference/spectral.py +215 -0
- oscura/inference/state_machine.py +634 -0
- oscura/inference/stream.py +918 -0
- oscura/integrations/__init__.py +59 -0
- oscura/integrations/llm.py +1827 -0
- oscura/jupyter/__init__.py +32 -0
- oscura/jupyter/display.py +268 -0
- oscura/jupyter/magic.py +334 -0
- oscura/loaders/__init__.py +526 -0
- oscura/loaders/binary.py +69 -0
- oscura/loaders/configurable.py +1255 -0
- oscura/loaders/csv.py +26 -0
- oscura/loaders/csv_loader.py +473 -0
- oscura/loaders/hdf5.py +9 -0
- oscura/loaders/hdf5_loader.py +510 -0
- oscura/loaders/lazy.py +370 -0
- oscura/loaders/mmap_loader.py +583 -0
- oscura/loaders/numpy_loader.py +436 -0
- oscura/loaders/pcap.py +432 -0
- oscura/loaders/preprocessing.py +368 -0
- oscura/loaders/rigol.py +287 -0
- oscura/loaders/sigrok.py +321 -0
- oscura/loaders/tdms.py +367 -0
- oscura/loaders/tektronix.py +711 -0
- oscura/loaders/validation.py +584 -0
- oscura/loaders/vcd.py +464 -0
- oscura/loaders/wav.py +233 -0
- oscura/math/__init__.py +45 -0
- oscura/math/arithmetic.py +824 -0
- oscura/math/interpolation.py +413 -0
- oscura/onboarding/__init__.py +39 -0
- oscura/onboarding/help.py +498 -0
- oscura/onboarding/tutorials.py +405 -0
- oscura/onboarding/wizard.py +466 -0
- oscura/optimization/__init__.py +19 -0
- oscura/optimization/parallel.py +440 -0
- oscura/optimization/search.py +532 -0
- oscura/pipeline/__init__.py +43 -0
- oscura/pipeline/base.py +338 -0
- oscura/pipeline/composition.py +242 -0
- oscura/pipeline/parallel.py +448 -0
- oscura/pipeline/pipeline.py +375 -0
- oscura/pipeline/reverse_engineering.py +1119 -0
- oscura/plugins/__init__.py +122 -0
- oscura/plugins/base.py +272 -0
- oscura/plugins/cli.py +497 -0
- oscura/plugins/discovery.py +411 -0
- oscura/plugins/isolation.py +418 -0
- oscura/plugins/lifecycle.py +959 -0
- oscura/plugins/manager.py +493 -0
- oscura/plugins/registry.py +421 -0
- oscura/plugins/versioning.py +372 -0
- oscura/py.typed +0 -0
- oscura/quality/__init__.py +65 -0
- oscura/quality/ensemble.py +740 -0
- oscura/quality/explainer.py +338 -0
- oscura/quality/scoring.py +616 -0
- oscura/quality/warnings.py +456 -0
- oscura/reporting/__init__.py +248 -0
- oscura/reporting/advanced.py +1234 -0
- oscura/reporting/analyze.py +448 -0
- oscura/reporting/argument_preparer.py +596 -0
- oscura/reporting/auto_report.py +507 -0
- oscura/reporting/batch.py +615 -0
- oscura/reporting/chart_selection.py +223 -0
- oscura/reporting/comparison.py +330 -0
- oscura/reporting/config.py +615 -0
- oscura/reporting/content/__init__.py +39 -0
- oscura/reporting/content/executive.py +127 -0
- oscura/reporting/content/filtering.py +191 -0
- oscura/reporting/content/minimal.py +257 -0
- oscura/reporting/content/verbosity.py +162 -0
- oscura/reporting/core.py +508 -0
- oscura/reporting/core_formats/__init__.py +17 -0
- oscura/reporting/core_formats/multi_format.py +210 -0
- oscura/reporting/engine.py +836 -0
- oscura/reporting/export.py +366 -0
- oscura/reporting/formatting/__init__.py +129 -0
- oscura/reporting/formatting/emphasis.py +81 -0
- oscura/reporting/formatting/numbers.py +403 -0
- oscura/reporting/formatting/standards.py +55 -0
- oscura/reporting/formatting.py +466 -0
- oscura/reporting/html.py +578 -0
- oscura/reporting/index.py +590 -0
- oscura/reporting/multichannel.py +296 -0
- oscura/reporting/output.py +379 -0
- oscura/reporting/pdf.py +373 -0
- oscura/reporting/plots.py +731 -0
- oscura/reporting/pptx_export.py +360 -0
- oscura/reporting/renderers/__init__.py +11 -0
- oscura/reporting/renderers/pdf.py +94 -0
- oscura/reporting/sections.py +471 -0
- oscura/reporting/standards.py +680 -0
- oscura/reporting/summary_generator.py +368 -0
- oscura/reporting/tables.py +397 -0
- oscura/reporting/template_system.py +724 -0
- oscura/reporting/templates/__init__.py +15 -0
- oscura/reporting/templates/definition.py +205 -0
- oscura/reporting/templates/index.html +649 -0
- oscura/reporting/templates/index.md +173 -0
- oscura/schemas/__init__.py +158 -0
- oscura/schemas/bus_configuration.json +322 -0
- oscura/schemas/device_mapping.json +182 -0
- oscura/schemas/packet_format.json +418 -0
- oscura/schemas/protocol_definition.json +363 -0
- oscura/search/__init__.py +16 -0
- oscura/search/anomaly.py +292 -0
- oscura/search/context.py +149 -0
- oscura/search/pattern.py +160 -0
- oscura/session/__init__.py +34 -0
- oscura/session/annotations.py +289 -0
- oscura/session/history.py +313 -0
- oscura/session/session.py +445 -0
- oscura/streaming/__init__.py +43 -0
- oscura/streaming/chunked.py +611 -0
- oscura/streaming/progressive.py +393 -0
- oscura/streaming/realtime.py +622 -0
- oscura/testing/__init__.py +54 -0
- oscura/testing/synthetic.py +808 -0
- oscura/triggering/__init__.py +68 -0
- oscura/triggering/base.py +229 -0
- oscura/triggering/edge.py +353 -0
- oscura/triggering/pattern.py +344 -0
- oscura/triggering/pulse.py +581 -0
- oscura/triggering/window.py +453 -0
- oscura/ui/__init__.py +48 -0
- oscura/ui/formatters.py +526 -0
- oscura/ui/progressive_display.py +340 -0
- oscura/utils/__init__.py +99 -0
- oscura/utils/autodetect.py +338 -0
- oscura/utils/buffer.py +389 -0
- oscura/utils/lazy.py +407 -0
- oscura/utils/lazy_imports.py +147 -0
- oscura/utils/memory.py +836 -0
- oscura/utils/memory_advanced.py +1326 -0
- oscura/utils/memory_extensions.py +465 -0
- oscura/utils/progressive.py +352 -0
- oscura/utils/windowing.py +362 -0
- oscura/visualization/__init__.py +321 -0
- oscura/visualization/accessibility.py +526 -0
- oscura/visualization/annotations.py +374 -0
- oscura/visualization/axis_scaling.py +305 -0
- oscura/visualization/colors.py +453 -0
- oscura/visualization/digital.py +337 -0
- oscura/visualization/eye.py +420 -0
- oscura/visualization/histogram.py +281 -0
- oscura/visualization/interactive.py +858 -0
- oscura/visualization/jitter.py +702 -0
- oscura/visualization/keyboard.py +394 -0
- oscura/visualization/layout.py +365 -0
- oscura/visualization/optimization.py +1028 -0
- oscura/visualization/palettes.py +446 -0
- oscura/visualization/plot.py +92 -0
- oscura/visualization/power.py +290 -0
- oscura/visualization/power_extended.py +626 -0
- oscura/visualization/presets.py +467 -0
- oscura/visualization/protocols.py +932 -0
- oscura/visualization/render.py +207 -0
- oscura/visualization/rendering.py +444 -0
- oscura/visualization/reverse_engineering.py +791 -0
- oscura/visualization/signal_integrity.py +808 -0
- oscura/visualization/specialized.py +553 -0
- oscura/visualization/spectral.py +811 -0
- oscura/visualization/styles.py +381 -0
- oscura/visualization/thumbnails.py +311 -0
- oscura/visualization/time_axis.py +351 -0
- oscura/visualization/waveform.py +367 -0
- oscura/workflow/__init__.py +13 -0
- oscura/workflow/dag.py +377 -0
- oscura/workflows/__init__.py +58 -0
- oscura/workflows/compliance.py +280 -0
- oscura/workflows/digital.py +272 -0
- oscura/workflows/multi_trace.py +502 -0
- oscura/workflows/power.py +178 -0
- oscura/workflows/protocol.py +492 -0
- oscura/workflows/reverse_engineering.py +639 -0
- oscura/workflows/signal_integrity.py +227 -0
- oscura-0.1.0.dist-info/METADATA +300 -0
- oscura-0.1.0.dist-info/RECORD +463 -0
- oscura-0.1.0.dist-info/entry_points.txt +2 -0
- {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/licenses/LICENSE +1 -1
- oscura-0.0.1.dist-info/METADATA +0 -63
- oscura-0.0.1.dist-info/RECORD +0 -5
- {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,670 @@
|
|
|
1
|
+
"""Payload pattern search and delimiter detection.
|
|
2
|
+
|
|
3
|
+
RE-PAY-002: Payload Pattern Search
|
|
4
|
+
RE-PAY-003: Payload Delimiter Detection
|
|
5
|
+
|
|
6
|
+
This module provides pattern matching, delimiter detection, and
|
|
7
|
+
message boundary finding for binary payloads.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
from collections.abc import Sequence
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Any, Literal, cast
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from oscura.analyzers.packet.payload_extraction import PayloadExtractor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class PatternMatch:
|
|
24
|
+
"""Pattern match result.
|
|
25
|
+
|
|
26
|
+
Implements RE-PAY-002: Pattern match with location info.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
pattern_name: Name of matched pattern.
|
|
30
|
+
offset: Byte offset within payload.
|
|
31
|
+
matched: Matched bytes.
|
|
32
|
+
packet_index: Source packet index.
|
|
33
|
+
context: Surrounding bytes for context.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
pattern_name: str
|
|
37
|
+
offset: int
|
|
38
|
+
matched: bytes
|
|
39
|
+
packet_index: int
|
|
40
|
+
context: bytes = b""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class DelimiterResult:
|
|
45
|
+
"""Detected delimiter information.
|
|
46
|
+
|
|
47
|
+
Implements RE-PAY-003: Delimiter detection result.
|
|
48
|
+
|
|
49
|
+
Attributes:
|
|
50
|
+
delimiter: Detected delimiter bytes.
|
|
51
|
+
delimiter_type: Type of delimiter (fixed, length_prefix, pattern).
|
|
52
|
+
confidence: Detection confidence (0-1).
|
|
53
|
+
occurrences: Number of occurrences found.
|
|
54
|
+
positions: List of positions where delimiter found.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
delimiter: bytes
|
|
58
|
+
delimiter_type: Literal["fixed", "length_prefix", "pattern"]
|
|
59
|
+
confidence: float
|
|
60
|
+
occurrences: int
|
|
61
|
+
positions: list[int] = field(default_factory=list)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class LengthPrefixResult:
|
|
66
|
+
"""Length prefix detection result.
|
|
67
|
+
|
|
68
|
+
Implements RE-PAY-003: Length prefix format detection.
|
|
69
|
+
|
|
70
|
+
Attributes:
|
|
71
|
+
detected: Whether length prefix was detected.
|
|
72
|
+
length_bytes: Number of bytes for length field.
|
|
73
|
+
endian: Endianness (big or little).
|
|
74
|
+
offset: Offset of length field from message start.
|
|
75
|
+
includes_length: Whether length includes the length field itself.
|
|
76
|
+
confidence: Detection confidence (0-1).
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
detected: bool
|
|
80
|
+
length_bytes: int = 0
|
|
81
|
+
endian: Literal["big", "little"] = "big"
|
|
82
|
+
offset: int = 0
|
|
83
|
+
includes_length: bool = False
|
|
84
|
+
confidence: float = 0.0
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@dataclass
|
|
88
|
+
class MessageBoundary:
|
|
89
|
+
"""Message boundary information.
|
|
90
|
+
|
|
91
|
+
Implements RE-PAY-003: Message boundary detection.
|
|
92
|
+
|
|
93
|
+
Attributes:
|
|
94
|
+
start: Start offset of message.
|
|
95
|
+
end: End offset of message.
|
|
96
|
+
length: Message length.
|
|
97
|
+
data: Message data.
|
|
98
|
+
index: Message index.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
start: int
|
|
102
|
+
end: int
|
|
103
|
+
length: int
|
|
104
|
+
data: bytes
|
|
105
|
+
index: int
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# =============================================================================
|
|
109
|
+
# RE-PAY-002: Pattern Search Functions
|
|
110
|
+
# =============================================================================
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def search_pattern(
|
|
114
|
+
packets: Sequence[dict[str, Any] | bytes],
|
|
115
|
+
pattern: bytes | str,
|
|
116
|
+
pattern_type: Literal["exact", "wildcard", "regex"] = "exact",
|
|
117
|
+
context_bytes: int = 8,
|
|
118
|
+
) -> list[PatternMatch]:
|
|
119
|
+
"""Search for pattern in packet payloads.
|
|
120
|
+
|
|
121
|
+
Implements RE-PAY-002: Payload Pattern Search.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
packets: Sequence of packets to search.
|
|
125
|
+
pattern: Pattern to search for.
|
|
126
|
+
pattern_type: Type of pattern matching.
|
|
127
|
+
context_bytes: Number of context bytes around match.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
List of PatternMatch results.
|
|
131
|
+
|
|
132
|
+
Example:
|
|
133
|
+
>>> matches = search_pattern(packets, b'\\x00\\x01\\x00\\x00')
|
|
134
|
+
>>> for m in matches:
|
|
135
|
+
... print(f"Found at packet {m.packet_index}, offset {m.offset}")
|
|
136
|
+
"""
|
|
137
|
+
extractor = PayloadExtractor()
|
|
138
|
+
results = []
|
|
139
|
+
|
|
140
|
+
for i, packet in enumerate(packets):
|
|
141
|
+
payload = extractor.extract_payload(packet)
|
|
142
|
+
if isinstance(payload, memoryview | np.ndarray):
|
|
143
|
+
payload = bytes(payload)
|
|
144
|
+
|
|
145
|
+
matches = _find_pattern_in_data(payload, pattern, pattern_type)
|
|
146
|
+
|
|
147
|
+
for offset, matched in matches:
|
|
148
|
+
# Get context
|
|
149
|
+
start = max(0, offset - context_bytes)
|
|
150
|
+
end = min(len(payload), offset + len(matched) + context_bytes)
|
|
151
|
+
context = payload[start:end]
|
|
152
|
+
|
|
153
|
+
results.append(
|
|
154
|
+
PatternMatch(
|
|
155
|
+
pattern_name=pattern.hex() if isinstance(pattern, bytes) else str(pattern),
|
|
156
|
+
offset=offset,
|
|
157
|
+
matched=matched,
|
|
158
|
+
packet_index=i,
|
|
159
|
+
context=context,
|
|
160
|
+
)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
return results
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def search_patterns(
|
|
167
|
+
packets: Sequence[dict[str, Any] | bytes],
|
|
168
|
+
patterns: dict[str, bytes | str],
|
|
169
|
+
context_bytes: int = 8,
|
|
170
|
+
) -> dict[str, list[PatternMatch]]:
|
|
171
|
+
"""Search for multiple patterns simultaneously.
|
|
172
|
+
|
|
173
|
+
Implements RE-PAY-002: Multi-pattern search.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
packets: Sequence of packets to search.
|
|
177
|
+
patterns: Dictionary mapping names to patterns.
|
|
178
|
+
context_bytes: Number of context bytes around match.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Dictionary mapping pattern names to match lists.
|
|
182
|
+
|
|
183
|
+
Example:
|
|
184
|
+
>>> signatures = {
|
|
185
|
+
... "header_a": b'\\xAA\\x55',
|
|
186
|
+
... "header_b": b'\\xDE\\xAD',
|
|
187
|
+
... }
|
|
188
|
+
>>> results = search_patterns(packets, signatures)
|
|
189
|
+
>>> for name, matches in results.items():
|
|
190
|
+
... print(f"{name}: {len(matches)} matches")
|
|
191
|
+
"""
|
|
192
|
+
results: dict[str, list[PatternMatch]] = {name: [] for name in patterns}
|
|
193
|
+
extractor = PayloadExtractor()
|
|
194
|
+
|
|
195
|
+
for i, packet in enumerate(packets):
|
|
196
|
+
payload = extractor.extract_payload(packet)
|
|
197
|
+
if isinstance(payload, memoryview | np.ndarray):
|
|
198
|
+
payload = bytes(payload)
|
|
199
|
+
|
|
200
|
+
for name, pattern in patterns.items():
|
|
201
|
+
# Detect pattern type
|
|
202
|
+
if isinstance(pattern, bytes):
|
|
203
|
+
if b"??" in pattern or b"\\x??" in pattern:
|
|
204
|
+
pattern_type = "wildcard"
|
|
205
|
+
else:
|
|
206
|
+
pattern_type = "exact"
|
|
207
|
+
else:
|
|
208
|
+
pattern_type = "regex"
|
|
209
|
+
|
|
210
|
+
matches = _find_pattern_in_data(payload, pattern, pattern_type)
|
|
211
|
+
|
|
212
|
+
for offset, matched in matches:
|
|
213
|
+
start = max(0, offset - context_bytes)
|
|
214
|
+
end = min(len(payload), offset + len(matched) + context_bytes)
|
|
215
|
+
context = payload[start:end]
|
|
216
|
+
|
|
217
|
+
results[name].append(
|
|
218
|
+
PatternMatch(
|
|
219
|
+
pattern_name=name,
|
|
220
|
+
offset=offset,
|
|
221
|
+
matched=matched,
|
|
222
|
+
packet_index=i,
|
|
223
|
+
context=context,
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return results
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def filter_by_pattern(
|
|
231
|
+
packets: Sequence[dict[str, Any] | bytes],
|
|
232
|
+
pattern: bytes | str,
|
|
233
|
+
pattern_type: Literal["exact", "wildcard", "regex"] = "exact",
|
|
234
|
+
) -> list[dict[str, Any] | bytes]:
|
|
235
|
+
"""Filter packets that contain a pattern.
|
|
236
|
+
|
|
237
|
+
Implements RE-PAY-002: Pattern-based filtering.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
packets: Sequence of packets.
|
|
241
|
+
pattern: Pattern to match.
|
|
242
|
+
pattern_type: Type of pattern matching.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
List of packets containing the pattern.
|
|
246
|
+
"""
|
|
247
|
+
extractor = PayloadExtractor()
|
|
248
|
+
result = []
|
|
249
|
+
|
|
250
|
+
for packet in packets:
|
|
251
|
+
payload = extractor.extract_payload(packet)
|
|
252
|
+
if isinstance(payload, memoryview | np.ndarray):
|
|
253
|
+
payload = bytes(payload)
|
|
254
|
+
|
|
255
|
+
matches = _find_pattern_in_data(payload, pattern, pattern_type)
|
|
256
|
+
if len(matches) > 0:
|
|
257
|
+
result.append(packet)
|
|
258
|
+
|
|
259
|
+
return result
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# =============================================================================
|
|
263
|
+
# RE-PAY-003: Delimiter Detection Functions
|
|
264
|
+
# =============================================================================
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def detect_delimiter(
|
|
268
|
+
payloads: Sequence[bytes] | bytes,
|
|
269
|
+
candidates: list[bytes] | None = None,
|
|
270
|
+
) -> DelimiterResult:
|
|
271
|
+
"""Automatically detect message delimiter.
|
|
272
|
+
|
|
273
|
+
Implements RE-PAY-003: Delimiter detection.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
payloads: Payload data or list of payloads.
|
|
277
|
+
candidates: Optional list of candidate delimiters to test.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
DelimiterResult with detected delimiter info.
|
|
281
|
+
|
|
282
|
+
Example:
|
|
283
|
+
>>> data = b'msg1\\r\\nmsg2\\r\\nmsg3\\r\\n'
|
|
284
|
+
>>> result = detect_delimiter(data)
|
|
285
|
+
>>> print(f"Delimiter: {result.delimiter!r}")
|
|
286
|
+
"""
|
|
287
|
+
# Combine payloads if list
|
|
288
|
+
if isinstance(payloads, list | tuple):
|
|
289
|
+
data: bytes = b"".join(payloads)
|
|
290
|
+
else:
|
|
291
|
+
# Type narrowing: payloads is bytes here
|
|
292
|
+
data = cast("bytes", payloads)
|
|
293
|
+
|
|
294
|
+
if not data:
|
|
295
|
+
return DelimiterResult(
|
|
296
|
+
delimiter=b"",
|
|
297
|
+
delimiter_type="fixed",
|
|
298
|
+
confidence=0.0,
|
|
299
|
+
occurrences=0,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Default candidates
|
|
303
|
+
if candidates is None:
|
|
304
|
+
candidates = [
|
|
305
|
+
b"\r\n", # CRLF
|
|
306
|
+
b"\n", # LF
|
|
307
|
+
b"\x00", # Null
|
|
308
|
+
b"\r", # CR
|
|
309
|
+
b"\x0d\x0a", # CRLF (explicit)
|
|
310
|
+
]
|
|
311
|
+
|
|
312
|
+
best_result = None
|
|
313
|
+
best_score = 0.0
|
|
314
|
+
|
|
315
|
+
for delim in candidates:
|
|
316
|
+
if len(delim) == 0:
|
|
317
|
+
continue
|
|
318
|
+
|
|
319
|
+
count = data.count(delim)
|
|
320
|
+
if count < 2:
|
|
321
|
+
continue
|
|
322
|
+
|
|
323
|
+
# Calculate score based on frequency and regularity
|
|
324
|
+
positions = []
|
|
325
|
+
pos = 0
|
|
326
|
+
while True:
|
|
327
|
+
pos = data.find(delim, pos)
|
|
328
|
+
if pos == -1:
|
|
329
|
+
break
|
|
330
|
+
positions.append(pos)
|
|
331
|
+
pos += len(delim)
|
|
332
|
+
|
|
333
|
+
if len(positions) < 2:
|
|
334
|
+
continue
|
|
335
|
+
|
|
336
|
+
# Calculate interval regularity
|
|
337
|
+
intervals = [positions[i + 1] - positions[i] for i in range(len(positions) - 1)]
|
|
338
|
+
if len(intervals) > 0:
|
|
339
|
+
mean_interval = sum(intervals) / len(intervals)
|
|
340
|
+
if mean_interval > 0:
|
|
341
|
+
variance = sum((x - mean_interval) ** 2 for x in intervals) / len(intervals)
|
|
342
|
+
cv = (variance**0.5) / mean_interval
|
|
343
|
+
regularity = 1.0 / (1.0 + cv)
|
|
344
|
+
else:
|
|
345
|
+
regularity = 0.0
|
|
346
|
+
else:
|
|
347
|
+
regularity = 0.0
|
|
348
|
+
|
|
349
|
+
# Score combines frequency and regularity
|
|
350
|
+
score = count * (0.5 + 0.5 * regularity)
|
|
351
|
+
|
|
352
|
+
if score > best_score:
|
|
353
|
+
best_score = score
|
|
354
|
+
best_result = DelimiterResult(
|
|
355
|
+
delimiter=delim,
|
|
356
|
+
delimiter_type="fixed",
|
|
357
|
+
confidence=min(1.0, regularity * 0.8 + 0.2 * min(1.0, count / 10)),
|
|
358
|
+
occurrences=count,
|
|
359
|
+
positions=positions,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
if best_result is None:
|
|
363
|
+
return DelimiterResult(
|
|
364
|
+
delimiter=b"",
|
|
365
|
+
delimiter_type="fixed",
|
|
366
|
+
confidence=0.0,
|
|
367
|
+
occurrences=0,
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
return best_result
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def detect_length_prefix(
|
|
374
|
+
payloads: Sequence[bytes],
|
|
375
|
+
max_length_bytes: int = 4,
|
|
376
|
+
) -> LengthPrefixResult:
|
|
377
|
+
"""Detect length-prefixed message format.
|
|
378
|
+
|
|
379
|
+
Implements RE-PAY-003: Length prefix detection.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
payloads: List of payload samples.
|
|
383
|
+
max_length_bytes: Maximum length field size to test.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
LengthPrefixResult with detected format.
|
|
387
|
+
|
|
388
|
+
Example:
|
|
389
|
+
>>> result = detect_length_prefix(payloads)
|
|
390
|
+
>>> if result.detected:
|
|
391
|
+
... print(f"Length field: {result.length_bytes} bytes, {result.endian}")
|
|
392
|
+
"""
|
|
393
|
+
if not payloads:
|
|
394
|
+
return LengthPrefixResult(detected=False)
|
|
395
|
+
|
|
396
|
+
# Concatenate payloads for analysis
|
|
397
|
+
data = b"".join(payloads)
|
|
398
|
+
|
|
399
|
+
best_result = LengthPrefixResult(detected=False)
|
|
400
|
+
best_score = 0.0
|
|
401
|
+
|
|
402
|
+
# Try different length field sizes and offsets
|
|
403
|
+
# IMPORTANT: Prefer larger length_bytes values when scores are equal
|
|
404
|
+
# by iterating in reverse order (4, 2, 1) and using >= for comparison
|
|
405
|
+
for length_bytes in [4, 2, 1]:
|
|
406
|
+
if length_bytes > max_length_bytes:
|
|
407
|
+
continue
|
|
408
|
+
|
|
409
|
+
for endian_str in ["big", "little"]:
|
|
410
|
+
endian: Literal["big", "little"] = endian_str # type: ignore[assignment]
|
|
411
|
+
for offset in range(min(8, len(data) - length_bytes)):
|
|
412
|
+
for includes_length in [False, True]:
|
|
413
|
+
score, matches = _test_length_prefix(
|
|
414
|
+
data, length_bytes, endian, offset, includes_length
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
# Use > to prefer larger length_bytes (tested first) when scores are equal
|
|
418
|
+
if score > best_score and matches >= 3:
|
|
419
|
+
best_score = score
|
|
420
|
+
best_result = LengthPrefixResult(
|
|
421
|
+
detected=True,
|
|
422
|
+
length_bytes=length_bytes,
|
|
423
|
+
endian=endian,
|
|
424
|
+
offset=offset,
|
|
425
|
+
includes_length=includes_length,
|
|
426
|
+
confidence=score,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
return best_result
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def find_message_boundaries(
|
|
433
|
+
payloads: Sequence[bytes] | bytes,
|
|
434
|
+
delimiter: bytes | DelimiterResult | None = None,
|
|
435
|
+
length_prefix: LengthPrefixResult | None = None,
|
|
436
|
+
) -> list[MessageBoundary]:
|
|
437
|
+
"""Find message boundaries in payload data.
|
|
438
|
+
|
|
439
|
+
Implements RE-PAY-003: Message boundary detection.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
payloads: Payload data or list of payloads.
|
|
443
|
+
delimiter: Delimiter to use (auto-detect if None).
|
|
444
|
+
length_prefix: Length prefix format (test if None).
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
List of MessageBoundary objects.
|
|
448
|
+
|
|
449
|
+
Example:
|
|
450
|
+
>>> boundaries = find_message_boundaries(data)
|
|
451
|
+
>>> for b in boundaries:
|
|
452
|
+
... print(f"Message {b.index}: {b.length} bytes")
|
|
453
|
+
"""
|
|
454
|
+
# Combine payloads if list
|
|
455
|
+
if isinstance(payloads, list | tuple):
|
|
456
|
+
data: bytes = b"".join(payloads)
|
|
457
|
+
else:
|
|
458
|
+
# Type narrowing: payloads is bytes here
|
|
459
|
+
data = cast("bytes", payloads)
|
|
460
|
+
|
|
461
|
+
if not data:
|
|
462
|
+
return []
|
|
463
|
+
|
|
464
|
+
boundaries = []
|
|
465
|
+
|
|
466
|
+
# Try length prefix first
|
|
467
|
+
if length_prefix is None:
|
|
468
|
+
length_prefix = detect_length_prefix([data] if isinstance(data, bytes) else list(payloads))
|
|
469
|
+
|
|
470
|
+
if length_prefix.detected:
|
|
471
|
+
boundaries = _extract_length_prefixed_messages(data, length_prefix)
|
|
472
|
+
if len(boundaries) > 0:
|
|
473
|
+
return boundaries
|
|
474
|
+
|
|
475
|
+
# Fall back to delimiter
|
|
476
|
+
if delimiter is None:
|
|
477
|
+
delimiter = detect_delimiter(data)
|
|
478
|
+
|
|
479
|
+
if isinstance(delimiter, DelimiterResult):
|
|
480
|
+
delim = delimiter.delimiter
|
|
481
|
+
else:
|
|
482
|
+
delim = delimiter
|
|
483
|
+
|
|
484
|
+
if not delim:
|
|
485
|
+
# No delimiter found, return whole data as one message
|
|
486
|
+
return [MessageBoundary(start=0, end=len(data), length=len(data), data=data, index=0)]
|
|
487
|
+
|
|
488
|
+
# Split by delimiter
|
|
489
|
+
parts = data.split(delim)
|
|
490
|
+
current_offset = 0
|
|
491
|
+
|
|
492
|
+
for _i, part in enumerate(parts):
|
|
493
|
+
if part: # Skip empty parts
|
|
494
|
+
boundaries.append(
|
|
495
|
+
MessageBoundary(
|
|
496
|
+
start=current_offset,
|
|
497
|
+
end=current_offset + len(part),
|
|
498
|
+
length=len(part),
|
|
499
|
+
data=part,
|
|
500
|
+
index=len(boundaries),
|
|
501
|
+
)
|
|
502
|
+
)
|
|
503
|
+
current_offset += len(part) + len(delim)
|
|
504
|
+
|
|
505
|
+
return boundaries
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def segment_messages(
|
|
509
|
+
payloads: Sequence[bytes] | bytes,
|
|
510
|
+
delimiter: bytes | None = None,
|
|
511
|
+
length_prefix: LengthPrefixResult | None = None,
|
|
512
|
+
) -> list[bytes]:
|
|
513
|
+
"""Segment stream into individual messages.
|
|
514
|
+
|
|
515
|
+
Implements RE-PAY-003: Message segmentation.
|
|
516
|
+
|
|
517
|
+
Args:
|
|
518
|
+
payloads: Payload data or list of payloads.
|
|
519
|
+
delimiter: Delimiter to use (auto-detect if None).
|
|
520
|
+
length_prefix: Length prefix format (auto-detect if None).
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
List of message bytes.
|
|
524
|
+
"""
|
|
525
|
+
boundaries = find_message_boundaries(payloads, delimiter, length_prefix)
|
|
526
|
+
return [b.data for b in boundaries]
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
# =============================================================================
|
|
530
|
+
# Helper Functions
|
|
531
|
+
# =============================================================================
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _find_pattern_in_data(
|
|
535
|
+
data: bytes,
|
|
536
|
+
pattern: bytes | str,
|
|
537
|
+
pattern_type: str,
|
|
538
|
+
) -> list[tuple[int, bytes]]:
|
|
539
|
+
"""Find pattern occurrences in data."""
|
|
540
|
+
matches = []
|
|
541
|
+
|
|
542
|
+
if pattern_type == "exact":
|
|
543
|
+
if isinstance(pattern, str):
|
|
544
|
+
pattern = pattern.encode()
|
|
545
|
+
pos = 0
|
|
546
|
+
while True:
|
|
547
|
+
pos = data.find(pattern, pos)
|
|
548
|
+
if pos == -1:
|
|
549
|
+
break
|
|
550
|
+
matches.append((pos, pattern))
|
|
551
|
+
pos += 1
|
|
552
|
+
|
|
553
|
+
elif pattern_type == "wildcard":
|
|
554
|
+
# Convert wildcard pattern to regex
|
|
555
|
+
if isinstance(pattern, bytes):
|
|
556
|
+
# Replace ?? with . for single byte match
|
|
557
|
+
regex_pattern = pattern.replace(b"??", b".")
|
|
558
|
+
try:
|
|
559
|
+
for match in re.finditer(regex_pattern, data, re.DOTALL):
|
|
560
|
+
matches.append((match.start(), match.group()))
|
|
561
|
+
except re.error:
|
|
562
|
+
pass
|
|
563
|
+
|
|
564
|
+
elif pattern_type == "regex":
|
|
565
|
+
if isinstance(pattern, str):
|
|
566
|
+
pattern = pattern.encode()
|
|
567
|
+
try:
|
|
568
|
+
for match in re.finditer(pattern, data, re.DOTALL):
|
|
569
|
+
matches.append((match.start(), match.group()))
|
|
570
|
+
except re.error:
|
|
571
|
+
pass
|
|
572
|
+
|
|
573
|
+
return matches
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def _test_length_prefix(
|
|
577
|
+
data: bytes,
|
|
578
|
+
length_bytes: int,
|
|
579
|
+
endian: str,
|
|
580
|
+
offset: int,
|
|
581
|
+
includes_length: bool,
|
|
582
|
+
) -> tuple[float, int]:
|
|
583
|
+
"""Test if data follows a length-prefix pattern."""
|
|
584
|
+
matches = 0
|
|
585
|
+
pos = 0
|
|
586
|
+
|
|
587
|
+
while pos + offset + length_bytes <= len(data):
|
|
588
|
+
# Read length field
|
|
589
|
+
length_data = data[pos + offset : pos + offset + length_bytes]
|
|
590
|
+
if endian == "big":
|
|
591
|
+
length = int.from_bytes(length_data, "big")
|
|
592
|
+
else:
|
|
593
|
+
length = int.from_bytes(length_data, "little")
|
|
594
|
+
|
|
595
|
+
if includes_length:
|
|
596
|
+
expected_end = pos + length
|
|
597
|
+
else:
|
|
598
|
+
expected_end = pos + offset + length_bytes + length
|
|
599
|
+
|
|
600
|
+
# Check if this makes sense
|
|
601
|
+
if 0 < length < 65536 and expected_end <= len(data):
|
|
602
|
+
matches += 1
|
|
603
|
+
pos = expected_end
|
|
604
|
+
else:
|
|
605
|
+
break
|
|
606
|
+
|
|
607
|
+
# Score based on matches and coverage
|
|
608
|
+
coverage = pos / len(data) if len(data) > 0 else 0
|
|
609
|
+
score = min(1.0, matches / 5) * coverage
|
|
610
|
+
|
|
611
|
+
return score, matches
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def _extract_length_prefixed_messages(
|
|
615
|
+
data: bytes,
|
|
616
|
+
length_prefix: LengthPrefixResult,
|
|
617
|
+
) -> list[MessageBoundary]:
|
|
618
|
+
"""Extract messages using detected length prefix format."""
|
|
619
|
+
boundaries = []
|
|
620
|
+
pos = 0
|
|
621
|
+
index = 0
|
|
622
|
+
|
|
623
|
+
while pos + length_prefix.offset + length_prefix.length_bytes <= len(data):
|
|
624
|
+
# Read length
|
|
625
|
+
length_data = data[
|
|
626
|
+
pos + length_prefix.offset : pos + length_prefix.offset + length_prefix.length_bytes
|
|
627
|
+
]
|
|
628
|
+
if length_prefix.endian == "big":
|
|
629
|
+
length = int.from_bytes(length_data, "big")
|
|
630
|
+
else:
|
|
631
|
+
length = int.from_bytes(length_data, "little")
|
|
632
|
+
|
|
633
|
+
if length_prefix.includes_length:
|
|
634
|
+
end = pos + length
|
|
635
|
+
else:
|
|
636
|
+
end = pos + length_prefix.offset + length_prefix.length_bytes + length
|
|
637
|
+
|
|
638
|
+
if end > len(data) or length <= 0:
|
|
639
|
+
break
|
|
640
|
+
|
|
641
|
+
msg_data = data[pos:end]
|
|
642
|
+
boundaries.append(
|
|
643
|
+
MessageBoundary(
|
|
644
|
+
start=pos,
|
|
645
|
+
end=end,
|
|
646
|
+
length=end - pos,
|
|
647
|
+
data=msg_data,
|
|
648
|
+
index=index,
|
|
649
|
+
)
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
pos = end
|
|
653
|
+
index += 1
|
|
654
|
+
|
|
655
|
+
return boundaries
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
__all__ = [
|
|
659
|
+
"DelimiterResult",
|
|
660
|
+
"LengthPrefixResult",
|
|
661
|
+
"MessageBoundary",
|
|
662
|
+
"PatternMatch",
|
|
663
|
+
"detect_delimiter",
|
|
664
|
+
"detect_length_prefix",
|
|
665
|
+
"filter_by_pattern",
|
|
666
|
+
"find_message_boundaries",
|
|
667
|
+
"search_pattern",
|
|
668
|
+
"search_patterns",
|
|
669
|
+
"segment_messages",
|
|
670
|
+
]
|