oscura 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +813 -8
- oscura/__main__.py +392 -0
- oscura/analyzers/__init__.py +37 -0
- oscura/analyzers/digital/__init__.py +177 -0
- oscura/analyzers/digital/bus.py +691 -0
- oscura/analyzers/digital/clock.py +805 -0
- oscura/analyzers/digital/correlation.py +720 -0
- oscura/analyzers/digital/edges.py +632 -0
- oscura/analyzers/digital/extraction.py +413 -0
- oscura/analyzers/digital/quality.py +878 -0
- oscura/analyzers/digital/signal_quality.py +877 -0
- oscura/analyzers/digital/thresholds.py +708 -0
- oscura/analyzers/digital/timing.py +1104 -0
- oscura/analyzers/eye/__init__.py +46 -0
- oscura/analyzers/eye/diagram.py +434 -0
- oscura/analyzers/eye/metrics.py +555 -0
- oscura/analyzers/jitter/__init__.py +83 -0
- oscura/analyzers/jitter/ber.py +333 -0
- oscura/analyzers/jitter/decomposition.py +759 -0
- oscura/analyzers/jitter/measurements.py +413 -0
- oscura/analyzers/jitter/spectrum.py +220 -0
- oscura/analyzers/measurements.py +40 -0
- oscura/analyzers/packet/__init__.py +171 -0
- oscura/analyzers/packet/daq.py +1077 -0
- oscura/analyzers/packet/metrics.py +437 -0
- oscura/analyzers/packet/parser.py +327 -0
- oscura/analyzers/packet/payload.py +2156 -0
- oscura/analyzers/packet/payload_analysis.py +1312 -0
- oscura/analyzers/packet/payload_extraction.py +236 -0
- oscura/analyzers/packet/payload_patterns.py +670 -0
- oscura/analyzers/packet/stream.py +359 -0
- oscura/analyzers/patterns/__init__.py +266 -0
- oscura/analyzers/patterns/clustering.py +1036 -0
- oscura/analyzers/patterns/discovery.py +539 -0
- oscura/analyzers/patterns/learning.py +797 -0
- oscura/analyzers/patterns/matching.py +1091 -0
- oscura/analyzers/patterns/periodic.py +650 -0
- oscura/analyzers/patterns/sequences.py +767 -0
- oscura/analyzers/power/__init__.py +116 -0
- oscura/analyzers/power/ac_power.py +391 -0
- oscura/analyzers/power/basic.py +383 -0
- oscura/analyzers/power/conduction.py +314 -0
- oscura/analyzers/power/efficiency.py +297 -0
- oscura/analyzers/power/ripple.py +356 -0
- oscura/analyzers/power/soa.py +372 -0
- oscura/analyzers/power/switching.py +479 -0
- oscura/analyzers/protocol/__init__.py +150 -0
- oscura/analyzers/protocols/__init__.py +150 -0
- oscura/analyzers/protocols/base.py +500 -0
- oscura/analyzers/protocols/can.py +620 -0
- oscura/analyzers/protocols/can_fd.py +448 -0
- oscura/analyzers/protocols/flexray.py +405 -0
- oscura/analyzers/protocols/hdlc.py +399 -0
- oscura/analyzers/protocols/i2c.py +368 -0
- oscura/analyzers/protocols/i2s.py +296 -0
- oscura/analyzers/protocols/jtag.py +393 -0
- oscura/analyzers/protocols/lin.py +445 -0
- oscura/analyzers/protocols/manchester.py +333 -0
- oscura/analyzers/protocols/onewire.py +501 -0
- oscura/analyzers/protocols/spi.py +334 -0
- oscura/analyzers/protocols/swd.py +325 -0
- oscura/analyzers/protocols/uart.py +393 -0
- oscura/analyzers/protocols/usb.py +495 -0
- oscura/analyzers/signal_integrity/__init__.py +63 -0
- oscura/analyzers/signal_integrity/embedding.py +294 -0
- oscura/analyzers/signal_integrity/equalization.py +370 -0
- oscura/analyzers/signal_integrity/sparams.py +484 -0
- oscura/analyzers/spectral/__init__.py +53 -0
- oscura/analyzers/spectral/chunked.py +273 -0
- oscura/analyzers/spectral/chunked_fft.py +571 -0
- oscura/analyzers/spectral/chunked_wavelet.py +391 -0
- oscura/analyzers/spectral/fft.py +92 -0
- oscura/analyzers/statistical/__init__.py +250 -0
- oscura/analyzers/statistical/checksum.py +923 -0
- oscura/analyzers/statistical/chunked_corr.py +228 -0
- oscura/analyzers/statistical/classification.py +778 -0
- oscura/analyzers/statistical/entropy.py +1113 -0
- oscura/analyzers/statistical/ngrams.py +614 -0
- oscura/analyzers/statistics/__init__.py +119 -0
- oscura/analyzers/statistics/advanced.py +885 -0
- oscura/analyzers/statistics/basic.py +263 -0
- oscura/analyzers/statistics/correlation.py +630 -0
- oscura/analyzers/statistics/distribution.py +298 -0
- oscura/analyzers/statistics/outliers.py +463 -0
- oscura/analyzers/statistics/streaming.py +93 -0
- oscura/analyzers/statistics/trend.py +520 -0
- oscura/analyzers/validation.py +598 -0
- oscura/analyzers/waveform/__init__.py +36 -0
- oscura/analyzers/waveform/measurements.py +943 -0
- oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
- oscura/analyzers/waveform/spectral.py +1689 -0
- oscura/analyzers/waveform/wavelets.py +298 -0
- oscura/api/__init__.py +62 -0
- oscura/api/dsl.py +538 -0
- oscura/api/fluent.py +571 -0
- oscura/api/operators.py +498 -0
- oscura/api/optimization.py +392 -0
- oscura/api/profiling.py +396 -0
- oscura/automotive/__init__.py +73 -0
- oscura/automotive/can/__init__.py +52 -0
- oscura/automotive/can/analysis.py +356 -0
- oscura/automotive/can/checksum.py +250 -0
- oscura/automotive/can/correlation.py +212 -0
- oscura/automotive/can/discovery.py +355 -0
- oscura/automotive/can/message_wrapper.py +375 -0
- oscura/automotive/can/models.py +385 -0
- oscura/automotive/can/patterns.py +381 -0
- oscura/automotive/can/session.py +452 -0
- oscura/automotive/can/state_machine.py +300 -0
- oscura/automotive/can/stimulus_response.py +461 -0
- oscura/automotive/dbc/__init__.py +15 -0
- oscura/automotive/dbc/generator.py +156 -0
- oscura/automotive/dbc/parser.py +146 -0
- oscura/automotive/dtc/__init__.py +30 -0
- oscura/automotive/dtc/database.py +3036 -0
- oscura/automotive/j1939/__init__.py +14 -0
- oscura/automotive/j1939/decoder.py +745 -0
- oscura/automotive/loaders/__init__.py +35 -0
- oscura/automotive/loaders/asc.py +98 -0
- oscura/automotive/loaders/blf.py +77 -0
- oscura/automotive/loaders/csv_can.py +136 -0
- oscura/automotive/loaders/dispatcher.py +136 -0
- oscura/automotive/loaders/mdf.py +331 -0
- oscura/automotive/loaders/pcap.py +132 -0
- oscura/automotive/obd/__init__.py +14 -0
- oscura/automotive/obd/decoder.py +707 -0
- oscura/automotive/uds/__init__.py +48 -0
- oscura/automotive/uds/decoder.py +265 -0
- oscura/automotive/uds/models.py +64 -0
- oscura/automotive/visualization.py +369 -0
- oscura/batch/__init__.py +55 -0
- oscura/batch/advanced.py +627 -0
- oscura/batch/aggregate.py +300 -0
- oscura/batch/analyze.py +139 -0
- oscura/batch/logging.py +487 -0
- oscura/batch/metrics.py +556 -0
- oscura/builders/__init__.py +41 -0
- oscura/builders/signal_builder.py +1131 -0
- oscura/cli/__init__.py +14 -0
- oscura/cli/batch.py +339 -0
- oscura/cli/characterize.py +273 -0
- oscura/cli/compare.py +775 -0
- oscura/cli/decode.py +551 -0
- oscura/cli/main.py +247 -0
- oscura/cli/shell.py +350 -0
- oscura/comparison/__init__.py +66 -0
- oscura/comparison/compare.py +397 -0
- oscura/comparison/golden.py +487 -0
- oscura/comparison/limits.py +391 -0
- oscura/comparison/mask.py +434 -0
- oscura/comparison/trace_diff.py +30 -0
- oscura/comparison/visualization.py +481 -0
- oscura/compliance/__init__.py +70 -0
- oscura/compliance/advanced.py +756 -0
- oscura/compliance/masks.py +363 -0
- oscura/compliance/reporting.py +483 -0
- oscura/compliance/testing.py +298 -0
- oscura/component/__init__.py +38 -0
- oscura/component/impedance.py +365 -0
- oscura/component/reactive.py +598 -0
- oscura/component/transmission_line.py +312 -0
- oscura/config/__init__.py +191 -0
- oscura/config/defaults.py +254 -0
- oscura/config/loader.py +348 -0
- oscura/config/memory.py +271 -0
- oscura/config/migration.py +458 -0
- oscura/config/pipeline.py +1077 -0
- oscura/config/preferences.py +530 -0
- oscura/config/protocol.py +875 -0
- oscura/config/schema.py +713 -0
- oscura/config/settings.py +420 -0
- oscura/config/thresholds.py +599 -0
- oscura/convenience.py +457 -0
- oscura/core/__init__.py +299 -0
- oscura/core/audit.py +457 -0
- oscura/core/backend_selector.py +405 -0
- oscura/core/cache.py +590 -0
- oscura/core/cancellation.py +439 -0
- oscura/core/confidence.py +225 -0
- oscura/core/config.py +506 -0
- oscura/core/correlation.py +216 -0
- oscura/core/cross_domain.py +422 -0
- oscura/core/debug.py +301 -0
- oscura/core/edge_cases.py +541 -0
- oscura/core/exceptions.py +535 -0
- oscura/core/gpu_backend.py +523 -0
- oscura/core/lazy.py +832 -0
- oscura/core/log_query.py +540 -0
- oscura/core/logging.py +931 -0
- oscura/core/logging_advanced.py +952 -0
- oscura/core/memoize.py +171 -0
- oscura/core/memory_check.py +274 -0
- oscura/core/memory_guard.py +290 -0
- oscura/core/memory_limits.py +336 -0
- oscura/core/memory_monitor.py +453 -0
- oscura/core/memory_progress.py +465 -0
- oscura/core/memory_warnings.py +315 -0
- oscura/core/numba_backend.py +362 -0
- oscura/core/performance.py +352 -0
- oscura/core/progress.py +524 -0
- oscura/core/provenance.py +358 -0
- oscura/core/results.py +331 -0
- oscura/core/types.py +504 -0
- oscura/core/uncertainty.py +383 -0
- oscura/discovery/__init__.py +52 -0
- oscura/discovery/anomaly_detector.py +672 -0
- oscura/discovery/auto_decoder.py +415 -0
- oscura/discovery/comparison.py +497 -0
- oscura/discovery/quality_validator.py +528 -0
- oscura/discovery/signal_detector.py +769 -0
- oscura/dsl/__init__.py +73 -0
- oscura/dsl/commands.py +246 -0
- oscura/dsl/interpreter.py +455 -0
- oscura/dsl/parser.py +689 -0
- oscura/dsl/repl.py +172 -0
- oscura/exceptions.py +59 -0
- oscura/exploratory/__init__.py +111 -0
- oscura/exploratory/error_recovery.py +642 -0
- oscura/exploratory/fuzzy.py +513 -0
- oscura/exploratory/fuzzy_advanced.py +786 -0
- oscura/exploratory/legacy.py +831 -0
- oscura/exploratory/parse.py +358 -0
- oscura/exploratory/recovery.py +275 -0
- oscura/exploratory/sync.py +382 -0
- oscura/exploratory/unknown.py +707 -0
- oscura/export/__init__.py +25 -0
- oscura/export/wireshark/README.md +265 -0
- oscura/export/wireshark/__init__.py +47 -0
- oscura/export/wireshark/generator.py +312 -0
- oscura/export/wireshark/lua_builder.py +159 -0
- oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
- oscura/export/wireshark/type_mapping.py +165 -0
- oscura/export/wireshark/validator.py +105 -0
- oscura/exporters/__init__.py +94 -0
- oscura/exporters/csv.py +303 -0
- oscura/exporters/exporters.py +44 -0
- oscura/exporters/hdf5.py +219 -0
- oscura/exporters/html_export.py +701 -0
- oscura/exporters/json_export.py +291 -0
- oscura/exporters/markdown_export.py +367 -0
- oscura/exporters/matlab_export.py +354 -0
- oscura/exporters/npz_export.py +219 -0
- oscura/exporters/spice_export.py +210 -0
- oscura/extensibility/__init__.py +131 -0
- oscura/extensibility/docs.py +752 -0
- oscura/extensibility/extensions.py +1125 -0
- oscura/extensibility/logging.py +259 -0
- oscura/extensibility/measurements.py +485 -0
- oscura/extensibility/plugins.py +414 -0
- oscura/extensibility/registry.py +346 -0
- oscura/extensibility/templates.py +913 -0
- oscura/extensibility/validation.py +651 -0
- oscura/filtering/__init__.py +89 -0
- oscura/filtering/base.py +563 -0
- oscura/filtering/convenience.py +564 -0
- oscura/filtering/design.py +725 -0
- oscura/filtering/filters.py +32 -0
- oscura/filtering/introspection.py +605 -0
- oscura/guidance/__init__.py +24 -0
- oscura/guidance/recommender.py +429 -0
- oscura/guidance/wizard.py +518 -0
- oscura/inference/__init__.py +251 -0
- oscura/inference/active_learning/README.md +153 -0
- oscura/inference/active_learning/__init__.py +38 -0
- oscura/inference/active_learning/lstar.py +257 -0
- oscura/inference/active_learning/observation_table.py +230 -0
- oscura/inference/active_learning/oracle.py +78 -0
- oscura/inference/active_learning/teachers/__init__.py +15 -0
- oscura/inference/active_learning/teachers/simulator.py +192 -0
- oscura/inference/adaptive_tuning.py +453 -0
- oscura/inference/alignment.py +653 -0
- oscura/inference/bayesian.py +943 -0
- oscura/inference/binary.py +1016 -0
- oscura/inference/crc_reverse.py +711 -0
- oscura/inference/logic.py +288 -0
- oscura/inference/message_format.py +1305 -0
- oscura/inference/protocol.py +417 -0
- oscura/inference/protocol_dsl.py +1084 -0
- oscura/inference/protocol_library.py +1230 -0
- oscura/inference/sequences.py +809 -0
- oscura/inference/signal_intelligence.py +1509 -0
- oscura/inference/spectral.py +215 -0
- oscura/inference/state_machine.py +634 -0
- oscura/inference/stream.py +918 -0
- oscura/integrations/__init__.py +59 -0
- oscura/integrations/llm.py +1827 -0
- oscura/jupyter/__init__.py +32 -0
- oscura/jupyter/display.py +268 -0
- oscura/jupyter/magic.py +334 -0
- oscura/loaders/__init__.py +526 -0
- oscura/loaders/binary.py +69 -0
- oscura/loaders/configurable.py +1255 -0
- oscura/loaders/csv.py +26 -0
- oscura/loaders/csv_loader.py +473 -0
- oscura/loaders/hdf5.py +9 -0
- oscura/loaders/hdf5_loader.py +510 -0
- oscura/loaders/lazy.py +370 -0
- oscura/loaders/mmap_loader.py +583 -0
- oscura/loaders/numpy_loader.py +436 -0
- oscura/loaders/pcap.py +432 -0
- oscura/loaders/preprocessing.py +368 -0
- oscura/loaders/rigol.py +287 -0
- oscura/loaders/sigrok.py +321 -0
- oscura/loaders/tdms.py +367 -0
- oscura/loaders/tektronix.py +711 -0
- oscura/loaders/validation.py +584 -0
- oscura/loaders/vcd.py +464 -0
- oscura/loaders/wav.py +233 -0
- oscura/math/__init__.py +45 -0
- oscura/math/arithmetic.py +824 -0
- oscura/math/interpolation.py +413 -0
- oscura/onboarding/__init__.py +39 -0
- oscura/onboarding/help.py +498 -0
- oscura/onboarding/tutorials.py +405 -0
- oscura/onboarding/wizard.py +466 -0
- oscura/optimization/__init__.py +19 -0
- oscura/optimization/parallel.py +440 -0
- oscura/optimization/search.py +532 -0
- oscura/pipeline/__init__.py +43 -0
- oscura/pipeline/base.py +338 -0
- oscura/pipeline/composition.py +242 -0
- oscura/pipeline/parallel.py +448 -0
- oscura/pipeline/pipeline.py +375 -0
- oscura/pipeline/reverse_engineering.py +1119 -0
- oscura/plugins/__init__.py +122 -0
- oscura/plugins/base.py +272 -0
- oscura/plugins/cli.py +497 -0
- oscura/plugins/discovery.py +411 -0
- oscura/plugins/isolation.py +418 -0
- oscura/plugins/lifecycle.py +959 -0
- oscura/plugins/manager.py +493 -0
- oscura/plugins/registry.py +421 -0
- oscura/plugins/versioning.py +372 -0
- oscura/py.typed +0 -0
- oscura/quality/__init__.py +65 -0
- oscura/quality/ensemble.py +740 -0
- oscura/quality/explainer.py +338 -0
- oscura/quality/scoring.py +616 -0
- oscura/quality/warnings.py +456 -0
- oscura/reporting/__init__.py +248 -0
- oscura/reporting/advanced.py +1234 -0
- oscura/reporting/analyze.py +448 -0
- oscura/reporting/argument_preparer.py +596 -0
- oscura/reporting/auto_report.py +507 -0
- oscura/reporting/batch.py +615 -0
- oscura/reporting/chart_selection.py +223 -0
- oscura/reporting/comparison.py +330 -0
- oscura/reporting/config.py +615 -0
- oscura/reporting/content/__init__.py +39 -0
- oscura/reporting/content/executive.py +127 -0
- oscura/reporting/content/filtering.py +191 -0
- oscura/reporting/content/minimal.py +257 -0
- oscura/reporting/content/verbosity.py +162 -0
- oscura/reporting/core.py +508 -0
- oscura/reporting/core_formats/__init__.py +17 -0
- oscura/reporting/core_formats/multi_format.py +210 -0
- oscura/reporting/engine.py +836 -0
- oscura/reporting/export.py +366 -0
- oscura/reporting/formatting/__init__.py +129 -0
- oscura/reporting/formatting/emphasis.py +81 -0
- oscura/reporting/formatting/numbers.py +403 -0
- oscura/reporting/formatting/standards.py +55 -0
- oscura/reporting/formatting.py +466 -0
- oscura/reporting/html.py +578 -0
- oscura/reporting/index.py +590 -0
- oscura/reporting/multichannel.py +296 -0
- oscura/reporting/output.py +379 -0
- oscura/reporting/pdf.py +373 -0
- oscura/reporting/plots.py +731 -0
- oscura/reporting/pptx_export.py +360 -0
- oscura/reporting/renderers/__init__.py +11 -0
- oscura/reporting/renderers/pdf.py +94 -0
- oscura/reporting/sections.py +471 -0
- oscura/reporting/standards.py +680 -0
- oscura/reporting/summary_generator.py +368 -0
- oscura/reporting/tables.py +397 -0
- oscura/reporting/template_system.py +724 -0
- oscura/reporting/templates/__init__.py +15 -0
- oscura/reporting/templates/definition.py +205 -0
- oscura/reporting/templates/index.html +649 -0
- oscura/reporting/templates/index.md +173 -0
- oscura/schemas/__init__.py +158 -0
- oscura/schemas/bus_configuration.json +322 -0
- oscura/schemas/device_mapping.json +182 -0
- oscura/schemas/packet_format.json +418 -0
- oscura/schemas/protocol_definition.json +363 -0
- oscura/search/__init__.py +16 -0
- oscura/search/anomaly.py +292 -0
- oscura/search/context.py +149 -0
- oscura/search/pattern.py +160 -0
- oscura/session/__init__.py +34 -0
- oscura/session/annotations.py +289 -0
- oscura/session/history.py +313 -0
- oscura/session/session.py +445 -0
- oscura/streaming/__init__.py +43 -0
- oscura/streaming/chunked.py +611 -0
- oscura/streaming/progressive.py +393 -0
- oscura/streaming/realtime.py +622 -0
- oscura/testing/__init__.py +54 -0
- oscura/testing/synthetic.py +808 -0
- oscura/triggering/__init__.py +68 -0
- oscura/triggering/base.py +229 -0
- oscura/triggering/edge.py +353 -0
- oscura/triggering/pattern.py +344 -0
- oscura/triggering/pulse.py +581 -0
- oscura/triggering/window.py +453 -0
- oscura/ui/__init__.py +48 -0
- oscura/ui/formatters.py +526 -0
- oscura/ui/progressive_display.py +340 -0
- oscura/utils/__init__.py +99 -0
- oscura/utils/autodetect.py +338 -0
- oscura/utils/buffer.py +389 -0
- oscura/utils/lazy.py +407 -0
- oscura/utils/lazy_imports.py +147 -0
- oscura/utils/memory.py +836 -0
- oscura/utils/memory_advanced.py +1326 -0
- oscura/utils/memory_extensions.py +465 -0
- oscura/utils/progressive.py +352 -0
- oscura/utils/windowing.py +362 -0
- oscura/visualization/__init__.py +321 -0
- oscura/visualization/accessibility.py +526 -0
- oscura/visualization/annotations.py +374 -0
- oscura/visualization/axis_scaling.py +305 -0
- oscura/visualization/colors.py +453 -0
- oscura/visualization/digital.py +337 -0
- oscura/visualization/eye.py +420 -0
- oscura/visualization/histogram.py +281 -0
- oscura/visualization/interactive.py +858 -0
- oscura/visualization/jitter.py +702 -0
- oscura/visualization/keyboard.py +394 -0
- oscura/visualization/layout.py +365 -0
- oscura/visualization/optimization.py +1028 -0
- oscura/visualization/palettes.py +446 -0
- oscura/visualization/plot.py +92 -0
- oscura/visualization/power.py +290 -0
- oscura/visualization/power_extended.py +626 -0
- oscura/visualization/presets.py +467 -0
- oscura/visualization/protocols.py +932 -0
- oscura/visualization/render.py +207 -0
- oscura/visualization/rendering.py +444 -0
- oscura/visualization/reverse_engineering.py +791 -0
- oscura/visualization/signal_integrity.py +808 -0
- oscura/visualization/specialized.py +553 -0
- oscura/visualization/spectral.py +811 -0
- oscura/visualization/styles.py +381 -0
- oscura/visualization/thumbnails.py +311 -0
- oscura/visualization/time_axis.py +351 -0
- oscura/visualization/waveform.py +367 -0
- oscura/workflow/__init__.py +13 -0
- oscura/workflow/dag.py +377 -0
- oscura/workflows/__init__.py +58 -0
- oscura/workflows/compliance.py +280 -0
- oscura/workflows/digital.py +272 -0
- oscura/workflows/multi_trace.py +502 -0
- oscura/workflows/power.py +178 -0
- oscura/workflows/protocol.py +492 -0
- oscura/workflows/reverse_engineering.py +639 -0
- oscura/workflows/signal_integrity.py +227 -0
- oscura-0.1.1.dist-info/METADATA +300 -0
- oscura-0.1.1.dist-info/RECORD +463 -0
- oscura-0.1.1.dist-info/entry_points.txt +2 -0
- {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/licenses/LICENSE +1 -1
- oscura-0.0.1.dist-info/METADATA +0 -63
- oscura-0.0.1.dist-info/RECORD +0 -5
- {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,539 @@
|
|
|
1
|
+
"""Automatic signature and delimiter discovery.
|
|
2
|
+
|
|
3
|
+
This module implements algorithms for automatically discovering candidate
|
|
4
|
+
signatures, headers, and delimiters in binary data through statistical analysis.
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
Author: Oscura Development Team
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import math
|
|
13
|
+
from collections import Counter, defaultdict
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from numpy.typing import NDArray
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class CandidateSignature:
|
|
25
|
+
"""A candidate signature/header pattern.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
pattern: The signature byte pattern
|
|
29
|
+
length: Length of signature in bytes
|
|
30
|
+
occurrences: Number of occurrences in data
|
|
31
|
+
positions: Start positions of each occurrence
|
|
32
|
+
interval_mean: Mean interval between occurrences (samples)
|
|
33
|
+
interval_std: Standard deviation of intervals (consistency measure)
|
|
34
|
+
entropy: Pattern entropy (low = more structured)
|
|
35
|
+
score: Overall distinctiveness score (0-1, higher = better)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
pattern: bytes
|
|
39
|
+
length: int
|
|
40
|
+
occurrences: int
|
|
41
|
+
positions: list[int]
|
|
42
|
+
interval_mean: float
|
|
43
|
+
interval_std: float
|
|
44
|
+
entropy: float
|
|
45
|
+
score: float
|
|
46
|
+
|
|
47
|
+
def __post_init__(self) -> None:
|
|
48
|
+
"""Validate candidate signature."""
|
|
49
|
+
if self.length <= 0:
|
|
50
|
+
raise ValueError("length must be positive")
|
|
51
|
+
if self.occurrences < 0:
|
|
52
|
+
raise ValueError("occurrences must be non-negative")
|
|
53
|
+
if len(self.pattern) != self.length:
|
|
54
|
+
raise ValueError("pattern length must match length field")
|
|
55
|
+
if self.score < 0 or self.score > 1:
|
|
56
|
+
raise ValueError("score must be in range [0, 1]")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class SignatureDiscovery:
|
|
60
|
+
"""Automatic signature and header discovery.
|
|
61
|
+
|
|
62
|
+
: Automatic Signature Discovery
|
|
63
|
+
|
|
64
|
+
This class analyzes binary data to automatically identify candidate
|
|
65
|
+
signatures, headers, and delimiters based on statistical patterns.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(self, min_length: int = 4, max_length: int = 16, min_occurrences: int = 2):
|
|
69
|
+
"""Initialize signature discovery.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
min_length: Minimum signature length in bytes
|
|
73
|
+
max_length: Maximum signature length in bytes
|
|
74
|
+
min_occurrences: Minimum number of times a pattern must occur to be considered
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
ValueError: If min_length or max_length are invalid
|
|
78
|
+
"""
|
|
79
|
+
if min_length < 1:
|
|
80
|
+
raise ValueError("min_length must be at least 1")
|
|
81
|
+
if max_length < min_length:
|
|
82
|
+
raise ValueError("max_length must be >= min_length")
|
|
83
|
+
if min_occurrences < 1:
|
|
84
|
+
raise ValueError("min_occurrences must be at least 1")
|
|
85
|
+
|
|
86
|
+
self.min_length = min_length
|
|
87
|
+
self.max_length = max_length
|
|
88
|
+
self.min_occurrences = min_occurrences
|
|
89
|
+
|
|
90
|
+
def discover_signatures(
|
|
91
|
+
self, data: bytes | NDArray[np.uint8] | list[bytes]
|
|
92
|
+
) -> list[CandidateSignature]:
|
|
93
|
+
"""Discover candidate signatures in data.
|
|
94
|
+
|
|
95
|
+
: General signature discovery
|
|
96
|
+
|
|
97
|
+
Finds byte patterns that appear regularly throughout the data,
|
|
98
|
+
suggesting they may be headers, sync markers, or delimiters.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
data: Input binary data or list of messages
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of CandidateSignature sorted by score (best first)
|
|
105
|
+
|
|
106
|
+
Examples:
|
|
107
|
+
>>> data = b"\\xAA\\x55DATA" * 100
|
|
108
|
+
>>> discovery = SignatureDiscovery(min_length=2, max_length=8)
|
|
109
|
+
>>> sigs = discovery.discover_signatures(data)
|
|
110
|
+
>>> assert any(s.pattern == b"\\xAA\\x55" for s in sigs)
|
|
111
|
+
"""
|
|
112
|
+
# Handle list of messages
|
|
113
|
+
if isinstance(data, list):
|
|
114
|
+
# Concatenate all messages for analysis
|
|
115
|
+
data_bytes = b"".join(_to_bytes(msg) for msg in data)
|
|
116
|
+
else:
|
|
117
|
+
data_bytes = _to_bytes(data)
|
|
118
|
+
n = len(data_bytes)
|
|
119
|
+
|
|
120
|
+
if n < self.min_length:
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
# Find all repeating patterns
|
|
124
|
+
pattern_dict = defaultdict(list)
|
|
125
|
+
|
|
126
|
+
for length in range(self.min_length, min(self.max_length + 1, n + 1)):
|
|
127
|
+
for i in range(n - length + 1):
|
|
128
|
+
pattern = data_bytes[i : i + length]
|
|
129
|
+
pattern_dict[pattern].append(i)
|
|
130
|
+
|
|
131
|
+
# Analyze each pattern
|
|
132
|
+
candidates = []
|
|
133
|
+
for pattern, positions in pattern_dict.items():
|
|
134
|
+
# Filter by min_occurrences
|
|
135
|
+
if len(positions) < self.min_occurrences:
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
# Calculate statistics
|
|
139
|
+
intervals = np.diff(positions)
|
|
140
|
+
interval_mean = float(np.mean(intervals)) if len(intervals) > 0 else 0.0
|
|
141
|
+
interval_std = float(np.std(intervals)) if len(intervals) > 0 else 0.0
|
|
142
|
+
|
|
143
|
+
# Calculate pattern entropy
|
|
144
|
+
entropy = _calculate_entropy(pattern)
|
|
145
|
+
|
|
146
|
+
# Calculate distinctiveness score
|
|
147
|
+
score = self._calculate_score(
|
|
148
|
+
pattern=pattern,
|
|
149
|
+
occurrences=len(positions),
|
|
150
|
+
interval_mean=interval_mean,
|
|
151
|
+
interval_std=interval_std,
|
|
152
|
+
entropy=entropy,
|
|
153
|
+
data_length=n,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
candidates.append(
|
|
157
|
+
CandidateSignature(
|
|
158
|
+
pattern=pattern,
|
|
159
|
+
length=len(pattern),
|
|
160
|
+
occurrences=len(positions),
|
|
161
|
+
positions=sorted(positions),
|
|
162
|
+
interval_mean=interval_mean,
|
|
163
|
+
interval_std=interval_std,
|
|
164
|
+
entropy=entropy,
|
|
165
|
+
score=score,
|
|
166
|
+
)
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Sort by score (descending)
|
|
170
|
+
candidates.sort(key=lambda x: x.score, reverse=True)
|
|
171
|
+
|
|
172
|
+
return candidates
|
|
173
|
+
|
|
174
|
+
def find_header_candidates(
|
|
175
|
+
self, data: bytes | NDArray[np.uint8], max_candidates: int = 20
|
|
176
|
+
) -> list[CandidateSignature]:
|
|
177
|
+
"""Find patterns likely to be message headers.
|
|
178
|
+
|
|
179
|
+
: Header candidate detection
|
|
180
|
+
|
|
181
|
+
Headers typically:
|
|
182
|
+
- Have low entropy (structured, not random)
|
|
183
|
+
- Appear at regular intervals
|
|
184
|
+
- Are relatively short (2-16 bytes)
|
|
185
|
+
- May contain magic bytes or sync markers
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
data: Input binary data
|
|
189
|
+
max_candidates: Maximum number of candidates to return
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
List of CandidateSignature sorted by likelihood (best first)
|
|
193
|
+
"""
|
|
194
|
+
# Discover all signatures
|
|
195
|
+
candidates = self.discover_signatures(data)
|
|
196
|
+
|
|
197
|
+
# Filter and rank for header characteristics
|
|
198
|
+
header_candidates = []
|
|
199
|
+
for sig in candidates:
|
|
200
|
+
# Headers should have low entropy (structured)
|
|
201
|
+
if sig.entropy > 6.0:
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
# Headers should be reasonably frequent
|
|
205
|
+
if sig.occurrences < 3:
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
# Prefer regular intervals (low std deviation)
|
|
209
|
+
regularity = 1.0 / (1.0 + sig.interval_std / max(sig.interval_mean, 1.0))
|
|
210
|
+
|
|
211
|
+
# Combine score with header-specific features
|
|
212
|
+
header_score = sig.score * 0.6 + (1.0 - sig.entropy / 8.0) * 0.2 + regularity * 0.2
|
|
213
|
+
|
|
214
|
+
header_candidates.append(
|
|
215
|
+
CandidateSignature(
|
|
216
|
+
pattern=sig.pattern,
|
|
217
|
+
length=sig.length,
|
|
218
|
+
occurrences=sig.occurrences,
|
|
219
|
+
positions=sig.positions,
|
|
220
|
+
interval_mean=sig.interval_mean,
|
|
221
|
+
interval_std=sig.interval_std,
|
|
222
|
+
entropy=sig.entropy,
|
|
223
|
+
score=header_score,
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Sort by header score
|
|
228
|
+
header_candidates.sort(key=lambda x: x.score, reverse=True)
|
|
229
|
+
|
|
230
|
+
return header_candidates[:max_candidates]
|
|
231
|
+
|
|
232
|
+
def find_delimiter_candidates(
|
|
233
|
+
self, data: bytes | NDArray[np.uint8]
|
|
234
|
+
) -> list[CandidateSignature]:
|
|
235
|
+
"""Find patterns likely to be message delimiters.
|
|
236
|
+
|
|
237
|
+
: Delimiter candidate detection
|
|
238
|
+
|
|
239
|
+
Delimiters typically:
|
|
240
|
+
- Are short (1-4 bytes)
|
|
241
|
+
- Have very low entropy (often single byte like \\n, \\0, etc.)
|
|
242
|
+
- Appear frequently
|
|
243
|
+
- May have variable intervals
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
data: Input binary data
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
List of CandidateSignature sorted by likelihood (best first)
|
|
250
|
+
"""
|
|
251
|
+
data_bytes = _to_bytes(data)
|
|
252
|
+
n = len(data_bytes)
|
|
253
|
+
|
|
254
|
+
if n < 2:
|
|
255
|
+
return []
|
|
256
|
+
|
|
257
|
+
# Focus on short patterns (typical delimiters)
|
|
258
|
+
delimiter_candidates = []
|
|
259
|
+
max_delim_length = min(4, self.max_length)
|
|
260
|
+
|
|
261
|
+
for length in range(1, max_delim_length + 1):
|
|
262
|
+
pattern_positions = defaultdict(list)
|
|
263
|
+
|
|
264
|
+
for i in range(n - length + 1):
|
|
265
|
+
pattern = data_bytes[i : i + length]
|
|
266
|
+
pattern_positions[pattern].append(i)
|
|
267
|
+
|
|
268
|
+
for pattern, positions in pattern_positions.items():
|
|
269
|
+
if len(positions) < 5: # Delimiters should be frequent
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
# Calculate statistics
|
|
273
|
+
intervals = np.diff(positions)
|
|
274
|
+
interval_mean = float(np.mean(intervals)) if len(intervals) > 0 else 0.0
|
|
275
|
+
interval_std = float(np.std(intervals)) if len(intervals) > 0 else 0.0
|
|
276
|
+
entropy = _calculate_entropy(pattern)
|
|
277
|
+
|
|
278
|
+
# Delimiters should have very low entropy
|
|
279
|
+
if entropy > 3.0:
|
|
280
|
+
continue
|
|
281
|
+
|
|
282
|
+
# Calculate delimiter score
|
|
283
|
+
# High frequency + low entropy + short length = good delimiter
|
|
284
|
+
frequency_score = min(len(positions) / (n / 100.0), 1.0)
|
|
285
|
+
entropy_score = 1.0 - entropy / 8.0
|
|
286
|
+
length_score = 1.0 - (length - 1) / max_delim_length
|
|
287
|
+
|
|
288
|
+
delimiter_score = frequency_score * 0.5 + entropy_score * 0.3 + length_score * 0.2
|
|
289
|
+
|
|
290
|
+
delimiter_candidates.append(
|
|
291
|
+
CandidateSignature(
|
|
292
|
+
pattern=pattern,
|
|
293
|
+
length=length,
|
|
294
|
+
occurrences=len(positions),
|
|
295
|
+
positions=sorted(positions),
|
|
296
|
+
interval_mean=interval_mean,
|
|
297
|
+
interval_std=interval_std,
|
|
298
|
+
entropy=entropy,
|
|
299
|
+
score=delimiter_score,
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# Sort by delimiter score
|
|
304
|
+
delimiter_candidates.sort(key=lambda x: x.score, reverse=True)
|
|
305
|
+
|
|
306
|
+
return delimiter_candidates[:20] # Top 20 delimiter candidates
|
|
307
|
+
|
|
308
|
+
def rank_signatures(self, candidates: list[CandidateSignature]) -> list[CandidateSignature]:
|
|
309
|
+
"""Rank signatures by distinctiveness.
|
|
310
|
+
|
|
311
|
+
: Signature ranking
|
|
312
|
+
|
|
313
|
+
Re-ranks candidates considering:
|
|
314
|
+
- Frequency vs. expected random occurrence
|
|
315
|
+
- Regularity of appearance
|
|
316
|
+
- Entropy characteristics
|
|
317
|
+
- Pattern uniqueness
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
candidates: List of candidate signatures
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Re-ranked list of CandidateSignature
|
|
324
|
+
"""
|
|
325
|
+
if not candidates:
|
|
326
|
+
return []
|
|
327
|
+
|
|
328
|
+
# Re-calculate scores with more sophisticated ranking
|
|
329
|
+
ranked = []
|
|
330
|
+
for sig in candidates:
|
|
331
|
+
# Regularity measure
|
|
332
|
+
if sig.interval_mean > 0:
|
|
333
|
+
regularity = 1.0 / (1.0 + sig.interval_std / sig.interval_mean)
|
|
334
|
+
else:
|
|
335
|
+
regularity = 0.0
|
|
336
|
+
|
|
337
|
+
# Entropy score (prefer low entropy for signatures)
|
|
338
|
+
entropy_score = max(0.0, 1.0 - sig.entropy / 8.0)
|
|
339
|
+
|
|
340
|
+
# Frequency score (normalized)
|
|
341
|
+
frequency_score = min(sig.occurrences / 100.0, 1.0)
|
|
342
|
+
|
|
343
|
+
# Combined score
|
|
344
|
+
new_score = regularity * 0.4 + entropy_score * 0.3 + frequency_score * 0.3
|
|
345
|
+
|
|
346
|
+
ranked.append(
|
|
347
|
+
CandidateSignature(
|
|
348
|
+
pattern=sig.pattern,
|
|
349
|
+
length=sig.length,
|
|
350
|
+
occurrences=sig.occurrences,
|
|
351
|
+
positions=sig.positions,
|
|
352
|
+
interval_mean=sig.interval_mean,
|
|
353
|
+
interval_std=sig.interval_std,
|
|
354
|
+
entropy=sig.entropy,
|
|
355
|
+
score=new_score,
|
|
356
|
+
)
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Sort by new score
|
|
360
|
+
ranked.sort(key=lambda x: x.score, reverse=True)
|
|
361
|
+
|
|
362
|
+
return ranked
|
|
363
|
+
|
|
364
|
+
def _calculate_score(
|
|
365
|
+
self,
|
|
366
|
+
pattern: bytes,
|
|
367
|
+
occurrences: int,
|
|
368
|
+
interval_mean: float,
|
|
369
|
+
interval_std: float,
|
|
370
|
+
entropy: float,
|
|
371
|
+
data_length: int,
|
|
372
|
+
) -> float:
|
|
373
|
+
"""Calculate distinctiveness score for a pattern.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
pattern: The byte pattern
|
|
377
|
+
occurrences: Number of occurrences
|
|
378
|
+
interval_mean: Mean interval between occurrences
|
|
379
|
+
interval_std: Standard deviation of intervals
|
|
380
|
+
entropy: Pattern entropy
|
|
381
|
+
data_length: Total data length
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
Score in range [0, 1], higher is more distinctive
|
|
385
|
+
"""
|
|
386
|
+
# Frequency score (normalized)
|
|
387
|
+
frequency_score = min(occurrences / 50.0, 1.0)
|
|
388
|
+
|
|
389
|
+
# Regularity score (prefer consistent intervals)
|
|
390
|
+
if interval_mean > 0:
|
|
391
|
+
regularity_score = 1.0 / (1.0 + interval_std / interval_mean)
|
|
392
|
+
else:
|
|
393
|
+
regularity_score = 0.0
|
|
394
|
+
|
|
395
|
+
# Entropy score (prefer structured patterns, not random)
|
|
396
|
+
entropy_score = max(0.0, 1.0 - entropy / 8.0)
|
|
397
|
+
|
|
398
|
+
# Length score (prefer medium-length patterns)
|
|
399
|
+
optimal_length = 4.0
|
|
400
|
+
length_score = 1.0 - abs(len(pattern) - optimal_length) / 8.0
|
|
401
|
+
length_score = max(0.0, length_score)
|
|
402
|
+
|
|
403
|
+
# Combine scores
|
|
404
|
+
score = (
|
|
405
|
+
frequency_score * 0.3
|
|
406
|
+
+ regularity_score * 0.4
|
|
407
|
+
+ entropy_score * 0.2
|
|
408
|
+
+ length_score * 0.1
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
return min(1.0, max(0.0, score))
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
# Convenience functions
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def discover_signatures(
|
|
418
|
+
data: bytes | NDArray[np.uint8] | list[bytes],
|
|
419
|
+
min_length: int = 4,
|
|
420
|
+
max_length: int = 16,
|
|
421
|
+
min_occurrences: int = 2,
|
|
422
|
+
) -> list[CandidateSignature]:
|
|
423
|
+
"""Convenience function for signature discovery.
|
|
424
|
+
|
|
425
|
+
: Signature discovery API
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
data: Input binary data or list of messages
|
|
429
|
+
min_length: Minimum signature length
|
|
430
|
+
max_length: Maximum signature length
|
|
431
|
+
min_occurrences: Minimum number of times a pattern must occur
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
List of CandidateSignature sorted by score
|
|
435
|
+
|
|
436
|
+
Examples:
|
|
437
|
+
>>> data = b"\\xFF\\xFF" + b"DATA" * 50
|
|
438
|
+
>>> signatures = discover_signatures(data, min_length=2)
|
|
439
|
+
"""
|
|
440
|
+
discovery = SignatureDiscovery(min_length, max_length, min_occurrences)
|
|
441
|
+
return discovery.discover_signatures(data)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def find_header_candidates(data: bytes | NDArray[np.uint8]) -> list[CandidateSignature]:
|
|
445
|
+
"""Find header candidates.
|
|
446
|
+
|
|
447
|
+
: Header discovery API
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
data: Input binary data
|
|
451
|
+
|
|
452
|
+
Returns:
|
|
453
|
+
List of header candidates
|
|
454
|
+
|
|
455
|
+
Examples:
|
|
456
|
+
>>> data = b"HDR" + b"payload" * 20
|
|
457
|
+
>>> headers = find_header_candidates(data)
|
|
458
|
+
"""
|
|
459
|
+
discovery = SignatureDiscovery(min_length=2, max_length=16)
|
|
460
|
+
return discovery.find_header_candidates(data)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def find_delimiter_candidates(data: bytes | NDArray[np.uint8]) -> list[CandidateSignature]:
|
|
464
|
+
"""Find delimiter candidates.
|
|
465
|
+
|
|
466
|
+
: Delimiter discovery API
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
data: Input binary data
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
List of delimiter candidates
|
|
473
|
+
|
|
474
|
+
Examples:
|
|
475
|
+
>>> data = b"field1,field2,field3"
|
|
476
|
+
>>> delimiters = find_delimiter_candidates(data)
|
|
477
|
+
>>> assert any(d.pattern == b"," for d in delimiters)
|
|
478
|
+
"""
|
|
479
|
+
discovery = SignatureDiscovery(min_length=1, max_length=4)
|
|
480
|
+
return discovery.find_delimiter_candidates(data)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
# Helper functions
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _to_bytes(data: bytes | NDArray[np.uint8] | memoryview | bytearray) -> bytes:
|
|
487
|
+
"""Convert input data to bytes.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
data: Input data (bytes, bytearray, memoryview, or numpy array)
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
Bytes representation
|
|
494
|
+
|
|
495
|
+
Raises:
|
|
496
|
+
TypeError: If data type is not supported
|
|
497
|
+
"""
|
|
498
|
+
if isinstance(data, bytes):
|
|
499
|
+
return data
|
|
500
|
+
elif isinstance(data, bytearray | memoryview):
|
|
501
|
+
return bytes(data)
|
|
502
|
+
elif isinstance(data, np.ndarray):
|
|
503
|
+
return data.astype(np.uint8).tobytes() # type: ignore[no-any-return]
|
|
504
|
+
else:
|
|
505
|
+
raise TypeError(f"Unsupported data type: {type(data)}")
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def _calculate_entropy(data: bytes) -> float:
|
|
509
|
+
"""Calculate Shannon entropy of byte sequence.
|
|
510
|
+
|
|
511
|
+
: Entropy calculation
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
data: Byte sequence
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
Entropy in bits (0-8 for byte data)
|
|
518
|
+
|
|
519
|
+
Examples:
|
|
520
|
+
>>> _calculate_entropy(b"\\x00" * 100) # All same byte
|
|
521
|
+
0.0
|
|
522
|
+
>>> entropy = _calculate_entropy(bytes(range(256))) # Uniform distribution
|
|
523
|
+
>>> assert entropy > 7.9 # Close to 8.0
|
|
524
|
+
"""
|
|
525
|
+
if len(data) == 0:
|
|
526
|
+
return 0.0
|
|
527
|
+
|
|
528
|
+
# Count byte frequencies
|
|
529
|
+
byte_counts = Counter(data)
|
|
530
|
+
n = len(data)
|
|
531
|
+
|
|
532
|
+
# Calculate entropy
|
|
533
|
+
entropy = 0.0
|
|
534
|
+
for count in byte_counts.values():
|
|
535
|
+
if count > 0:
|
|
536
|
+
prob = count / n
|
|
537
|
+
entropy -= prob * math.log2(prob)
|
|
538
|
+
|
|
539
|
+
return entropy
|