oscura 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +813 -8
- oscura/__main__.py +392 -0
- oscura/analyzers/__init__.py +37 -0
- oscura/analyzers/digital/__init__.py +177 -0
- oscura/analyzers/digital/bus.py +691 -0
- oscura/analyzers/digital/clock.py +805 -0
- oscura/analyzers/digital/correlation.py +720 -0
- oscura/analyzers/digital/edges.py +632 -0
- oscura/analyzers/digital/extraction.py +413 -0
- oscura/analyzers/digital/quality.py +878 -0
- oscura/analyzers/digital/signal_quality.py +877 -0
- oscura/analyzers/digital/thresholds.py +708 -0
- oscura/analyzers/digital/timing.py +1104 -0
- oscura/analyzers/eye/__init__.py +46 -0
- oscura/analyzers/eye/diagram.py +434 -0
- oscura/analyzers/eye/metrics.py +555 -0
- oscura/analyzers/jitter/__init__.py +83 -0
- oscura/analyzers/jitter/ber.py +333 -0
- oscura/analyzers/jitter/decomposition.py +759 -0
- oscura/analyzers/jitter/measurements.py +413 -0
- oscura/analyzers/jitter/spectrum.py +220 -0
- oscura/analyzers/measurements.py +40 -0
- oscura/analyzers/packet/__init__.py +171 -0
- oscura/analyzers/packet/daq.py +1077 -0
- oscura/analyzers/packet/metrics.py +437 -0
- oscura/analyzers/packet/parser.py +327 -0
- oscura/analyzers/packet/payload.py +2156 -0
- oscura/analyzers/packet/payload_analysis.py +1312 -0
- oscura/analyzers/packet/payload_extraction.py +236 -0
- oscura/analyzers/packet/payload_patterns.py +670 -0
- oscura/analyzers/packet/stream.py +359 -0
- oscura/analyzers/patterns/__init__.py +266 -0
- oscura/analyzers/patterns/clustering.py +1036 -0
- oscura/analyzers/patterns/discovery.py +539 -0
- oscura/analyzers/patterns/learning.py +797 -0
- oscura/analyzers/patterns/matching.py +1091 -0
- oscura/analyzers/patterns/periodic.py +650 -0
- oscura/analyzers/patterns/sequences.py +767 -0
- oscura/analyzers/power/__init__.py +116 -0
- oscura/analyzers/power/ac_power.py +391 -0
- oscura/analyzers/power/basic.py +383 -0
- oscura/analyzers/power/conduction.py +314 -0
- oscura/analyzers/power/efficiency.py +297 -0
- oscura/analyzers/power/ripple.py +356 -0
- oscura/analyzers/power/soa.py +372 -0
- oscura/analyzers/power/switching.py +479 -0
- oscura/analyzers/protocol/__init__.py +150 -0
- oscura/analyzers/protocols/__init__.py +150 -0
- oscura/analyzers/protocols/base.py +500 -0
- oscura/analyzers/protocols/can.py +620 -0
- oscura/analyzers/protocols/can_fd.py +448 -0
- oscura/analyzers/protocols/flexray.py +405 -0
- oscura/analyzers/protocols/hdlc.py +399 -0
- oscura/analyzers/protocols/i2c.py +368 -0
- oscura/analyzers/protocols/i2s.py +296 -0
- oscura/analyzers/protocols/jtag.py +393 -0
- oscura/analyzers/protocols/lin.py +445 -0
- oscura/analyzers/protocols/manchester.py +333 -0
- oscura/analyzers/protocols/onewire.py +501 -0
- oscura/analyzers/protocols/spi.py +334 -0
- oscura/analyzers/protocols/swd.py +325 -0
- oscura/analyzers/protocols/uart.py +393 -0
- oscura/analyzers/protocols/usb.py +495 -0
- oscura/analyzers/signal_integrity/__init__.py +63 -0
- oscura/analyzers/signal_integrity/embedding.py +294 -0
- oscura/analyzers/signal_integrity/equalization.py +370 -0
- oscura/analyzers/signal_integrity/sparams.py +484 -0
- oscura/analyzers/spectral/__init__.py +53 -0
- oscura/analyzers/spectral/chunked.py +273 -0
- oscura/analyzers/spectral/chunked_fft.py +571 -0
- oscura/analyzers/spectral/chunked_wavelet.py +391 -0
- oscura/analyzers/spectral/fft.py +92 -0
- oscura/analyzers/statistical/__init__.py +250 -0
- oscura/analyzers/statistical/checksum.py +923 -0
- oscura/analyzers/statistical/chunked_corr.py +228 -0
- oscura/analyzers/statistical/classification.py +778 -0
- oscura/analyzers/statistical/entropy.py +1113 -0
- oscura/analyzers/statistical/ngrams.py +614 -0
- oscura/analyzers/statistics/__init__.py +119 -0
- oscura/analyzers/statistics/advanced.py +885 -0
- oscura/analyzers/statistics/basic.py +263 -0
- oscura/analyzers/statistics/correlation.py +630 -0
- oscura/analyzers/statistics/distribution.py +298 -0
- oscura/analyzers/statistics/outliers.py +463 -0
- oscura/analyzers/statistics/streaming.py +93 -0
- oscura/analyzers/statistics/trend.py +520 -0
- oscura/analyzers/validation.py +598 -0
- oscura/analyzers/waveform/__init__.py +36 -0
- oscura/analyzers/waveform/measurements.py +943 -0
- oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
- oscura/analyzers/waveform/spectral.py +1689 -0
- oscura/analyzers/waveform/wavelets.py +298 -0
- oscura/api/__init__.py +62 -0
- oscura/api/dsl.py +538 -0
- oscura/api/fluent.py +571 -0
- oscura/api/operators.py +498 -0
- oscura/api/optimization.py +392 -0
- oscura/api/profiling.py +396 -0
- oscura/automotive/__init__.py +73 -0
- oscura/automotive/can/__init__.py +52 -0
- oscura/automotive/can/analysis.py +356 -0
- oscura/automotive/can/checksum.py +250 -0
- oscura/automotive/can/correlation.py +212 -0
- oscura/automotive/can/discovery.py +355 -0
- oscura/automotive/can/message_wrapper.py +375 -0
- oscura/automotive/can/models.py +385 -0
- oscura/automotive/can/patterns.py +381 -0
- oscura/automotive/can/session.py +452 -0
- oscura/automotive/can/state_machine.py +300 -0
- oscura/automotive/can/stimulus_response.py +461 -0
- oscura/automotive/dbc/__init__.py +15 -0
- oscura/automotive/dbc/generator.py +156 -0
- oscura/automotive/dbc/parser.py +146 -0
- oscura/automotive/dtc/__init__.py +30 -0
- oscura/automotive/dtc/database.py +3036 -0
- oscura/automotive/j1939/__init__.py +14 -0
- oscura/automotive/j1939/decoder.py +745 -0
- oscura/automotive/loaders/__init__.py +35 -0
- oscura/automotive/loaders/asc.py +98 -0
- oscura/automotive/loaders/blf.py +77 -0
- oscura/automotive/loaders/csv_can.py +136 -0
- oscura/automotive/loaders/dispatcher.py +136 -0
- oscura/automotive/loaders/mdf.py +331 -0
- oscura/automotive/loaders/pcap.py +132 -0
- oscura/automotive/obd/__init__.py +14 -0
- oscura/automotive/obd/decoder.py +707 -0
- oscura/automotive/uds/__init__.py +48 -0
- oscura/automotive/uds/decoder.py +265 -0
- oscura/automotive/uds/models.py +64 -0
- oscura/automotive/visualization.py +369 -0
- oscura/batch/__init__.py +55 -0
- oscura/batch/advanced.py +627 -0
- oscura/batch/aggregate.py +300 -0
- oscura/batch/analyze.py +139 -0
- oscura/batch/logging.py +487 -0
- oscura/batch/metrics.py +556 -0
- oscura/builders/__init__.py +41 -0
- oscura/builders/signal_builder.py +1131 -0
- oscura/cli/__init__.py +14 -0
- oscura/cli/batch.py +339 -0
- oscura/cli/characterize.py +273 -0
- oscura/cli/compare.py +775 -0
- oscura/cli/decode.py +551 -0
- oscura/cli/main.py +247 -0
- oscura/cli/shell.py +350 -0
- oscura/comparison/__init__.py +66 -0
- oscura/comparison/compare.py +397 -0
- oscura/comparison/golden.py +487 -0
- oscura/comparison/limits.py +391 -0
- oscura/comparison/mask.py +434 -0
- oscura/comparison/trace_diff.py +30 -0
- oscura/comparison/visualization.py +481 -0
- oscura/compliance/__init__.py +70 -0
- oscura/compliance/advanced.py +756 -0
- oscura/compliance/masks.py +363 -0
- oscura/compliance/reporting.py +483 -0
- oscura/compliance/testing.py +298 -0
- oscura/component/__init__.py +38 -0
- oscura/component/impedance.py +365 -0
- oscura/component/reactive.py +598 -0
- oscura/component/transmission_line.py +312 -0
- oscura/config/__init__.py +191 -0
- oscura/config/defaults.py +254 -0
- oscura/config/loader.py +348 -0
- oscura/config/memory.py +271 -0
- oscura/config/migration.py +458 -0
- oscura/config/pipeline.py +1077 -0
- oscura/config/preferences.py +530 -0
- oscura/config/protocol.py +875 -0
- oscura/config/schema.py +713 -0
- oscura/config/settings.py +420 -0
- oscura/config/thresholds.py +599 -0
- oscura/convenience.py +457 -0
- oscura/core/__init__.py +299 -0
- oscura/core/audit.py +457 -0
- oscura/core/backend_selector.py +405 -0
- oscura/core/cache.py +590 -0
- oscura/core/cancellation.py +439 -0
- oscura/core/confidence.py +225 -0
- oscura/core/config.py +506 -0
- oscura/core/correlation.py +216 -0
- oscura/core/cross_domain.py +422 -0
- oscura/core/debug.py +301 -0
- oscura/core/edge_cases.py +541 -0
- oscura/core/exceptions.py +535 -0
- oscura/core/gpu_backend.py +523 -0
- oscura/core/lazy.py +832 -0
- oscura/core/log_query.py +540 -0
- oscura/core/logging.py +931 -0
- oscura/core/logging_advanced.py +952 -0
- oscura/core/memoize.py +171 -0
- oscura/core/memory_check.py +274 -0
- oscura/core/memory_guard.py +290 -0
- oscura/core/memory_limits.py +336 -0
- oscura/core/memory_monitor.py +453 -0
- oscura/core/memory_progress.py +465 -0
- oscura/core/memory_warnings.py +315 -0
- oscura/core/numba_backend.py +362 -0
- oscura/core/performance.py +352 -0
- oscura/core/progress.py +524 -0
- oscura/core/provenance.py +358 -0
- oscura/core/results.py +331 -0
- oscura/core/types.py +504 -0
- oscura/core/uncertainty.py +383 -0
- oscura/discovery/__init__.py +52 -0
- oscura/discovery/anomaly_detector.py +672 -0
- oscura/discovery/auto_decoder.py +415 -0
- oscura/discovery/comparison.py +497 -0
- oscura/discovery/quality_validator.py +528 -0
- oscura/discovery/signal_detector.py +769 -0
- oscura/dsl/__init__.py +73 -0
- oscura/dsl/commands.py +246 -0
- oscura/dsl/interpreter.py +455 -0
- oscura/dsl/parser.py +689 -0
- oscura/dsl/repl.py +172 -0
- oscura/exceptions.py +59 -0
- oscura/exploratory/__init__.py +111 -0
- oscura/exploratory/error_recovery.py +642 -0
- oscura/exploratory/fuzzy.py +513 -0
- oscura/exploratory/fuzzy_advanced.py +786 -0
- oscura/exploratory/legacy.py +831 -0
- oscura/exploratory/parse.py +358 -0
- oscura/exploratory/recovery.py +275 -0
- oscura/exploratory/sync.py +382 -0
- oscura/exploratory/unknown.py +707 -0
- oscura/export/__init__.py +25 -0
- oscura/export/wireshark/README.md +265 -0
- oscura/export/wireshark/__init__.py +47 -0
- oscura/export/wireshark/generator.py +312 -0
- oscura/export/wireshark/lua_builder.py +159 -0
- oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
- oscura/export/wireshark/type_mapping.py +165 -0
- oscura/export/wireshark/validator.py +105 -0
- oscura/exporters/__init__.py +94 -0
- oscura/exporters/csv.py +303 -0
- oscura/exporters/exporters.py +44 -0
- oscura/exporters/hdf5.py +219 -0
- oscura/exporters/html_export.py +701 -0
- oscura/exporters/json_export.py +291 -0
- oscura/exporters/markdown_export.py +367 -0
- oscura/exporters/matlab_export.py +354 -0
- oscura/exporters/npz_export.py +219 -0
- oscura/exporters/spice_export.py +210 -0
- oscura/extensibility/__init__.py +131 -0
- oscura/extensibility/docs.py +752 -0
- oscura/extensibility/extensions.py +1125 -0
- oscura/extensibility/logging.py +259 -0
- oscura/extensibility/measurements.py +485 -0
- oscura/extensibility/plugins.py +414 -0
- oscura/extensibility/registry.py +346 -0
- oscura/extensibility/templates.py +913 -0
- oscura/extensibility/validation.py +651 -0
- oscura/filtering/__init__.py +89 -0
- oscura/filtering/base.py +563 -0
- oscura/filtering/convenience.py +564 -0
- oscura/filtering/design.py +725 -0
- oscura/filtering/filters.py +32 -0
- oscura/filtering/introspection.py +605 -0
- oscura/guidance/__init__.py +24 -0
- oscura/guidance/recommender.py +429 -0
- oscura/guidance/wizard.py +518 -0
- oscura/inference/__init__.py +251 -0
- oscura/inference/active_learning/README.md +153 -0
- oscura/inference/active_learning/__init__.py +38 -0
- oscura/inference/active_learning/lstar.py +257 -0
- oscura/inference/active_learning/observation_table.py +230 -0
- oscura/inference/active_learning/oracle.py +78 -0
- oscura/inference/active_learning/teachers/__init__.py +15 -0
- oscura/inference/active_learning/teachers/simulator.py +192 -0
- oscura/inference/adaptive_tuning.py +453 -0
- oscura/inference/alignment.py +653 -0
- oscura/inference/bayesian.py +943 -0
- oscura/inference/binary.py +1016 -0
- oscura/inference/crc_reverse.py +711 -0
- oscura/inference/logic.py +288 -0
- oscura/inference/message_format.py +1305 -0
- oscura/inference/protocol.py +417 -0
- oscura/inference/protocol_dsl.py +1084 -0
- oscura/inference/protocol_library.py +1230 -0
- oscura/inference/sequences.py +809 -0
- oscura/inference/signal_intelligence.py +1509 -0
- oscura/inference/spectral.py +215 -0
- oscura/inference/state_machine.py +634 -0
- oscura/inference/stream.py +918 -0
- oscura/integrations/__init__.py +59 -0
- oscura/integrations/llm.py +1827 -0
- oscura/jupyter/__init__.py +32 -0
- oscura/jupyter/display.py +268 -0
- oscura/jupyter/magic.py +334 -0
- oscura/loaders/__init__.py +526 -0
- oscura/loaders/binary.py +69 -0
- oscura/loaders/configurable.py +1255 -0
- oscura/loaders/csv.py +26 -0
- oscura/loaders/csv_loader.py +473 -0
- oscura/loaders/hdf5.py +9 -0
- oscura/loaders/hdf5_loader.py +510 -0
- oscura/loaders/lazy.py +370 -0
- oscura/loaders/mmap_loader.py +583 -0
- oscura/loaders/numpy_loader.py +436 -0
- oscura/loaders/pcap.py +432 -0
- oscura/loaders/preprocessing.py +368 -0
- oscura/loaders/rigol.py +287 -0
- oscura/loaders/sigrok.py +321 -0
- oscura/loaders/tdms.py +367 -0
- oscura/loaders/tektronix.py +711 -0
- oscura/loaders/validation.py +584 -0
- oscura/loaders/vcd.py +464 -0
- oscura/loaders/wav.py +233 -0
- oscura/math/__init__.py +45 -0
- oscura/math/arithmetic.py +824 -0
- oscura/math/interpolation.py +413 -0
- oscura/onboarding/__init__.py +39 -0
- oscura/onboarding/help.py +498 -0
- oscura/onboarding/tutorials.py +405 -0
- oscura/onboarding/wizard.py +466 -0
- oscura/optimization/__init__.py +19 -0
- oscura/optimization/parallel.py +440 -0
- oscura/optimization/search.py +532 -0
- oscura/pipeline/__init__.py +43 -0
- oscura/pipeline/base.py +338 -0
- oscura/pipeline/composition.py +242 -0
- oscura/pipeline/parallel.py +448 -0
- oscura/pipeline/pipeline.py +375 -0
- oscura/pipeline/reverse_engineering.py +1119 -0
- oscura/plugins/__init__.py +122 -0
- oscura/plugins/base.py +272 -0
- oscura/plugins/cli.py +497 -0
- oscura/plugins/discovery.py +411 -0
- oscura/plugins/isolation.py +418 -0
- oscura/plugins/lifecycle.py +959 -0
- oscura/plugins/manager.py +493 -0
- oscura/plugins/registry.py +421 -0
- oscura/plugins/versioning.py +372 -0
- oscura/py.typed +0 -0
- oscura/quality/__init__.py +65 -0
- oscura/quality/ensemble.py +740 -0
- oscura/quality/explainer.py +338 -0
- oscura/quality/scoring.py +616 -0
- oscura/quality/warnings.py +456 -0
- oscura/reporting/__init__.py +248 -0
- oscura/reporting/advanced.py +1234 -0
- oscura/reporting/analyze.py +448 -0
- oscura/reporting/argument_preparer.py +596 -0
- oscura/reporting/auto_report.py +507 -0
- oscura/reporting/batch.py +615 -0
- oscura/reporting/chart_selection.py +223 -0
- oscura/reporting/comparison.py +330 -0
- oscura/reporting/config.py +615 -0
- oscura/reporting/content/__init__.py +39 -0
- oscura/reporting/content/executive.py +127 -0
- oscura/reporting/content/filtering.py +191 -0
- oscura/reporting/content/minimal.py +257 -0
- oscura/reporting/content/verbosity.py +162 -0
- oscura/reporting/core.py +508 -0
- oscura/reporting/core_formats/__init__.py +17 -0
- oscura/reporting/core_formats/multi_format.py +210 -0
- oscura/reporting/engine.py +836 -0
- oscura/reporting/export.py +366 -0
- oscura/reporting/formatting/__init__.py +129 -0
- oscura/reporting/formatting/emphasis.py +81 -0
- oscura/reporting/formatting/numbers.py +403 -0
- oscura/reporting/formatting/standards.py +55 -0
- oscura/reporting/formatting.py +466 -0
- oscura/reporting/html.py +578 -0
- oscura/reporting/index.py +590 -0
- oscura/reporting/multichannel.py +296 -0
- oscura/reporting/output.py +379 -0
- oscura/reporting/pdf.py +373 -0
- oscura/reporting/plots.py +731 -0
- oscura/reporting/pptx_export.py +360 -0
- oscura/reporting/renderers/__init__.py +11 -0
- oscura/reporting/renderers/pdf.py +94 -0
- oscura/reporting/sections.py +471 -0
- oscura/reporting/standards.py +680 -0
- oscura/reporting/summary_generator.py +368 -0
- oscura/reporting/tables.py +397 -0
- oscura/reporting/template_system.py +724 -0
- oscura/reporting/templates/__init__.py +15 -0
- oscura/reporting/templates/definition.py +205 -0
- oscura/reporting/templates/index.html +649 -0
- oscura/reporting/templates/index.md +173 -0
- oscura/schemas/__init__.py +158 -0
- oscura/schemas/bus_configuration.json +322 -0
- oscura/schemas/device_mapping.json +182 -0
- oscura/schemas/packet_format.json +418 -0
- oscura/schemas/protocol_definition.json +363 -0
- oscura/search/__init__.py +16 -0
- oscura/search/anomaly.py +292 -0
- oscura/search/context.py +149 -0
- oscura/search/pattern.py +160 -0
- oscura/session/__init__.py +34 -0
- oscura/session/annotations.py +289 -0
- oscura/session/history.py +313 -0
- oscura/session/session.py +445 -0
- oscura/streaming/__init__.py +43 -0
- oscura/streaming/chunked.py +611 -0
- oscura/streaming/progressive.py +393 -0
- oscura/streaming/realtime.py +622 -0
- oscura/testing/__init__.py +54 -0
- oscura/testing/synthetic.py +808 -0
- oscura/triggering/__init__.py +68 -0
- oscura/triggering/base.py +229 -0
- oscura/triggering/edge.py +353 -0
- oscura/triggering/pattern.py +344 -0
- oscura/triggering/pulse.py +581 -0
- oscura/triggering/window.py +453 -0
- oscura/ui/__init__.py +48 -0
- oscura/ui/formatters.py +526 -0
- oscura/ui/progressive_display.py +340 -0
- oscura/utils/__init__.py +99 -0
- oscura/utils/autodetect.py +338 -0
- oscura/utils/buffer.py +389 -0
- oscura/utils/lazy.py +407 -0
- oscura/utils/lazy_imports.py +147 -0
- oscura/utils/memory.py +836 -0
- oscura/utils/memory_advanced.py +1326 -0
- oscura/utils/memory_extensions.py +465 -0
- oscura/utils/progressive.py +352 -0
- oscura/utils/windowing.py +362 -0
- oscura/visualization/__init__.py +321 -0
- oscura/visualization/accessibility.py +526 -0
- oscura/visualization/annotations.py +374 -0
- oscura/visualization/axis_scaling.py +305 -0
- oscura/visualization/colors.py +453 -0
- oscura/visualization/digital.py +337 -0
- oscura/visualization/eye.py +420 -0
- oscura/visualization/histogram.py +281 -0
- oscura/visualization/interactive.py +858 -0
- oscura/visualization/jitter.py +702 -0
- oscura/visualization/keyboard.py +394 -0
- oscura/visualization/layout.py +365 -0
- oscura/visualization/optimization.py +1028 -0
- oscura/visualization/palettes.py +446 -0
- oscura/visualization/plot.py +92 -0
- oscura/visualization/power.py +290 -0
- oscura/visualization/power_extended.py +626 -0
- oscura/visualization/presets.py +467 -0
- oscura/visualization/protocols.py +932 -0
- oscura/visualization/render.py +207 -0
- oscura/visualization/rendering.py +444 -0
- oscura/visualization/reverse_engineering.py +791 -0
- oscura/visualization/signal_integrity.py +808 -0
- oscura/visualization/specialized.py +553 -0
- oscura/visualization/spectral.py +811 -0
- oscura/visualization/styles.py +381 -0
- oscura/visualization/thumbnails.py +311 -0
- oscura/visualization/time_axis.py +351 -0
- oscura/visualization/waveform.py +367 -0
- oscura/workflow/__init__.py +13 -0
- oscura/workflow/dag.py +377 -0
- oscura/workflows/__init__.py +58 -0
- oscura/workflows/compliance.py +280 -0
- oscura/workflows/digital.py +272 -0
- oscura/workflows/multi_trace.py +502 -0
- oscura/workflows/power.py +178 -0
- oscura/workflows/protocol.py +492 -0
- oscura/workflows/reverse_engineering.py +639 -0
- oscura/workflows/signal_integrity.py +227 -0
- oscura-0.1.0.dist-info/METADATA +300 -0
- oscura-0.1.0.dist-info/RECORD +463 -0
- oscura-0.1.0.dist-info/entry_points.txt +2 -0
- {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/licenses/LICENSE +1 -1
- oscura-0.0.1.dist-info/METADATA +0 -63
- oscura-0.0.1.dist-info/RECORD +0 -5
- {oscura-0.0.1.dist-info → oscura-0.1.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
"""Outlier detection for signal analysis.
|
|
2
|
+
|
|
3
|
+
This module provides multiple outlier detection methods suitable for
|
|
4
|
+
different data distributions and contamination levels.
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> from oscura.analyzers.statistics.outliers import (
|
|
9
|
+
... zscore_outliers, modified_zscore_outliers, iqr_outliers
|
|
10
|
+
... )
|
|
11
|
+
>>> outliers = zscore_outliers(trace, threshold=3.0)
|
|
12
|
+
>>> robust_outliers = modified_zscore_outliers(trace, threshold=3.5)
|
|
13
|
+
>>> iqr_result = iqr_outliers(trace, multiplier=1.5)
|
|
14
|
+
|
|
15
|
+
References:
|
|
16
|
+
Iglewicz, B. & Hoaglin, D. (1993). How to Detect and Handle Outliers
|
|
17
|
+
NIST Engineering Statistics Handbook
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
from typing import TYPE_CHECKING, Any
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
from oscura.core.types import WaveformTrace
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from numpy.typing import NDArray
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class OutlierResult:
|
|
35
|
+
"""Result of outlier detection.
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
indices: Array of indices where outliers were detected.
|
|
39
|
+
values: Array of outlier values.
|
|
40
|
+
scores: Array of outlier scores (z-scores or similar).
|
|
41
|
+
mask: Boolean mask (True = outlier).
|
|
42
|
+
count: Number of outliers detected.
|
|
43
|
+
method: Detection method used.
|
|
44
|
+
threshold: Threshold used for detection.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
indices: NDArray[np.intp]
|
|
48
|
+
values: NDArray[np.float64]
|
|
49
|
+
scores: NDArray[np.float64]
|
|
50
|
+
mask: NDArray[np.bool_]
|
|
51
|
+
count: int
|
|
52
|
+
method: str
|
|
53
|
+
threshold: float
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def zscore_outliers(
|
|
57
|
+
trace: WaveformTrace | NDArray[np.floating[Any]],
|
|
58
|
+
*,
|
|
59
|
+
threshold: float = 3.0,
|
|
60
|
+
return_scores: bool = False,
|
|
61
|
+
) -> OutlierResult | tuple[OutlierResult, NDArray[np.float64]]:
|
|
62
|
+
"""Detect outliers using Z-score method.
|
|
63
|
+
|
|
64
|
+
Identifies points where |z-score| exceeds the threshold.
|
|
65
|
+
Best for normally distributed data without heavy contamination.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
trace: Input trace or numpy array.
|
|
69
|
+
threshold: Z-score threshold for outlier detection (default 3.0).
|
|
70
|
+
return_scores: If True, also return full z-score array.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
OutlierResult containing outlier information.
|
|
74
|
+
If return_scores=True, also returns full z-score array.
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
>>> result = zscore_outliers(trace, threshold=3.0)
|
|
78
|
+
>>> print(f"Found {result.count} outliers")
|
|
79
|
+
>>> print(f"Outlier indices: {result.indices}")
|
|
80
|
+
|
|
81
|
+
References:
|
|
82
|
+
NIST Engineering Statistics Handbook Section 1.3.5.17
|
|
83
|
+
"""
|
|
84
|
+
data = trace.data if isinstance(trace, WaveformTrace) else trace
|
|
85
|
+
|
|
86
|
+
n = len(data)
|
|
87
|
+
if n < 3:
|
|
88
|
+
empty_result = OutlierResult(
|
|
89
|
+
indices=np.array([], dtype=np.intp),
|
|
90
|
+
values=np.array([], dtype=np.float64),
|
|
91
|
+
scores=np.array([], dtype=np.float64),
|
|
92
|
+
mask=np.zeros(n, dtype=np.bool_),
|
|
93
|
+
count=0,
|
|
94
|
+
method="zscore",
|
|
95
|
+
threshold=threshold,
|
|
96
|
+
)
|
|
97
|
+
if return_scores:
|
|
98
|
+
return empty_result, np.zeros(n, dtype=np.float64)
|
|
99
|
+
return empty_result
|
|
100
|
+
|
|
101
|
+
# Compute z-scores
|
|
102
|
+
mean = np.mean(data)
|
|
103
|
+
std = np.std(data, ddof=1)
|
|
104
|
+
|
|
105
|
+
if std < 1e-12:
|
|
106
|
+
# No variation, no outliers
|
|
107
|
+
zscores = np.zeros(n, dtype=np.float64)
|
|
108
|
+
else:
|
|
109
|
+
zscores = (data - mean) / std
|
|
110
|
+
|
|
111
|
+
# Find outliers
|
|
112
|
+
mask = np.abs(zscores) > threshold
|
|
113
|
+
indices = np.where(mask)[0]
|
|
114
|
+
outlier_values = data[mask].astype(np.float64)
|
|
115
|
+
outlier_scores = zscores[mask]
|
|
116
|
+
|
|
117
|
+
result = OutlierResult(
|
|
118
|
+
indices=indices,
|
|
119
|
+
values=outlier_values,
|
|
120
|
+
scores=outlier_scores,
|
|
121
|
+
mask=mask,
|
|
122
|
+
count=int(np.sum(mask)),
|
|
123
|
+
method="zscore",
|
|
124
|
+
threshold=threshold,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if return_scores:
|
|
128
|
+
return result, zscores
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def modified_zscore_outliers(
|
|
133
|
+
trace: WaveformTrace | NDArray[np.floating[Any]],
|
|
134
|
+
*,
|
|
135
|
+
threshold: float = 3.5,
|
|
136
|
+
return_scores: bool = False,
|
|
137
|
+
) -> OutlierResult | tuple[OutlierResult, NDArray[np.float64]]:
|
|
138
|
+
"""Detect outliers using Modified Z-score (MAD-based) method.
|
|
139
|
+
|
|
140
|
+
Uses Median Absolute Deviation (MAD) for robust outlier detection.
|
|
141
|
+
More resistant to contaminated data than standard z-score.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
trace: Input trace or numpy array.
|
|
145
|
+
threshold: Modified z-score threshold (default 3.5, per Iglewicz & Hoaglin).
|
|
146
|
+
return_scores: If True, also return full modified z-score array.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
OutlierResult containing outlier information.
|
|
150
|
+
If return_scores=True, also returns full modified z-score array.
|
|
151
|
+
|
|
152
|
+
Example:
|
|
153
|
+
>>> result = modified_zscore_outliers(trace, threshold=3.5)
|
|
154
|
+
>>> print(f"Found {result.count} outliers")
|
|
155
|
+
>>> # Robust to up to ~50% contamination
|
|
156
|
+
|
|
157
|
+
References:
|
|
158
|
+
Iglewicz, B. & Hoaglin, D. (1993). How to Detect and Handle Outliers
|
|
159
|
+
"""
|
|
160
|
+
data = trace.data if isinstance(trace, WaveformTrace) else trace
|
|
161
|
+
|
|
162
|
+
n = len(data)
|
|
163
|
+
if n < 3:
|
|
164
|
+
empty_result = OutlierResult(
|
|
165
|
+
indices=np.array([], dtype=np.intp),
|
|
166
|
+
values=np.array([], dtype=np.float64),
|
|
167
|
+
scores=np.array([], dtype=np.float64),
|
|
168
|
+
mask=np.zeros(n, dtype=np.bool_),
|
|
169
|
+
count=0,
|
|
170
|
+
method="modified_zscore",
|
|
171
|
+
threshold=threshold,
|
|
172
|
+
)
|
|
173
|
+
if return_scores:
|
|
174
|
+
return empty_result, np.zeros(n, dtype=np.float64)
|
|
175
|
+
return empty_result
|
|
176
|
+
|
|
177
|
+
# Compute median and MAD
|
|
178
|
+
median = np.median(data)
|
|
179
|
+
mad = np.median(np.abs(data - median))
|
|
180
|
+
|
|
181
|
+
# Consistency constant for normal distribution
|
|
182
|
+
# MAD = 0.6745 * sigma for normal distribution
|
|
183
|
+
k = 0.6745
|
|
184
|
+
|
|
185
|
+
if mad < 1e-12:
|
|
186
|
+
# Very low spread - use fallback
|
|
187
|
+
# Check if there are any points far from median
|
|
188
|
+
deviations = np.abs(data - median)
|
|
189
|
+
max_dev = np.max(deviations)
|
|
190
|
+
if max_dev < 1e-12:
|
|
191
|
+
# All points identical, no outliers
|
|
192
|
+
modified_zscores = np.zeros(n, dtype=np.float64)
|
|
193
|
+
else:
|
|
194
|
+
# Scale deviations so that max_dev gets a high score
|
|
195
|
+
# This ensures outliers in nearly-constant data are detected
|
|
196
|
+
# Use a scale factor that makes max_dev map to a large z-score
|
|
197
|
+
scale = max_dev / (threshold * 2) # Conservative scaling
|
|
198
|
+
modified_zscores = deviations / scale
|
|
199
|
+
else:
|
|
200
|
+
modified_zscores = k * (data - median) / mad
|
|
201
|
+
|
|
202
|
+
# Find outliers
|
|
203
|
+
mask = np.abs(modified_zscores) > threshold
|
|
204
|
+
indices = np.where(mask)[0]
|
|
205
|
+
outlier_values = data[mask].astype(np.float64)
|
|
206
|
+
outlier_scores = modified_zscores[mask]
|
|
207
|
+
|
|
208
|
+
result = OutlierResult(
|
|
209
|
+
indices=indices,
|
|
210
|
+
values=outlier_values,
|
|
211
|
+
scores=outlier_scores,
|
|
212
|
+
mask=mask,
|
|
213
|
+
count=int(np.sum(mask)),
|
|
214
|
+
method="modified_zscore",
|
|
215
|
+
threshold=threshold,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if return_scores:
|
|
219
|
+
return result, modified_zscores.astype(np.float64)
|
|
220
|
+
return result
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def iqr_outliers(
|
|
224
|
+
trace: WaveformTrace | NDArray[np.floating[Any]],
|
|
225
|
+
*,
|
|
226
|
+
multiplier: float = 1.5,
|
|
227
|
+
return_fences: bool = False,
|
|
228
|
+
) -> OutlierResult | tuple[OutlierResult, dict[str, float]]:
|
|
229
|
+
"""Detect outliers using Interquartile Range (IQR) method.
|
|
230
|
+
|
|
231
|
+
Flags points outside the fences: [Q1 - k*IQR, Q3 + k*IQR].
|
|
232
|
+
Good for skewed distributions.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
trace: Input trace or numpy array.
|
|
236
|
+
multiplier: IQR multiplier for fence calculation (default 1.5).
|
|
237
|
+
Use 3.0 for "extreme" outliers.
|
|
238
|
+
return_fences: If True, also return fence values.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
OutlierResult containing outlier information.
|
|
242
|
+
If return_fences=True, also returns dict with Q1, Q3, IQR, fences.
|
|
243
|
+
|
|
244
|
+
Example:
|
|
245
|
+
>>> result = iqr_outliers(trace, multiplier=1.5)
|
|
246
|
+
>>> print(f"Found {result.count} outliers")
|
|
247
|
+
|
|
248
|
+
>>> # Get fence values
|
|
249
|
+
>>> result, fences = iqr_outliers(trace, return_fences=True)
|
|
250
|
+
>>> print(f"Lower fence: {fences['lower_fence']}")
|
|
251
|
+
|
|
252
|
+
References:
|
|
253
|
+
Tukey, J. W. (1977). Exploratory Data Analysis
|
|
254
|
+
"""
|
|
255
|
+
data = trace.data if isinstance(trace, WaveformTrace) else trace
|
|
256
|
+
|
|
257
|
+
n = len(data)
|
|
258
|
+
if n < 4:
|
|
259
|
+
empty_result = OutlierResult(
|
|
260
|
+
indices=np.array([], dtype=np.intp),
|
|
261
|
+
values=np.array([], dtype=np.float64),
|
|
262
|
+
scores=np.array([], dtype=np.float64),
|
|
263
|
+
mask=np.zeros(n, dtype=np.bool_),
|
|
264
|
+
count=0,
|
|
265
|
+
method="iqr",
|
|
266
|
+
threshold=multiplier,
|
|
267
|
+
)
|
|
268
|
+
if return_fences:
|
|
269
|
+
return empty_result, {
|
|
270
|
+
"q1": np.nan,
|
|
271
|
+
"q3": np.nan,
|
|
272
|
+
"iqr": np.nan,
|
|
273
|
+
"lower_fence": np.nan,
|
|
274
|
+
"upper_fence": np.nan,
|
|
275
|
+
}
|
|
276
|
+
return empty_result
|
|
277
|
+
|
|
278
|
+
# Compute quartiles
|
|
279
|
+
q1 = float(np.percentile(data, 25))
|
|
280
|
+
q3 = float(np.percentile(data, 75))
|
|
281
|
+
iqr = q3 - q1
|
|
282
|
+
|
|
283
|
+
# Calculate fences
|
|
284
|
+
lower_fence = q1 - multiplier * iqr
|
|
285
|
+
upper_fence = q3 + multiplier * iqr
|
|
286
|
+
|
|
287
|
+
# Find outliers
|
|
288
|
+
mask = (data < lower_fence) | (data > upper_fence)
|
|
289
|
+
indices = np.where(mask)[0]
|
|
290
|
+
outlier_values = data[mask].astype(np.float64)
|
|
291
|
+
|
|
292
|
+
# Calculate "scores" as distance from nearest fence normalized by IQR
|
|
293
|
+
if iqr > 0:
|
|
294
|
+
scores = np.zeros(n, dtype=np.float64)
|
|
295
|
+
below = data < lower_fence
|
|
296
|
+
above = data > upper_fence
|
|
297
|
+
scores[below] = (lower_fence - data[below]) / iqr
|
|
298
|
+
scores[above] = (data[above] - upper_fence) / iqr
|
|
299
|
+
outlier_scores = scores[mask]
|
|
300
|
+
else:
|
|
301
|
+
outlier_scores = np.zeros(len(indices), dtype=np.float64)
|
|
302
|
+
|
|
303
|
+
result = OutlierResult(
|
|
304
|
+
indices=indices,
|
|
305
|
+
values=outlier_values,
|
|
306
|
+
scores=outlier_scores,
|
|
307
|
+
mask=mask,
|
|
308
|
+
count=int(np.sum(mask)),
|
|
309
|
+
method="iqr",
|
|
310
|
+
threshold=multiplier,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
if return_fences:
|
|
314
|
+
fences = {
|
|
315
|
+
"q1": q1,
|
|
316
|
+
"q3": q3,
|
|
317
|
+
"iqr": iqr,
|
|
318
|
+
"lower_fence": lower_fence,
|
|
319
|
+
"upper_fence": upper_fence,
|
|
320
|
+
}
|
|
321
|
+
return result, fences
|
|
322
|
+
return result
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def detect_outliers(
|
|
326
|
+
trace: WaveformTrace | NDArray[np.floating[Any]], # type: ignore[name-defined]
|
|
327
|
+
*,
|
|
328
|
+
method: str = "modified_zscore",
|
|
329
|
+
**kwargs: Any, # type: ignore[name-defined]
|
|
330
|
+
) -> OutlierResult:
|
|
331
|
+
"""Detect outliers using specified method.
|
|
332
|
+
|
|
333
|
+
Convenience function that dispatches to the appropriate outlier
|
|
334
|
+
detection method based on the method parameter.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
trace: Input trace or numpy array.
|
|
338
|
+
method: Detection method. One of:
|
|
339
|
+
- "zscore": Standard z-score method
|
|
340
|
+
- "modified_zscore": MAD-based robust method (default)
|
|
341
|
+
- "iqr": Interquartile range method
|
|
342
|
+
**kwargs: Additional arguments passed to the detection method.
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
OutlierResult containing outlier information.
|
|
346
|
+
|
|
347
|
+
Raises:
|
|
348
|
+
ValueError: If method is not one of the supported types.
|
|
349
|
+
|
|
350
|
+
Example:
|
|
351
|
+
>>> result = detect_outliers(trace, method="iqr", multiplier=2.0)
|
|
352
|
+
>>> print(f"Method: {result.method}, Count: {result.count}")
|
|
353
|
+
"""
|
|
354
|
+
methods = {
|
|
355
|
+
"zscore": zscore_outliers, # type: ignore[dict-item]
|
|
356
|
+
"modified_zscore": modified_zscore_outliers, # type: ignore[dict-item]
|
|
357
|
+
"iqr": iqr_outliers,
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
if method not in methods:
|
|
361
|
+
available = ", ".join(methods.keys())
|
|
362
|
+
raise ValueError(f"Unknown method: {method}. Available: {available}")
|
|
363
|
+
|
|
364
|
+
result = methods[method](trace, **kwargs)
|
|
365
|
+
|
|
366
|
+
# Handle tuple returns
|
|
367
|
+
if isinstance(result, tuple):
|
|
368
|
+
return result[0]
|
|
369
|
+
return result
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def remove_outliers( # type: ignore[no-untyped-def]
|
|
373
|
+
trace: WaveformTrace | NDArray[np.floating[Any]],
|
|
374
|
+
*,
|
|
375
|
+
method: str = "modified_zscore",
|
|
376
|
+
replacement: str = "nan",
|
|
377
|
+
**kwargs,
|
|
378
|
+
) -> NDArray[np.float64]:
|
|
379
|
+
"""Remove or replace outliers in data.
|
|
380
|
+
|
|
381
|
+
Args:
|
|
382
|
+
trace: Input trace or numpy array.
|
|
383
|
+
method: Detection method (see detect_outliers).
|
|
384
|
+
replacement: How to handle outliers:
|
|
385
|
+
- "nan": Replace with NaN
|
|
386
|
+
- "clip": Clip to nearest fence/threshold
|
|
387
|
+
- "interpolate": Linear interpolation from neighbors
|
|
388
|
+
**kwargs: Additional arguments for detection method.
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
Array with outliers handled according to replacement method.
|
|
392
|
+
|
|
393
|
+
Raises:
|
|
394
|
+
ValueError: If replacement method is not one of the supported types.
|
|
395
|
+
|
|
396
|
+
Example:
|
|
397
|
+
>>> cleaned = remove_outliers(trace, method="iqr", replacement="nan")
|
|
398
|
+
>>> # Use for analysis that can handle NaN values
|
|
399
|
+
"""
|
|
400
|
+
if isinstance(trace, WaveformTrace):
|
|
401
|
+
data = trace.data.copy()
|
|
402
|
+
else:
|
|
403
|
+
data = np.array(trace, dtype=np.float64)
|
|
404
|
+
|
|
405
|
+
result = detect_outliers(trace, method=method, **kwargs)
|
|
406
|
+
|
|
407
|
+
if result.count == 0:
|
|
408
|
+
return data
|
|
409
|
+
|
|
410
|
+
if replacement == "nan":
|
|
411
|
+
data[result.mask] = np.nan
|
|
412
|
+
|
|
413
|
+
elif replacement == "clip":
|
|
414
|
+
if method == "iqr":
|
|
415
|
+
# Get fence values
|
|
416
|
+
_, fences = iqr_outliers(trace, return_fences=True, **kwargs) # type: ignore[misc]
|
|
417
|
+
data = np.clip(data, fences["lower_fence"], fences["upper_fence"])
|
|
418
|
+
else:
|
|
419
|
+
# For z-score methods, clip to mean +/- threshold * std
|
|
420
|
+
mean = np.mean(data[~result.mask]) if np.any(~result.mask) else np.mean(data)
|
|
421
|
+
std = (
|
|
422
|
+
np.std(data[~result.mask], ddof=1) if np.any(~result.mask) else np.std(data, ddof=1)
|
|
423
|
+
)
|
|
424
|
+
threshold = result.threshold
|
|
425
|
+
data = np.clip(data, mean - threshold * std, mean + threshold * std)
|
|
426
|
+
|
|
427
|
+
elif replacement == "interpolate":
|
|
428
|
+
# Linear interpolation from non-outlier neighbors
|
|
429
|
+
outlier_indices = result.indices
|
|
430
|
+
valid_indices = np.where(~result.mask)[0]
|
|
431
|
+
|
|
432
|
+
if len(valid_indices) > 0:
|
|
433
|
+
for idx in outlier_indices:
|
|
434
|
+
# Find nearest valid neighbors
|
|
435
|
+
left_valid = valid_indices[valid_indices < idx]
|
|
436
|
+
right_valid = valid_indices[valid_indices > idx]
|
|
437
|
+
|
|
438
|
+
if len(left_valid) > 0 and len(right_valid) > 0:
|
|
439
|
+
# Interpolate between neighbors
|
|
440
|
+
left_idx = left_valid[-1]
|
|
441
|
+
right_idx = right_valid[0]
|
|
442
|
+
weight = (idx - left_idx) / (right_idx - left_idx)
|
|
443
|
+
data[idx] = data[left_idx] + weight * (data[right_idx] - data[left_idx])
|
|
444
|
+
elif len(left_valid) > 0:
|
|
445
|
+
data[idx] = data[left_valid[-1]]
|
|
446
|
+
elif len(right_valid) > 0:
|
|
447
|
+
data[idx] = data[right_valid[0]]
|
|
448
|
+
# else: leave unchanged
|
|
449
|
+
|
|
450
|
+
else:
|
|
451
|
+
raise ValueError(f"Unknown replacement method: {replacement}")
|
|
452
|
+
|
|
453
|
+
return data
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
__all__ = [
|
|
457
|
+
"OutlierResult",
|
|
458
|
+
"detect_outliers",
|
|
459
|
+
"iqr_outliers",
|
|
460
|
+
"modified_zscore_outliers",
|
|
461
|
+
"remove_outliers",
|
|
462
|
+
"zscore_outliers",
|
|
463
|
+
]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Streaming statistics computation for large datasets.
|
|
2
|
+
|
|
3
|
+
This module provides online/streaming algorithms for computing statistics
|
|
4
|
+
incrementally without loading entire datasets into memory.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
from numpy.typing import NDArray
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class StreamingStatsResult:
|
|
18
|
+
"""Result from streaming statistics computation."""
|
|
19
|
+
|
|
20
|
+
mean: float
|
|
21
|
+
variance: float
|
|
22
|
+
std: float
|
|
23
|
+
min: float
|
|
24
|
+
max: float
|
|
25
|
+
count: int
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class StreamingStats:
|
|
29
|
+
"""Compute statistics incrementally using Welford's online algorithm.
|
|
30
|
+
|
|
31
|
+
This class allows computing mean, variance, and other statistics
|
|
32
|
+
without storing all data points in memory.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
>>> stats = StreamingStats()
|
|
36
|
+
>>> stats.update(np.array([1, 2, 3]))
|
|
37
|
+
>>> stats.update(np.array([4, 5, 6]))
|
|
38
|
+
>>> result = stats.finalize()
|
|
39
|
+
>>> print(result.mean, result.std)
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self) -> None:
|
|
43
|
+
"""Initialize streaming statistics tracker."""
|
|
44
|
+
self.count = 0
|
|
45
|
+
self.mean = 0.0
|
|
46
|
+
self.m2 = 0.0 # Sum of squared differences from mean
|
|
47
|
+
self.min_val = float("inf")
|
|
48
|
+
self.max_val = float("-inf")
|
|
49
|
+
|
|
50
|
+
def update(self, data: NDArray[np.floating[Any]]) -> None:
|
|
51
|
+
"""Update statistics with new data chunk.
|
|
52
|
+
|
|
53
|
+
Uses Welford's online algorithm for numerical stability.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
data: New data chunk to incorporate.
|
|
57
|
+
"""
|
|
58
|
+
data = np.asarray(data, dtype=np.float64).ravel()
|
|
59
|
+
|
|
60
|
+
for value in data:
|
|
61
|
+
self.count += 1
|
|
62
|
+
delta = value - self.mean
|
|
63
|
+
self.mean += delta / self.count
|
|
64
|
+
delta2 = value - self.mean
|
|
65
|
+
self.m2 += delta * delta2
|
|
66
|
+
|
|
67
|
+
# Update min/max
|
|
68
|
+
if value < self.min_val:
|
|
69
|
+
self.min_val = value
|
|
70
|
+
if value > self.max_val:
|
|
71
|
+
self.max_val = value
|
|
72
|
+
|
|
73
|
+
def finalize(self) -> StreamingStatsResult:
|
|
74
|
+
"""Finalize and return computed statistics.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
StreamingStatsResult with mean, variance, std, min, max, count.
|
|
78
|
+
"""
|
|
79
|
+
if self.count < 2:
|
|
80
|
+
variance = 0.0
|
|
81
|
+
std = 0.0
|
|
82
|
+
else:
|
|
83
|
+
variance = self.m2 / (self.count - 1) # Sample variance
|
|
84
|
+
std = np.sqrt(variance)
|
|
85
|
+
|
|
86
|
+
return StreamingStatsResult(
|
|
87
|
+
mean=self.mean,
|
|
88
|
+
variance=variance,
|
|
89
|
+
std=std,
|
|
90
|
+
min=self.min_val if self.min_val != float("inf") else 0.0,
|
|
91
|
+
max=self.max_val if self.max_val != float("-inf") else 0.0,
|
|
92
|
+
count=self.count,
|
|
93
|
+
)
|