oscura 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oscura/__init__.py +813 -8
- oscura/__main__.py +392 -0
- oscura/analyzers/__init__.py +37 -0
- oscura/analyzers/digital/__init__.py +177 -0
- oscura/analyzers/digital/bus.py +691 -0
- oscura/analyzers/digital/clock.py +805 -0
- oscura/analyzers/digital/correlation.py +720 -0
- oscura/analyzers/digital/edges.py +632 -0
- oscura/analyzers/digital/extraction.py +413 -0
- oscura/analyzers/digital/quality.py +878 -0
- oscura/analyzers/digital/signal_quality.py +877 -0
- oscura/analyzers/digital/thresholds.py +708 -0
- oscura/analyzers/digital/timing.py +1104 -0
- oscura/analyzers/eye/__init__.py +46 -0
- oscura/analyzers/eye/diagram.py +434 -0
- oscura/analyzers/eye/metrics.py +555 -0
- oscura/analyzers/jitter/__init__.py +83 -0
- oscura/analyzers/jitter/ber.py +333 -0
- oscura/analyzers/jitter/decomposition.py +759 -0
- oscura/analyzers/jitter/measurements.py +413 -0
- oscura/analyzers/jitter/spectrum.py +220 -0
- oscura/analyzers/measurements.py +40 -0
- oscura/analyzers/packet/__init__.py +171 -0
- oscura/analyzers/packet/daq.py +1077 -0
- oscura/analyzers/packet/metrics.py +437 -0
- oscura/analyzers/packet/parser.py +327 -0
- oscura/analyzers/packet/payload.py +2156 -0
- oscura/analyzers/packet/payload_analysis.py +1312 -0
- oscura/analyzers/packet/payload_extraction.py +236 -0
- oscura/analyzers/packet/payload_patterns.py +670 -0
- oscura/analyzers/packet/stream.py +359 -0
- oscura/analyzers/patterns/__init__.py +266 -0
- oscura/analyzers/patterns/clustering.py +1036 -0
- oscura/analyzers/patterns/discovery.py +539 -0
- oscura/analyzers/patterns/learning.py +797 -0
- oscura/analyzers/patterns/matching.py +1091 -0
- oscura/analyzers/patterns/periodic.py +650 -0
- oscura/analyzers/patterns/sequences.py +767 -0
- oscura/analyzers/power/__init__.py +116 -0
- oscura/analyzers/power/ac_power.py +391 -0
- oscura/analyzers/power/basic.py +383 -0
- oscura/analyzers/power/conduction.py +314 -0
- oscura/analyzers/power/efficiency.py +297 -0
- oscura/analyzers/power/ripple.py +356 -0
- oscura/analyzers/power/soa.py +372 -0
- oscura/analyzers/power/switching.py +479 -0
- oscura/analyzers/protocol/__init__.py +150 -0
- oscura/analyzers/protocols/__init__.py +150 -0
- oscura/analyzers/protocols/base.py +500 -0
- oscura/analyzers/protocols/can.py +620 -0
- oscura/analyzers/protocols/can_fd.py +448 -0
- oscura/analyzers/protocols/flexray.py +405 -0
- oscura/analyzers/protocols/hdlc.py +399 -0
- oscura/analyzers/protocols/i2c.py +368 -0
- oscura/analyzers/protocols/i2s.py +296 -0
- oscura/analyzers/protocols/jtag.py +393 -0
- oscura/analyzers/protocols/lin.py +445 -0
- oscura/analyzers/protocols/manchester.py +333 -0
- oscura/analyzers/protocols/onewire.py +501 -0
- oscura/analyzers/protocols/spi.py +334 -0
- oscura/analyzers/protocols/swd.py +325 -0
- oscura/analyzers/protocols/uart.py +393 -0
- oscura/analyzers/protocols/usb.py +495 -0
- oscura/analyzers/signal_integrity/__init__.py +63 -0
- oscura/analyzers/signal_integrity/embedding.py +294 -0
- oscura/analyzers/signal_integrity/equalization.py +370 -0
- oscura/analyzers/signal_integrity/sparams.py +484 -0
- oscura/analyzers/spectral/__init__.py +53 -0
- oscura/analyzers/spectral/chunked.py +273 -0
- oscura/analyzers/spectral/chunked_fft.py +571 -0
- oscura/analyzers/spectral/chunked_wavelet.py +391 -0
- oscura/analyzers/spectral/fft.py +92 -0
- oscura/analyzers/statistical/__init__.py +250 -0
- oscura/analyzers/statistical/checksum.py +923 -0
- oscura/analyzers/statistical/chunked_corr.py +228 -0
- oscura/analyzers/statistical/classification.py +778 -0
- oscura/analyzers/statistical/entropy.py +1113 -0
- oscura/analyzers/statistical/ngrams.py +614 -0
- oscura/analyzers/statistics/__init__.py +119 -0
- oscura/analyzers/statistics/advanced.py +885 -0
- oscura/analyzers/statistics/basic.py +263 -0
- oscura/analyzers/statistics/correlation.py +630 -0
- oscura/analyzers/statistics/distribution.py +298 -0
- oscura/analyzers/statistics/outliers.py +463 -0
- oscura/analyzers/statistics/streaming.py +93 -0
- oscura/analyzers/statistics/trend.py +520 -0
- oscura/analyzers/validation.py +598 -0
- oscura/analyzers/waveform/__init__.py +36 -0
- oscura/analyzers/waveform/measurements.py +943 -0
- oscura/analyzers/waveform/measurements_with_uncertainty.py +371 -0
- oscura/analyzers/waveform/spectral.py +1689 -0
- oscura/analyzers/waveform/wavelets.py +298 -0
- oscura/api/__init__.py +62 -0
- oscura/api/dsl.py +538 -0
- oscura/api/fluent.py +571 -0
- oscura/api/operators.py +498 -0
- oscura/api/optimization.py +392 -0
- oscura/api/profiling.py +396 -0
- oscura/automotive/__init__.py +73 -0
- oscura/automotive/can/__init__.py +52 -0
- oscura/automotive/can/analysis.py +356 -0
- oscura/automotive/can/checksum.py +250 -0
- oscura/automotive/can/correlation.py +212 -0
- oscura/automotive/can/discovery.py +355 -0
- oscura/automotive/can/message_wrapper.py +375 -0
- oscura/automotive/can/models.py +385 -0
- oscura/automotive/can/patterns.py +381 -0
- oscura/automotive/can/session.py +452 -0
- oscura/automotive/can/state_machine.py +300 -0
- oscura/automotive/can/stimulus_response.py +461 -0
- oscura/automotive/dbc/__init__.py +15 -0
- oscura/automotive/dbc/generator.py +156 -0
- oscura/automotive/dbc/parser.py +146 -0
- oscura/automotive/dtc/__init__.py +30 -0
- oscura/automotive/dtc/database.py +3036 -0
- oscura/automotive/j1939/__init__.py +14 -0
- oscura/automotive/j1939/decoder.py +745 -0
- oscura/automotive/loaders/__init__.py +35 -0
- oscura/automotive/loaders/asc.py +98 -0
- oscura/automotive/loaders/blf.py +77 -0
- oscura/automotive/loaders/csv_can.py +136 -0
- oscura/automotive/loaders/dispatcher.py +136 -0
- oscura/automotive/loaders/mdf.py +331 -0
- oscura/automotive/loaders/pcap.py +132 -0
- oscura/automotive/obd/__init__.py +14 -0
- oscura/automotive/obd/decoder.py +707 -0
- oscura/automotive/uds/__init__.py +48 -0
- oscura/automotive/uds/decoder.py +265 -0
- oscura/automotive/uds/models.py +64 -0
- oscura/automotive/visualization.py +369 -0
- oscura/batch/__init__.py +55 -0
- oscura/batch/advanced.py +627 -0
- oscura/batch/aggregate.py +300 -0
- oscura/batch/analyze.py +139 -0
- oscura/batch/logging.py +487 -0
- oscura/batch/metrics.py +556 -0
- oscura/builders/__init__.py +41 -0
- oscura/builders/signal_builder.py +1131 -0
- oscura/cli/__init__.py +14 -0
- oscura/cli/batch.py +339 -0
- oscura/cli/characterize.py +273 -0
- oscura/cli/compare.py +775 -0
- oscura/cli/decode.py +551 -0
- oscura/cli/main.py +247 -0
- oscura/cli/shell.py +350 -0
- oscura/comparison/__init__.py +66 -0
- oscura/comparison/compare.py +397 -0
- oscura/comparison/golden.py +487 -0
- oscura/comparison/limits.py +391 -0
- oscura/comparison/mask.py +434 -0
- oscura/comparison/trace_diff.py +30 -0
- oscura/comparison/visualization.py +481 -0
- oscura/compliance/__init__.py +70 -0
- oscura/compliance/advanced.py +756 -0
- oscura/compliance/masks.py +363 -0
- oscura/compliance/reporting.py +483 -0
- oscura/compliance/testing.py +298 -0
- oscura/component/__init__.py +38 -0
- oscura/component/impedance.py +365 -0
- oscura/component/reactive.py +598 -0
- oscura/component/transmission_line.py +312 -0
- oscura/config/__init__.py +191 -0
- oscura/config/defaults.py +254 -0
- oscura/config/loader.py +348 -0
- oscura/config/memory.py +271 -0
- oscura/config/migration.py +458 -0
- oscura/config/pipeline.py +1077 -0
- oscura/config/preferences.py +530 -0
- oscura/config/protocol.py +875 -0
- oscura/config/schema.py +713 -0
- oscura/config/settings.py +420 -0
- oscura/config/thresholds.py +599 -0
- oscura/convenience.py +457 -0
- oscura/core/__init__.py +299 -0
- oscura/core/audit.py +457 -0
- oscura/core/backend_selector.py +405 -0
- oscura/core/cache.py +590 -0
- oscura/core/cancellation.py +439 -0
- oscura/core/confidence.py +225 -0
- oscura/core/config.py +506 -0
- oscura/core/correlation.py +216 -0
- oscura/core/cross_domain.py +422 -0
- oscura/core/debug.py +301 -0
- oscura/core/edge_cases.py +541 -0
- oscura/core/exceptions.py +535 -0
- oscura/core/gpu_backend.py +523 -0
- oscura/core/lazy.py +832 -0
- oscura/core/log_query.py +540 -0
- oscura/core/logging.py +931 -0
- oscura/core/logging_advanced.py +952 -0
- oscura/core/memoize.py +171 -0
- oscura/core/memory_check.py +274 -0
- oscura/core/memory_guard.py +290 -0
- oscura/core/memory_limits.py +336 -0
- oscura/core/memory_monitor.py +453 -0
- oscura/core/memory_progress.py +465 -0
- oscura/core/memory_warnings.py +315 -0
- oscura/core/numba_backend.py +362 -0
- oscura/core/performance.py +352 -0
- oscura/core/progress.py +524 -0
- oscura/core/provenance.py +358 -0
- oscura/core/results.py +331 -0
- oscura/core/types.py +504 -0
- oscura/core/uncertainty.py +383 -0
- oscura/discovery/__init__.py +52 -0
- oscura/discovery/anomaly_detector.py +672 -0
- oscura/discovery/auto_decoder.py +415 -0
- oscura/discovery/comparison.py +497 -0
- oscura/discovery/quality_validator.py +528 -0
- oscura/discovery/signal_detector.py +769 -0
- oscura/dsl/__init__.py +73 -0
- oscura/dsl/commands.py +246 -0
- oscura/dsl/interpreter.py +455 -0
- oscura/dsl/parser.py +689 -0
- oscura/dsl/repl.py +172 -0
- oscura/exceptions.py +59 -0
- oscura/exploratory/__init__.py +111 -0
- oscura/exploratory/error_recovery.py +642 -0
- oscura/exploratory/fuzzy.py +513 -0
- oscura/exploratory/fuzzy_advanced.py +786 -0
- oscura/exploratory/legacy.py +831 -0
- oscura/exploratory/parse.py +358 -0
- oscura/exploratory/recovery.py +275 -0
- oscura/exploratory/sync.py +382 -0
- oscura/exploratory/unknown.py +707 -0
- oscura/export/__init__.py +25 -0
- oscura/export/wireshark/README.md +265 -0
- oscura/export/wireshark/__init__.py +47 -0
- oscura/export/wireshark/generator.py +312 -0
- oscura/export/wireshark/lua_builder.py +159 -0
- oscura/export/wireshark/templates/dissector.lua.j2 +92 -0
- oscura/export/wireshark/type_mapping.py +165 -0
- oscura/export/wireshark/validator.py +105 -0
- oscura/exporters/__init__.py +94 -0
- oscura/exporters/csv.py +303 -0
- oscura/exporters/exporters.py +44 -0
- oscura/exporters/hdf5.py +219 -0
- oscura/exporters/html_export.py +701 -0
- oscura/exporters/json_export.py +291 -0
- oscura/exporters/markdown_export.py +367 -0
- oscura/exporters/matlab_export.py +354 -0
- oscura/exporters/npz_export.py +219 -0
- oscura/exporters/spice_export.py +210 -0
- oscura/extensibility/__init__.py +131 -0
- oscura/extensibility/docs.py +752 -0
- oscura/extensibility/extensions.py +1125 -0
- oscura/extensibility/logging.py +259 -0
- oscura/extensibility/measurements.py +485 -0
- oscura/extensibility/plugins.py +414 -0
- oscura/extensibility/registry.py +346 -0
- oscura/extensibility/templates.py +913 -0
- oscura/extensibility/validation.py +651 -0
- oscura/filtering/__init__.py +89 -0
- oscura/filtering/base.py +563 -0
- oscura/filtering/convenience.py +564 -0
- oscura/filtering/design.py +725 -0
- oscura/filtering/filters.py +32 -0
- oscura/filtering/introspection.py +605 -0
- oscura/guidance/__init__.py +24 -0
- oscura/guidance/recommender.py +429 -0
- oscura/guidance/wizard.py +518 -0
- oscura/inference/__init__.py +251 -0
- oscura/inference/active_learning/README.md +153 -0
- oscura/inference/active_learning/__init__.py +38 -0
- oscura/inference/active_learning/lstar.py +257 -0
- oscura/inference/active_learning/observation_table.py +230 -0
- oscura/inference/active_learning/oracle.py +78 -0
- oscura/inference/active_learning/teachers/__init__.py +15 -0
- oscura/inference/active_learning/teachers/simulator.py +192 -0
- oscura/inference/adaptive_tuning.py +453 -0
- oscura/inference/alignment.py +653 -0
- oscura/inference/bayesian.py +943 -0
- oscura/inference/binary.py +1016 -0
- oscura/inference/crc_reverse.py +711 -0
- oscura/inference/logic.py +288 -0
- oscura/inference/message_format.py +1305 -0
- oscura/inference/protocol.py +417 -0
- oscura/inference/protocol_dsl.py +1084 -0
- oscura/inference/protocol_library.py +1230 -0
- oscura/inference/sequences.py +809 -0
- oscura/inference/signal_intelligence.py +1509 -0
- oscura/inference/spectral.py +215 -0
- oscura/inference/state_machine.py +634 -0
- oscura/inference/stream.py +918 -0
- oscura/integrations/__init__.py +59 -0
- oscura/integrations/llm.py +1827 -0
- oscura/jupyter/__init__.py +32 -0
- oscura/jupyter/display.py +268 -0
- oscura/jupyter/magic.py +334 -0
- oscura/loaders/__init__.py +526 -0
- oscura/loaders/binary.py +69 -0
- oscura/loaders/configurable.py +1255 -0
- oscura/loaders/csv.py +26 -0
- oscura/loaders/csv_loader.py +473 -0
- oscura/loaders/hdf5.py +9 -0
- oscura/loaders/hdf5_loader.py +510 -0
- oscura/loaders/lazy.py +370 -0
- oscura/loaders/mmap_loader.py +583 -0
- oscura/loaders/numpy_loader.py +436 -0
- oscura/loaders/pcap.py +432 -0
- oscura/loaders/preprocessing.py +368 -0
- oscura/loaders/rigol.py +287 -0
- oscura/loaders/sigrok.py +321 -0
- oscura/loaders/tdms.py +367 -0
- oscura/loaders/tektronix.py +711 -0
- oscura/loaders/validation.py +584 -0
- oscura/loaders/vcd.py +464 -0
- oscura/loaders/wav.py +233 -0
- oscura/math/__init__.py +45 -0
- oscura/math/arithmetic.py +824 -0
- oscura/math/interpolation.py +413 -0
- oscura/onboarding/__init__.py +39 -0
- oscura/onboarding/help.py +498 -0
- oscura/onboarding/tutorials.py +405 -0
- oscura/onboarding/wizard.py +466 -0
- oscura/optimization/__init__.py +19 -0
- oscura/optimization/parallel.py +440 -0
- oscura/optimization/search.py +532 -0
- oscura/pipeline/__init__.py +43 -0
- oscura/pipeline/base.py +338 -0
- oscura/pipeline/composition.py +242 -0
- oscura/pipeline/parallel.py +448 -0
- oscura/pipeline/pipeline.py +375 -0
- oscura/pipeline/reverse_engineering.py +1119 -0
- oscura/plugins/__init__.py +122 -0
- oscura/plugins/base.py +272 -0
- oscura/plugins/cli.py +497 -0
- oscura/plugins/discovery.py +411 -0
- oscura/plugins/isolation.py +418 -0
- oscura/plugins/lifecycle.py +959 -0
- oscura/plugins/manager.py +493 -0
- oscura/plugins/registry.py +421 -0
- oscura/plugins/versioning.py +372 -0
- oscura/py.typed +0 -0
- oscura/quality/__init__.py +65 -0
- oscura/quality/ensemble.py +740 -0
- oscura/quality/explainer.py +338 -0
- oscura/quality/scoring.py +616 -0
- oscura/quality/warnings.py +456 -0
- oscura/reporting/__init__.py +248 -0
- oscura/reporting/advanced.py +1234 -0
- oscura/reporting/analyze.py +448 -0
- oscura/reporting/argument_preparer.py +596 -0
- oscura/reporting/auto_report.py +507 -0
- oscura/reporting/batch.py +615 -0
- oscura/reporting/chart_selection.py +223 -0
- oscura/reporting/comparison.py +330 -0
- oscura/reporting/config.py +615 -0
- oscura/reporting/content/__init__.py +39 -0
- oscura/reporting/content/executive.py +127 -0
- oscura/reporting/content/filtering.py +191 -0
- oscura/reporting/content/minimal.py +257 -0
- oscura/reporting/content/verbosity.py +162 -0
- oscura/reporting/core.py +508 -0
- oscura/reporting/core_formats/__init__.py +17 -0
- oscura/reporting/core_formats/multi_format.py +210 -0
- oscura/reporting/engine.py +836 -0
- oscura/reporting/export.py +366 -0
- oscura/reporting/formatting/__init__.py +129 -0
- oscura/reporting/formatting/emphasis.py +81 -0
- oscura/reporting/formatting/numbers.py +403 -0
- oscura/reporting/formatting/standards.py +55 -0
- oscura/reporting/formatting.py +466 -0
- oscura/reporting/html.py +578 -0
- oscura/reporting/index.py +590 -0
- oscura/reporting/multichannel.py +296 -0
- oscura/reporting/output.py +379 -0
- oscura/reporting/pdf.py +373 -0
- oscura/reporting/plots.py +731 -0
- oscura/reporting/pptx_export.py +360 -0
- oscura/reporting/renderers/__init__.py +11 -0
- oscura/reporting/renderers/pdf.py +94 -0
- oscura/reporting/sections.py +471 -0
- oscura/reporting/standards.py +680 -0
- oscura/reporting/summary_generator.py +368 -0
- oscura/reporting/tables.py +397 -0
- oscura/reporting/template_system.py +724 -0
- oscura/reporting/templates/__init__.py +15 -0
- oscura/reporting/templates/definition.py +205 -0
- oscura/reporting/templates/index.html +649 -0
- oscura/reporting/templates/index.md +173 -0
- oscura/schemas/__init__.py +158 -0
- oscura/schemas/bus_configuration.json +322 -0
- oscura/schemas/device_mapping.json +182 -0
- oscura/schemas/packet_format.json +418 -0
- oscura/schemas/protocol_definition.json +363 -0
- oscura/search/__init__.py +16 -0
- oscura/search/anomaly.py +292 -0
- oscura/search/context.py +149 -0
- oscura/search/pattern.py +160 -0
- oscura/session/__init__.py +34 -0
- oscura/session/annotations.py +289 -0
- oscura/session/history.py +313 -0
- oscura/session/session.py +445 -0
- oscura/streaming/__init__.py +43 -0
- oscura/streaming/chunked.py +611 -0
- oscura/streaming/progressive.py +393 -0
- oscura/streaming/realtime.py +622 -0
- oscura/testing/__init__.py +54 -0
- oscura/testing/synthetic.py +808 -0
- oscura/triggering/__init__.py +68 -0
- oscura/triggering/base.py +229 -0
- oscura/triggering/edge.py +353 -0
- oscura/triggering/pattern.py +344 -0
- oscura/triggering/pulse.py +581 -0
- oscura/triggering/window.py +453 -0
- oscura/ui/__init__.py +48 -0
- oscura/ui/formatters.py +526 -0
- oscura/ui/progressive_display.py +340 -0
- oscura/utils/__init__.py +99 -0
- oscura/utils/autodetect.py +338 -0
- oscura/utils/buffer.py +389 -0
- oscura/utils/lazy.py +407 -0
- oscura/utils/lazy_imports.py +147 -0
- oscura/utils/memory.py +836 -0
- oscura/utils/memory_advanced.py +1326 -0
- oscura/utils/memory_extensions.py +465 -0
- oscura/utils/progressive.py +352 -0
- oscura/utils/windowing.py +362 -0
- oscura/visualization/__init__.py +321 -0
- oscura/visualization/accessibility.py +526 -0
- oscura/visualization/annotations.py +374 -0
- oscura/visualization/axis_scaling.py +305 -0
- oscura/visualization/colors.py +453 -0
- oscura/visualization/digital.py +337 -0
- oscura/visualization/eye.py +420 -0
- oscura/visualization/histogram.py +281 -0
- oscura/visualization/interactive.py +858 -0
- oscura/visualization/jitter.py +702 -0
- oscura/visualization/keyboard.py +394 -0
- oscura/visualization/layout.py +365 -0
- oscura/visualization/optimization.py +1028 -0
- oscura/visualization/palettes.py +446 -0
- oscura/visualization/plot.py +92 -0
- oscura/visualization/power.py +290 -0
- oscura/visualization/power_extended.py +626 -0
- oscura/visualization/presets.py +467 -0
- oscura/visualization/protocols.py +932 -0
- oscura/visualization/render.py +207 -0
- oscura/visualization/rendering.py +444 -0
- oscura/visualization/reverse_engineering.py +791 -0
- oscura/visualization/signal_integrity.py +808 -0
- oscura/visualization/specialized.py +553 -0
- oscura/visualization/spectral.py +811 -0
- oscura/visualization/styles.py +381 -0
- oscura/visualization/thumbnails.py +311 -0
- oscura/visualization/time_axis.py +351 -0
- oscura/visualization/waveform.py +367 -0
- oscura/workflow/__init__.py +13 -0
- oscura/workflow/dag.py +377 -0
- oscura/workflows/__init__.py +58 -0
- oscura/workflows/compliance.py +280 -0
- oscura/workflows/digital.py +272 -0
- oscura/workflows/multi_trace.py +502 -0
- oscura/workflows/power.py +178 -0
- oscura/workflows/protocol.py +492 -0
- oscura/workflows/reverse_engineering.py +639 -0
- oscura/workflows/signal_integrity.py +227 -0
- oscura-0.1.1.dist-info/METADATA +300 -0
- oscura-0.1.1.dist-info/RECORD +463 -0
- oscura-0.1.1.dist-info/entry_points.txt +2 -0
- {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/licenses/LICENSE +1 -1
- oscura-0.0.1.dist-info/METADATA +0 -63
- oscura-0.0.1.dist-info/RECORD +0 -5
- {oscura-0.0.1.dist-info → oscura-0.1.1.dist-info}/WHEEL +0 -0
oscura/batch/advanced.py
ADDED
|
@@ -0,0 +1,627 @@
|
|
|
1
|
+
"""Advanced batch processing with checkpointing and error handling.
|
|
2
|
+
|
|
3
|
+
This module provides enhanced batch processing capabilities including
|
|
4
|
+
checkpointing for long-running jobs, resume functionality, and sophisticated
|
|
5
|
+
error handling.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import concurrent.futures
|
|
11
|
+
import json
|
|
12
|
+
import threading
|
|
13
|
+
import traceback
|
|
14
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
|
|
15
|
+
from dataclasses import asdict, dataclass, field
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
18
|
+
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from collections.abc import Callable
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from tqdm import tqdm # type: ignore[import-untyped]
|
|
26
|
+
|
|
27
|
+
HAS_TQDM = True
|
|
28
|
+
except ImportError:
|
|
29
|
+
HAS_TQDM = False
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TimeoutError(Exception):
|
|
33
|
+
"""Raised when a function execution exceeds timeout."""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class BatchConfig:
|
|
38
|
+
"""Configuration for advanced batch processing.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
on_error: Error handling strategy:
|
|
42
|
+
- 'skip': Skip failed files, continue processing
|
|
43
|
+
- 'stop': Stop processing on first error
|
|
44
|
+
- 'warn': Log warning but continue (default)
|
|
45
|
+
checkpoint_dir: Directory to save checkpoints. If None, no checkpointing.
|
|
46
|
+
checkpoint_interval: Save checkpoint every N files (default: 10).
|
|
47
|
+
max_workers: Maximum number of parallel workers. None uses CPU count.
|
|
48
|
+
memory_limit: Maximum memory per worker in MB (not enforced, for documentation).
|
|
49
|
+
timeout_per_file: Timeout in seconds per file. None for no timeout.
|
|
50
|
+
When specified, uses threading.Timer for true timeout enforcement
|
|
51
|
+
that interrupts long-running operations.
|
|
52
|
+
use_threads: Use threads instead of processes for parallelization.
|
|
53
|
+
progress_bar: Show progress bar (requires tqdm).
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
>>> config = BatchConfig(
|
|
57
|
+
... on_error='skip',
|
|
58
|
+
... checkpoint_dir='./checkpoints',
|
|
59
|
+
... checkpoint_interval=5,
|
|
60
|
+
... max_workers=4
|
|
61
|
+
... )
|
|
62
|
+
|
|
63
|
+
References:
|
|
64
|
+
API-012: Advanced Batch Control
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
on_error: Literal["skip", "stop", "warn"] = "warn"
|
|
68
|
+
checkpoint_dir: Path | str | None = None
|
|
69
|
+
checkpoint_interval: int = 10
|
|
70
|
+
max_workers: int | None = None
|
|
71
|
+
memory_limit: float | None = None # MB, not enforced
|
|
72
|
+
timeout_per_file: float | None = None # seconds, enforced via threading.Timer
|
|
73
|
+
use_threads: bool = False
|
|
74
|
+
progress_bar: bool = True
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class FileResult:
|
|
79
|
+
"""Result from processing a single file.
|
|
80
|
+
|
|
81
|
+
Attributes:
|
|
82
|
+
file: Path to the file.
|
|
83
|
+
success: Whether processing succeeded.
|
|
84
|
+
result: Analysis result dictionary if successful.
|
|
85
|
+
error: Error message if failed.
|
|
86
|
+
traceback: Full traceback if failed.
|
|
87
|
+
duration: Processing time in seconds.
|
|
88
|
+
timed_out: Whether processing was terminated due to timeout.
|
|
89
|
+
|
|
90
|
+
Example:
|
|
91
|
+
>>> result = FileResult(
|
|
92
|
+
... file='trace001.wfm',
|
|
93
|
+
... success=True,
|
|
94
|
+
... result={'rise_time': 1.2e-9},
|
|
95
|
+
... duration=0.5
|
|
96
|
+
... )
|
|
97
|
+
|
|
98
|
+
References:
|
|
99
|
+
API-012: Advanced Batch Control
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
file: str
|
|
103
|
+
success: bool = True
|
|
104
|
+
result: dict[str, Any] = field(default_factory=dict)
|
|
105
|
+
error: str | None = None
|
|
106
|
+
traceback: str | None = None
|
|
107
|
+
duration: float = 0.0
|
|
108
|
+
timed_out: bool = False
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class BatchCheckpoint:
|
|
113
|
+
"""Checkpoint state for batch processing.
|
|
114
|
+
|
|
115
|
+
Attributes:
|
|
116
|
+
completed_files: List of successfully completed files.
|
|
117
|
+
failed_files: List of failed file paths.
|
|
118
|
+
results: List of FileResult objects.
|
|
119
|
+
total_files: Total number of files in batch.
|
|
120
|
+
config: Batch configuration used.
|
|
121
|
+
|
|
122
|
+
Example:
|
|
123
|
+
>>> checkpoint = BatchCheckpoint(
|
|
124
|
+
... completed_files=['file1.wfm', 'file2.wfm'],
|
|
125
|
+
... total_files=10,
|
|
126
|
+
... config=config
|
|
127
|
+
... )
|
|
128
|
+
|
|
129
|
+
References:
|
|
130
|
+
API-012: Advanced Batch Control
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
completed_files: list[str] = field(default_factory=list)
|
|
134
|
+
failed_files: list[str] = field(default_factory=list)
|
|
135
|
+
results: list[FileResult] = field(default_factory=list)
|
|
136
|
+
total_files: int = 0
|
|
137
|
+
config: BatchConfig | None = None
|
|
138
|
+
|
|
139
|
+
def save(self, checkpoint_path: Path) -> None:
|
|
140
|
+
"""Save checkpoint to JSON file.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
checkpoint_path: Path to save checkpoint file.
|
|
144
|
+
|
|
145
|
+
Example:
|
|
146
|
+
>>> checkpoint.save(Path('checkpoints/batch_001.json'))
|
|
147
|
+
"""
|
|
148
|
+
checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
|
|
150
|
+
# Convert to serializable format
|
|
151
|
+
config_dict = None
|
|
152
|
+
if self.config:
|
|
153
|
+
# Manually convert BatchConfig to dict to handle Path objects properly
|
|
154
|
+
config_dict = {
|
|
155
|
+
"on_error": self.config.on_error,
|
|
156
|
+
"checkpoint_dir": (
|
|
157
|
+
str(self.config.checkpoint_dir)
|
|
158
|
+
if self.config.checkpoint_dir is not None
|
|
159
|
+
else None
|
|
160
|
+
),
|
|
161
|
+
"checkpoint_interval": self.config.checkpoint_interval,
|
|
162
|
+
"max_workers": self.config.max_workers,
|
|
163
|
+
"memory_limit": self.config.memory_limit,
|
|
164
|
+
"timeout_per_file": self.config.timeout_per_file,
|
|
165
|
+
"use_threads": self.config.use_threads,
|
|
166
|
+
"progress_bar": self.config.progress_bar,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
data = {
|
|
170
|
+
"completed_files": self.completed_files,
|
|
171
|
+
"failed_files": self.failed_files,
|
|
172
|
+
"results": [asdict(r) for r in self.results],
|
|
173
|
+
"total_files": self.total_files,
|
|
174
|
+
"config": config_dict,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
with open(checkpoint_path, "w") as f:
|
|
178
|
+
json.dump(data, f, indent=2)
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
181
|
+
def load(cls, checkpoint_path: Path) -> BatchCheckpoint:
|
|
182
|
+
"""Load checkpoint from JSON file.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
checkpoint_path: Path to checkpoint file.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Loaded BatchCheckpoint object.
|
|
189
|
+
|
|
190
|
+
Example:
|
|
191
|
+
>>> checkpoint = BatchCheckpoint.load(Path('checkpoints/batch_001.json'))
|
|
192
|
+
"""
|
|
193
|
+
with open(checkpoint_path) as f:
|
|
194
|
+
data = json.load(f)
|
|
195
|
+
|
|
196
|
+
# Reconstruct FileResult objects
|
|
197
|
+
results = [FileResult(**r) for r in data.get("results", [])]
|
|
198
|
+
|
|
199
|
+
# Reconstruct BatchConfig if present
|
|
200
|
+
# Keep checkpoint_dir as string (not Path) to ensure JSON serializability
|
|
201
|
+
# when checkpoint is saved again
|
|
202
|
+
config = None
|
|
203
|
+
if data.get("config"):
|
|
204
|
+
config_data = data["config"]
|
|
205
|
+
# Keep checkpoint_dir as string for JSON serialization compatibility
|
|
206
|
+
# The BatchConfig type annotation allows str | Path | None
|
|
207
|
+
config = BatchConfig(**config_data)
|
|
208
|
+
|
|
209
|
+
return cls(
|
|
210
|
+
completed_files=data.get("completed_files", []),
|
|
211
|
+
failed_files=data.get("failed_files", []),
|
|
212
|
+
results=results,
|
|
213
|
+
total_files=data.get("total_files", 0),
|
|
214
|
+
config=config,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _run_with_timeout(
|
|
219
|
+
func: Callable[..., Any],
|
|
220
|
+
args: tuple[Any, ...],
|
|
221
|
+
kwargs: dict[str, Any],
|
|
222
|
+
timeout: float,
|
|
223
|
+
) -> tuple[Any, bool]:
|
|
224
|
+
"""Run a function with true timeout enforcement using threading.
|
|
225
|
+
|
|
226
|
+
This function wraps the target function in a separate thread and uses
|
|
227
|
+
threading.Timer to interrupt it if it exceeds the timeout. This provides
|
|
228
|
+
actual timeout enforcement rather than just post-hoc checking.
|
|
229
|
+
|
|
230
|
+
If the wrapped function raises any exception, it will be re-raised by
|
|
231
|
+
this wrapper.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
func: Function to execute.
|
|
235
|
+
args: Positional arguments for the function.
|
|
236
|
+
kwargs: Keyword arguments for the function.
|
|
237
|
+
timeout: Timeout in seconds.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Tuple of (result, timed_out) where result is the function return value
|
|
241
|
+
(or None if timed out) and timed_out indicates whether timeout occurred.
|
|
242
|
+
If the wrapped function raised an exception, that exception is re-raised.
|
|
243
|
+
|
|
244
|
+
Note:
|
|
245
|
+
This uses concurrent.futures.ThreadPoolExecutor internally which can
|
|
246
|
+
only interrupt I/O-bound operations. CPU-bound functions in Python
|
|
247
|
+
cannot be truly interrupted due to the GIL. For CPU-bound timeouts,
|
|
248
|
+
consider using ProcessPoolExecutor with timeout on future.result().
|
|
249
|
+
"""
|
|
250
|
+
result_container: dict[str, Any] = {"result": None, "error": None}
|
|
251
|
+
|
|
252
|
+
def target() -> None:
|
|
253
|
+
try:
|
|
254
|
+
result_container["result"] = func(*args, **kwargs)
|
|
255
|
+
except Exception as e:
|
|
256
|
+
result_container["error"] = e
|
|
257
|
+
|
|
258
|
+
# Use a thread with explicit timeout
|
|
259
|
+
thread = threading.Thread(target=target, daemon=True)
|
|
260
|
+
thread.start()
|
|
261
|
+
thread.join(timeout=timeout)
|
|
262
|
+
|
|
263
|
+
if thread.is_alive():
|
|
264
|
+
# Thread is still running - timeout occurred
|
|
265
|
+
# Note: We can't truly kill the thread, but we can mark it as timed out
|
|
266
|
+
# and move on. The daemon=True ensures it won't block process exit.
|
|
267
|
+
return None, True
|
|
268
|
+
|
|
269
|
+
if result_container["error"] is not None:
|
|
270
|
+
raise result_container["error"]
|
|
271
|
+
|
|
272
|
+
return result_container["result"], False # type: ignore[return-value]
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class AdvancedBatchProcessor:
|
|
276
|
+
"""Advanced batch processor with checkpointing and error handling.
|
|
277
|
+
|
|
278
|
+
Provides robust batch processing with checkpoint/resume capability,
|
|
279
|
+
per-file error isolation, progress tracking, and resource limits.
|
|
280
|
+
|
|
281
|
+
Timeout enforcement:
|
|
282
|
+
When `timeout_per_file` is configured, this processor uses actual
|
|
283
|
+
timeout enforcement (via threading.Timer or concurrent.futures timeout)
|
|
284
|
+
rather than just post-hoc duration checking. This means:
|
|
285
|
+
- Long-running operations will be interrupted
|
|
286
|
+
- Results will be marked with `timed_out=True`
|
|
287
|
+
- Processing continues to the next file
|
|
288
|
+
|
|
289
|
+
Example:
|
|
290
|
+
>>> from oscura.batch.advanced import AdvancedBatchProcessor, BatchConfig
|
|
291
|
+
>>> config = BatchConfig(
|
|
292
|
+
... on_error='skip',
|
|
293
|
+
... checkpoint_dir='./checkpoints',
|
|
294
|
+
... max_workers=4,
|
|
295
|
+
... timeout_per_file=60.0 # Enforced timeout
|
|
296
|
+
... )
|
|
297
|
+
>>> processor = AdvancedBatchProcessor(config)
|
|
298
|
+
>>> results = processor.process(files, analysis_fn)
|
|
299
|
+
|
|
300
|
+
References:
|
|
301
|
+
API-012: Advanced Batch Control
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
def __init__(self, config: BatchConfig | None = None) -> None:
|
|
305
|
+
"""Initialize batch processor.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
config: Batch configuration. Uses defaults if None.
|
|
309
|
+
"""
|
|
310
|
+
self.config = config or BatchConfig()
|
|
311
|
+
self.checkpoint: BatchCheckpoint | None = None
|
|
312
|
+
|
|
313
|
+
def process(
|
|
314
|
+
self,
|
|
315
|
+
files: list[str | Path],
|
|
316
|
+
analysis_fn: Callable[[str | Path], dict[str, Any]],
|
|
317
|
+
*,
|
|
318
|
+
checkpoint_name: str = "batch_checkpoint",
|
|
319
|
+
**kwargs: Any,
|
|
320
|
+
) -> pd.DataFrame:
|
|
321
|
+
"""Process files with checkpointing and error handling.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
files: List of file paths to process.
|
|
325
|
+
analysis_fn: Analysis function to apply to each file.
|
|
326
|
+
checkpoint_name: Name for checkpoint file (default: 'batch_checkpoint').
|
|
327
|
+
**kwargs: Additional arguments passed to analysis_fn.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
DataFrame with results and error information.
|
|
331
|
+
|
|
332
|
+
Example:
|
|
333
|
+
>>> results = processor.process(
|
|
334
|
+
... files=['trace1.wfm', 'trace2.wfm'],
|
|
335
|
+
... analysis_fn=analyze_trace
|
|
336
|
+
... )
|
|
337
|
+
|
|
338
|
+
References:
|
|
339
|
+
API-012: Advanced Batch Control
|
|
340
|
+
"""
|
|
341
|
+
# Try to resume from checkpoint
|
|
342
|
+
remaining_files = self._resume_or_start(files, checkpoint_name)
|
|
343
|
+
|
|
344
|
+
# Initialize checkpoint if not resumed
|
|
345
|
+
if self.checkpoint is None:
|
|
346
|
+
self.checkpoint = BatchCheckpoint(
|
|
347
|
+
total_files=len(files),
|
|
348
|
+
config=self.config,
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Process remaining files
|
|
352
|
+
self._process_files(remaining_files, analysis_fn, checkpoint_name, **kwargs)
|
|
353
|
+
|
|
354
|
+
# Convert results to DataFrame
|
|
355
|
+
return self._results_to_dataframe()
|
|
356
|
+
|
|
357
|
+
def _resume_or_start(self, files: list[str | Path], checkpoint_name: str) -> list[str | Path]:
|
|
358
|
+
"""Try to resume from checkpoint or start fresh.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
files: Full list of files to process.
|
|
362
|
+
checkpoint_name: Name of checkpoint file.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
List of files remaining to be processed.
|
|
366
|
+
"""
|
|
367
|
+
if self.config.checkpoint_dir is None:
|
|
368
|
+
return files
|
|
369
|
+
|
|
370
|
+
checkpoint_dir = Path(self.config.checkpoint_dir)
|
|
371
|
+
checkpoint_path = checkpoint_dir / f"{checkpoint_name}.json"
|
|
372
|
+
|
|
373
|
+
if checkpoint_path.exists():
|
|
374
|
+
# Load checkpoint
|
|
375
|
+
self.checkpoint = BatchCheckpoint.load(checkpoint_path)
|
|
376
|
+
|
|
377
|
+
# Determine remaining files
|
|
378
|
+
completed_set = set(self.checkpoint.completed_files)
|
|
379
|
+
failed_set = set(self.checkpoint.failed_files)
|
|
380
|
+
processed_set = completed_set | failed_set
|
|
381
|
+
|
|
382
|
+
remaining = [str(f) for f in files if str(f) not in processed_set]
|
|
383
|
+
|
|
384
|
+
print(
|
|
385
|
+
f"Resuming from checkpoint: "
|
|
386
|
+
f"{len(self.checkpoint.completed_files)} completed, "
|
|
387
|
+
f"{len(self.checkpoint.failed_files)} failed, "
|
|
388
|
+
f"{len(remaining)} remaining"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
return [Path(f) for f in remaining]
|
|
392
|
+
|
|
393
|
+
return files
|
|
394
|
+
|
|
395
|
+
def _process_files(
|
|
396
|
+
self,
|
|
397
|
+
files: list[str | Path],
|
|
398
|
+
analysis_fn: Callable[[str | Path], dict[str, Any]],
|
|
399
|
+
checkpoint_name: str,
|
|
400
|
+
**kwargs: Any,
|
|
401
|
+
) -> None:
|
|
402
|
+
"""Process files with parallel execution and checkpointing.
|
|
403
|
+
|
|
404
|
+
Args:
|
|
405
|
+
files: Files to process.
|
|
406
|
+
analysis_fn: Analysis function.
|
|
407
|
+
checkpoint_name: Checkpoint file name.
|
|
408
|
+
**kwargs: Additional arguments for analysis_fn.
|
|
409
|
+
|
|
410
|
+
Raises:
|
|
411
|
+
RuntimeError: If processing is stopped due to error and on_error is "stop".
|
|
412
|
+
"""
|
|
413
|
+
if not files:
|
|
414
|
+
return
|
|
415
|
+
|
|
416
|
+
# Create progress bar if requested
|
|
417
|
+
pbar = None
|
|
418
|
+
if self.config.progress_bar and HAS_TQDM:
|
|
419
|
+
total = self.checkpoint.total_files if self.checkpoint else len(files)
|
|
420
|
+
initial = (
|
|
421
|
+
len(self.checkpoint.completed_files) + len(self.checkpoint.failed_files)
|
|
422
|
+
if self.checkpoint
|
|
423
|
+
else 0
|
|
424
|
+
)
|
|
425
|
+
pbar = tqdm(total=total, initial=initial, desc="Processing files")
|
|
426
|
+
|
|
427
|
+
# Wrapper for file processing with error handling and timeout
|
|
428
|
+
def _process_one(file_path: str | Path) -> FileResult:
|
|
429
|
+
import time
|
|
430
|
+
|
|
431
|
+
start_time = time.time()
|
|
432
|
+
timed_out = False
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
# Apply timeout if configured
|
|
436
|
+
if self.config.timeout_per_file is not None:
|
|
437
|
+
result, timed_out = _run_with_timeout(
|
|
438
|
+
analysis_fn,
|
|
439
|
+
(file_path,),
|
|
440
|
+
kwargs,
|
|
441
|
+
self.config.timeout_per_file,
|
|
442
|
+
)
|
|
443
|
+
if timed_out:
|
|
444
|
+
duration = time.time() - start_time
|
|
445
|
+
return FileResult(
|
|
446
|
+
file=str(file_path),
|
|
447
|
+
success=False,
|
|
448
|
+
error=f"Processing timed out after {self.config.timeout_per_file}s",
|
|
449
|
+
duration=duration,
|
|
450
|
+
timed_out=True,
|
|
451
|
+
)
|
|
452
|
+
else:
|
|
453
|
+
result = analysis_fn(file_path, **kwargs)
|
|
454
|
+
|
|
455
|
+
duration = time.time() - start_time
|
|
456
|
+
return FileResult(
|
|
457
|
+
file=str(file_path),
|
|
458
|
+
success=True,
|
|
459
|
+
result=result if isinstance(result, dict) else {"result": result},
|
|
460
|
+
duration=duration,
|
|
461
|
+
)
|
|
462
|
+
except Exception as e:
|
|
463
|
+
duration = time.time() - start_time
|
|
464
|
+
return FileResult(
|
|
465
|
+
file=str(file_path),
|
|
466
|
+
success=False,
|
|
467
|
+
error=str(e),
|
|
468
|
+
traceback=traceback.format_exc(),
|
|
469
|
+
duration=duration,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
# Process files
|
|
473
|
+
processed_count = 0
|
|
474
|
+
executor_class = ThreadPoolExecutor if self.config.use_threads else ProcessPoolExecutor
|
|
475
|
+
|
|
476
|
+
with executor_class(max_workers=self.config.max_workers) as executor:
|
|
477
|
+
# Submit all tasks
|
|
478
|
+
futures = {executor.submit(_process_one, f): f for f in files}
|
|
479
|
+
|
|
480
|
+
# Process results as they complete
|
|
481
|
+
# Use future.result() with timeout for additional enforcement layer
|
|
482
|
+
for future in as_completed(futures):
|
|
483
|
+
file_path = futures[future]
|
|
484
|
+
try:
|
|
485
|
+
# Apply timeout on result retrieval as backup enforcement
|
|
486
|
+
# This catches cases where the thread-based timeout didn't work
|
|
487
|
+
# (e.g., CPU-bound operations that don't release the GIL)
|
|
488
|
+
retrieval_timeout = (
|
|
489
|
+
self.config.timeout_per_file * 1.1 if self.config.timeout_per_file else None
|
|
490
|
+
)
|
|
491
|
+
file_result = future.result(timeout=retrieval_timeout)
|
|
492
|
+
except concurrent.futures.TimeoutError:
|
|
493
|
+
# Backup timeout triggered - future.result() timed out
|
|
494
|
+
file_result = FileResult(
|
|
495
|
+
file=str(file_path),
|
|
496
|
+
success=False,
|
|
497
|
+
error=f"Processing timed out (backup enforcement) "
|
|
498
|
+
f"after {self.config.timeout_per_file}s",
|
|
499
|
+
timed_out=True,
|
|
500
|
+
)
|
|
501
|
+
except Exception as e:
|
|
502
|
+
# Unexpected error during result retrieval
|
|
503
|
+
file_result = FileResult(
|
|
504
|
+
file=str(file_path),
|
|
505
|
+
success=False,
|
|
506
|
+
error=f"Error retrieving result: {e}",
|
|
507
|
+
traceback=traceback.format_exc(),
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
# Update checkpoint
|
|
511
|
+
if self.checkpoint:
|
|
512
|
+
self.checkpoint.results.append(file_result)
|
|
513
|
+
if file_result.success:
|
|
514
|
+
self.checkpoint.completed_files.append(file_result.file)
|
|
515
|
+
else:
|
|
516
|
+
self.checkpoint.failed_files.append(file_result.file)
|
|
517
|
+
|
|
518
|
+
# Handle errors
|
|
519
|
+
if not file_result.success:
|
|
520
|
+
if self.config.on_error == "stop":
|
|
521
|
+
if pbar:
|
|
522
|
+
pbar.close()
|
|
523
|
+
raise RuntimeError(
|
|
524
|
+
f"Processing stopped due to error in {file_result.file}: "
|
|
525
|
+
f"{file_result.error}"
|
|
526
|
+
)
|
|
527
|
+
elif self.config.on_error == "warn":
|
|
528
|
+
timeout_note = " (TIMEOUT)" if file_result.timed_out else ""
|
|
529
|
+
print(
|
|
530
|
+
f"Warning: Error processing {file_result.file}{timeout_note}: "
|
|
531
|
+
f"{file_result.error}"
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# Update progress
|
|
535
|
+
processed_count += 1
|
|
536
|
+
if pbar:
|
|
537
|
+
pbar.update(1)
|
|
538
|
+
|
|
539
|
+
# Save checkpoint periodically
|
|
540
|
+
if (
|
|
541
|
+
self.config.checkpoint_dir
|
|
542
|
+
and processed_count % self.config.checkpoint_interval == 0
|
|
543
|
+
):
|
|
544
|
+
self._save_checkpoint(checkpoint_name)
|
|
545
|
+
|
|
546
|
+
if pbar:
|
|
547
|
+
pbar.close()
|
|
548
|
+
|
|
549
|
+
# Final checkpoint save
|
|
550
|
+
if self.config.checkpoint_dir:
|
|
551
|
+
self._save_checkpoint(checkpoint_name)
|
|
552
|
+
|
|
553
|
+
def _save_checkpoint(self, checkpoint_name: str) -> None:
|
|
554
|
+
"""Save current checkpoint.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
checkpoint_name: Name for checkpoint file.
|
|
558
|
+
"""
|
|
559
|
+
if self.checkpoint and self.config.checkpoint_dir:
|
|
560
|
+
checkpoint_dir = Path(self.config.checkpoint_dir)
|
|
561
|
+
checkpoint_path = checkpoint_dir / f"{checkpoint_name}.json"
|
|
562
|
+
self.checkpoint.save(checkpoint_path)
|
|
563
|
+
|
|
564
|
+
def _results_to_dataframe(self) -> pd.DataFrame:
|
|
565
|
+
"""Convert checkpoint results to DataFrame.
|
|
566
|
+
|
|
567
|
+
Returns:
|
|
568
|
+
DataFrame with results and metadata.
|
|
569
|
+
"""
|
|
570
|
+
if not self.checkpoint or not self.checkpoint.results:
|
|
571
|
+
return pd.DataFrame()
|
|
572
|
+
|
|
573
|
+
# Build rows
|
|
574
|
+
rows = []
|
|
575
|
+
for file_result in self.checkpoint.results:
|
|
576
|
+
row = {
|
|
577
|
+
"file": file_result.file,
|
|
578
|
+
"success": file_result.success,
|
|
579
|
+
"timed_out": file_result.timed_out,
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
if file_result.success:
|
|
583
|
+
row.update(file_result.result)
|
|
584
|
+
row["error"] = None
|
|
585
|
+
else:
|
|
586
|
+
row["error"] = file_result.error
|
|
587
|
+
row["traceback"] = file_result.traceback
|
|
588
|
+
|
|
589
|
+
row["duration"] = file_result.duration
|
|
590
|
+
rows.append(row)
|
|
591
|
+
|
|
592
|
+
return pd.DataFrame(rows)
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def resume_batch(
|
|
596
|
+
checkpoint_dir: str | Path, checkpoint_name: str = "batch_checkpoint"
|
|
597
|
+
) -> BatchCheckpoint:
|
|
598
|
+
"""Resume a batch job from checkpoint directory.
|
|
599
|
+
|
|
600
|
+
Convenience function to load checkpoint and inspect state.
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
checkpoint_dir: Directory containing checkpoint.
|
|
604
|
+
checkpoint_name: Name of checkpoint file.
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
Loaded checkpoint.
|
|
608
|
+
|
|
609
|
+
Example:
|
|
610
|
+
>>> checkpoint = resume_batch('./checkpoints')
|
|
611
|
+
>>> print(f"Completed: {len(checkpoint.completed_files)}")
|
|
612
|
+
|
|
613
|
+
References:
|
|
614
|
+
API-012: Advanced Batch Control
|
|
615
|
+
"""
|
|
616
|
+
checkpoint_path = Path(checkpoint_dir) / f"{checkpoint_name}.json"
|
|
617
|
+
return BatchCheckpoint.load(checkpoint_path)
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
__all__ = [
|
|
621
|
+
"AdvancedBatchProcessor",
|
|
622
|
+
"BatchCheckpoint",
|
|
623
|
+
"BatchConfig",
|
|
624
|
+
"FileResult",
|
|
625
|
+
"TimeoutError",
|
|
626
|
+
"resume_batch",
|
|
627
|
+
]
|