blastbox 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. blastbox/__init__.py +13 -0
  2. blastbox/contract/__init__.py +30 -0
  3. blastbox/contract/envelope.py +173 -0
  4. blastbox/contract/leaf.py +68 -0
  5. blastbox/contract/nodes.py +172 -0
  6. blastbox/contract/walk.py +35 -0
  7. blastbox/errors.py +75 -0
  8. blastbox/host/__init__.py +0 -0
  9. blastbox/host/cli.py +142 -0
  10. blastbox/host/dispatch.py +632 -0
  11. blastbox/host/ingress/__init__.py +0 -0
  12. blastbox/host/ingress/app.py +561 -0
  13. blastbox/host/ingress/middleware.py +139 -0
  14. blastbox/host/jobs/__init__.py +17 -0
  15. blastbox/host/jobs/base.py +110 -0
  16. blastbox/host/jobs/memory.py +62 -0
  17. blastbox/host/jobs/redis_store.py +131 -0
  18. blastbox/host/jobs/retention.py +163 -0
  19. blastbox/host/jobs/sql_store.py +309 -0
  20. blastbox/host/pool.py +555 -0
  21. blastbox/host/pool_config.py +105 -0
  22. blastbox/host/runtime/__init__.py +42 -0
  23. blastbox/host/runtime/docker.py +408 -0
  24. blastbox/host/runtime/firecracker.py +1072 -0
  25. blastbox/host/runtime/host_limits.py +233 -0
  26. blastbox/host/trust.py +145 -0
  27. blastbox/limits.py +106 -0
  28. blastbox/observability/__init__.py +23 -0
  29. blastbox/observability/logging.py +41 -0
  30. blastbox/observability/metrics.py +131 -0
  31. blastbox/worker/__init__.py +25 -0
  32. blastbox/worker/engine.py +89 -0
  33. blastbox/worker/fc_guest.py +167 -0
  34. blastbox/worker/fc_warm.py +173 -0
  35. blastbox/worker/harness.py +249 -0
  36. blastbox/worker/sandbox/__init__.py +31 -0
  37. blastbox/worker/sandbox/base.py +97 -0
  38. blastbox/worker/sandbox/bwrap.py +423 -0
  39. blastbox/worker/sandbox/container.py +303 -0
  40. blastbox/worker/sandbox/detect.py +226 -0
  41. blastbox/worker/sandbox/nsjail.py +339 -0
  42. blastbox/worker/warm.py +370 -0
  43. blastbox-0.1.0.dist-info/METADATA +158 -0
  44. blastbox-0.1.0.dist-info/RECORD +48 -0
  45. blastbox-0.1.0.dist-info/WHEEL +5 -0
  46. blastbox-0.1.0.dist-info/entry_points.txt +2 -0
  47. blastbox-0.1.0.dist-info/licenses/LICENSE +21 -0
  48. blastbox-0.1.0.dist-info/top_level.txt +1 -0
blastbox/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ """blastbox — reusable detonation framework for untrusted documents.
2
+
3
+ Engine authors need only the lean core (`pip install blastbox`): implement the
4
+ ``Engine`` protocol's ``detonate()`` and return a ``DetonationResult``; the
5
+ host orchestrator (``blastbox[host]``) handles ingress, disposable-worker
6
+ launch, output-trust validation, and serving.
7
+ """
8
+ from blastbox.worker.engine import DetonationResult, Engine
9
+ from blastbox.worker.harness import run_detonation
10
+
11
+ __version__ = "0.1.0"
12
+
13
+ __all__ = ["Engine", "DetonationResult", "run_detonation", "__version__"]
@@ -0,0 +1,30 @@
1
+ """Typed data contract for the detonation framework.
2
+
3
+ Engines emit a typed payload tree + declared artifacts; the worker SDK seals
4
+ them into an Envelope (hashes, sizes, path-confinement); the host re-validates.
5
+ """
6
+ from .leaf import Hash, Detection, Warning, ArtifactRef, Dimensions, Lang
7
+ from .nodes import (
8
+ Record, ExtractedText, Page, EmbeddedResource,
9
+ parse_node, register_node_type, rebuild_node_union,
10
+ )
11
+ from .envelope import (
12
+ DeclaredArtifact, Artifact, Envelope,
13
+ seal_envelope, validate_envelope, envelope_from_json,
14
+ )
15
+ from .walk import iter_nodes, find_by_type
16
+
17
+
18
+ def json_schema() -> dict:
19
+ """Canonical JSON Schema for the Envelope (for non-Python engines)."""
20
+ return Envelope.model_json_schema()
21
+
22
+
23
+ __all__ = [
24
+ "Hash", "Detection", "Warning", "ArtifactRef", "Dimensions", "Lang",
25
+ "Record", "ExtractedText", "Page", "EmbeddedResource",
26
+ "parse_node", "register_node_type", "rebuild_node_union",
27
+ "DeclaredArtifact", "Artifact", "Envelope",
28
+ "seal_envelope", "validate_envelope", "envelope_from_json",
29
+ "iter_nodes", "find_by_type", "json_schema",
30
+ ]
@@ -0,0 +1,173 @@
1
+ """The security envelope: sealed by the worker SDK, re-validated by the host."""
2
+ from __future__ import annotations
3
+
4
+ import hashlib
5
+ from pathlib import Path
6
+ from typing import Annotated, Literal
7
+
8
+ from pydantic import BaseModel, ConfigDict, Field
9
+
10
+ from .leaf import Detection, Warning
11
+ from .nodes import ChildNode, _REBUILD_CALLBACKS, parse_node
12
+
13
+
14
+ class DeclaredArtifact(BaseModel):
15
+ """What an engine declares; the SDK turns it into a sealed Artifact."""
16
+ model_config = ConfigDict(frozen=True, extra="forbid")
17
+ id: str = Field(pattern=r"^[A-Za-z0-9._-]{1,128}$")
18
+ path: str = Field(max_length=4096) # outdir-relative
19
+ kind: str = Field(min_length=1, max_length=64)
20
+
21
+
22
+ class Artifact(BaseModel):
23
+ model_config = ConfigDict(frozen=True, extra="forbid")
24
+ id: str
25
+ path: str
26
+ kind: str
27
+ sha256: str = Field(pattern=r"^[0-9a-f]{64}$")
28
+ bytes: int = Field(ge=0)
29
+
30
+
31
+ class Envelope(BaseModel):
32
+ """A signed, sealed, and validated job result envelope.
33
+
34
+ The ``payload`` field is typed as ``Annotated[ChildNode, ...]`` at class
35
+ definition time. After each ``register_node_type()`` call,
36
+ ``_rebuild_envelope()`` is triggered via ``nodes._REBUILD_CALLBACKS`` and
37
+ calls ``Envelope.model_rebuild(force=True, _types_namespace=...)`` so that
38
+ pydantic re-evaluates the ``"_PayloadNode"`` forward-ref string against the
39
+ current live union — without any top-level circular import.
40
+ """
41
+ model_config = ConfigDict(extra="forbid")
42
+ engine: str = Field(min_length=1, max_length=64)
43
+ status: Literal["ok", "rejected", "engine_error"] = "ok"
44
+ input_sha256: str = Field(pattern=r"^[0-9a-f]{64}$")
45
+ detected: Detection
46
+ artifacts: list[Artifact] = Field(default_factory=list)
47
+ warnings: list[Warning] = Field(default_factory=list)
48
+ # Initial annotation uses ChildNode (the base union); _rebuild_envelope()
49
+ # replaces model_fields["payload"].annotation with the live Node union after
50
+ # each register_node_type() call so engine subtypes are also accepted.
51
+ payload: Annotated[ChildNode, Field(discriminator="type")]
52
+
53
+
54
+ def _rebuild_envelope() -> None:
55
+ """Rebuild Envelope against the current live Node union.
56
+
57
+ Called by nodes.rebuild_node_union() via _REBUILD_CALLBACKS.
58
+ Uses a lazy import to avoid a circular dependency at module-top level.
59
+ Updates the ``payload`` field's annotation to the current live ``Node``
60
+ union so pydantic regenerates the discriminated-union validator correctly.
61
+ """
62
+ import blastbox.contract.nodes as _nodes
63
+ Envelope.model_fields["payload"].annotation = _nodes.Node # type: ignore[assignment]
64
+ Envelope.model_rebuild(force=True)
65
+
66
+
67
+ # Register so every rebuild_node_union() call (triggered by register_node_type)
68
+ # also refreshes the Envelope discriminated union.
69
+ _REBUILD_CALLBACKS.append(_rebuild_envelope)
70
+ # Apply immediately so the initial union is in place.
71
+ _rebuild_envelope()
72
+
73
+
74
+ def _collect_refs(node) -> set[str]:
75
+ """Walk a node tree and collect every ArtifactRef.id it references."""
76
+ from .leaf import ArtifactRef as _ArtifactRef
77
+
78
+ refs: set[str] = set()
79
+ stack: list = [node]
80
+ while stack:
81
+ v = stack.pop()
82
+ if isinstance(v, _ArtifactRef):
83
+ refs.add(v.id)
84
+ elif isinstance(v, BaseModel):
85
+ for f in type(v).model_fields:
86
+ stack.append(getattr(v, f))
87
+ elif isinstance(v, (list, tuple)):
88
+ for it in v:
89
+ stack.append(it)
90
+ elif isinstance(v, dict):
91
+ for it in v.values():
92
+ stack.append(it)
93
+ return refs
94
+
95
+
96
+ def seal_envelope(*, engine: str, outdir: Path, input_sha256: str,
97
+ detected: Detection, declared: list[DeclaredArtifact],
98
+ warnings: list[Warning], payload: ChildNode,
99
+ status: Literal["ok", "rejected", "engine_error"] = "ok") -> Envelope:
100
+ """Seal declared artifacts + payload into a validated Envelope.
101
+
102
+ Computes sha256/bytes from disk, confines every path under outdir, and
103
+ verifies every ArtifactRef in the payload resolves to a declared id.
104
+ Raises ValueError on any violation — the worker must not emit on failure.
105
+ """
106
+ outdir_resolved = outdir.resolve(strict=False)
107
+ artifacts: list[Artifact] = []
108
+ declared_ids: set[str] = set()
109
+ for d in declared:
110
+ if d.id in declared_ids:
111
+ raise ValueError(f"duplicate artifact id: {d.id}")
112
+ declared_ids.add(d.id)
113
+ target = (outdir / d.path).resolve(strict=False)
114
+ if outdir_resolved != target and outdir_resolved not in target.parents:
115
+ raise ValueError(f"artifact path not confined to outdir: {d.path}")
116
+ if not target.is_file():
117
+ raise ValueError(f"declared artifact file missing or not a regular file: {d.path}")
118
+ data = target.read_bytes()
119
+ artifacts.append(Artifact(id=d.id, path=d.path, kind=d.kind,
120
+ sha256=hashlib.sha256(data).hexdigest(),
121
+ bytes=len(data)))
122
+ unresolved = _collect_refs(payload) - declared_ids
123
+ if unresolved:
124
+ raise ValueError(f"payload has unresolved ArtifactRef(s): {sorted(unresolved)}")
125
+ return Envelope(engine=engine, status=status, input_sha256=input_sha256,
126
+ detected=detected, artifacts=artifacts, warnings=warnings,
127
+ payload=payload)
128
+
129
+
130
+ def validate_envelope(env: Envelope, *, outdir: Path, max_artifact_bytes: int,
131
+ max_total_bytes: int, max_artifacts: int) -> Envelope:
132
+ """Host-side re-validation: enforce count/size bounds and verify on-disk sizes.
133
+
134
+ Re-stats every artifact file under outdir to confirm st_size matches
135
+ the declared bytes (so a tampered worker-reported size is caught).
136
+ Raises ValueError on any violation.
137
+ """
138
+ if len(env.artifacts) > max_artifacts:
139
+ raise ValueError(f"artifact count {len(env.artifacts)} exceeds {max_artifacts}")
140
+ outdir_resolved = outdir.resolve(strict=False)
141
+ total = 0
142
+ for a in env.artifacts:
143
+ target = (outdir / a.path).resolve(strict=False)
144
+ if outdir_resolved != target and outdir_resolved not in target.parents:
145
+ raise ValueError(f"artifact path not confined to outdir: {a.path}")
146
+ if not target.is_file():
147
+ raise ValueError(f"artifact file missing or not a regular file: {a.path}")
148
+ actual_size = target.stat().st_size
149
+ if actual_size != a.bytes:
150
+ raise ValueError(
151
+ f"artifact {a.id} declared bytes={a.bytes} but on-disk size={actual_size}"
152
+ )
153
+ if actual_size > max_artifact_bytes:
154
+ raise ValueError(f"artifact {a.id} bytes {actual_size} exceeds {max_artifact_bytes}")
155
+ total += actual_size
156
+ if total > max_total_bytes:
157
+ raise ValueError(f"total artifact bytes {total} exceeds {max_total_bytes}")
158
+ return env
159
+
160
+
161
+ def envelope_from_json(raw: bytes, *, max_bytes: int = 4 * 1024 * 1024) -> Envelope:
162
+ """Parse a worker-emitted metadata.json into an Envelope (size-bounded)."""
163
+ if len(raw) > max_bytes:
164
+ raise ValueError(f"metadata json {len(raw)} bytes exceeds {max_bytes}")
165
+ import json
166
+ obj = json.loads(raw)
167
+ if not isinstance(obj, dict):
168
+ raise ValueError("envelope JSON must be a JSON object")
169
+ payload_data = obj.get("payload")
170
+ if payload_data is None:
171
+ raise ValueError("envelope JSON missing required 'payload' field")
172
+ obj["payload"] = parse_node(payload_data)
173
+ return Envelope.model_validate(obj)
@@ -0,0 +1,68 @@
1
+ """Leaf types: the shared vocabulary every engine can reuse."""
2
+ from __future__ import annotations
3
+
4
+ import re
5
+ from typing import Literal
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
8
+
9
+ _HEX_RE = re.compile(r"\A[0-9a-fA-F]+\Z")
10
+ _SAFE_ID_RE = re.compile(r"\A[A-Za-z0-9._-]{1,128}\Z")
11
+ # Expected hex length per hash algorithm (None = any positive hex length).
12
+ _HASH_HEXLEN: dict[str, int | None] = {
13
+ "sha256": 64, "phash": 16, "dhash": 16, "ahash": 16, "colorhash": None,
14
+ }
15
+
16
+
17
+ class _Frozen(BaseModel):
18
+ model_config = ConfigDict(frozen=True, extra="forbid")
19
+
20
+
21
+ class Hash(_Frozen):
22
+ algo: Literal["sha256", "phash", "dhash", "ahash", "colorhash"]
23
+ value: str
24
+
25
+ @field_validator("value")
26
+ @classmethod
27
+ def _hex(cls, v: str, info) -> str:
28
+ if not _HEX_RE.match(v):
29
+ raise ValueError("hash value must be hex")
30
+ expected = _HASH_HEXLEN.get(info.data.get("algo"))
31
+ if expected is not None and len(v) != expected:
32
+ raise ValueError(f"expected {expected} hex chars, got {len(v)}")
33
+ return v.lower()
34
+
35
+
36
+ class ArtifactRef(_Frozen):
37
+ """A reference into the Envelope's artifact set by id (never a path)."""
38
+ id: str
39
+
40
+ @field_validator("id")
41
+ @classmethod
42
+ def _safe(cls, v: str) -> str:
43
+ if not _SAFE_ID_RE.match(v):
44
+ raise ValueError("artifact id must match [A-Za-z0-9._-]{1,128}")
45
+ return v
46
+
47
+
48
+ class Detection(_Frozen):
49
+ label: str = Field(min_length=1, max_length=64)
50
+ mime: str = Field(max_length=255)
51
+ confidence: float = Field(ge=0.0, le=1.0)
52
+ source: str = Field(min_length=1, max_length=32)
53
+
54
+
55
+ class Warning(_Frozen):
56
+ code: str = Field(min_length=1, max_length=64)
57
+ message: str = Field(max_length=2000)
58
+ context: str | None = Field(default=None, max_length=255)
59
+
60
+
61
+ class Dimensions(_Frozen):
62
+ width: float = Field(gt=0)
63
+ height: float = Field(gt=0)
64
+ unit: Literal["mm", "px", "pt"]
65
+
66
+
67
+ class Lang(_Frozen):
68
+ code: str = Field(min_length=2, max_length=64)
@@ -0,0 +1,172 @@
1
+ """Typed payload nodes: a recursive tree with a generic Record floor."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Annotated, Any, Callable, Literal, Union, get_args
5
+
6
+ from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, TypeAdapter
7
+
8
+ from .leaf import ArtifactRef, Dimensions, Hash, Lang
9
+
10
+ Scalar = Union[str, int, float, bool, None]
11
+ # A Record field value is a scalar, a list of scalars, or a nested Record.
12
+ RecordValue = Union[Scalar, list[Scalar], "Record"]
13
+
14
+
15
+ class _Node(BaseModel):
16
+ model_config = ConfigDict(extra="forbid", populate_by_name=True)
17
+
18
+
19
+ def _parse_children(value: Any) -> Any:
20
+ """Route each child through the *live*, registry-aware node adapter.
21
+
22
+ Runs as a ``BeforeValidator`` on the ``children`` fields so that child
23
+ parsing is decoupled from the static ``ChildNode`` union captured at class
24
+ definition. A statically-reassigned union does NOT work here: pydantic
25
+ bakes a discriminated-union core schema into the ``children`` field when the
26
+ model is built and does not re-resolve a reassigned module-global union on
27
+ ``model_rebuild`` — so registered engine subtypes were never reachable as
28
+ children. Delegating to ``_NODE_ADAPTER`` (the same adapter ``parse_node``
29
+ uses, rebuilt by ``rebuild_node_union`` to include engine types) fixes that:
30
+ the registry is consulted at validation time, not class-definition time.
31
+
32
+ Already-constructed ``_Node`` instances pass through untouched; dicts are
33
+ validated through the live adapter (which rejects unknown ``_type`` and
34
+ enforces ``extra="forbid"``). Non-list / non-dict values are returned
35
+ unchanged so pydantic emits its normal type/length errors.
36
+ """
37
+ if not isinstance(value, list):
38
+ return value
39
+ if _NODE_ADAPTER is None:
40
+ rebuild_node_union()
41
+ assert _NODE_ADAPTER is not None
42
+ parsed: list[Any] = []
43
+ for item in value:
44
+ if isinstance(item, _Node):
45
+ parsed.append(item)
46
+ elif isinstance(item, dict):
47
+ parsed.append(_NODE_ADAPTER.validate_python(item))
48
+ else:
49
+ # Let the list[Any] validator surface a normal error for this item.
50
+ parsed.append(item)
51
+ return parsed
52
+
53
+
54
+ # A children list whose items are parsed by the live registry-aware adapter
55
+ # (via _parse_children) rather than a static union. Typed list[Any] so pydantic
56
+ # does not re-narrow the already-validated _Node instances back to the 4 base
57
+ # types; max_length is still enforced because BeforeValidator runs first.
58
+ ChildList = Annotated[list[Any], BeforeValidator(_parse_children)]
59
+
60
+
61
+ class Record(_Node):
62
+ """The generic floor: a typed bag for engine data not worth a named type."""
63
+ type: Literal["record"] = Field(default="record", alias="_type")
64
+ fields: dict[str, RecordValue] = Field(default_factory=dict, max_length=4096)
65
+
66
+
67
+ class ExtractedText(_Node):
68
+ type: Literal["extracted_text"] = Field(default="extracted_text", alias="_type")
69
+ text: str = Field(max_length=10_000_000)
70
+ char_count: int = Field(ge=0)
71
+ lang: Lang | None = None
72
+
73
+
74
+ class Page(_Node):
75
+ type: Literal["page"] = Field(default="page", alias="_type")
76
+ index: int = Field(ge=0)
77
+ dims: Dimensions
78
+ image: ArtifactRef
79
+ hashes: list[Hash] = Field(default_factory=list, max_length=32)
80
+ children: ChildList = Field(default_factory=list, max_length=10000)
81
+
82
+
83
+ class EmbeddedResource(_Node):
84
+ type: Literal["embedded_resource"] = Field(default="embedded_resource", alias="_type")
85
+ embedded_path: str = Field(max_length=4096)
86
+ content_type: str = Field(max_length=255)
87
+ depth: int = Field(ge=0, le=64)
88
+ metadata: Record | None = None
89
+ children: ChildList = Field(default_factory=list, max_length=10000)
90
+
91
+
92
+ # Forward-declared recursive child union; engine types register into it (Task 4).
93
+ ChildNode = Union[Page, EmbeddedResource, ExtractedText, Record]
94
+
95
+ # Module-level Node type and adapter; rebuilt by rebuild_node_union().
96
+ Node: Any = Annotated[ChildNode, Field(discriminator="type")]
97
+ _NODE_ADAPTER: TypeAdapter[Any] | None = None
98
+
99
+ _ENGINE_NODE_TYPES: list[type[_Node]] = []
100
+
101
+ # Callbacks invoked after every rebuild_node_union() — allows envelope and
102
+ # other modules to re-bind their own models to the live union without
103
+ # introducing a circular top-level import (they register lazily at their
104
+ # own module-init time).
105
+ _REBUILD_CALLBACKS: list[Callable[[], None]] = []
106
+
107
+
108
+ def rebuild_node_union() -> None:
109
+ """(Re)build the discriminated-union adapter. Call after registering types."""
110
+ global _NODE_ADAPTER, Node
111
+ members: tuple[type[_Node], ...] = (
112
+ Page, EmbeddedResource, ExtractedText, Record, *_ENGINE_NODE_TYPES
113
+ )
114
+ for m in members:
115
+ m.model_rebuild()
116
+ if len(members) > 1:
117
+ union: Any = Union[members] # type: ignore[arg-type]
118
+ else:
119
+ union = members[0]
120
+ Node = Annotated[union, Field(discriminator="type")]
121
+ _NODE_ADAPTER = TypeAdapter(Node)
122
+ for cb in _REBUILD_CALLBACKS:
123
+ cb()
124
+
125
+
126
+ def parse_node(data: dict[str, Any]) -> ChildNode:
127
+ """Parse an untyped dict into the correct node by its _type discriminator."""
128
+ global _NODE_ADAPTER
129
+ if _NODE_ADAPTER is None:
130
+ rebuild_node_union()
131
+ assert _NODE_ADAPTER is not None
132
+ return _NODE_ADAPTER.validate_python(data) # type: ignore[return-value]
133
+
134
+
135
+ def _discriminator_value(cls: type[_Node]) -> Any:
136
+ """The Literal discriminator value carried by a node class's ``type`` field."""
137
+ field = cls.model_fields.get("type")
138
+ if field is None:
139
+ return None
140
+ args = get_args(field.annotation)
141
+ return args[0] if args else field.default
142
+
143
+
144
+ def register_node_type(cls: type[_Node]) -> type[_Node]:
145
+ """Register an engine-specific node subclass into the parse union.
146
+
147
+ The class MUST carry a unique Literal `type` discriminator. After
148
+ registration the union is rebuilt so parse_node() accepts it.
149
+
150
+ Idempotent: re-registering the same class is a no-op, and registering a
151
+ class whose discriminator value matches an already-registered engine type
152
+ *replaces* the prior one. Without the replace, two classes sharing a
153
+ ``_type`` would both enter the union and pydantic would reject the build
154
+ with "mapped to multiple choices" — so this keeps the discriminated union
155
+ well-formed under reloads / duplicate registrations.
156
+ """
157
+ if cls in _ENGINE_NODE_TYPES:
158
+ rebuild_node_union()
159
+ return cls
160
+ disc = _discriminator_value(cls)
161
+ for i, existing in enumerate(_ENGINE_NODE_TYPES):
162
+ if _discriminator_value(existing) == disc:
163
+ _ENGINE_NODE_TYPES[i] = cls
164
+ break
165
+ else:
166
+ _ENGINE_NODE_TYPES.append(cls)
167
+ rebuild_node_union()
168
+ return cls
169
+
170
+
171
+ # Bootstrap: rebuild after all forward refs are defined.
172
+ rebuild_node_union()
@@ -0,0 +1,35 @@
1
+ """Generic, engine-agnostic walkers over the typed payload tree."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Iterator, TypeVar
5
+
6
+ _T = TypeVar("_T")
7
+
8
+ _MAX_DEPTH = 128
9
+
10
+
11
+ def iter_nodes(root, *, _max_depth: int = _MAX_DEPTH) -> Iterator[object]:
12
+ """Yield root and every descendant node (pre-order).
13
+
14
+ Uses an explicit stack to avoid Python recursion limits. Raises
15
+ ``ValueError`` if the tree is deeper than *_max_depth* (default 128)
16
+ so callers get a clean error instead of a ``RecursionError``.
17
+ """
18
+ # Stack entries are (node, depth)
19
+ stack: list[tuple[object, int]] = [(root, 0)]
20
+ while stack:
21
+ node, depth = stack.pop()
22
+ if depth > _max_depth:
23
+ raise ValueError(
24
+ f"payload tree exceeds maximum nesting depth of {_max_depth}"
25
+ )
26
+ yield node
27
+ children = getattr(node, "children", None) or []
28
+ # Reverse so left-to-right pre-order is preserved when popping.
29
+ for child in reversed(list(children)):
30
+ stack.append((child, depth + 1))
31
+
32
+
33
+ def find_by_type(root, node_type: type[_T]) -> list[_T]:
34
+ """All nodes that are instances of node_type (subclasses included)."""
35
+ return [n for n in iter_nodes(root) if isinstance(n, node_type)]
blastbox/errors.py ADDED
@@ -0,0 +1,75 @@
1
+ """Exception hierarchy for the blastbox framework."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+
8
+ # Strip internal filesystem paths from public-facing error messages.
9
+ # Root-agnostic: match any absolute POSIX path of two or more segments
10
+ # (``/var/lib/blastbox/x``, ``/etc/passwd``, ``/proc/self/mem``) rather
11
+ # than denylisting specific roots. A single ``/`` between words (``and/or``)
12
+ # is not a path and is left untouched because it lacks a trailing segment
13
+ # separator.
14
+ _INTERNAL_PATH_RE = re.compile(r"/(?:[A-Za-z0-9._+-]+/)+[A-Za-z0-9._+-]*")
15
+
16
+
17
+ def sanitize_public_error(msg: str) -> str:
18
+ """Remove internal filesystem paths from an error message."""
19
+ return _INTERNAL_PATH_RE.sub("<path>", msg)
20
+
21
+
22
+ class BlastboxError(Exception):
23
+ """Base for all blastbox errors."""
24
+
25
+
26
+ class DetectionError(BlastboxError):
27
+ """Input was rejected by the detector."""
28
+
29
+ def __init__(self, reason: str, detail: str = "") -> None:
30
+ super().__init__(f"{reason}: {detail}" if detail else reason)
31
+ self.reason = reason
32
+ self.detail = detail
33
+
34
+
35
+ class SandboxError(BlastboxError):
36
+ """Sandbox setup or execution failed."""
37
+
38
+
39
+ class SandboxTimeout(SandboxError):
40
+ """Sandboxed process exceeded the wall-clock timeout."""
41
+
42
+
43
+ class SandboxUnavailable(SandboxError):
44
+ """No usable sandbox backend on this host."""
45
+
46
+
47
+ class EngineError(BlastboxError):
48
+ """An engine-specific failure (e.g. LibreOffice, Tika, …)."""
49
+
50
+
51
+ class DetonationError(BlastboxError):
52
+ """Top-level detonation failure, optionally wrapping a cause."""
53
+
54
+ def __init__(self, message: str, cause: Exception | None = None) -> None:
55
+ super().__init__(message)
56
+ self.cause = cause
57
+ if cause is not None:
58
+ self.__cause__ = cause
59
+
60
+
61
+ # Keep ConversionError as an alias so engines can use the familiar name
62
+ # without importing engine-specific terms into the generic layer.
63
+ ConversionError = DetonationError
64
+
65
+
66
+ class ValidationError(BlastboxError):
67
+ """Envelope / metadata validation failed."""
68
+
69
+
70
+ class OutputTrustError(BlastboxError):
71
+ """Worker output failed the host-side trust validation."""
72
+
73
+
74
+ class WarmTimeout(BlastboxError):
75
+ """No job arrived within the idle-timeout window for a warm worker slot."""
File without changes