agentforge-core 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_core/__init__.py +228 -0
- agentforge_core/_bm25.py +132 -0
- agentforge_core/config/__init__.py +62 -0
- agentforge_core/config/loader.py +239 -0
- agentforge_core/config/module_schemas.py +208 -0
- agentforge_core/config/schema.py +424 -0
- agentforge_core/contracts/__init__.py +52 -0
- agentforge_core/contracts/auth.py +33 -0
- agentforge_core/contracts/chat.py +118 -0
- agentforge_core/contracts/embedding.py +71 -0
- agentforge_core/contracts/evaluator.py +56 -0
- agentforge_core/contracts/finding.py +39 -0
- agentforge_core/contracts/graph_store.py +180 -0
- agentforge_core/contracts/guardrails.py +129 -0
- agentforge_core/contracts/llm.py +152 -0
- agentforge_core/contracts/memory.py +113 -0
- agentforge_core/contracts/migrator.py +120 -0
- agentforge_core/contracts/renderer.py +57 -0
- agentforge_core/contracts/reranker.py +91 -0
- agentforge_core/contracts/strategy.py +70 -0
- agentforge_core/contracts/task.py +73 -0
- agentforge_core/contracts/tool.py +71 -0
- agentforge_core/contracts/vector_store.py +151 -0
- agentforge_core/migrations/__init__.py +14 -0
- agentforge_core/migrations/discover.py +77 -0
- agentforge_core/migrations/template.py +34 -0
- agentforge_core/observability/__init__.py +18 -0
- agentforge_core/observability/tracing.py +37 -0
- agentforge_core/production/__init__.py +77 -0
- agentforge_core/production/budget.py +134 -0
- agentforge_core/production/exceptions.py +136 -0
- agentforge_core/production/fallback.py +321 -0
- agentforge_core/production/log_filter.py +49 -0
- agentforge_core/production/log_format.py +117 -0
- agentforge_core/production/run_context.py +108 -0
- agentforge_core/py.typed +0 -0
- agentforge_core/resolver/__init__.py +38 -0
- agentforge_core/resolver/discover.py +145 -0
- agentforge_core/resolver/resolve.py +168 -0
- agentforge_core/testing/__init__.py +45 -0
- agentforge_core/testing/conformance.py +1138 -0
- agentforge_core/values/__init__.py +103 -0
- agentforge_core/values/auth.py +20 -0
- agentforge_core/values/chat.py +131 -0
- agentforge_core/values/claim.py +30 -0
- agentforge_core/values/graph.py +136 -0
- agentforge_core/values/guardrails.py +49 -0
- agentforge_core/values/manifest.py +129 -0
- agentforge_core/values/messages.py +153 -0
- agentforge_core/values/module.py +40 -0
- agentforge_core/values/pipeline.py +43 -0
- agentforge_core/values/retrieval.py +53 -0
- agentforge_core/values/state.py +118 -0
- agentforge_core/values/vector.py +59 -0
- agentforge_core-0.2.1.dist-info/METADATA +66 -0
- agentforge_core-0.2.1.dist-info/RECORD +58 -0
- agentforge_core-0.2.1.dist-info/WHEEL +4 -0
- agentforge_core-0.2.1.dist-info/licenses/LICENSE +202 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""`Tool` — the locked tool ABC.
|
|
2
|
+
|
|
3
|
+
feat-001 ships the explicit-`input_schema` form. feat-004 layers a
|
|
4
|
+
`@tool` decorator on top that infers the schema from a typed function's
|
|
5
|
+
signature; the resulting object is still a `Tool` subclass under the
|
|
6
|
+
hood.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import inspect
|
|
12
|
+
from abc import ABC, abstractmethod
|
|
13
|
+
from typing import Any, ClassVar
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel
|
|
16
|
+
|
|
17
|
+
from agentforge_core.values.messages import ToolSpec
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Tool(ABC):
|
|
21
|
+
"""A typed callable the agent can invoke.
|
|
22
|
+
|
|
23
|
+
Subclasses declare three class attributes:
|
|
24
|
+
|
|
25
|
+
name: str — unique identifier the LLM sees
|
|
26
|
+
description: str — human-readable usage description
|
|
27
|
+
input_schema: type[BaseModel]
|
|
28
|
+
— Pydantic v2 model for inputs
|
|
29
|
+
|
|
30
|
+
Plus override `run`. The decorator-based path in feat-004 builds
|
|
31
|
+
these attributes automatically from a typed function.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
name: ClassVar[str]
|
|
35
|
+
description: ClassVar[str]
|
|
36
|
+
input_schema: ClassVar[type[BaseModel]]
|
|
37
|
+
capabilities: ClassVar[frozenset[str]] = frozenset()
|
|
38
|
+
|
|
39
|
+
def __init_subclass__(cls, **kwargs: Any) -> None:
|
|
40
|
+
super().__init_subclass__(**kwargs)
|
|
41
|
+
if inspect.isabstract(cls):
|
|
42
|
+
return
|
|
43
|
+
# Concrete subclasses must declare the three class attributes.
|
|
44
|
+
for attr in ("name", "description", "input_schema"):
|
|
45
|
+
if attr not in cls.__dict__ and not _inherited_attr(cls, attr):
|
|
46
|
+
raise TypeError(
|
|
47
|
+
f"{cls.__name__} must declare class attribute '{attr}' (see Tool docstring)."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
async def run(self, **kwargs: Any) -> Any:
|
|
52
|
+
"""Execute the tool with kwargs validated against `input_schema`."""
|
|
53
|
+
|
|
54
|
+
def to_spec(self) -> ToolSpec:
|
|
55
|
+
"""Provider-agnostic JSON-schema description for the LLM."""
|
|
56
|
+
return ToolSpec(
|
|
57
|
+
name=type(self).name,
|
|
58
|
+
description=type(self).description,
|
|
59
|
+
schema=type(self).input_schema.model_json_schema(),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _inherited_attr(cls: type, attr: str) -> bool:
|
|
64
|
+
"""Walk the MRO (excluding `cls` itself and the abstract `Tool`)
|
|
65
|
+
looking for a non-`Tool` ancestor that declared `attr`."""
|
|
66
|
+
for base in cls.__mro__[1:]:
|
|
67
|
+
if base is Tool or base is object:
|
|
68
|
+
continue
|
|
69
|
+
if attr in base.__dict__:
|
|
70
|
+
return True
|
|
71
|
+
return False
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""`VectorStore` — locked semantic-search ABC.
|
|
2
|
+
|
|
3
|
+
A vector store is distinct from `MemoryStore` (the claim audit log):
|
|
4
|
+
the shapes don't unify cleanly. Vectors search by similarity; claims
|
|
5
|
+
filter by structured metadata + monotonic ULID ordering. We keep two
|
|
6
|
+
separate ABCs and a user who wants similarity over claims puts the
|
|
7
|
+
claim text into a vector store with `metadata={"claim_id": <id>}`.
|
|
8
|
+
|
|
9
|
+
Per ADR-0007 the surface is locked at v0.1: adding a method is a
|
|
10
|
+
major version bump. Optional capabilities (e.g. native ANN indexes,
|
|
11
|
+
hybrid search) layer the same way as `LLMClient` capabilities —
|
|
12
|
+
declared via `capabilities()` and gated via `supports()`.
|
|
13
|
+
|
|
14
|
+
Conformance: every shipped or third-party driver must pass
|
|
15
|
+
`agentforge_core.testing.run_vector_conformance` (lands alongside
|
|
16
|
+
this contract).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from abc import ABC, abstractmethod
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from agentforge_core.values.vector import VectorItem, VectorMatch
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class VectorStore(ABC):
|
|
28
|
+
"""Provider-agnostic vector index.
|
|
29
|
+
|
|
30
|
+
Implementations:
|
|
31
|
+
- declare a fixed `dimensions()` — every upserted vector and
|
|
32
|
+
every search vector must match
|
|
33
|
+
- normalise scores to cosine similarity in `[0, 1]` regardless
|
|
34
|
+
of internal distance metric (drivers convert at the boundary)
|
|
35
|
+
- implement metadata filtering as conjunctive equality on every
|
|
36
|
+
key/value pair the caller passes
|
|
37
|
+
|
|
38
|
+
Cross-driver invariants enforced by the conformance suite:
|
|
39
|
+
- upsert(id=X) followed by upsert(id=X) replaces the prior
|
|
40
|
+
record (write-through semantics)
|
|
41
|
+
- search returns at most `limit` items, sorted by score desc
|
|
42
|
+
- dimension mismatch on upsert or search raises `ValueError`
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
async def upsert(self, items: list[VectorItem]) -> None:
|
|
47
|
+
"""Insert or replace `items`.
|
|
48
|
+
|
|
49
|
+
If two items in `items` share an id, the last one wins. Callers
|
|
50
|
+
wanting transactional all-or-nothing semantics should batch via
|
|
51
|
+
a single `upsert` call (drivers may still split the request
|
|
52
|
+
internally).
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
ValueError: a vector's length does not match `dimensions()`.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
async def search(
|
|
60
|
+
self,
|
|
61
|
+
query_vector: tuple[float, ...],
|
|
62
|
+
*,
|
|
63
|
+
limit: int = 5,
|
|
64
|
+
filter_metadata: dict[str, Any] | None = None,
|
|
65
|
+
) -> list[VectorMatch]:
|
|
66
|
+
"""Return the top-`limit` items by cosine similarity.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
query_vector: Length must equal `dimensions()`.
|
|
70
|
+
limit: Maximum results to return. Drivers may return fewer.
|
|
71
|
+
filter_metadata: Conjunctive equality filter on the items'
|
|
72
|
+
`metadata` dict. `None` means no filtering.
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
ValueError: dimension mismatch or `limit < 1`.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
async def lexical_search(
|
|
79
|
+
self,
|
|
80
|
+
query: str,
|
|
81
|
+
*,
|
|
82
|
+
limit: int = 5,
|
|
83
|
+
filter_metadata: dict[str, Any] | None = None,
|
|
84
|
+
) -> list[VectorMatch]:
|
|
85
|
+
"""Return the top-`limit` items by lexical (BM25-style) relevance.
|
|
86
|
+
|
|
87
|
+
Drivers that declare the ``"hybrid_search"`` capability MUST
|
|
88
|
+
override this. The default implementation raises
|
|
89
|
+
:class:`NotImplementedError` with a remediation message so
|
|
90
|
+
callers see a clear error rather than silently empty results.
|
|
91
|
+
|
|
92
|
+
Scores in the returned matches are normalised to ``[0, 1]`` by
|
|
93
|
+
max-score division within the result set (so the top match has
|
|
94
|
+
score 1.0; absolute BM25 magnitudes are not portable across
|
|
95
|
+
corpora). Cross-path comparability with `search()` scores is
|
|
96
|
+
NOT guaranteed — hybrid retrieval fuses by **rank**, not raw
|
|
97
|
+
score (see ``Retriever`` for RRF fusion).
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
query: The user's text query. Tokenisation is driver-specific
|
|
101
|
+
but typically lowercase + non-word split.
|
|
102
|
+
limit: Maximum results to return. Drivers may return fewer.
|
|
103
|
+
filter_metadata: Conjunctive equality filter on the items'
|
|
104
|
+
``metadata`` dict. ``None`` means no filtering.
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
NotImplementedError: This driver does not support hybrid
|
|
108
|
+
search (default behaviour).
|
|
109
|
+
ValueError: ``limit < 1``.
|
|
110
|
+
"""
|
|
111
|
+
raise NotImplementedError(
|
|
112
|
+
f"{type(self).__name__} does not support hybrid search. "
|
|
113
|
+
"Either swap to a VectorStore that declares the "
|
|
114
|
+
"'hybrid_search' capability (e.g. the built-in "
|
|
115
|
+
"InMemoryVectorStore) or open an issue requesting native "
|
|
116
|
+
"lexical support for this driver."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
@abstractmethod
|
|
120
|
+
async def delete(self, ids: list[str]) -> int:
|
|
121
|
+
"""Delete by id. Returns the number of items actually removed.
|
|
122
|
+
|
|
123
|
+
Unknown ids are silently ignored (no exception). Empty `ids`
|
|
124
|
+
list returns 0.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
@abstractmethod
|
|
128
|
+
async def close(self) -> None:
|
|
129
|
+
"""Release backing resources (connections, file handles)."""
|
|
130
|
+
|
|
131
|
+
@abstractmethod
|
|
132
|
+
def dimensions(self) -> int:
|
|
133
|
+
"""The fixed vector dimensionality this store accepts.
|
|
134
|
+
|
|
135
|
+
Synchronous so callers can size storage / validate input
|
|
136
|
+
without a network round-trip.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
def capabilities(self) -> set[str]:
|
|
140
|
+
"""Optional capabilities this driver supports.
|
|
141
|
+
|
|
142
|
+
Default empty set. Closed vocabulary (additions are minor
|
|
143
|
+
bumps): `"native_ann"` (driver uses an ANN index rather than
|
|
144
|
+
brute force), `"hybrid_search"` (BM25 + vector fusion),
|
|
145
|
+
`"transactions"` (multi-statement atomic upserts).
|
|
146
|
+
"""
|
|
147
|
+
return set()
|
|
148
|
+
|
|
149
|
+
def supports(self, capability: str) -> bool:
|
|
150
|
+
"""True if this driver declares the given capability."""
|
|
151
|
+
return capability in self.capabilities()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Migration discovery + checksum helpers (feat-024).
|
|
2
|
+
|
|
3
|
+
Drivers consume :func:`discover_migrations` to load their bundled
|
|
4
|
+
migration files at startup. The contract type :class:`Migration`
|
|
5
|
+
+ :class:`Migrator` Protocol live in
|
|
6
|
+
:mod:`agentforge_core.contracts.migrator`.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from agentforge_core.migrations.discover import _checksum, discover_migrations
|
|
12
|
+
from agentforge_core.migrations.template import render_migration_up
|
|
13
|
+
|
|
14
|
+
__all__ = ["_checksum", "discover_migrations", "render_migration_up"]
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Filesystem migration discovery (feat-024).
|
|
2
|
+
|
|
3
|
+
Drivers store migration files at
|
|
4
|
+
``<package>/migrations/NNNN_<snake_name>.<ext>`` where ``<ext>`` is
|
|
5
|
+
the driver's dialect (``sql`` / ``cypher`` / ``surql``). This
|
|
6
|
+
module loads every matching file from a directory, hashes its
|
|
7
|
+
contents, and returns a list of :class:`Migration` values sorted by
|
|
8
|
+
id ascending.
|
|
9
|
+
|
|
10
|
+
Filename convention is strict: the 4-digit prefix must be followed
|
|
11
|
+
by an underscore and a snake-case name (``[a-z0-9_]+``). Files that
|
|
12
|
+
don't match are silently ignored — operators can drop drafts and
|
|
13
|
+
notes alongside without breaking the discovery.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import hashlib
|
|
19
|
+
import re
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from agentforge_core.contracts.migrator import Migration
|
|
23
|
+
|
|
24
|
+
_FILENAME_RE = re.compile(r"^(\d{4})_([a-z0-9_]+)$")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _checksum(text: str) -> str:
|
|
28
|
+
"""SHA-256 hex digest over ``text`` after LF normalisation."""
|
|
29
|
+
normalised = text.replace("\r\n", "\n").replace("\r", "\n")
|
|
30
|
+
return hashlib.sha256(normalised.encode("utf-8")).hexdigest()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def discover_migrations(path: Path, *, suffix: str) -> list[Migration]:
|
|
34
|
+
"""Load every ``NNNN_<name>.<suffix>`` file in ``path``.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
path: Directory to scan. Non-existent or non-directory paths
|
|
38
|
+
return an empty list (so a driver without bundled
|
|
39
|
+
migrations is a no-op rather than an error).
|
|
40
|
+
suffix: File extension without the dot — e.g. ``"sql"`` for
|
|
41
|
+
Postgres, ``"cypher"`` for Neo4j.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Migrations sorted by id ascending. Duplicate ids raise
|
|
45
|
+
:class:`ValueError`.
|
|
46
|
+
"""
|
|
47
|
+
if not path.exists() or not path.is_dir():
|
|
48
|
+
return []
|
|
49
|
+
|
|
50
|
+
pattern = f"*.{suffix}"
|
|
51
|
+
seen_ids: set[str] = set()
|
|
52
|
+
migrations: list[Migration] = []
|
|
53
|
+
for file_path in sorted(path.glob(pattern)):
|
|
54
|
+
stem = file_path.stem
|
|
55
|
+
match = _FILENAME_RE.match(stem)
|
|
56
|
+
if match is None:
|
|
57
|
+
continue
|
|
58
|
+
migration_id, name = match.group(1), match.group(2)
|
|
59
|
+
if migration_id in seen_ids:
|
|
60
|
+
msg = (
|
|
61
|
+
f"Duplicate migration id {migration_id!r} in {path}; "
|
|
62
|
+
f"found at {file_path.name!r} but already seen."
|
|
63
|
+
)
|
|
64
|
+
raise ValueError(msg)
|
|
65
|
+
seen_ids.add(migration_id)
|
|
66
|
+
body = file_path.read_text(encoding="utf-8")
|
|
67
|
+
migrations.append(
|
|
68
|
+
Migration(
|
|
69
|
+
id=migration_id,
|
|
70
|
+
name=name,
|
|
71
|
+
up=body,
|
|
72
|
+
checksum=_checksum(body),
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
migrations.sort(key=lambda m: m.id)
|
|
77
|
+
return migrations
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Migration body templating (feat-024 v0.3 follow-up).
|
|
2
|
+
|
|
3
|
+
Per-driver migrators may need per-deployment variables in their
|
|
4
|
+
migration bodies — Postgres `vector(${dimensions})` and SurrealDB
|
|
5
|
+
`HNSW DIMENSION ${dimensions}` are the canonical cases. The
|
|
6
|
+
templating syntax is Python's :class:`string.Template` with
|
|
7
|
+
``${var}`` placeholders and ``$$`` for a literal ``$``.
|
|
8
|
+
|
|
9
|
+
Important invariant: the migration's checksum is computed over the
|
|
10
|
+
*un-substituted* template body. Re-deploying with a different
|
|
11
|
+
variable value (e.g. swapping a 768-dim embedder for a 1536-dim
|
|
12
|
+
one) produces the same checksum, so the framework's drift
|
|
13
|
+
detection stays correct.
|
|
14
|
+
|
|
15
|
+
Unknown placeholders left untouched (`safe_substitute` semantics)
|
|
16
|
+
— template-key typos surface as SQL syntax errors at apply time
|
|
17
|
+
rather than silently empty replacements.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from string import Template
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def render_migration_up(body: str, variables: dict[str, str] | None) -> str:
|
|
26
|
+
"""Substitute ``${var}`` placeholders in ``body`` with ``variables``.
|
|
27
|
+
|
|
28
|
+
Returns ``body`` unchanged when ``variables`` is ``None`` or
|
|
29
|
+
empty. Unknown placeholders pass through unchanged so callers
|
|
30
|
+
can spot template-key typos as apply-time SQL errors.
|
|
31
|
+
"""
|
|
32
|
+
if not variables:
|
|
33
|
+
return body
|
|
34
|
+
return Template(body).safe_substitute(**variables)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Observability primitives — tracer helper, span attribute helpers.
|
|
2
|
+
|
|
3
|
+
The OpenTelemetry **API** ships in core; the **SDK** + exporter ship
|
|
4
|
+
in the optional `agentforge-otel` package. Without the SDK, all
|
|
5
|
+
`tracer.start_*` calls degrade to the no-op `NonRecordingTracer` —
|
|
6
|
+
near-zero cost. Consumers that want real spans install
|
|
7
|
+
`agentforge-otel` and construct an `OpenTelemetryHook` (which
|
|
8
|
+
configures the SDK provider once).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from agentforge_core.observability.tracing import (
|
|
14
|
+
SCOPE_NAME,
|
|
15
|
+
get_tracer,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = ["SCOPE_NAME", "get_tracer"]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""`get_tracer` — OpenTelemetry tracer accessor for framework spans.
|
|
2
|
+
|
|
3
|
+
feat-009 §4.3 defines a span tree per run:
|
|
4
|
+
|
|
5
|
+
span: agent.run
|
|
6
|
+
└── span: strategy.iteration
|
|
7
|
+
├── span: llm.call
|
|
8
|
+
├── span: tool.<name>
|
|
9
|
+
└── span: evaluator.<name>
|
|
10
|
+
|
|
11
|
+
The framework emits these spans unconditionally via the OTel API. When
|
|
12
|
+
no SDK provider is configured (the default), `start_as_current_span`
|
|
13
|
+
returns the no-op `NonRecordingSpan` and the cost is negligible. When
|
|
14
|
+
`agentforge-otel` configures a real provider, the same call sites
|
|
15
|
+
produce real spans + attributes that flow to the OTLP collector.
|
|
16
|
+
|
|
17
|
+
`SCOPE_NAME` is the OTel instrumentation scope used for every framework
|
|
18
|
+
span; `agentforge-otel` filters or routes on it.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from opentelemetry import trace
|
|
24
|
+
from opentelemetry.trace import Tracer
|
|
25
|
+
|
|
26
|
+
SCOPE_NAME = "agentforge"
|
|
27
|
+
"""Instrumentation scope name for every framework-emitted span."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_tracer() -> Tracer:
|
|
31
|
+
"""Return the framework's tracer.
|
|
32
|
+
|
|
33
|
+
Always safe to call — works whether or not an SDK provider is
|
|
34
|
+
installed. The same `Tracer` instance is fine across the
|
|
35
|
+
process; OTel handles thread/async safety internally.
|
|
36
|
+
"""
|
|
37
|
+
return trace.get_tracer(SCOPE_NAME)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Production-rails primitives.
|
|
2
|
+
|
|
3
|
+
Owned by the framework (per ADR-0010): every agent has these wired by
|
|
4
|
+
default. Includes per-run cost guarding (`BudgetPolicy`), correlation
|
|
5
|
+
context (`RunContext`, `current_run`), structured logging filter
|
|
6
|
+
(`RunIdFilter`), cross-provider failover (`FallbackChain`), and the
|
|
7
|
+
framework's exception hierarchy.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
# `FallbackChain` is intentionally NOT re-exported here because it
|
|
13
|
+
# imports `LLMClient`, which would create a circular import:
|
|
14
|
+
# `agentforge_core/__init__.py` → contracts.llm (imports
|
|
15
|
+
# `CapabilityNotSupported` from `production.exceptions`) →
|
|
16
|
+
# triggers `production/__init__.py` → fallback (imports
|
|
17
|
+
# LLMClient still being loaded). Users reach FallbackChain via the
|
|
18
|
+
# top-level `from agentforge_core import FallbackChain` (loaded
|
|
19
|
+
# after `production` finishes), or directly via
|
|
20
|
+
# `agentforge_core.production.fallback`.
|
|
21
|
+
from agentforge_core.production.budget import BudgetPolicy
|
|
22
|
+
from agentforge_core.production.exceptions import (
|
|
23
|
+
AgentForgeError,
|
|
24
|
+
AuthenticationError,
|
|
25
|
+
BudgetExceeded,
|
|
26
|
+
CapabilityNotSupported,
|
|
27
|
+
GuardrailViolation,
|
|
28
|
+
ModelNotFoundError,
|
|
29
|
+
ModuleError,
|
|
30
|
+
ProviderError,
|
|
31
|
+
RateLimitError,
|
|
32
|
+
ServiceError,
|
|
33
|
+
TimeoutError,
|
|
34
|
+
)
|
|
35
|
+
from agentforge_core.production.log_filter import (
|
|
36
|
+
RunIdFilter,
|
|
37
|
+
install_run_id_filter,
|
|
38
|
+
uninstall_run_id_filter,
|
|
39
|
+
)
|
|
40
|
+
from agentforge_core.production.log_format import (
|
|
41
|
+
JsonFormatter,
|
|
42
|
+
install_json_formatter,
|
|
43
|
+
uninstall_json_formatter,
|
|
44
|
+
)
|
|
45
|
+
from agentforge_core.production.run_context import (
|
|
46
|
+
RunContext,
|
|
47
|
+
bind_run,
|
|
48
|
+
current_run,
|
|
49
|
+
new_run,
|
|
50
|
+
reset_run,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
__all__ = [
|
|
54
|
+
"AgentForgeError",
|
|
55
|
+
"AuthenticationError",
|
|
56
|
+
"BudgetExceeded",
|
|
57
|
+
"BudgetPolicy",
|
|
58
|
+
"CapabilityNotSupported",
|
|
59
|
+
"GuardrailViolation",
|
|
60
|
+
"JsonFormatter",
|
|
61
|
+
"ModelNotFoundError",
|
|
62
|
+
"ModuleError",
|
|
63
|
+
"ProviderError",
|
|
64
|
+
"RateLimitError",
|
|
65
|
+
"RunContext",
|
|
66
|
+
"RunIdFilter",
|
|
67
|
+
"ServiceError",
|
|
68
|
+
"TimeoutError",
|
|
69
|
+
"bind_run",
|
|
70
|
+
"current_run",
|
|
71
|
+
"install_json_formatter",
|
|
72
|
+
"install_run_id_filter",
|
|
73
|
+
"new_run",
|
|
74
|
+
"reset_run",
|
|
75
|
+
"uninstall_json_formatter",
|
|
76
|
+
"uninstall_run_id_filter",
|
|
77
|
+
]
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""`BudgetPolicy` — per-run cost cap (per ADR-0010).
|
|
2
|
+
|
|
3
|
+
Every reasoning strategy must call `BudgetPolicy.check` before every
|
|
4
|
+
LLM call. Branching strategies (Tree-of-Thoughts, Multi-Agent
|
|
5
|
+
Supervisor in feat-002) call `reserve` before fanning out so collective
|
|
6
|
+
spend across parallel branches cannot exceed the cap.
|
|
7
|
+
|
|
8
|
+
The policy aggregates spend across every provider used in a run. A
|
|
9
|
+
multi-provider agent (e.g. reasoning model + cheap judge + embedding
|
|
10
|
+
model per ADR-0018) shares one policy.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
16
|
+
|
|
17
|
+
from agentforge_core.production.exceptions import (
|
|
18
|
+
BudgetExceeded,
|
|
19
|
+
GuardrailViolation,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BudgetPolicy(BaseModel):
|
|
24
|
+
"""Per-run cost and resource cap.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
usd: Maximum spend in USD across all LLM/embedding providers
|
|
28
|
+
used during the run. Default $1.00 — small enough that the
|
|
29
|
+
naive 3-line agent never racks up surprise bills, large
|
|
30
|
+
enough for a meaningful exploration.
|
|
31
|
+
max_tokens: Maximum total tokens (input + output) consumed in
|
|
32
|
+
the run.
|
|
33
|
+
max_iterations: Maximum reasoning loop iterations.
|
|
34
|
+
error_streak_limit: Maximum consecutive tool/observation errors
|
|
35
|
+
before the loop aborts.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
model_config = ConfigDict(strict=True, validate_assignment=True)
|
|
39
|
+
|
|
40
|
+
usd: float = Field(default=1.0, ge=0.0)
|
|
41
|
+
max_tokens: int = Field(default=200_000, ge=0)
|
|
42
|
+
max_iterations: int = Field(default=25, ge=1)
|
|
43
|
+
error_streak_limit: int = Field(default=3, ge=1)
|
|
44
|
+
|
|
45
|
+
spent_usd: float = Field(default=0.0, ge=0.0)
|
|
46
|
+
reserved_usd: float = Field(default=0.0, ge=0.0)
|
|
47
|
+
consumed_tokens: int = Field(default=0, ge=0)
|
|
48
|
+
iteration: int = Field(default=0, ge=0)
|
|
49
|
+
error_streak: int = Field(default=0, ge=0)
|
|
50
|
+
|
|
51
|
+
def remaining_usd(self) -> float:
|
|
52
|
+
"""USD budget left after committed spend and reservations."""
|
|
53
|
+
return max(0.0, self.usd - self.spent_usd - self.reserved_usd)
|
|
54
|
+
|
|
55
|
+
def check(self) -> None:
|
|
56
|
+
"""Raise if any cap is breached. Called before every LLM call.
|
|
57
|
+
|
|
58
|
+
Raises:
|
|
59
|
+
BudgetExceeded: USD or token cap exhausted.
|
|
60
|
+
GuardrailViolation: iteration or error-streak limit reached.
|
|
61
|
+
"""
|
|
62
|
+
if self.spent_usd >= self.usd:
|
|
63
|
+
raise BudgetExceeded(
|
|
64
|
+
f"USD budget exhausted: spent ${self.spent_usd:.4f} of ${self.usd:.4f}"
|
|
65
|
+
)
|
|
66
|
+
if self.consumed_tokens >= self.max_tokens:
|
|
67
|
+
raise BudgetExceeded(
|
|
68
|
+
f"Token budget exhausted: {self.consumed_tokens} of {self.max_tokens}"
|
|
69
|
+
)
|
|
70
|
+
if self.iteration >= self.max_iterations:
|
|
71
|
+
raise GuardrailViolation(
|
|
72
|
+
f"Iteration cap reached: {self.iteration} of {self.max_iterations}"
|
|
73
|
+
)
|
|
74
|
+
if self.error_streak >= self.error_streak_limit:
|
|
75
|
+
raise GuardrailViolation(
|
|
76
|
+
f"Error streak limit hit: {self.error_streak} consecutive errors"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def reserve(self, usd: float) -> None:
|
|
80
|
+
"""Pre-reserve USD budget for a planned spend.
|
|
81
|
+
|
|
82
|
+
Used by branching strategies that fan out: each branch reserves
|
|
83
|
+
before issuing the LLM call so the sum of reservations cannot
|
|
84
|
+
exceed the cap.
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
ValueError: `usd` is negative.
|
|
88
|
+
BudgetExceeded: reservation would exceed remaining budget.
|
|
89
|
+
"""
|
|
90
|
+
if usd < 0:
|
|
91
|
+
raise ValueError(f"Cannot reserve negative budget: {usd}")
|
|
92
|
+
if self.spent_usd + self.reserved_usd + usd > self.usd:
|
|
93
|
+
raise BudgetExceeded(
|
|
94
|
+
f"Cannot reserve ${usd:.4f}; only ${self.remaining_usd():.4f} "
|
|
95
|
+
f"of ${self.usd:.4f} remains"
|
|
96
|
+
)
|
|
97
|
+
self.reserved_usd += usd
|
|
98
|
+
|
|
99
|
+
def commit(self, actual_usd: float, tokens: int = 0) -> None:
|
|
100
|
+
"""Record actual cost after a call completes.
|
|
101
|
+
|
|
102
|
+
Reservations are released by `release_reservation` separately;
|
|
103
|
+
commit only records spend.
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
ValueError: negative cost or token count.
|
|
107
|
+
"""
|
|
108
|
+
if actual_usd < 0:
|
|
109
|
+
raise ValueError(f"Cannot commit negative cost: {actual_usd}")
|
|
110
|
+
if tokens < 0:
|
|
111
|
+
raise ValueError(f"Cannot commit negative tokens: {tokens}")
|
|
112
|
+
self.spent_usd += actual_usd
|
|
113
|
+
self.consumed_tokens += tokens
|
|
114
|
+
|
|
115
|
+
def release_reservation(self, usd: float) -> None:
|
|
116
|
+
"""Release a previously-reserved budget (e.g. on cancellation).
|
|
117
|
+
|
|
118
|
+
Idempotent at zero — releasing more than reserved clamps to 0.
|
|
119
|
+
"""
|
|
120
|
+
if usd < 0:
|
|
121
|
+
raise ValueError(f"Cannot release negative reservation: {usd}")
|
|
122
|
+
self.reserved_usd = max(0.0, self.reserved_usd - usd)
|
|
123
|
+
|
|
124
|
+
def increment_iteration(self) -> None:
|
|
125
|
+
"""Record one strategy iteration. Called by the strategy loop."""
|
|
126
|
+
self.iteration += 1
|
|
127
|
+
|
|
128
|
+
def record_error(self) -> None:
|
|
129
|
+
"""Increment the error streak counter."""
|
|
130
|
+
self.error_streak += 1
|
|
131
|
+
|
|
132
|
+
def record_success(self) -> None:
|
|
133
|
+
"""Reset the error streak counter."""
|
|
134
|
+
self.error_streak = 0
|