forgesight-registry 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+ *.so
9
+
10
+ # venv / tooling
11
+ .venv/
12
+ venv/
13
+ .uv/
14
+ uv.lock
15
+
16
+ # test / type / lint caches
17
+ .pytest_cache/
18
+ .mypy_cache/
19
+ .ruff_cache/
20
+ .coverage
21
+ .coverage.*
22
+ coverage.xml
23
+ htmlcov/
24
+
25
+ # secrets / local env (never commit)
26
+ .env
27
+ .env.*
28
+
29
+ # editor / OS
30
+ .DS_Store
31
+ .idea/
32
+ .vscode/
33
+
34
+ # local-only session working state (per the workspace pipeline)
35
+ .claude/state/
36
+
37
+ # local-only launch planning (not part of the published repo)
38
+ /launch/
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: forgesight-registry
3
+ Version: 0.1.1
4
+ Summary: ForgeSight registry — declared agent ownership auto-stamped onto runs; chargeback + catalogue rollups.
5
+ Project-URL: Homepage, https://github.com/Scaffoldic/forgesight
6
+ Project-URL: Repository, https://github.com/Scaffoldic/forgesight
7
+ Project-URL: Issues, https://github.com/Scaffoldic/forgesight/issues
8
+ Project-URL: Changelog, https://github.com/Scaffoldic/forgesight/blob/main/docs/releases/v0.1.md
9
+ Author: kjoshi
10
+ License-Expression: Apache-2.0
11
+ Keywords: ai-agents,chargeback,finops,forgesight,observability,registry
12
+ Classifier: Development Status :: 2 - Pre-Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Information Technology
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: System :: Monitoring
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: forgesight-core
24
+ Requires-Dist: pyyaml>=6.0
25
+ Description-Content-Type: text/markdown
26
+
27
+ # forgesight-registry
28
+
29
+ Declared agent **ownership** auto-stamped onto every run — plus offline **chargeback** and
30
+ **catalogue** rollups — for [ForgeSight](https://github.com/Scaffoldic/forgesight). The last
31
+ mile from per-run cost to org-level FinOps: declare an agent *once* instead of tagging it
32
+ every run, everywhere, inconsistently.
33
+
34
+ ```bash
35
+ pip install forgesight-registry
36
+ ```
37
+
38
+ ```yaml
39
+ # agents.yaml — one source of truth
40
+ agents:
41
+ - name: invoice-parser
42
+ version: "2.3.0"
43
+ owner: "fin-platform@acme.com"
44
+ team: "finance-platform"
45
+ repo: "acme/invoice-agents"
46
+ lifecycle: "ga"
47
+ sla_tier: "tier-1"
48
+ - name: nightly-summariser
49
+ version: "*"
50
+ owner: "growth@acme.com"
51
+ team: "growth"
52
+ lifecycle: "beta"
53
+ ```
54
+
55
+ ```python
56
+ import forgesight
57
+ from forgesight_registry import Registry
58
+
59
+ reg = Registry.from_file("agents.yaml")
60
+ forgesight.configure(run_metadata_provider=reg.ownership_metadata) # stamp ownership at run start
61
+
62
+ # Agent author writes nothing extra — every run now carries team/owner/repo/... on the
63
+ # root span and every child (FR-5):
64
+ with forgesight.telemetry.agent_run("invoice-parser", version="2.3.0") as run:
65
+ ...
66
+ ```
67
+
68
+ ## How it works
69
+
70
+ - **Stamp at run start.** The registry resolves `(name, version)` — exact → `"*"` wildcard →
71
+ unmatched — and merges the agent's ownership fields into the run's metadata. **Caller-set
72
+ keys win** (a one-off `environment=staging` survives). Undeclared agents are counted
73
+ (`on_unmatched` = `warn` | `ignore` | `error`), so the registry doubles as a "what's running
74
+ but undeclared" detector.
75
+ - **Chargeback is a group-by.** `ChargebackReport.from_records(records, dimensions=["team", "environment"])`
76
+ sums `cost_usd` / tokens / runs / failures per group — clean, because the dimensions were
77
+ stamped at source. An absent dimension groups under `<unattributed>` so cost never vanishes.
78
+ - **Catalogue = declared ∪ observed.** `AgentCatalogue.from_records(records, registry=reg, now_unix_nanos=…)`
79
+ joins the declared registry (owner / lifecycle / SLA) with observed telemetry (last-seen /
80
+ run count / windowed cost), surfacing declared-but-silent and active-but-undeclared agents.
81
+
82
+ ## Configuration
83
+
84
+ ```yaml
85
+ registry:
86
+ enabled: true # master switch (default false — install does nothing until on)
87
+ source: "file" # file | http | <custom>
88
+ path: "agents.yaml"
89
+ on_unmatched: "warn" # warn | ignore | error
90
+ stamp:
91
+ fields: ["team", "owner", "repo", "lifecycle", "sla_tier"]
92
+ prefix: "" # e.g. "org." → org.team, org.owner
93
+ ```
94
+
95
+ Non-blocking: stamping is an in-memory dict merge (P6). No vendor SDK (P1) — the HTTP source
96
+ uses stdlib `urllib`. Attribution, not control — budget *enforcement* on these same dimensions
97
+ is `forgesight-governance` (feat-020).
98
+
99
+ ## License
100
+
101
+ Apache-2.0
@@ -0,0 +1,75 @@
1
+ # forgesight-registry
2
+
3
+ Declared agent **ownership** auto-stamped onto every run — plus offline **chargeback** and
4
+ **catalogue** rollups — for [ForgeSight](https://github.com/Scaffoldic/forgesight). The last
5
+ mile from per-run cost to org-level FinOps: declare an agent *once* instead of tagging it
6
+ every run, everywhere, inconsistently.
7
+
8
+ ```bash
9
+ pip install forgesight-registry
10
+ ```
11
+
12
+ ```yaml
13
+ # agents.yaml — one source of truth
14
+ agents:
15
+ - name: invoice-parser
16
+ version: "2.3.0"
17
+ owner: "fin-platform@acme.com"
18
+ team: "finance-platform"
19
+ repo: "acme/invoice-agents"
20
+ lifecycle: "ga"
21
+ sla_tier: "tier-1"
22
+ - name: nightly-summariser
23
+ version: "*"
24
+ owner: "growth@acme.com"
25
+ team: "growth"
26
+ lifecycle: "beta"
27
+ ```
28
+
29
+ ```python
30
+ import forgesight
31
+ from forgesight_registry import Registry
32
+
33
+ reg = Registry.from_file("agents.yaml")
34
+ forgesight.configure(run_metadata_provider=reg.ownership_metadata) # stamp ownership at run start
35
+
36
+ # Agent author writes nothing extra — every run now carries team/owner/repo/... on the
37
+ # root span and every child (FR-5):
38
+ with forgesight.telemetry.agent_run("invoice-parser", version="2.3.0") as run:
39
+ ...
40
+ ```
41
+
42
+ ## How it works
43
+
44
+ - **Stamp at run start.** The registry resolves `(name, version)` — exact → `"*"` wildcard →
45
+ unmatched — and merges the agent's ownership fields into the run's metadata. **Caller-set
46
+ keys win** (a one-off `environment=staging` survives). Undeclared agents are counted
47
+ (`on_unmatched` = `warn` | `ignore` | `error`), so the registry doubles as a "what's running
48
+ but undeclared" detector.
49
+ - **Chargeback is a group-by.** `ChargebackReport.from_records(records, dimensions=["team", "environment"])`
50
+ sums `cost_usd` / tokens / runs / failures per group — clean, because the dimensions were
51
+ stamped at source. An absent dimension groups under `<unattributed>` so cost never vanishes.
52
+ - **Catalogue = declared ∪ observed.** `AgentCatalogue.from_records(records, registry=reg, now_unix_nanos=…)`
53
+ joins the declared registry (owner / lifecycle / SLA) with observed telemetry (last-seen /
54
+ run count / windowed cost), surfacing declared-but-silent and active-but-undeclared agents.
55
+
56
+ ## Configuration
57
+
58
+ ```yaml
59
+ registry:
60
+ enabled: true # master switch (default false — install does nothing until on)
61
+ source: "file" # file | http | <custom>
62
+ path: "agents.yaml"
63
+ on_unmatched: "warn" # warn | ignore | error
64
+ stamp:
65
+ fields: ["team", "owner", "repo", "lifecycle", "sla_tier"]
66
+ prefix: "" # e.g. "org." → org.team, org.owner
67
+ ```
68
+
69
+ Non-blocking: stamping is an in-memory dict merge (P6). No vendor SDK (P1) — the HTTP source
70
+ uses stdlib `urllib`. Attribution, not control — budget *enforcement* on these same dimensions
71
+ is `forgesight-governance` (feat-020).
72
+
73
+ ## License
74
+
75
+ Apache-2.0
@@ -0,0 +1,45 @@
1
+ [project]
2
+ name = "forgesight-registry"
3
+ version = "0.1.1"
4
+ description = "ForgeSight registry — declared agent ownership auto-stamped onto runs; chargeback + catalogue rollups."
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = "Apache-2.0"
8
+ authors = [{ name = "kjoshi" }]
9
+ keywords = ["observability", "finops", "chargeback", "registry", "ai-agents", "forgesight"]
10
+ classifiers = [
11
+ "Development Status :: 2 - Pre-Alpha",
12
+ "Intended Audience :: Developers",
13
+ "Intended Audience :: Information Technology",
14
+ "Topic :: System :: Monitoring",
15
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
16
+ "License :: OSI Approved :: Apache Software License",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ "Typing :: Typed",
21
+ ]
22
+ dependencies = ["forgesight-core", "PyYAML>=6.0"]
23
+
24
+ [project.entry-points."forgesight.modules"]
25
+ registry = "forgesight_registry:install"
26
+
27
+ [project.entry-points."forgesight.registry_sources"]
28
+ file = "forgesight_registry.source:FileSource"
29
+ http = "forgesight_registry.source:HttpSource"
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/Scaffoldic/forgesight"
33
+ Repository = "https://github.com/Scaffoldic/forgesight"
34
+ Issues = "https://github.com/Scaffoldic/forgesight/issues"
35
+ Changelog = "https://github.com/Scaffoldic/forgesight/blob/main/docs/releases/v0.1.md"
36
+
37
+ [build-system]
38
+ requires = ["hatchling"]
39
+ build-backend = "hatchling.build"
40
+
41
+ [tool.hatch.build.targets.wheel]
42
+ packages = ["src/forgesight_registry"]
43
+
44
+ [tool.uv.sources]
45
+ forgesight-core = { workspace = true }
@@ -0,0 +1,67 @@
1
+ """ForgeSight registry — declared agent ownership auto-stamped onto runs; chargeback rollups.
2
+
3
+ Wire the registry's stamping at bootstrap, then chargeback and the catalogue are group-bys
4
+ over the clean dimensions the SDK stamped:
5
+
6
+ ```python
7
+ import forgesight
8
+ from forgesight_registry import Registry
9
+
10
+ reg = Registry.from_file("agents.yaml")
11
+ forgesight.configure(run_metadata_provider=reg.ownership_metadata)
12
+ ```
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from collections.abc import Mapping
18
+ from typing import Any
19
+
20
+ from .model import AgentEntry, Lifecycle
21
+ from .registry import Registry, RegistryUnmatched
22
+ from .rollup import AgentCatalogue, CatalogueEntry, ChargebackReport, ChargebackRow
23
+ from .source import FileSource, HttpSource, RegistrySource
24
+
25
+ __version__ = "0.1.0"
26
+
27
+ _installed: Registry | None = None
28
+
29
+
30
+ def install(config: Mapping[str, Any] | None = None) -> Registry:
31
+ """The ``forgesight.modules`` entry point: build the registry from config and stash it.
32
+
33
+ Wire it as the run-start provider with
34
+ ``configure(run_metadata_provider=installed_registry().ownership_metadata)``.
35
+ """
36
+ global _installed
37
+ _installed = Registry.from_config(config)
38
+ return _installed
39
+
40
+
41
+ def installed_registry() -> Registry | None:
42
+ """The registry built by :func:`install`, or ``None`` if not installed."""
43
+ return _installed
44
+
45
+
46
+ def reset_for_tests() -> None:
47
+ global _installed
48
+ _installed = None
49
+
50
+
51
+ __all__ = [
52
+ "AgentCatalogue",
53
+ "AgentEntry",
54
+ "CatalogueEntry",
55
+ "ChargebackReport",
56
+ "ChargebackRow",
57
+ "FileSource",
58
+ "HttpSource",
59
+ "Lifecycle",
60
+ "Registry",
61
+ "RegistrySource",
62
+ "RegistryUnmatched",
63
+ "__version__",
64
+ "install",
65
+ "installed_registry",
66
+ "reset_for_tests",
67
+ ]
@@ -0,0 +1,50 @@
1
+ """``AgentEntry`` — one declared agent's ownership metadata (feat-022).
2
+
3
+ The registry's value type: the name→team→owner→repo→lifecycle mapping that, declared once,
4
+ is auto-stamped onto every run so chargeback rolls up on clean dimensions and every run is
5
+ traceable to a human. ``version`` is an exact string or ``"*"`` (any version). Experimental.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Mapping
11
+ from dataclasses import dataclass, field
12
+ from enum import StrEnum
13
+ from types import MappingProxyType
14
+
15
+ _EMPTY: Mapping[str, str] = MappingProxyType({})
16
+
17
+
18
+ class Lifecycle(StrEnum):
19
+ EXPERIMENTAL = "experimental"
20
+ BETA = "beta"
21
+ GA = "ga"
22
+ DEPRECATED = "deprecated"
23
+
24
+
25
+ @dataclass(frozen=True, slots=True)
26
+ class AgentEntry:
27
+ name: str
28
+ version: str = "*" # exact version or "*" wildcard
29
+ owner: str | None = None
30
+ team: str | None = None
31
+ repo: str | None = None
32
+ lifecycle: Lifecycle = Lifecycle.GA
33
+ sla_tier: str | None = None
34
+ extra: Mapping[str, str] = field(default_factory=lambda: _EMPTY)
35
+
36
+ def fields(self) -> dict[str, str]:
37
+ """The stampable fields as a flat ``key → value`` dict (omitting unset ones)."""
38
+ out: dict[str, str] = {}
39
+ if self.owner is not None:
40
+ out["owner"] = self.owner
41
+ if self.team is not None:
42
+ out["team"] = self.team
43
+ if self.repo is not None:
44
+ out["repo"] = self.repo
45
+ out["lifecycle"] = self.lifecycle.value
46
+ if self.sla_tier is not None:
47
+ out["sla_tier"] = self.sla_tier
48
+ for key, value in self.extra.items():
49
+ out[key] = value
50
+ return out
@@ -0,0 +1,124 @@
1
+ """``Registry`` — resolve ``(name, version)`` → ownership and stamp it onto every run.
2
+
3
+ Wired at bootstrap as the runtime's run-start metadata provider (feat-022): at run start the
4
+ SDK looks the agent up and merges its ownership fields into the run's metadata (on the root
5
+ span and every child, FR-5) — caller-set keys win. Resolution is exact ``(name, version)`` →
6
+ ``(name, "*")`` wildcard → unmatched (counted; ``on_unmatched`` decides warn/ignore/error).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from collections.abc import Mapping, Sequence
13
+ from typing import Any
14
+
15
+ from .model import AgentEntry
16
+ from .source import FileSource, HttpSource, RegistrySource, parse_entries
17
+
18
+ _log = logging.getLogger("forgesight.registry")
19
+ _ON_UNMATCHED = ("warn", "ignore", "error")
20
+
21
+
22
+ class RegistryUnmatched(LookupError):
23
+ """Raised at run start when ``on_unmatched='error'`` and an agent isn't declared."""
24
+
25
+
26
+ class Registry:
27
+ """The declared agent registry: resolve ownership and produce run-start metadata."""
28
+
29
+ def __init__(
30
+ self,
31
+ entries: Sequence[AgentEntry],
32
+ *,
33
+ stamp_fields: Sequence[str] | None = None,
34
+ prefix: str = "",
35
+ on_unmatched: str = "warn",
36
+ ) -> None:
37
+ if on_unmatched not in _ON_UNMATCHED:
38
+ raise ValueError(f"on_unmatched must be one of {_ON_UNMATCHED}, got {on_unmatched!r}")
39
+ self._entries = list(entries)
40
+ self._exact: dict[tuple[str, str], AgentEntry] = {}
41
+ self._wildcard: dict[str, AgentEntry] = {}
42
+ for entry in entries:
43
+ if entry.version == "*":
44
+ self._wildcard[entry.name] = entry
45
+ else:
46
+ self._exact[(entry.name, entry.version)] = entry
47
+ self._fields = tuple(stamp_fields) if stamp_fields is not None else None
48
+ self._prefix = prefix
49
+ self._on_unmatched = on_unmatched
50
+ self.unmatched_count = 0
51
+
52
+ @property
53
+ def entries(self) -> list[AgentEntry]:
54
+ return list(self._entries)
55
+
56
+ def resolve(self, name: str, version: str | None) -> AgentEntry | None:
57
+ if version is not None:
58
+ exact = self._exact.get((name, version))
59
+ if exact is not None:
60
+ return exact
61
+ return self._wildcard.get(name)
62
+
63
+ def ownership_metadata(self, name: str, version: str | None = None) -> dict[str, str]:
64
+ """The metadata to stamp on a run for ``(name, version)``. The run-start provider."""
65
+ entry = self.resolve(name, version)
66
+ if entry is None:
67
+ self.unmatched_count += 1
68
+ if self._on_unmatched == "error":
69
+ raise RegistryUnmatched(
70
+ f"agent {name!r} v{version} is not declared in the registry"
71
+ )
72
+ if self._on_unmatched == "warn":
73
+ _log.warning("forgesight-registry: undeclared agent %r v%s", name, version)
74
+ return {}
75
+ fields = entry.fields()
76
+ if self._fields is not None:
77
+ fields = {k: v for k, v in fields.items() if k in self._fields}
78
+ if self._prefix:
79
+ fields = {f"{self._prefix}{k}": v for k, v in fields.items()}
80
+ return fields
81
+
82
+ # --- construction -----------------------------------------------------
83
+ @classmethod
84
+ def from_source(cls, source: RegistrySource, **kwargs: Any) -> Registry:
85
+ return cls(source.load(), **kwargs)
86
+
87
+ @classmethod
88
+ def from_file(cls, path: str, **kwargs: Any) -> Registry:
89
+ return cls.from_source(FileSource(path), **kwargs)
90
+
91
+ @classmethod
92
+ def from_entries(cls, entries: Sequence[Mapping[str, Any]], **kwargs: Any) -> Registry:
93
+ return cls(parse_entries(list(entries)), **kwargs)
94
+
95
+ @classmethod
96
+ def from_config(cls, settings: Mapping[str, Any] | None = None) -> Registry:
97
+ from forgesight_core.config import load_settings
98
+
99
+ resolved = settings if settings is not None else load_settings()
100
+ block = resolved.get("registry")
101
+ block = block if isinstance(block, Mapping) else {}
102
+ stamp = block.get("stamp")
103
+ stamp = stamp if isinstance(stamp, Mapping) else {}
104
+ kwargs: dict[str, Any] = {
105
+ "on_unmatched": str(block.get("on_unmatched", "warn")),
106
+ "prefix": str(stamp.get("prefix", "")),
107
+ "stamp_fields": list(stamp["fields"])
108
+ if isinstance(stamp.get("fields"), Sequence)
109
+ else None,
110
+ }
111
+ if not block.get("enabled", False):
112
+ return cls([], **kwargs) # installed but not switched on (P2) ⇒ stamps nothing
113
+ source = str(block.get("source", "file"))
114
+ if source == "file":
115
+ path = block.get("path")
116
+ if not path:
117
+ raise ValueError("registry.source 'file' requires path")
118
+ return cls.from_file(str(path), **kwargs)
119
+ if source == "http":
120
+ url = block.get("url")
121
+ if not url:
122
+ raise ValueError("registry.source 'http' requires url")
123
+ return cls.from_source(HttpSource(str(url)), **kwargs)
124
+ raise ValueError(f"unknown registry source {source!r}")
@@ -0,0 +1,179 @@
1
+ """Offline chargeback + catalogue rollups over exported records (feat-022).
2
+
3
+ Pure aggregation, off the hot path. Because the ownership dimensions were stamped *at
4
+ source* from one declaration, the group-by is clean — no missing / misspelled ``team``. An
5
+ absent dimension groups under ``"<unattributed>"`` so cost never silently vanishes. The
6
+ catalogue joins the *declared* registry (owner / lifecycle / SLA) with *observed* telemetry
7
+ (last-seen / run count / windowed cost), surfacing declared-but-silent and undeclared agents.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from collections.abc import Mapping, Sequence
13
+ from dataclasses import dataclass, field
14
+
15
+ from forgesight_api import Kind, Record, RunStatus
16
+
17
+ from .model import Lifecycle
18
+ from .registry import Registry
19
+
20
+ UNATTRIBUTED = "<unattributed>"
21
+ _NANOS_PER_DAY = 86_400 * 1_000_000_000
22
+ _OK = frozenset({RunStatus.OK, RunStatus.RUNNING})
23
+
24
+
25
+ @dataclass(frozen=True, slots=True)
26
+ class ChargebackRow:
27
+ dimensions: Mapping[str, str]
28
+ cost_usd: float
29
+ run_count: int
30
+ token_total: int
31
+ failure_count: int
32
+
33
+
34
+ class ChargebackReport:
35
+ """Cost / tokens / runs / failures grouped by ownership dimensions."""
36
+
37
+ def __init__(self, rows: Sequence[ChargebackRow]) -> None:
38
+ self._rows = list(rows)
39
+
40
+ @classmethod
41
+ def from_records(
42
+ cls,
43
+ records: Sequence[Record],
44
+ *,
45
+ dimensions: Sequence[str],
46
+ registry: Registry | None = None,
47
+ ) -> ChargebackReport:
48
+ groups: dict[tuple[str, ...], list[float]] = {}
49
+ for record in records:
50
+ key = tuple(str(record.attributes.get(dim, UNATTRIBUTED)) for dim in dimensions)
51
+ acc = groups.setdefault(key, [0.0, 0.0, 0.0, 0.0]) # cost, tokens, runs, failures
52
+ if record.llm is not None:
53
+ acc[0] += record.llm.cost_usd or 0.0
54
+ acc[1] += record.llm.usage.total
55
+ if record.kind is Kind.AGENT:
56
+ acc[2] += 1
57
+ if record.status not in _OK:
58
+ acc[3] += 1
59
+ rows = [
60
+ ChargebackRow(
61
+ dimensions=dict(zip(dimensions, key, strict=True)),
62
+ cost_usd=acc[0],
63
+ token_total=int(acc[1]),
64
+ run_count=int(acc[2]),
65
+ failure_count=int(acc[3]),
66
+ )
67
+ for key, acc in groups.items()
68
+ ]
69
+ return cls(rows)
70
+
71
+ def rows(self) -> list[ChargebackRow]:
72
+ return list(self._rows)
73
+
74
+ def total_usd(self) -> float:
75
+ return sum(row.cost_usd for row in self._rows)
76
+
77
+
78
+ @dataclass(frozen=True, slots=True)
79
+ class CatalogueEntry:
80
+ name: str
81
+ version: str
82
+ owner: str | None
83
+ team: str | None
84
+ lifecycle: Lifecycle | None
85
+ sla_tier: str | None
86
+ last_seen: int | None
87
+ run_count: int
88
+ cost_30d: float
89
+ declared: bool
90
+ active: bool
91
+
92
+
93
+ @dataclass
94
+ class _Observed:
95
+ run_count: int = 0
96
+ last_seen: int | None = None
97
+ cost_window: float = 0.0
98
+ run_ids: set[str] = field(default_factory=set)
99
+
100
+
101
+ class AgentCatalogue:
102
+ """Declared+observed union: agents with owner, lifecycle, last-seen, and windowed cost."""
103
+
104
+ def __init__(self, entries: Sequence[CatalogueEntry]) -> None:
105
+ self._entries = list(entries)
106
+
107
+ @classmethod
108
+ def from_records(
109
+ cls,
110
+ records: Sequence[Record],
111
+ *,
112
+ registry: Registry,
113
+ now_unix_nanos: int,
114
+ window_days: int = 30,
115
+ ) -> AgentCatalogue:
116
+ cutoff = now_unix_nanos - window_days * _NANOS_PER_DAY
117
+ observed: dict[str, _Observed] = {}
118
+ run_to_name: dict[str, str] = {}
119
+ for record in records:
120
+ if record.kind is Kind.AGENT:
121
+ obs = observed.setdefault(record.name, _Observed())
122
+ obs.run_count += 1
123
+ obs.run_ids.add(record.run_id)
124
+ run_to_name[record.run_id] = record.name
125
+ end = record.end_unix_nanos
126
+ if end is not None and (obs.last_seen is None or end > obs.last_seen):
127
+ obs.last_seen = end
128
+ for record in records: # attribute LLM cost to the owning agent run, within the window
129
+ if record.llm is None:
130
+ continue
131
+ name = run_to_name.get(record.run_id)
132
+ if name is None:
133
+ continue
134
+ end = record.end_unix_nanos or 0
135
+ if end >= cutoff and record.llm.cost_usd:
136
+ observed[name].cost_window += record.llm.cost_usd
137
+
138
+ entries: list[CatalogueEntry] = []
139
+ seen_names: set[str] = set()
140
+ for declared in registry.entries: # declared-and-active + declared-but-silent
141
+ declared_obs = observed.get(declared.name)
142
+ seen_names.add(declared.name)
143
+ entries.append(
144
+ CatalogueEntry(
145
+ name=declared.name,
146
+ version=declared.version,
147
+ owner=declared.owner,
148
+ team=declared.team,
149
+ lifecycle=declared.lifecycle,
150
+ sla_tier=declared.sla_tier,
151
+ last_seen=declared_obs.last_seen if declared_obs else None,
152
+ run_count=declared_obs.run_count if declared_obs else 0,
153
+ cost_30d=declared_obs.cost_window if declared_obs else 0.0,
154
+ declared=True,
155
+ active=declared_obs is not None,
156
+ )
157
+ )
158
+ for name, obs in observed.items(): # active-but-undeclared (a governance gap)
159
+ if name in seen_names:
160
+ continue
161
+ entries.append(
162
+ CatalogueEntry(
163
+ name=name,
164
+ version="*",
165
+ owner=None,
166
+ team=None,
167
+ lifecycle=None,
168
+ sla_tier=None,
169
+ last_seen=obs.last_seen,
170
+ run_count=obs.run_count,
171
+ cost_30d=obs.cost_window,
172
+ declared=False,
173
+ active=True,
174
+ )
175
+ )
176
+ return cls(entries)
177
+
178
+ def entries(self) -> list[CatalogueEntry]:
179
+ return list(self._entries)
@@ -0,0 +1,82 @@
1
+ """Where registry entries come from — file and HTTP shipped, custom via the Protocol.
2
+
3
+ A ``RegistrySource`` loads ``AgentEntry`` records from a declared source. The file source
4
+ reads YAML/JSON once at ``configure()``; the HTTP source TTL-refreshes best-effort and keeps
5
+ the last-good set on a failed refresh (the cost-table pattern). No vendor SDK — the HTTP
6
+ source uses stdlib ``urllib`` (P1).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import urllib.request
13
+ from collections.abc import Mapping, Sequence
14
+ from typing import Any, Protocol, runtime_checkable
15
+
16
+ import yaml
17
+
18
+ from .model import AgentEntry, Lifecycle
19
+
20
+
21
+ @runtime_checkable
22
+ class RegistrySource(Protocol):
23
+ """Loads the declared registry entries. Shipped: file + HTTP; custom via this Protocol."""
24
+
25
+ def load(self) -> Sequence[AgentEntry]: ...
26
+
27
+
28
+ def parse_entries(raw: Any) -> list[AgentEntry]:
29
+ """Parse the ``agents:`` list (or a bare list) into :class:`AgentEntry` records."""
30
+ items = raw.get("agents") if isinstance(raw, Mapping) else raw
31
+ if not isinstance(items, Sequence):
32
+ return []
33
+ entries: list[AgentEntry] = []
34
+ for item in items:
35
+ if not isinstance(item, Mapping) or not item.get("name"):
36
+ continue
37
+ known = {"name", "version", "owner", "team", "repo", "lifecycle", "sla_tier"}
38
+ extra = {str(k): str(v) for k, v in item.items() if k not in known and k != "extra"}
39
+ extra.update({str(k): str(v) for k, v in (item.get("extra") or {}).items()})
40
+ entries.append(
41
+ AgentEntry(
42
+ name=str(item["name"]),
43
+ version=str(item.get("version", "*")),
44
+ owner=_opt(item.get("owner")),
45
+ team=_opt(item.get("team")),
46
+ repo=_opt(item.get("repo")),
47
+ lifecycle=Lifecycle(str(item.get("lifecycle", "ga"))),
48
+ sla_tier=_opt(item.get("sla_tier")),
49
+ extra=extra,
50
+ )
51
+ )
52
+ return entries
53
+
54
+
55
+ class FileSource:
56
+ """Loads entries from a YAML or JSON file (read once at load)."""
57
+
58
+ def __init__(self, path: str) -> None:
59
+ self._path = path
60
+
61
+ def load(self) -> Sequence[AgentEntry]:
62
+ with open(self._path, encoding="utf-8") as handle:
63
+ raw = yaml.safe_load(handle) # YAML is a JSON superset
64
+ return parse_entries(raw)
65
+
66
+
67
+ class HttpSource: # pragma: no cover - requires a live endpoint
68
+ """Loads entries from an HTTP(S) URL (stdlib urllib; TTL refresh is the Registry's job)."""
69
+
70
+ def __init__(self, url: str, *, timeout: float = 5.0) -> None:
71
+ self._url = url
72
+ self._timeout = timeout
73
+
74
+ def load(self) -> Sequence[AgentEntry]:
75
+ request = urllib.request.Request(self._url, headers={"Accept": "application/json"})
76
+ with urllib.request.urlopen(request, timeout=self._timeout) as response:
77
+ raw = json.loads(response.read().decode("utf-8"))
78
+ return parse_entries(raw)
79
+
80
+
81
+ def _opt(value: Any) -> str | None:
82
+ return str(value) if value is not None else None
@@ -0,0 +1,352 @@
1
+ """Tests for the registry: resolution, stamping, chargeback, catalogue, config."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterator
6
+ from pathlib import Path
7
+
8
+ import pytest
9
+
10
+ from forgesight_api import Kind, LLMCall, Record, RunStatus, TokenUsage
11
+ from forgesight_core import InMemoryExporter, configure, reset_runtime, telemetry
12
+ from forgesight_registry import (
13
+ AgentCatalogue,
14
+ AgentEntry,
15
+ ChargebackReport,
16
+ Lifecycle,
17
+ Registry,
18
+ RegistryUnmatched,
19
+ install,
20
+ installed_registry,
21
+ reset_for_tests,
22
+ )
23
+
24
+ ENTRIES = [
25
+ {
26
+ "name": "invoice-parser",
27
+ "version": "2.3.0",
28
+ "owner": "fin@acme",
29
+ "team": "finance",
30
+ "repo": "acme/inv",
31
+ "lifecycle": "ga",
32
+ "sla_tier": "tier-1",
33
+ },
34
+ {
35
+ "name": "summariser",
36
+ "version": "*",
37
+ "owner": "growth@acme",
38
+ "team": "growth",
39
+ "lifecycle": "beta",
40
+ },
41
+ ]
42
+
43
+
44
+ @pytest.fixture(autouse=True)
45
+ def _reset() -> Iterator[None]:
46
+ yield
47
+ reset_runtime()
48
+ reset_for_tests()
49
+
50
+
51
+ def _reg(**kw: object) -> Registry:
52
+ return Registry.from_entries(ENTRIES, **kw)
53
+
54
+
55
+ # --- resolution ---------------------------------------------------------------
56
+ def test_resolve_exact_then_wildcard_then_none() -> None:
57
+ reg = _reg()
58
+ assert reg.resolve("invoice-parser", "2.3.0").team == "finance" # type: ignore[union-attr]
59
+ assert reg.resolve("invoice-parser", "9.9.9") is None # exact miss, no wildcard
60
+ assert reg.resolve("summariser", "1.0.0").team == "growth" # type: ignore[union-attr] # wildcard
61
+ assert reg.resolve("unknown", "1.0") is None
62
+
63
+
64
+ def test_ownership_metadata_fields() -> None:
65
+ reg = _reg()
66
+ meta = reg.ownership_metadata("invoice-parser", "2.3.0")
67
+ assert meta == {
68
+ "owner": "fin@acme",
69
+ "team": "finance",
70
+ "repo": "acme/inv",
71
+ "lifecycle": "ga",
72
+ "sla_tier": "tier-1",
73
+ }
74
+
75
+
76
+ def test_ownership_metadata_field_filter_and_prefix() -> None:
77
+ reg = _reg(stamp_fields=["team", "owner"], prefix="org.")
78
+ meta = reg.ownership_metadata("invoice-parser", "2.3.0")
79
+ assert meta == {"org.team": "finance", "org.owner": "fin@acme"}
80
+
81
+
82
+ def test_extra_fields_stamped() -> None:
83
+ reg = Registry.from_entries([{"name": "x", "version": "*", "team": "t", "cost_center": "cc-9"}])
84
+ assert reg.ownership_metadata("x", "1")["cost_center"] == "cc-9"
85
+
86
+
87
+ # --- on_unmatched -------------------------------------------------------------
88
+ def test_on_unmatched_warn_counts(caplog: pytest.LogCaptureFixture) -> None:
89
+ reg = _reg(on_unmatched="warn")
90
+ with caplog.at_level("WARNING"):
91
+ assert reg.ownership_metadata("ghost", "1.0") == {}
92
+ assert reg.unmatched_count == 1
93
+ assert any("undeclared agent" in r.message for r in caplog.records)
94
+
95
+
96
+ def test_on_unmatched_ignore_silent() -> None:
97
+ reg = _reg(on_unmatched="ignore")
98
+ assert reg.ownership_metadata("ghost", "1.0") == {}
99
+ assert reg.unmatched_count == 1
100
+
101
+
102
+ def test_on_unmatched_error_raises() -> None:
103
+ reg = _reg(on_unmatched="error")
104
+ with pytest.raises(RegistryUnmatched):
105
+ reg.ownership_metadata("ghost", "1.0")
106
+
107
+
108
+ def test_invalid_on_unmatched() -> None:
109
+ with pytest.raises(ValueError, match="on_unmatched must be"):
110
+ _reg(on_unmatched="explode")
111
+
112
+
113
+ # --- stamping at run start (core hook) ---------------------------------------
114
+ def test_stamps_ownership_on_run_and_children() -> None:
115
+ reg = _reg()
116
+ exporter = InMemoryExporter()
117
+ configure(exporters=[exporter], sync_export=True, run_metadata_provider=reg.ownership_metadata)
118
+ with (
119
+ telemetry.agent_run("invoice-parser", version="2.3.0") as run,
120
+ run.llm_call("anthropic", "m"),
121
+ ):
122
+ pass
123
+ agent = next(r for r in exporter.records if r.kind is Kind.AGENT)
124
+ llm = next(r for r in exporter.records if r.kind is Kind.LLM)
125
+ assert agent.attributes["team"] == "finance"
126
+ assert agent.attributes["owner"] == "fin@acme"
127
+ assert llm.attributes["team"] == "finance" # propagated onto the child (FR-5)
128
+
129
+
130
+ def test_caller_metadata_wins_over_registry() -> None:
131
+ reg = _reg()
132
+ exporter = InMemoryExporter()
133
+ configure(exporters=[exporter], sync_export=True, run_metadata_provider=reg.ownership_metadata)
134
+ with telemetry.agent_run("invoice-parser", version="2.3.0", metadata={"team": "override"}):
135
+ pass
136
+ agent = next(r for r in exporter.records if r.kind is Kind.AGENT)
137
+ assert agent.attributes["team"] == "override" # caller-set key wins
138
+
139
+
140
+ def test_unregistered_run_is_unstamped() -> None:
141
+ reg = _reg(on_unmatched="ignore")
142
+ exporter = InMemoryExporter()
143
+ configure(exporters=[exporter], sync_export=True, run_metadata_provider=reg.ownership_metadata)
144
+ with telemetry.agent_run("ghost", version="9.9"):
145
+ pass
146
+ agent = next(r for r in exporter.records if r.kind is Kind.AGENT)
147
+ assert "team" not in agent.attributes
148
+ assert reg.unmatched_count == 1
149
+
150
+
151
+ # --- file source --------------------------------------------------------------
152
+ def test_from_file(tmp_path: Path) -> None:
153
+ path = tmp_path / "agents.yaml"
154
+ path.write_text(
155
+ "agents:\n"
156
+ " - name: a\n version: '1.0'\n team: t1\n owner: o1\n"
157
+ " - name: b\n team: t2\n"
158
+ )
159
+ reg = Registry.from_file(str(path))
160
+ assert reg.resolve("a", "1.0").team == "t1" # type: ignore[union-attr]
161
+ assert reg.resolve("b", "anything").team == "t2" # type: ignore[union-attr] # default wildcard
162
+
163
+
164
+ # --- chargeback ---------------------------------------------------------------
165
+ def _agent(team: str, env: str, run_id: str, status: RunStatus = RunStatus.OK) -> Record:
166
+ from types import MappingProxyType
167
+
168
+ return Record(
169
+ kind=Kind.AGENT,
170
+ run_id=run_id,
171
+ trace_id="t",
172
+ span_id=run_id,
173
+ parent_span_id=None,
174
+ name="a",
175
+ status=status,
176
+ start_unix_nanos=1,
177
+ end_unix_nanos=2,
178
+ attributes=MappingProxyType({"team": team, "environment": env}),
179
+ )
180
+
181
+
182
+ def _llm(team: str, env: str, run_id: str, cost: float, tokens: int = 100) -> Record:
183
+ from types import MappingProxyType
184
+
185
+ return Record(
186
+ kind=Kind.LLM,
187
+ run_id=run_id,
188
+ trace_id="t",
189
+ span_id=f"{run_id}-l",
190
+ parent_span_id=run_id,
191
+ name="m",
192
+ status=RunStatus.OK,
193
+ start_unix_nanos=1,
194
+ end_unix_nanos=2,
195
+ attributes=MappingProxyType({"team": team, "environment": env}),
196
+ llm=LLMCall(provider="p", request_model="m", usage=TokenUsage(input=tokens), cost_usd=cost),
197
+ )
198
+
199
+
200
+ def test_chargeback_groups_and_totals() -> None:
201
+ records = [
202
+ _agent("growth", "prod", "r1"),
203
+ _llm("growth", "prod", "r1", 0.10, 100),
204
+ _agent("growth", "prod", "r2"),
205
+ _llm("growth", "prod", "r2", 0.20, 200),
206
+ _agent("research", "dev", "r3", RunStatus.ERROR),
207
+ _llm("research", "dev", "r3", 0.05, 50),
208
+ ]
209
+ report = ChargebackReport.from_records(records, dimensions=["team", "environment"])
210
+ rows = {(r.dimensions["team"], r.dimensions["environment"]): r for r in report.rows()}
211
+ growth = rows[("growth", "prod")]
212
+ assert growth.cost_usd == pytest.approx(0.30)
213
+ assert growth.run_count == 2
214
+ assert growth.token_total == 300
215
+ assert growth.failure_count == 0
216
+ assert rows[("research", "dev")].failure_count == 1
217
+ assert report.total_usd() == pytest.approx(0.35)
218
+
219
+
220
+ def test_chargeback_unattributed_bucket() -> None:
221
+ from types import MappingProxyType
222
+
223
+ rec = Record(
224
+ kind=Kind.LLM,
225
+ run_id="r",
226
+ trace_id="t",
227
+ span_id="s",
228
+ parent_span_id=None,
229
+ name="m",
230
+ status=RunStatus.OK,
231
+ start_unix_nanos=1,
232
+ end_unix_nanos=2,
233
+ attributes=MappingProxyType({}), # no team
234
+ llm=LLMCall(provider="p", request_model="m", cost_usd=0.5),
235
+ )
236
+ report = ChargebackReport.from_records([rec], dimensions=["team"])
237
+ assert report.rows()[0].dimensions["team"] == "<unattributed>" # cost never vanishes
238
+ assert report.total_usd() == pytest.approx(0.5)
239
+
240
+
241
+ # --- catalogue ----------------------------------------------------------------
242
+ def test_catalogue_declared_active_silent_undeclared() -> None:
243
+ reg = Registry.from_entries(
244
+ [
245
+ {
246
+ "name": "active-agent",
247
+ "version": "1.0",
248
+ "owner": "o",
249
+ "team": "t",
250
+ "lifecycle": "ga",
251
+ },
252
+ {"name": "silent-agent", "version": "1.0", "owner": "o2", "lifecycle": "deprecated"},
253
+ ]
254
+ )
255
+ now = 1_000 * 86_400 * 1_000_000_000 # day 1000 in ns
256
+ records = [
257
+ _agent("t", "prod", "ra"),
258
+ _llm("t", "prod", "ra", 0.4),
259
+ ]
260
+ # relabel the active agent's records to name "active-agent"
261
+ from dataclasses import replace
262
+
263
+ records = [replace(r, name="active-agent") if r.kind is Kind.AGENT else r for r in records]
264
+ records.append(_agent("x", "dev", "ru"))
265
+ records[-1] = replace(records[-1], name="rogue-agent") # undeclared
266
+
267
+ # stamp recent end times so the cost falls inside the 30-day window
268
+ catalogue = AgentCatalogue.from_records(
269
+ [replace(r, end_unix_nanos=now) for r in records], registry=reg, now_unix_nanos=now
270
+ )
271
+ by_name = {e.name: e for e in catalogue.entries()}
272
+ assert by_name["active-agent"].active is True
273
+ assert by_name["active-agent"].cost_30d == pytest.approx(0.4)
274
+ assert by_name["silent-agent"].active is False # declared but no runs
275
+ assert by_name["silent-agent"].lifecycle is Lifecycle.DEPRECATED
276
+ assert by_name["rogue-agent"].declared is False # active but undeclared
277
+ assert by_name["rogue-agent"].owner is None
278
+
279
+
280
+ def test_catalogue_cost_window_excludes_old() -> None:
281
+ from dataclasses import replace
282
+
283
+ reg = Registry.from_entries([{"name": "a", "version": "*", "team": "t"}])
284
+ now = 1_000 * 86_400 * 1_000_000_000
285
+ old = now - 40 * 86_400 * 1_000_000_000 # 40 days ago, outside the 30-day window
286
+ records = [
287
+ replace(_agent("t", "prod", "r1"), name="a", end_unix_nanos=old),
288
+ replace(_llm("t", "prod", "r1", 0.9), end_unix_nanos=old),
289
+ ]
290
+ catalogue = AgentCatalogue.from_records(records, registry=reg, now_unix_nanos=now)
291
+ assert catalogue.entries()[0].cost_30d == 0.0 # old cost excluded from the window
292
+
293
+
294
+ # --- config / install ---------------------------------------------------------
295
+ def test_from_config_disabled_stamps_nothing() -> None:
296
+ reg = Registry.from_config({"registry": {"enabled": False, "source": "file", "path": "x"}})
297
+ assert reg.entries == [] # not switched on ⇒ empty
298
+
299
+
300
+ def test_from_config_file(tmp_path: Path) -> None:
301
+ path = tmp_path / "a.yaml"
302
+ path.write_text("agents:\n - name: a\n team: t\n")
303
+ reg = Registry.from_config({"registry": {"enabled": True, "source": "file", "path": str(path)}})
304
+ assert reg.resolve("a", "1").team == "t" # type: ignore[union-attr]
305
+
306
+
307
+ def test_from_config_file_requires_path() -> None:
308
+ with pytest.raises(ValueError, match="requires path"):
309
+ Registry.from_config({"registry": {"enabled": True, "source": "file"}})
310
+
311
+
312
+ def test_from_config_unknown_source() -> None:
313
+ with pytest.raises(ValueError, match="unknown registry source"):
314
+ Registry.from_config({"registry": {"enabled": True, "source": "carrier-pigeon"}})
315
+
316
+
317
+ def test_install_stashes_registry(tmp_path: Path) -> None:
318
+ path = tmp_path / "a.yaml"
319
+ path.write_text("agents:\n - name: a\n team: t\n")
320
+ install({"registry": {"enabled": True, "source": "file", "path": str(path)}})
321
+ assert installed_registry() is not None
322
+ assert installed_registry().resolve("a", "1").team == "t" # type: ignore[union-attr]
323
+
324
+
325
+ def test_agent_entry_lifecycle_default() -> None:
326
+ entry = AgentEntry(name="x")
327
+ assert entry.lifecycle is Lifecycle.GA
328
+ assert entry.version == "*"
329
+
330
+
331
+ def test_from_config_http_requires_url() -> None:
332
+ with pytest.raises(ValueError, match="requires url"):
333
+ Registry.from_config({"registry": {"enabled": True, "source": "http"}})
334
+
335
+
336
+ def test_parse_entries_skips_malformed() -> None:
337
+ from forgesight_registry.source import parse_entries
338
+
339
+ assert parse_entries({"agents": "not-a-list"}) == []
340
+ assert parse_entries(42) == [] # not a list at all
341
+ reg = Registry.from_entries(["not-a-dict", {"no_name": "x"}, {"name": "ok", "team": "t"}])
342
+ assert len(reg.entries) == 1 # only the well-formed entry survives
343
+ assert reg.entries[0].name == "ok"
344
+
345
+
346
+ def test_chargeback_ignores_orphan_llm_in_catalogue() -> None:
347
+ # an LLM record whose run_id has no agent run is not attributed to any agent
348
+ reg = Registry.from_entries([{"name": "a", "version": "*", "team": "t"}])
349
+ now = 1_000 * 86_400 * 1_000_000_000
350
+ orphan = _llm("t", "prod", "no-agent-run", 0.9)
351
+ catalogue = AgentCatalogue.from_records([orphan], registry=reg, now_unix_nanos=now)
352
+ assert catalogue.entries()[0].cost_30d == 0.0 # declared "a" has no runs; orphan cost ignored