askfaro-progressive-context 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- askfaro_progressive_context/__init__.py +57 -0
- askfaro_progressive_context/build/__init__.py +33 -0
- askfaro_progressive_context/build/_frontmatter.py +62 -0
- askfaro_progressive_context/build/adapters/__init__.py +21 -0
- askfaro_progressive_context/build/adapters/base.py +33 -0
- askfaro_progressive_context/build/adapters/docs.py +61 -0
- askfaro_progressive_context/build/adapters/memory.py +56 -0
- askfaro_progressive_context/build/adapters/skills.py +62 -0
- askfaro_progressive_context/build/adapters/tools.py +82 -0
- askfaro_progressive_context/build/compiler.py +71 -0
- askfaro_progressive_context/build/cost.py +47 -0
- askfaro_progressive_context/build/descriptors.py +329 -0
- askfaro_progressive_context/build/emit.py +120 -0
- askfaro_progressive_context/build/ir.py +69 -0
- askfaro_progressive_context/cli.py +175 -0
- askfaro_progressive_context/eval.py +141 -0
- askfaro_progressive_context/llm.py +132 -0
- askfaro_progressive_context/navigator.py +98 -0
- askfaro_progressive_context/py.typed +0 -0
- askfaro_progressive_context/runtime.py +288 -0
- askfaro_progressive_context/schema/pcx-0.1.schema.json +123 -0
- askfaro_progressive_context/session.py +100 -0
- askfaro_progressive_context/tokenizer.py +36 -0
- askfaro_progressive_context/types.py +167 -0
- askfaro_progressive_context/validate.py +112 -0
- askfaro_progressive_context-0.1.0.dist-info/METADATA +144 -0
- askfaro_progressive_context-0.1.0.dist-info/RECORD +30 -0
- askfaro_progressive_context-0.1.0.dist-info/WHEEL +4 -0
- askfaro_progressive_context-0.1.0.dist-info/entry_points.txt +2 -0
- askfaro_progressive_context-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""askfaro-progressive-context: compile any content into a tiered, budget-aware,
|
|
2
|
+
agent-navigable progressive-disclosure manifest, plus an expansion protocol."""
|
|
3
|
+
|
|
4
|
+
from .eval import CaseResult, EvalReport, NavCase, run_case, run_eval
|
|
5
|
+
from .llm import LLMClient, OpenAICompatibleClient
|
|
6
|
+
from .navigator import KeywordNavigator, LLMNavigator, Navigator
|
|
7
|
+
from .runtime import (
|
|
8
|
+
VIEW_LEVELS,
|
|
9
|
+
BudgetExceeded,
|
|
10
|
+
FrontierEntry,
|
|
11
|
+
LeafResolver,
|
|
12
|
+
Runtime,
|
|
13
|
+
SearchBackend,
|
|
14
|
+
dict_resolver,
|
|
15
|
+
render_descriptor,
|
|
16
|
+
)
|
|
17
|
+
from .session import LOCAL, REMOTE, ModeConfig, NavSession
|
|
18
|
+
from .tokenizer import make_tokenizer
|
|
19
|
+
from .types import PROTOCOL_USAGE, Manifest, Node, Payload, Variant, estimate_tokens
|
|
20
|
+
from .validate import schema_errors, structural_errors, validate
|
|
21
|
+
|
|
22
|
+
__version__ = "0.0.7"
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"BudgetExceeded",
|
|
26
|
+
"CaseResult",
|
|
27
|
+
"EvalReport",
|
|
28
|
+
"FrontierEntry",
|
|
29
|
+
"KeywordNavigator",
|
|
30
|
+
"LLMClient",
|
|
31
|
+
"LLMNavigator",
|
|
32
|
+
"LOCAL",
|
|
33
|
+
"REMOTE",
|
|
34
|
+
"LeafResolver",
|
|
35
|
+
"Manifest",
|
|
36
|
+
"ModeConfig",
|
|
37
|
+
"NavCase",
|
|
38
|
+
"NavSession",
|
|
39
|
+
"Navigator",
|
|
40
|
+
"Node",
|
|
41
|
+
"OpenAICompatibleClient",
|
|
42
|
+
"PROTOCOL_USAGE",
|
|
43
|
+
"Payload",
|
|
44
|
+
"Runtime",
|
|
45
|
+
"SearchBackend",
|
|
46
|
+
"VIEW_LEVELS",
|
|
47
|
+
"Variant",
|
|
48
|
+
"dict_resolver",
|
|
49
|
+
"estimate_tokens",
|
|
50
|
+
"make_tokenizer",
|
|
51
|
+
"render_descriptor",
|
|
52
|
+
"run_case",
|
|
53
|
+
"run_eval",
|
|
54
|
+
"schema_errors",
|
|
55
|
+
"structural_errors",
|
|
56
|
+
"validate",
|
|
57
|
+
]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""The compiler: source content -> annotated tree -> pcx manifest variants.
|
|
2
|
+
|
|
3
|
+
Pipeline: an Adapter yields a SourceTree (native structure, verbatim leaves),
|
|
4
|
+
the descriptor engine generates what/when/keywords, cost annotation tokenizes
|
|
5
|
+
and rolls up subtree costs, and emit writes one manifest per budget variant
|
|
6
|
+
plus an llms.txt export.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .compiler import BuildResult, compile_source
|
|
10
|
+
from .descriptors import (
|
|
11
|
+
Descriptor,
|
|
12
|
+
DescriptorModel,
|
|
13
|
+
FakeDescriptorModel,
|
|
14
|
+
Grade,
|
|
15
|
+
LLMDescriptorModel,
|
|
16
|
+
cache_from_manifest,
|
|
17
|
+
generate_descriptors,
|
|
18
|
+
)
|
|
19
|
+
from .ir import SourceNode, SourceTree
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"BuildResult",
|
|
23
|
+
"Descriptor",
|
|
24
|
+
"DescriptorModel",
|
|
25
|
+
"FakeDescriptorModel",
|
|
26
|
+
"Grade",
|
|
27
|
+
"LLMDescriptorModel",
|
|
28
|
+
"SourceNode",
|
|
29
|
+
"SourceTree",
|
|
30
|
+
"cache_from_manifest",
|
|
31
|
+
"compile_source",
|
|
32
|
+
"generate_descriptors",
|
|
33
|
+
]
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Tiny frontmatter reader for markdown sources.
|
|
2
|
+
|
|
3
|
+
Uses PyYAML if available; otherwise a minimal parser that handles the shapes
|
|
4
|
+
our adapters need: top-level `key: value`, inline lists `[a, b]`, and a single
|
|
5
|
+
nested mapping block (e.g. a one-fact memory store's `metadata:`). Keeps the core
|
|
6
|
+
dependency-free.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def split_frontmatter(text: str) -> tuple[dict[str, Any], str]:
|
|
15
|
+
if not text.startswith("---"):
|
|
16
|
+
return {}, text
|
|
17
|
+
end = text.find("\n---", 3)
|
|
18
|
+
if end == -1:
|
|
19
|
+
return {}, text
|
|
20
|
+
block = text[3:end].strip("\n")
|
|
21
|
+
body = text[end + 4 :].lstrip("\n")
|
|
22
|
+
return _parse_yaml(block), body
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _parse_yaml(block: str) -> dict[str, Any]:
|
|
26
|
+
try:
|
|
27
|
+
import yaml # type: ignore
|
|
28
|
+
|
|
29
|
+
data = yaml.safe_load(block)
|
|
30
|
+
return data if isinstance(data, dict) else {}
|
|
31
|
+
except ImportError:
|
|
32
|
+
return _minimal_parse(block)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _scalar(v: str) -> Any:
|
|
36
|
+
v = v.strip()
|
|
37
|
+
if v.startswith("[") and v.endswith("]"):
|
|
38
|
+
inner = v[1:-1].strip()
|
|
39
|
+
return [x.strip().strip("\"'") for x in inner.split(",")] if inner else []
|
|
40
|
+
return v.strip("\"'")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _minimal_parse(block: str) -> dict[str, Any]:
|
|
44
|
+
out: dict[str, Any] = {}
|
|
45
|
+
parent: str | None = None
|
|
46
|
+
for line in block.splitlines():
|
|
47
|
+
if not line.strip() or line.strip().startswith("#"):
|
|
48
|
+
continue
|
|
49
|
+
indented = line[0] in " \t"
|
|
50
|
+
key, _, val = line.strip().partition(":")
|
|
51
|
+
key = key.strip()
|
|
52
|
+
if indented and parent is not None:
|
|
53
|
+
if not isinstance(out.get(parent), dict):
|
|
54
|
+
out[parent] = {}
|
|
55
|
+
out[parent][key] = _scalar(val)
|
|
56
|
+
elif val.strip() == "":
|
|
57
|
+
out[key] = {}
|
|
58
|
+
parent = key
|
|
59
|
+
else:
|
|
60
|
+
out[key] = _scalar(val)
|
|
61
|
+
parent = None
|
|
62
|
+
return out
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Adapters turn a source on disk into a SourceTree.
|
|
2
|
+
|
|
3
|
+
Phase 1 ships the four already-hierarchical kinds (no structure inference):
|
|
4
|
+
docs, skills, tools, memory.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .base import Adapter, get_adapter, register_adapter
|
|
8
|
+
from .docs import DocsAdapter
|
|
9
|
+
from .memory import MemoryAdapter
|
|
10
|
+
from .skills import SkillsAdapter
|
|
11
|
+
from .tools import ToolsAdapter
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"Adapter",
|
|
15
|
+
"DocsAdapter",
|
|
16
|
+
"MemoryAdapter",
|
|
17
|
+
"SkillsAdapter",
|
|
18
|
+
"ToolsAdapter",
|
|
19
|
+
"get_adapter",
|
|
20
|
+
"register_adapter",
|
|
21
|
+
]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
from ..ir import SourceTree
|
|
7
|
+
|
|
8
|
+
_REGISTRY: dict[str, "Adapter"] = {}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Adapter(Protocol):
|
|
12
|
+
kind: str
|
|
13
|
+
|
|
14
|
+
def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
|
|
15
|
+
...
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def register_adapter(adapter: "Adapter") -> "Adapter":
|
|
19
|
+
_REGISTRY[adapter.kind] = adapter
|
|
20
|
+
return adapter
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_adapter(kind: str) -> "Adapter":
|
|
24
|
+
if kind not in _REGISTRY:
|
|
25
|
+
raise KeyError(f"unknown adapter kind {kind!r}; known: {sorted(_REGISTRY)}")
|
|
26
|
+
return _REGISTRY[kind]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def slugify(text: str) -> str:
|
|
30
|
+
out = "".join(c if c.isalnum() else "-" for c in text.lower()).strip("-")
|
|
31
|
+
while "--" in out:
|
|
32
|
+
out = out.replace("--", "-")
|
|
33
|
+
return out or "node"
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""docs adapter — a directory tree of markdown.
|
|
2
|
+
|
|
3
|
+
Mirrors the filesystem: directories become branches, `.md`/`.mdx` files become
|
|
4
|
+
leaves. Titles come from the first H1 if present, else the filename. The native
|
|
5
|
+
hierarchy is used as-is (no clustering).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from .._frontmatter import split_frontmatter
|
|
14
|
+
from ..ir import SourceNode, SourceTree
|
|
15
|
+
from .base import register_adapter, slugify
|
|
16
|
+
|
|
17
|
+
_H1 = re.compile(r"^#\s+(.+)$", re.MULTILINE)
|
|
18
|
+
_DOC_EXT = {".md", ".mdx", ".markdown"}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _title_of(body: str, fallback: str) -> str:
|
|
22
|
+
m = _H1.search(body)
|
|
23
|
+
return m.group(1).strip() if m else fallback
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _humanize(name: str) -> str:
|
|
27
|
+
return name.replace("-", " ").replace("_", " ").strip().title()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class _DocsAdapter:
|
|
31
|
+
kind = "docs"
|
|
32
|
+
|
|
33
|
+
def _file_node(self, file: Path) -> SourceNode:
|
|
34
|
+
fm, body = split_frontmatter(file.read_text())
|
|
35
|
+
title = fm.get("title") or _title_of(body, _humanize(file.stem))
|
|
36
|
+
return SourceNode(
|
|
37
|
+
id=slugify(file.stem),
|
|
38
|
+
title=title,
|
|
39
|
+
content=body.strip(),
|
|
40
|
+
hint=fm.get("description"),
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def _dir_node(self, directory: Path, node_id: str, title: str) -> SourceNode:
|
|
44
|
+
node = SourceNode(id=node_id, title=title, hint=f"Docs under {directory.name}/.")
|
|
45
|
+
for child in sorted(directory.iterdir(), key=lambda p: (p.is_file(), p.name)):
|
|
46
|
+
if child.is_dir():
|
|
47
|
+
sub = self._dir_node(child, slugify(child.name), _humanize(child.name))
|
|
48
|
+
if sub.children:
|
|
49
|
+
node.children.append(sub)
|
|
50
|
+
elif child.suffix.lower() in _DOC_EXT:
|
|
51
|
+
node.children.append(self._file_node(child))
|
|
52
|
+
return node
|
|
53
|
+
|
|
54
|
+
def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
|
|
55
|
+
root_dir = Path(path)
|
|
56
|
+
sid = source_id or root_dir.name
|
|
57
|
+
root = self._dir_node(root_dir, "r", sid)
|
|
58
|
+
return SourceTree(source_id=sid, kind=self.kind, root=root)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
DocsAdapter = register_adapter(_DocsAdapter())
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""memory adapter — a directory of one-fact markdown files with frontmatter.
|
|
2
|
+
|
|
3
|
+
Each file is a leaf; `name`/`description` frontmatter become the title/hint and
|
|
4
|
+
the body is the verbatim content. Files are grouped into tier-1 branches by
|
|
5
|
+
`metadata.type` (e.g. user / feedback / project / reference) when present —
|
|
6
|
+
matching a one-fact-per-file memory layout; generic to any such collection.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from .._frontmatter import split_frontmatter
|
|
14
|
+
from ..ir import SourceNode, SourceTree
|
|
15
|
+
from .base import register_adapter, slugify
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class _MemoryAdapter:
|
|
19
|
+
kind = "memory"
|
|
20
|
+
|
|
21
|
+
def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
|
|
22
|
+
root_dir = Path(path)
|
|
23
|
+
sid = source_id or root_dir.name
|
|
24
|
+
|
|
25
|
+
groups: dict[str, list[SourceNode]] = {}
|
|
26
|
+
for md in sorted(root_dir.glob("*.md")):
|
|
27
|
+
if md.name.upper() == "MEMORY.md".upper():
|
|
28
|
+
continue # the index, not a fact
|
|
29
|
+
fm, body = split_frontmatter(md.read_text())
|
|
30
|
+
meta = fm.get("metadata") if isinstance(fm.get("metadata"), dict) else {}
|
|
31
|
+
mtype = (meta or {}).get("type", "")
|
|
32
|
+
name = fm.get("name") or md.stem
|
|
33
|
+
leaf = SourceNode(
|
|
34
|
+
id=slugify(name),
|
|
35
|
+
title=name,
|
|
36
|
+
content=body.strip(),
|
|
37
|
+
hint=fm.get("description"),
|
|
38
|
+
)
|
|
39
|
+
groups.setdefault(mtype, []).append(leaf)
|
|
40
|
+
|
|
41
|
+
root = SourceNode(id="r", title=sid, hint="A memory store of discrete facts.")
|
|
42
|
+
if len(groups) <= 1:
|
|
43
|
+
for leaves in groups.values():
|
|
44
|
+
root.children.extend(leaves)
|
|
45
|
+
else:
|
|
46
|
+
for mtype, leaves in sorted(groups.items()):
|
|
47
|
+
if not mtype:
|
|
48
|
+
root.children.extend(leaves)
|
|
49
|
+
continue
|
|
50
|
+
root.children.append(
|
|
51
|
+
SourceNode(id=slugify(mtype), title=mtype, hint=f"{mtype} memories.", children=leaves)
|
|
52
|
+
)
|
|
53
|
+
return SourceTree(source_id=sid, kind=self.kind, root=root)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
MemoryAdapter = register_adapter(_MemoryAdapter())
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""skills adapter — a directory of skills, one markdown file per skill.
|
|
2
|
+
|
|
3
|
+
Each skill file carries frontmatter (`name`, `description`, and optionally
|
|
4
|
+
`when`/`when_to_use` and `category`); the body is the verbatim skill content.
|
|
5
|
+
Skills are grouped into tier-1 branches by `category` when present. This is a
|
|
6
|
+
generic skills layout — a host-side shim maps your skill source
|
|
7
|
+
into it without this package importing the host app.
|
|
8
|
+
|
|
9
|
+
Skill *selection* is exactly the agent-navigated case: a name+purpose+when
|
|
10
|
+
manifest up front, the full skill body fetched on expand.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from .._frontmatter import split_frontmatter
|
|
18
|
+
from ..ir import SourceNode, SourceTree
|
|
19
|
+
from .base import register_adapter, slugify
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class _SkillsAdapter:
|
|
23
|
+
kind = "skills"
|
|
24
|
+
|
|
25
|
+
def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
|
|
26
|
+
root_dir = Path(path)
|
|
27
|
+
sid = source_id or root_dir.name
|
|
28
|
+
|
|
29
|
+
groups: dict[str, list[SourceNode]] = {}
|
|
30
|
+
for md in sorted(root_dir.rglob("*.md")):
|
|
31
|
+
fm, body = split_frontmatter(md.read_text())
|
|
32
|
+
name = fm.get("name") or md.stem
|
|
33
|
+
when_hint = fm.get("when") or fm.get("when_to_use")
|
|
34
|
+
hint = fm.get("description")
|
|
35
|
+
if when_hint:
|
|
36
|
+
hint = f"{hint or ''}\nWhen to use: {when_hint}".strip()
|
|
37
|
+
category = fm.get("category", "")
|
|
38
|
+
leaf = SourceNode(
|
|
39
|
+
id=slugify(name),
|
|
40
|
+
title=name,
|
|
41
|
+
content=body.strip(),
|
|
42
|
+
hint=hint,
|
|
43
|
+
keywords=fm.get("keywords", []) if isinstance(fm.get("keywords"), list) else [],
|
|
44
|
+
)
|
|
45
|
+
groups.setdefault(category, []).append(leaf)
|
|
46
|
+
|
|
47
|
+
root = SourceNode(id="r", title=f"{sid} skills", hint="Reusable skills for performing tasks.")
|
|
48
|
+
if len(groups) <= 1:
|
|
49
|
+
for leaves in groups.values():
|
|
50
|
+
root.children.extend(leaves)
|
|
51
|
+
else:
|
|
52
|
+
for category, leaves in sorted(groups.items()):
|
|
53
|
+
if not category:
|
|
54
|
+
root.children.extend(leaves)
|
|
55
|
+
continue
|
|
56
|
+
root.children.append(
|
|
57
|
+
SourceNode(id=slugify(category), title=category, hint=f"{category} skills.", children=leaves)
|
|
58
|
+
)
|
|
59
|
+
return SourceTree(source_id=sid, kind=self.kind, root=root)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
SkillsAdapter = register_adapter(_SkillsAdapter())
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""tools adapter — a JSON file of tool/function schemas.
|
|
2
|
+
|
|
3
|
+
Accepts an OpenAI-style list (`[{name, description, parameters}, ...]`), a
|
|
4
|
+
`{"tools": [...]}` wrapper, or a `{name: schema}` mapping. Each tool becomes a
|
|
5
|
+
leaf whose verbatim content is its full schema; the existing `description`
|
|
6
|
+
becomes a descriptor hint. Tools are grouped into tier-1 branches by namespace
|
|
7
|
+
(the part before the first `.`/`/`/`:`/`__` in the name) when present.
|
|
8
|
+
|
|
9
|
+
This is the proven progressive-tool-disclosure pattern: a tiny name+purpose
|
|
10
|
+
manifest up front, the full schema fetched on expand.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import re
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from ..ir import SourceNode, SourceTree
|
|
20
|
+
from .base import register_adapter, slugify
|
|
21
|
+
|
|
22
|
+
_NS = re.compile(r"[./:]|__")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _normalize(raw) -> list[dict]:
|
|
26
|
+
if isinstance(raw, dict) and "tools" in raw:
|
|
27
|
+
raw = raw["tools"]
|
|
28
|
+
if isinstance(raw, dict):
|
|
29
|
+
out = []
|
|
30
|
+
for name, schema in raw.items():
|
|
31
|
+
entry = dict(schema) if isinstance(schema, dict) else {"schema": schema}
|
|
32
|
+
entry.setdefault("name", name)
|
|
33
|
+
out.append(entry)
|
|
34
|
+
return out
|
|
35
|
+
return list(raw)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _name_of(tool: dict) -> str:
|
|
39
|
+
return tool.get("name") or tool.get("function", {}).get("name") or "tool"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _description_of(tool: dict) -> str | None:
|
|
43
|
+
return tool.get("description") or tool.get("function", {}).get("description")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class _ToolsAdapter:
|
|
47
|
+
kind = "tools"
|
|
48
|
+
|
|
49
|
+
def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
|
|
50
|
+
tools = _normalize(json.loads(Path(path).read_text()))
|
|
51
|
+
sid = source_id or Path(path).stem
|
|
52
|
+
|
|
53
|
+
groups: dict[str, list[SourceNode]] = {}
|
|
54
|
+
for tool in tools:
|
|
55
|
+
name = _name_of(tool)
|
|
56
|
+
ns_match = _NS.split(name, 1)
|
|
57
|
+
ns = ns_match[0] if len(ns_match) > 1 else ""
|
|
58
|
+
leaf = SourceNode(
|
|
59
|
+
id=slugify(name),
|
|
60
|
+
title=name,
|
|
61
|
+
content=json.dumps(tool, indent=2, sort_keys=True),
|
|
62
|
+
format="json",
|
|
63
|
+
hint=_description_of(tool),
|
|
64
|
+
keywords=[t for t in _NS.split(name) if t],
|
|
65
|
+
)
|
|
66
|
+
groups.setdefault(ns, []).append(leaf)
|
|
67
|
+
|
|
68
|
+
root = SourceNode(id="r", title=f"{sid} tools", hint="Callable tools and their schemas.")
|
|
69
|
+
if len(groups) == 1 and "" in groups:
|
|
70
|
+
root.children = groups[""]
|
|
71
|
+
else:
|
|
72
|
+
for ns, leaves in sorted(groups.items()):
|
|
73
|
+
if ns == "":
|
|
74
|
+
root.children.extend(leaves)
|
|
75
|
+
continue
|
|
76
|
+
root.children.append(
|
|
77
|
+
SourceNode(id=slugify(ns), title=ns, hint=f"Tools in the {ns} namespace.", children=leaves)
|
|
78
|
+
)
|
|
79
|
+
return SourceTree(source_id=sid, kind=self.kind, root=root)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
ToolsAdapter = register_adapter(_ToolsAdapter())
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Compiler orchestration: SourceTree -> descriptors -> costs -> manifests."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from ..tokenizer import Tokenizer, heuristic_tokenizer
|
|
9
|
+
from .cost import annotate
|
|
10
|
+
from .descriptors import DescriptorModel, cache_from_manifest, generate_descriptors
|
|
11
|
+
from .emit import build_manifest, to_llms_txt
|
|
12
|
+
from .ir import SourceTree
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class BuildResult:
|
|
17
|
+
source_id: str
|
|
18
|
+
manifests: dict[int, dict[str, Any]] # budget -> manifest dict
|
|
19
|
+
llms_txt: str
|
|
20
|
+
stats: dict[str, Any] = field(default_factory=dict)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def compile_source(
|
|
24
|
+
tree: SourceTree,
|
|
25
|
+
model: DescriptorModel,
|
|
26
|
+
budgets: list[int],
|
|
27
|
+
*,
|
|
28
|
+
tokenizer: Tokenizer | None = None,
|
|
29
|
+
contrastive: bool = True,
|
|
30
|
+
contrast_chunk: int = 8,
|
|
31
|
+
grade_threshold: float = 0.7,
|
|
32
|
+
max_repairs: int = 1,
|
|
33
|
+
max_workers: int = 1,
|
|
34
|
+
prior_manifest: dict | None = None,
|
|
35
|
+
generated_at: str | None = None,
|
|
36
|
+
) -> BuildResult:
|
|
37
|
+
tokenizer = tokenizer or heuristic_tokenizer()
|
|
38
|
+
budgets = sorted(set(budgets))
|
|
39
|
+
|
|
40
|
+
cache = cache_from_manifest(prior_manifest) if prior_manifest else None
|
|
41
|
+
gen_stats: dict = {}
|
|
42
|
+
descriptors = generate_descriptors(
|
|
43
|
+
tree,
|
|
44
|
+
model,
|
|
45
|
+
contrastive=contrastive,
|
|
46
|
+
contrast_chunk=contrast_chunk,
|
|
47
|
+
grade_threshold=grade_threshold,
|
|
48
|
+
max_repairs=max_repairs,
|
|
49
|
+
max_workers=max_workers,
|
|
50
|
+
cache=cache,
|
|
51
|
+
_stats=gen_stats,
|
|
52
|
+
)
|
|
53
|
+
costs = annotate(tree.root, descriptors, tokenizer)
|
|
54
|
+
|
|
55
|
+
manifests = {
|
|
56
|
+
b: build_manifest(tree, descriptors, costs, b, siblings=budgets, generated_at=generated_at)
|
|
57
|
+
for b in budgets
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
nodes = list(tree.root.walk())
|
|
61
|
+
leaves = [n for n in nodes if n.is_leaf]
|
|
62
|
+
stats = {
|
|
63
|
+
"nodes": len(nodes),
|
|
64
|
+
"leaves": len(leaves),
|
|
65
|
+
"branches": len(nodes) - len(leaves),
|
|
66
|
+
"full_tokens": costs[tree.root.id].subtree_tokens,
|
|
67
|
+
"manifest_tokens": manifests[budgets[0]]["variant"]["manifest_tokens"],
|
|
68
|
+
"regenerated": gen_stats.get("regenerated", len(nodes)),
|
|
69
|
+
"reused": gen_stats.get("reused", 0),
|
|
70
|
+
}
|
|
71
|
+
return BuildResult(source_id=tree.source_id, manifests=manifests, llms_txt=to_llms_txt(tree, descriptors), stats=stats)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Cost annotation — tokenize leaves, roll up subtree costs, hash for caching.
|
|
2
|
+
|
|
3
|
+
`tokens` is the cost to expand a leaf's full content (0 for branches).
|
|
4
|
+
`desc_tokens` is the cost of showing the node's descriptor in a frontier.
|
|
5
|
+
`subtree_tokens` is the cost to expand everything beneath a node, rolled up
|
|
6
|
+
bottom-up. `content_hash` lets a later build reuse descriptors for unchanged
|
|
7
|
+
nodes (leaf = hash of content; branch = hash of child hashes).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
from ..tokenizer import Tokenizer
|
|
15
|
+
from .descriptors import Descriptor
|
|
16
|
+
from .ir import SourceNode, content_hashes
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Cost:
|
|
21
|
+
tokens: int
|
|
22
|
+
desc_tokens: int
|
|
23
|
+
subtree_tokens: int
|
|
24
|
+
content_hash: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def annotate(root: SourceNode, descriptors: dict[str, Descriptor], tokenizer: Tokenizer) -> dict[str, Cost]:
|
|
28
|
+
hashes = content_hashes(root)
|
|
29
|
+
costs: dict[str, Cost] = {}
|
|
30
|
+
|
|
31
|
+
for node in root.post_order():
|
|
32
|
+
d = descriptors[node.id]
|
|
33
|
+
desc_text = " ".join(filter(None, [node.title, d.what, d.when, " ".join(d.keywords)]))
|
|
34
|
+
desc_tokens = tokenizer(desc_text)
|
|
35
|
+
|
|
36
|
+
if node.is_leaf:
|
|
37
|
+
tokens = tokenizer(node.content or "")
|
|
38
|
+
costs[node.id] = Cost(tokens, desc_tokens, tokens, hashes[node.id])
|
|
39
|
+
else:
|
|
40
|
+
child_costs = [costs[c.id] for c in node.children]
|
|
41
|
+
costs[node.id] = Cost(
|
|
42
|
+
tokens=0,
|
|
43
|
+
desc_tokens=desc_tokens,
|
|
44
|
+
subtree_tokens=sum(c.subtree_tokens for c in child_costs),
|
|
45
|
+
content_hash=hashes[node.id],
|
|
46
|
+
)
|
|
47
|
+
return costs
|