askfaro-progressive-context 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. askfaro_progressive_context/__init__.py +57 -0
  2. askfaro_progressive_context/build/__init__.py +33 -0
  3. askfaro_progressive_context/build/_frontmatter.py +62 -0
  4. askfaro_progressive_context/build/adapters/__init__.py +21 -0
  5. askfaro_progressive_context/build/adapters/base.py +33 -0
  6. askfaro_progressive_context/build/adapters/docs.py +61 -0
  7. askfaro_progressive_context/build/adapters/memory.py +56 -0
  8. askfaro_progressive_context/build/adapters/skills.py +62 -0
  9. askfaro_progressive_context/build/adapters/tools.py +82 -0
  10. askfaro_progressive_context/build/compiler.py +71 -0
  11. askfaro_progressive_context/build/cost.py +47 -0
  12. askfaro_progressive_context/build/descriptors.py +329 -0
  13. askfaro_progressive_context/build/emit.py +120 -0
  14. askfaro_progressive_context/build/ir.py +69 -0
  15. askfaro_progressive_context/cli.py +175 -0
  16. askfaro_progressive_context/eval.py +141 -0
  17. askfaro_progressive_context/llm.py +132 -0
  18. askfaro_progressive_context/navigator.py +98 -0
  19. askfaro_progressive_context/py.typed +0 -0
  20. askfaro_progressive_context/runtime.py +288 -0
  21. askfaro_progressive_context/schema/pcx-0.1.schema.json +123 -0
  22. askfaro_progressive_context/session.py +100 -0
  23. askfaro_progressive_context/tokenizer.py +36 -0
  24. askfaro_progressive_context/types.py +167 -0
  25. askfaro_progressive_context/validate.py +112 -0
  26. askfaro_progressive_context-0.1.0.dist-info/METADATA +144 -0
  27. askfaro_progressive_context-0.1.0.dist-info/RECORD +30 -0
  28. askfaro_progressive_context-0.1.0.dist-info/WHEEL +4 -0
  29. askfaro_progressive_context-0.1.0.dist-info/entry_points.txt +2 -0
  30. askfaro_progressive_context-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,57 @@
1
+ """askfaro-progressive-context: compile any content into a tiered, budget-aware,
2
+ agent-navigable progressive-disclosure manifest, plus an expansion protocol."""
3
+
4
+ from .eval import CaseResult, EvalReport, NavCase, run_case, run_eval
5
+ from .llm import LLMClient, OpenAICompatibleClient
6
+ from .navigator import KeywordNavigator, LLMNavigator, Navigator
7
+ from .runtime import (
8
+ VIEW_LEVELS,
9
+ BudgetExceeded,
10
+ FrontierEntry,
11
+ LeafResolver,
12
+ Runtime,
13
+ SearchBackend,
14
+ dict_resolver,
15
+ render_descriptor,
16
+ )
17
+ from .session import LOCAL, REMOTE, ModeConfig, NavSession
18
+ from .tokenizer import make_tokenizer
19
+ from .types import PROTOCOL_USAGE, Manifest, Node, Payload, Variant, estimate_tokens
20
+ from .validate import schema_errors, structural_errors, validate
21
+
22
+ __version__ = "0.0.7"
23
+
24
+ __all__ = [
25
+ "BudgetExceeded",
26
+ "CaseResult",
27
+ "EvalReport",
28
+ "FrontierEntry",
29
+ "KeywordNavigator",
30
+ "LLMClient",
31
+ "LLMNavigator",
32
+ "LOCAL",
33
+ "REMOTE",
34
+ "LeafResolver",
35
+ "Manifest",
36
+ "ModeConfig",
37
+ "NavCase",
38
+ "NavSession",
39
+ "Navigator",
40
+ "Node",
41
+ "OpenAICompatibleClient",
42
+ "PROTOCOL_USAGE",
43
+ "Payload",
44
+ "Runtime",
45
+ "SearchBackend",
46
+ "VIEW_LEVELS",
47
+ "Variant",
48
+ "dict_resolver",
49
+ "estimate_tokens",
50
+ "make_tokenizer",
51
+ "render_descriptor",
52
+ "run_case",
53
+ "run_eval",
54
+ "schema_errors",
55
+ "structural_errors",
56
+ "validate",
57
+ ]
@@ -0,0 +1,33 @@
1
+ """The compiler: source content -> annotated tree -> pcx manifest variants.
2
+
3
+ Pipeline: an Adapter yields a SourceTree (native structure, verbatim leaves),
4
+ the descriptor engine generates what/when/keywords, cost annotation tokenizes
5
+ and rolls up subtree costs, and emit writes one manifest per budget variant
6
+ plus an llms.txt export.
7
+ """
8
+
9
+ from .compiler import BuildResult, compile_source
10
+ from .descriptors import (
11
+ Descriptor,
12
+ DescriptorModel,
13
+ FakeDescriptorModel,
14
+ Grade,
15
+ LLMDescriptorModel,
16
+ cache_from_manifest,
17
+ generate_descriptors,
18
+ )
19
+ from .ir import SourceNode, SourceTree
20
+
21
+ __all__ = [
22
+ "BuildResult",
23
+ "Descriptor",
24
+ "DescriptorModel",
25
+ "FakeDescriptorModel",
26
+ "Grade",
27
+ "LLMDescriptorModel",
28
+ "SourceNode",
29
+ "SourceTree",
30
+ "cache_from_manifest",
31
+ "compile_source",
32
+ "generate_descriptors",
33
+ ]
@@ -0,0 +1,62 @@
1
+ """Tiny frontmatter reader for markdown sources.
2
+
3
+ Uses PyYAML if available; otherwise a minimal parser that handles the shapes
4
+ our adapters need: top-level `key: value`, inline lists `[a, b]`, and a single
5
+ nested mapping block (e.g. a one-fact memory store's `metadata:`). Keeps the core
6
+ dependency-free.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any
12
+
13
+
14
+ def split_frontmatter(text: str) -> tuple[dict[str, Any], str]:
15
+ if not text.startswith("---"):
16
+ return {}, text
17
+ end = text.find("\n---", 3)
18
+ if end == -1:
19
+ return {}, text
20
+ block = text[3:end].strip("\n")
21
+ body = text[end + 4 :].lstrip("\n")
22
+ return _parse_yaml(block), body
23
+
24
+
25
+ def _parse_yaml(block: str) -> dict[str, Any]:
26
+ try:
27
+ import yaml # type: ignore
28
+
29
+ data = yaml.safe_load(block)
30
+ return data if isinstance(data, dict) else {}
31
+ except ImportError:
32
+ return _minimal_parse(block)
33
+
34
+
35
+ def _scalar(v: str) -> Any:
36
+ v = v.strip()
37
+ if v.startswith("[") and v.endswith("]"):
38
+ inner = v[1:-1].strip()
39
+ return [x.strip().strip("\"'") for x in inner.split(",")] if inner else []
40
+ return v.strip("\"'")
41
+
42
+
43
+ def _minimal_parse(block: str) -> dict[str, Any]:
44
+ out: dict[str, Any] = {}
45
+ parent: str | None = None
46
+ for line in block.splitlines():
47
+ if not line.strip() or line.strip().startswith("#"):
48
+ continue
49
+ indented = line[0] in " \t"
50
+ key, _, val = line.strip().partition(":")
51
+ key = key.strip()
52
+ if indented and parent is not None:
53
+ if not isinstance(out.get(parent), dict):
54
+ out[parent] = {}
55
+ out[parent][key] = _scalar(val)
56
+ elif val.strip() == "":
57
+ out[key] = {}
58
+ parent = key
59
+ else:
60
+ out[key] = _scalar(val)
61
+ parent = None
62
+ return out
@@ -0,0 +1,21 @@
1
+ """Adapters turn a source on disk into a SourceTree.
2
+
3
+ Phase 1 ships the four already-hierarchical kinds (no structure inference):
4
+ docs, skills, tools, memory.
5
+ """
6
+
7
+ from .base import Adapter, get_adapter, register_adapter
8
+ from .docs import DocsAdapter
9
+ from .memory import MemoryAdapter
10
+ from .skills import SkillsAdapter
11
+ from .tools import ToolsAdapter
12
+
13
+ __all__ = [
14
+ "Adapter",
15
+ "DocsAdapter",
16
+ "MemoryAdapter",
17
+ "SkillsAdapter",
18
+ "ToolsAdapter",
19
+ "get_adapter",
20
+ "register_adapter",
21
+ ]
@@ -0,0 +1,33 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Protocol
5
+
6
+ from ..ir import SourceTree
7
+
8
+ _REGISTRY: dict[str, "Adapter"] = {}
9
+
10
+
11
+ class Adapter(Protocol):
12
+ kind: str
13
+
14
+ def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
15
+ ...
16
+
17
+
18
+ def register_adapter(adapter: "Adapter") -> "Adapter":
19
+ _REGISTRY[adapter.kind] = adapter
20
+ return adapter
21
+
22
+
23
+ def get_adapter(kind: str) -> "Adapter":
24
+ if kind not in _REGISTRY:
25
+ raise KeyError(f"unknown adapter kind {kind!r}; known: {sorted(_REGISTRY)}")
26
+ return _REGISTRY[kind]
27
+
28
+
29
+ def slugify(text: str) -> str:
30
+ out = "".join(c if c.isalnum() else "-" for c in text.lower()).strip("-")
31
+ while "--" in out:
32
+ out = out.replace("--", "-")
33
+ return out or "node"
@@ -0,0 +1,61 @@
1
+ """docs adapter — a directory tree of markdown.
2
+
3
+ Mirrors the filesystem: directories become branches, `.md`/`.mdx` files become
4
+ leaves. Titles come from the first H1 if present, else the filename. The native
5
+ hierarchy is used as-is (no clustering).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from pathlib import Path
12
+
13
+ from .._frontmatter import split_frontmatter
14
+ from ..ir import SourceNode, SourceTree
15
+ from .base import register_adapter, slugify
16
+
17
+ _H1 = re.compile(r"^#\s+(.+)$", re.MULTILINE)
18
+ _DOC_EXT = {".md", ".mdx", ".markdown"}
19
+
20
+
21
+ def _title_of(body: str, fallback: str) -> str:
22
+ m = _H1.search(body)
23
+ return m.group(1).strip() if m else fallback
24
+
25
+
26
+ def _humanize(name: str) -> str:
27
+ return name.replace("-", " ").replace("_", " ").strip().title()
28
+
29
+
30
+ class _DocsAdapter:
31
+ kind = "docs"
32
+
33
+ def _file_node(self, file: Path) -> SourceNode:
34
+ fm, body = split_frontmatter(file.read_text())
35
+ title = fm.get("title") or _title_of(body, _humanize(file.stem))
36
+ return SourceNode(
37
+ id=slugify(file.stem),
38
+ title=title,
39
+ content=body.strip(),
40
+ hint=fm.get("description"),
41
+ )
42
+
43
+ def _dir_node(self, directory: Path, node_id: str, title: str) -> SourceNode:
44
+ node = SourceNode(id=node_id, title=title, hint=f"Docs under {directory.name}/.")
45
+ for child in sorted(directory.iterdir(), key=lambda p: (p.is_file(), p.name)):
46
+ if child.is_dir():
47
+ sub = self._dir_node(child, slugify(child.name), _humanize(child.name))
48
+ if sub.children:
49
+ node.children.append(sub)
50
+ elif child.suffix.lower() in _DOC_EXT:
51
+ node.children.append(self._file_node(child))
52
+ return node
53
+
54
+ def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
55
+ root_dir = Path(path)
56
+ sid = source_id or root_dir.name
57
+ root = self._dir_node(root_dir, "r", sid)
58
+ return SourceTree(source_id=sid, kind=self.kind, root=root)
59
+
60
+
61
+ DocsAdapter = register_adapter(_DocsAdapter())
@@ -0,0 +1,56 @@
1
+ """memory adapter — a directory of one-fact markdown files with frontmatter.
2
+
3
+ Each file is a leaf; `name`/`description` frontmatter become the title/hint and
4
+ the body is the verbatim content. Files are grouped into tier-1 branches by
5
+ `metadata.type` (e.g. user / feedback / project / reference) when present —
6
+ matching a one-fact-per-file memory layout; generic to any such collection.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from pathlib import Path
12
+
13
+ from .._frontmatter import split_frontmatter
14
+ from ..ir import SourceNode, SourceTree
15
+ from .base import register_adapter, slugify
16
+
17
+
18
+ class _MemoryAdapter:
19
+ kind = "memory"
20
+
21
+ def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
22
+ root_dir = Path(path)
23
+ sid = source_id or root_dir.name
24
+
25
+ groups: dict[str, list[SourceNode]] = {}
26
+ for md in sorted(root_dir.glob("*.md")):
27
+ if md.name.upper() == "MEMORY.md".upper():
28
+ continue # the index, not a fact
29
+ fm, body = split_frontmatter(md.read_text())
30
+ meta = fm.get("metadata") if isinstance(fm.get("metadata"), dict) else {}
31
+ mtype = (meta or {}).get("type", "")
32
+ name = fm.get("name") or md.stem
33
+ leaf = SourceNode(
34
+ id=slugify(name),
35
+ title=name,
36
+ content=body.strip(),
37
+ hint=fm.get("description"),
38
+ )
39
+ groups.setdefault(mtype, []).append(leaf)
40
+
41
+ root = SourceNode(id="r", title=sid, hint="A memory store of discrete facts.")
42
+ if len(groups) <= 1:
43
+ for leaves in groups.values():
44
+ root.children.extend(leaves)
45
+ else:
46
+ for mtype, leaves in sorted(groups.items()):
47
+ if not mtype:
48
+ root.children.extend(leaves)
49
+ continue
50
+ root.children.append(
51
+ SourceNode(id=slugify(mtype), title=mtype, hint=f"{mtype} memories.", children=leaves)
52
+ )
53
+ return SourceTree(source_id=sid, kind=self.kind, root=root)
54
+
55
+
56
+ MemoryAdapter = register_adapter(_MemoryAdapter())
@@ -0,0 +1,62 @@
1
+ """skills adapter — a directory of skills, one markdown file per skill.
2
+
3
+ Each skill file carries frontmatter (`name`, `description`, and optionally
4
+ `when`/`when_to_use` and `category`); the body is the verbatim skill content.
5
+ Skills are grouped into tier-1 branches by `category` when present. This is a
6
+ generic skills layout — a host-side shim maps your skill source
7
+ into it without this package importing the host app.
8
+
9
+ Skill *selection* is exactly the agent-navigated case: a name+purpose+when
10
+ manifest up front, the full skill body fetched on expand.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from pathlib import Path
16
+
17
+ from .._frontmatter import split_frontmatter
18
+ from ..ir import SourceNode, SourceTree
19
+ from .base import register_adapter, slugify
20
+
21
+
22
+ class _SkillsAdapter:
23
+ kind = "skills"
24
+
25
+ def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
26
+ root_dir = Path(path)
27
+ sid = source_id or root_dir.name
28
+
29
+ groups: dict[str, list[SourceNode]] = {}
30
+ for md in sorted(root_dir.rglob("*.md")):
31
+ fm, body = split_frontmatter(md.read_text())
32
+ name = fm.get("name") or md.stem
33
+ when_hint = fm.get("when") or fm.get("when_to_use")
34
+ hint = fm.get("description")
35
+ if when_hint:
36
+ hint = f"{hint or ''}\nWhen to use: {when_hint}".strip()
37
+ category = fm.get("category", "")
38
+ leaf = SourceNode(
39
+ id=slugify(name),
40
+ title=name,
41
+ content=body.strip(),
42
+ hint=hint,
43
+ keywords=fm.get("keywords", []) if isinstance(fm.get("keywords"), list) else [],
44
+ )
45
+ groups.setdefault(category, []).append(leaf)
46
+
47
+ root = SourceNode(id="r", title=f"{sid} skills", hint="Reusable skills for performing tasks.")
48
+ if len(groups) <= 1:
49
+ for leaves in groups.values():
50
+ root.children.extend(leaves)
51
+ else:
52
+ for category, leaves in sorted(groups.items()):
53
+ if not category:
54
+ root.children.extend(leaves)
55
+ continue
56
+ root.children.append(
57
+ SourceNode(id=slugify(category), title=category, hint=f"{category} skills.", children=leaves)
58
+ )
59
+ return SourceTree(source_id=sid, kind=self.kind, root=root)
60
+
61
+
62
+ SkillsAdapter = register_adapter(_SkillsAdapter())
@@ -0,0 +1,82 @@
1
+ """tools adapter — a JSON file of tool/function schemas.
2
+
3
+ Accepts an OpenAI-style list (`[{name, description, parameters}, ...]`), a
4
+ `{"tools": [...]}` wrapper, or a `{name: schema}` mapping. Each tool becomes a
5
+ leaf whose verbatim content is its full schema; the existing `description`
6
+ becomes a descriptor hint. Tools are grouped into tier-1 branches by namespace
7
+ (the part before the first `.`/`/`/`:`/`__` in the name) when present.
8
+
9
+ This is the proven progressive-tool-disclosure pattern: a tiny name+purpose
10
+ manifest up front, the full schema fetched on expand.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import re
17
+ from pathlib import Path
18
+
19
+ from ..ir import SourceNode, SourceTree
20
+ from .base import register_adapter, slugify
21
+
22
+ _NS = re.compile(r"[./:]|__")
23
+
24
+
25
+ def _normalize(raw) -> list[dict]:
26
+ if isinstance(raw, dict) and "tools" in raw:
27
+ raw = raw["tools"]
28
+ if isinstance(raw, dict):
29
+ out = []
30
+ for name, schema in raw.items():
31
+ entry = dict(schema) if isinstance(schema, dict) else {"schema": schema}
32
+ entry.setdefault("name", name)
33
+ out.append(entry)
34
+ return out
35
+ return list(raw)
36
+
37
+
38
+ def _name_of(tool: dict) -> str:
39
+ return tool.get("name") or tool.get("function", {}).get("name") or "tool"
40
+
41
+
42
+ def _description_of(tool: dict) -> str | None:
43
+ return tool.get("description") or tool.get("function", {}).get("description")
44
+
45
+
46
+ class _ToolsAdapter:
47
+ kind = "tools"
48
+
49
+ def load(self, path: Path, *, source_id: str | None = None) -> SourceTree:
50
+ tools = _normalize(json.loads(Path(path).read_text()))
51
+ sid = source_id or Path(path).stem
52
+
53
+ groups: dict[str, list[SourceNode]] = {}
54
+ for tool in tools:
55
+ name = _name_of(tool)
56
+ ns_match = _NS.split(name, 1)
57
+ ns = ns_match[0] if len(ns_match) > 1 else ""
58
+ leaf = SourceNode(
59
+ id=slugify(name),
60
+ title=name,
61
+ content=json.dumps(tool, indent=2, sort_keys=True),
62
+ format="json",
63
+ hint=_description_of(tool),
64
+ keywords=[t for t in _NS.split(name) if t],
65
+ )
66
+ groups.setdefault(ns, []).append(leaf)
67
+
68
+ root = SourceNode(id="r", title=f"{sid} tools", hint="Callable tools and their schemas.")
69
+ if len(groups) == 1 and "" in groups:
70
+ root.children = groups[""]
71
+ else:
72
+ for ns, leaves in sorted(groups.items()):
73
+ if ns == "":
74
+ root.children.extend(leaves)
75
+ continue
76
+ root.children.append(
77
+ SourceNode(id=slugify(ns), title=ns, hint=f"Tools in the {ns} namespace.", children=leaves)
78
+ )
79
+ return SourceTree(source_id=sid, kind=self.kind, root=root)
80
+
81
+
82
+ ToolsAdapter = register_adapter(_ToolsAdapter())
@@ -0,0 +1,71 @@
1
+ """Compiler orchestration: SourceTree -> descriptors -> costs -> manifests."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ from ..tokenizer import Tokenizer, heuristic_tokenizer
9
+ from .cost import annotate
10
+ from .descriptors import DescriptorModel, cache_from_manifest, generate_descriptors
11
+ from .emit import build_manifest, to_llms_txt
12
+ from .ir import SourceTree
13
+
14
+
15
+ @dataclass
16
+ class BuildResult:
17
+ source_id: str
18
+ manifests: dict[int, dict[str, Any]] # budget -> manifest dict
19
+ llms_txt: str
20
+ stats: dict[str, Any] = field(default_factory=dict)
21
+
22
+
23
+ def compile_source(
24
+ tree: SourceTree,
25
+ model: DescriptorModel,
26
+ budgets: list[int],
27
+ *,
28
+ tokenizer: Tokenizer | None = None,
29
+ contrastive: bool = True,
30
+ contrast_chunk: int = 8,
31
+ grade_threshold: float = 0.7,
32
+ max_repairs: int = 1,
33
+ max_workers: int = 1,
34
+ prior_manifest: dict | None = None,
35
+ generated_at: str | None = None,
36
+ ) -> BuildResult:
37
+ tokenizer = tokenizer or heuristic_tokenizer()
38
+ budgets = sorted(set(budgets))
39
+
40
+ cache = cache_from_manifest(prior_manifest) if prior_manifest else None
41
+ gen_stats: dict = {}
42
+ descriptors = generate_descriptors(
43
+ tree,
44
+ model,
45
+ contrastive=contrastive,
46
+ contrast_chunk=contrast_chunk,
47
+ grade_threshold=grade_threshold,
48
+ max_repairs=max_repairs,
49
+ max_workers=max_workers,
50
+ cache=cache,
51
+ _stats=gen_stats,
52
+ )
53
+ costs = annotate(tree.root, descriptors, tokenizer)
54
+
55
+ manifests = {
56
+ b: build_manifest(tree, descriptors, costs, b, siblings=budgets, generated_at=generated_at)
57
+ for b in budgets
58
+ }
59
+
60
+ nodes = list(tree.root.walk())
61
+ leaves = [n for n in nodes if n.is_leaf]
62
+ stats = {
63
+ "nodes": len(nodes),
64
+ "leaves": len(leaves),
65
+ "branches": len(nodes) - len(leaves),
66
+ "full_tokens": costs[tree.root.id].subtree_tokens,
67
+ "manifest_tokens": manifests[budgets[0]]["variant"]["manifest_tokens"],
68
+ "regenerated": gen_stats.get("regenerated", len(nodes)),
69
+ "reused": gen_stats.get("reused", 0),
70
+ }
71
+ return BuildResult(source_id=tree.source_id, manifests=manifests, llms_txt=to_llms_txt(tree, descriptors), stats=stats)
@@ -0,0 +1,47 @@
1
+ """Cost annotation — tokenize leaves, roll up subtree costs, hash for caching.
2
+
3
+ `tokens` is the cost to expand a leaf's full content (0 for branches).
4
+ `desc_tokens` is the cost of showing the node's descriptor in a frontier.
5
+ `subtree_tokens` is the cost to expand everything beneath a node, rolled up
6
+ bottom-up. `content_hash` lets a later build reuse descriptors for unchanged
7
+ nodes (leaf = hash of content; branch = hash of child hashes).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+
14
+ from ..tokenizer import Tokenizer
15
+ from .descriptors import Descriptor
16
+ from .ir import SourceNode, content_hashes
17
+
18
+
19
+ @dataclass
20
+ class Cost:
21
+ tokens: int
22
+ desc_tokens: int
23
+ subtree_tokens: int
24
+ content_hash: str
25
+
26
+
27
+ def annotate(root: SourceNode, descriptors: dict[str, Descriptor], tokenizer: Tokenizer) -> dict[str, Cost]:
28
+ hashes = content_hashes(root)
29
+ costs: dict[str, Cost] = {}
30
+
31
+ for node in root.post_order():
32
+ d = descriptors[node.id]
33
+ desc_text = " ".join(filter(None, [node.title, d.what, d.when, " ".join(d.keywords)]))
34
+ desc_tokens = tokenizer(desc_text)
35
+
36
+ if node.is_leaf:
37
+ tokens = tokenizer(node.content or "")
38
+ costs[node.id] = Cost(tokens, desc_tokens, tokens, hashes[node.id])
39
+ else:
40
+ child_costs = [costs[c.id] for c in node.children]
41
+ costs[node.id] = Cost(
42
+ tokens=0,
43
+ desc_tokens=desc_tokens,
44
+ subtree_tokens=sum(c.subtree_tokens for c in child_costs),
45
+ content_hash=hashes[node.id],
46
+ )
47
+ return costs