contextweaver 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextweaver/__init__.py +151 -0
- contextweaver/__main__.py +491 -0
- contextweaver/_utils.py +287 -0
- contextweaver/adapters/__init__.py +27 -0
- contextweaver/adapters/_common.py +91 -0
- contextweaver/adapters/a2a.py +201 -0
- contextweaver/adapters/mcp.py +193 -0
- contextweaver/config.py +120 -0
- contextweaver/context/__init__.py +30 -0
- contextweaver/context/candidates.py +76 -0
- contextweaver/context/dedup.py +53 -0
- contextweaver/context/firewall.py +122 -0
- contextweaver/context/manager.py +543 -0
- contextweaver/context/prompt.py +66 -0
- contextweaver/context/scoring.py +99 -0
- contextweaver/context/selection.py +84 -0
- contextweaver/envelope.py +184 -0
- contextweaver/exceptions.py +39 -0
- contextweaver/protocols.py +303 -0
- contextweaver/py.typed +1 -0
- contextweaver/routing/__init__.py +38 -0
- contextweaver/routing/cards.py +195 -0
- contextweaver/routing/catalog.py +331 -0
- contextweaver/routing/graph.py +377 -0
- contextweaver/routing/graph_io.py +61 -0
- contextweaver/routing/graph_node.py +54 -0
- contextweaver/routing/labeler.py +148 -0
- contextweaver/routing/router.py +337 -0
- contextweaver/routing/tree.py +322 -0
- contextweaver/serde.py +83 -0
- contextweaver/store/__init__.py +47 -0
- contextweaver/store/artifacts.py +188 -0
- contextweaver/store/episodic.py +151 -0
- contextweaver/store/event_log.py +163 -0
- contextweaver/store/facts.py +139 -0
- contextweaver/summarize/__init__.py +27 -0
- contextweaver/summarize/extract.py +238 -0
- contextweaver/summarize/rules.py +181 -0
- contextweaver/types.py +240 -0
- contextweaver-0.1.0.dist-info/METADATA +153 -0
- contextweaver-0.1.0.dist-info/RECORD +45 -0
- contextweaver-0.1.0.dist-info/WHEEL +5 -0
- contextweaver-0.1.0.dist-info/entry_points.txt +2 -0
- contextweaver-0.1.0.dist-info/licenses/LICENSE +201 -0
- contextweaver-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""contextweaver — dynamic context management for tool-using AI agents.
|
|
2
|
+
|
|
3
|
+
Two integrated engines:
|
|
4
|
+
|
|
5
|
+
* **Context Engine** — phase-specific budgeted context compilation with a
|
|
6
|
+
context firewall (raw tool outputs stored out-of-band; LLM sees summaries,
|
|
7
|
+
handles, and structured extractions).
|
|
8
|
+
|
|
9
|
+
* **Routing Engine** — bounded-choice navigation over large tool catalogs via
|
|
10
|
+
a DAG + beam search + LLM-friendly choice cards.
|
|
11
|
+
|
|
12
|
+
Quick start::
|
|
13
|
+
|
|
14
|
+
from contextweaver.types import Phase, ContextItem, ItemKind
|
|
15
|
+
from contextweaver.config import ContextBudget
|
|
16
|
+
|
|
17
|
+
budget = ContextBudget()
|
|
18
|
+
print(budget.for_phase(Phase.answer)) # 6000
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from contextweaver import config, envelope, exceptions, protocols, types
|
|
24
|
+
from contextweaver._utils import TfIdfScorer, jaccard
|
|
25
|
+
from contextweaver.config import ContextBudget, ContextPolicy, ScoringConfig
|
|
26
|
+
from contextweaver.context.manager import ContextManager
|
|
27
|
+
from contextweaver.envelope import (
|
|
28
|
+
BuildStats,
|
|
29
|
+
ChoiceCard,
|
|
30
|
+
ContextPack,
|
|
31
|
+
ResultEnvelope,
|
|
32
|
+
)
|
|
33
|
+
from contextweaver.exceptions import (
|
|
34
|
+
ArtifactNotFoundError,
|
|
35
|
+
BudgetExceededError,
|
|
36
|
+
CatalogError,
|
|
37
|
+
ContextWeaverError,
|
|
38
|
+
GraphBuildError,
|
|
39
|
+
ItemNotFoundError,
|
|
40
|
+
PolicyViolationError,
|
|
41
|
+
RouteError,
|
|
42
|
+
)
|
|
43
|
+
from contextweaver.protocols import (
|
|
44
|
+
EventHook,
|
|
45
|
+
Extractor,
|
|
46
|
+
Labeler,
|
|
47
|
+
RedactionHook,
|
|
48
|
+
Summarizer,
|
|
49
|
+
TokenEstimator,
|
|
50
|
+
)
|
|
51
|
+
from contextweaver.routing.cards import make_choice_cards, render_cards_text
|
|
52
|
+
from contextweaver.routing.catalog import (
|
|
53
|
+
Catalog,
|
|
54
|
+
generate_sample_catalog,
|
|
55
|
+
load_catalog_dicts,
|
|
56
|
+
load_catalog_json,
|
|
57
|
+
)
|
|
58
|
+
from contextweaver.routing.graph import ChoiceGraph
|
|
59
|
+
from contextweaver.routing.graph_node import ChoiceNode
|
|
60
|
+
from contextweaver.routing.labeler import KeywordLabeler
|
|
61
|
+
from contextweaver.routing.router import Router, RouteResult
|
|
62
|
+
from contextweaver.routing.tree import TreeBuilder
|
|
63
|
+
from contextweaver.store import (
|
|
64
|
+
InMemoryArtifactStore,
|
|
65
|
+
InMemoryEpisodicStore,
|
|
66
|
+
InMemoryEventLog,
|
|
67
|
+
InMemoryFactStore,
|
|
68
|
+
StoreBundle,
|
|
69
|
+
)
|
|
70
|
+
from contextweaver.summarize.extract import StructuredExtractor
|
|
71
|
+
from contextweaver.summarize.rules import RuleBasedSummarizer
|
|
72
|
+
from contextweaver.types import (
|
|
73
|
+
ArtifactRef,
|
|
74
|
+
ContextItem,
|
|
75
|
+
ItemKind,
|
|
76
|
+
Phase,
|
|
77
|
+
SelectableItem,
|
|
78
|
+
Sensitivity,
|
|
79
|
+
ToolCard,
|
|
80
|
+
ViewSpec,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
__version__ = "0.1.0"
|
|
84
|
+
__all__ = [
|
|
85
|
+
# sub-modules
|
|
86
|
+
"config",
|
|
87
|
+
"envelope",
|
|
88
|
+
"exceptions",
|
|
89
|
+
"protocols",
|
|
90
|
+
"types",
|
|
91
|
+
# utilities
|
|
92
|
+
"TfIdfScorer",
|
|
93
|
+
"jaccard",
|
|
94
|
+
# types / enums
|
|
95
|
+
"ArtifactRef",
|
|
96
|
+
"BuildStats",
|
|
97
|
+
"ChoiceCard",
|
|
98
|
+
"ContextItem",
|
|
99
|
+
"ContextPack",
|
|
100
|
+
"ItemKind",
|
|
101
|
+
"Phase",
|
|
102
|
+
"ResultEnvelope",
|
|
103
|
+
"SelectableItem",
|
|
104
|
+
"Sensitivity",
|
|
105
|
+
"ToolCard",
|
|
106
|
+
"ViewSpec",
|
|
107
|
+
# config
|
|
108
|
+
"ContextBudget",
|
|
109
|
+
"ContextPolicy",
|
|
110
|
+
"ScoringConfig",
|
|
111
|
+
# protocols
|
|
112
|
+
"EventHook",
|
|
113
|
+
"Extractor",
|
|
114
|
+
"Labeler",
|
|
115
|
+
"RedactionHook",
|
|
116
|
+
"Summarizer",
|
|
117
|
+
"TokenEstimator",
|
|
118
|
+
# exceptions
|
|
119
|
+
"ArtifactNotFoundError",
|
|
120
|
+
"BudgetExceededError",
|
|
121
|
+
"CatalogError",
|
|
122
|
+
"ContextWeaverError",
|
|
123
|
+
"GraphBuildError",
|
|
124
|
+
"ItemNotFoundError",
|
|
125
|
+
"PolicyViolationError",
|
|
126
|
+
"RouteError",
|
|
127
|
+
# stores
|
|
128
|
+
"InMemoryArtifactStore",
|
|
129
|
+
"InMemoryEpisodicStore",
|
|
130
|
+
"InMemoryEventLog",
|
|
131
|
+
"InMemoryFactStore",
|
|
132
|
+
"StoreBundle",
|
|
133
|
+
# context engine
|
|
134
|
+
"ContextManager",
|
|
135
|
+
# routing engine
|
|
136
|
+
"Catalog",
|
|
137
|
+
"ChoiceGraph",
|
|
138
|
+
"ChoiceNode",
|
|
139
|
+
"KeywordLabeler",
|
|
140
|
+
"RouteResult",
|
|
141
|
+
"Router",
|
|
142
|
+
"TreeBuilder",
|
|
143
|
+
"generate_sample_catalog",
|
|
144
|
+
"load_catalog_dicts",
|
|
145
|
+
"load_catalog_json",
|
|
146
|
+
"make_choice_cards",
|
|
147
|
+
"render_cards_text",
|
|
148
|
+
# summarize
|
|
149
|
+
"RuleBasedSummarizer",
|
|
150
|
+
"StructuredExtractor",
|
|
151
|
+
]
|
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
"""Command-line interface for contextweaver.
|
|
2
|
+
|
|
3
|
+
Provides seven sub-commands:
|
|
4
|
+
|
|
5
|
+
demo Run a built-in demonstration of both engines.
|
|
6
|
+
build Build a routing graph from a catalog JSON file.
|
|
7
|
+
route Route a query over a pre-built routing graph.
|
|
8
|
+
print-tree Pretty-print the routing tree for a graph.
|
|
9
|
+
init Scaffold contextweaver config + sample catalog in cwd.
|
|
10
|
+
ingest Ingest a JSONL session into a serialised session file.
|
|
11
|
+
replay Replay a session and build context for a given phase.
|
|
12
|
+
|
|
13
|
+
Invocable as ``python -m contextweaver`` or ``contextweaver`` (via
|
|
14
|
+
``[project.scripts]``). Exempt from 300-line module limit.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import json
|
|
21
|
+
import sys
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from contextweaver.config import ContextBudget
|
|
26
|
+
from contextweaver.context.manager import ContextManager
|
|
27
|
+
from contextweaver.routing.cards import make_choice_cards, render_cards_text
|
|
28
|
+
from contextweaver.routing.catalog import Catalog, generate_sample_catalog, load_catalog_json
|
|
29
|
+
from contextweaver.routing.graph_io import load_graph, save_graph
|
|
30
|
+
from contextweaver.routing.router import Router
|
|
31
|
+
from contextweaver.routing.tree import TreeBuilder
|
|
32
|
+
from contextweaver.types import ContextItem, ItemKind, Phase, SelectableItem
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# JSON-L session helpers
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
_KIND_MAP: dict[str, ItemKind] = {
|
|
39
|
+
"user_turn": ItemKind.user_turn,
|
|
40
|
+
"agent_msg": ItemKind.agent_msg,
|
|
41
|
+
"tool_call": ItemKind.tool_call,
|
|
42
|
+
"tool_result": ItemKind.tool_result,
|
|
43
|
+
"doc_snippet": ItemKind.doc_snippet,
|
|
44
|
+
"memory_fact": ItemKind.memory_fact,
|
|
45
|
+
"plan_state": ItemKind.plan_state,
|
|
46
|
+
"policy": ItemKind.policy,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _load_jsonl(path: str) -> list[ContextItem]:
|
|
51
|
+
"""Read a JSONL file and convert each line into a ContextItem."""
|
|
52
|
+
items: list[ContextItem] = []
|
|
53
|
+
for lineno, line in enumerate(Path(path).read_text(encoding="utf-8").strip().splitlines(), 1):
|
|
54
|
+
try:
|
|
55
|
+
obj: dict[str, Any] = json.loads(line)
|
|
56
|
+
except json.JSONDecodeError as exc:
|
|
57
|
+
raise ValueError(f"{path}:{lineno}: invalid JSON — {exc}") from exc
|
|
58
|
+
kind = _KIND_MAP.get(obj.get("type", "user_turn"), ItemKind.user_turn)
|
|
59
|
+
text = obj.get("text") or obj.get("content", "")
|
|
60
|
+
items.append(
|
|
61
|
+
ContextItem(
|
|
62
|
+
id=obj.get("id", f"line-{lineno}"),
|
|
63
|
+
kind=kind,
|
|
64
|
+
text=str(text),
|
|
65
|
+
metadata={
|
|
66
|
+
k: v
|
|
67
|
+
for k, v in obj.items()
|
|
68
|
+
if k not in {"id", "type", "text", "content", "parent_id", "token_estimate"}
|
|
69
|
+
},
|
|
70
|
+
parent_id=obj.get("parent_id"),
|
|
71
|
+
token_estimate=int(obj.get("token_estimate", 0)),
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
return items
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
# Command handlers
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _cmd_demo(args: argparse.Namespace) -> int: # noqa: ARG001
|
|
83
|
+
"""Run a built-in demonstration of both engines."""
|
|
84
|
+
print("=" * 60)
|
|
85
|
+
print("contextweaver demo — end-to-end demonstration")
|
|
86
|
+
print("=" * 60)
|
|
87
|
+
|
|
88
|
+
# 1. Build a sample catalog
|
|
89
|
+
raw_items = generate_sample_catalog(n=40, seed=42)
|
|
90
|
+
catalog = Catalog()
|
|
91
|
+
for raw in raw_items:
|
|
92
|
+
catalog.register(SelectableItem.from_dict(raw))
|
|
93
|
+
items = catalog.all()
|
|
94
|
+
ns_count = len({it.namespace for it in items})
|
|
95
|
+
print(f"\n[1/5] Loaded catalog: {len(items)} items across {ns_count} namespaces")
|
|
96
|
+
|
|
97
|
+
# 2. Build routing graph
|
|
98
|
+
builder = TreeBuilder(max_children=10)
|
|
99
|
+
graph = builder.build(items)
|
|
100
|
+
stats = graph.stats()
|
|
101
|
+
print(f"[2/5] Built routing graph: {stats['total_nodes']} nodes, depth={stats['max_depth']}")
|
|
102
|
+
|
|
103
|
+
# 3. Route a query
|
|
104
|
+
router = Router(graph, items=items, beam_width=3, top_k=5)
|
|
105
|
+
query = "find unpaid invoices and send a reminder email"
|
|
106
|
+
result = router.route(query)
|
|
107
|
+
print(f"[3/5] Routed query: {query!r}")
|
|
108
|
+
print(f" Top candidates: {result.candidate_ids}")
|
|
109
|
+
cards = make_choice_cards(
|
|
110
|
+
result.candidate_items,
|
|
111
|
+
scores=dict(zip(result.candidate_ids, result.scores, strict=False)),
|
|
112
|
+
)
|
|
113
|
+
print(f" Choice cards ({len(cards)}):")
|
|
114
|
+
print(render_cards_text(cards))
|
|
115
|
+
|
|
116
|
+
# 4. Ingest sample events and build context
|
|
117
|
+
mgr = ContextManager()
|
|
118
|
+
mgr.ingest(
|
|
119
|
+
ContextItem(id="u1", kind=ItemKind.user_turn, text="How many open invoices do we have?")
|
|
120
|
+
)
|
|
121
|
+
mgr.ingest(
|
|
122
|
+
ContextItem(id="a1", kind=ItemKind.agent_msg, text="Let me check the billing system.")
|
|
123
|
+
)
|
|
124
|
+
mgr.ingest(
|
|
125
|
+
ContextItem(
|
|
126
|
+
id="tc1", kind=ItemKind.tool_call, text="invoices.search(status='open')", parent_id="u1"
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
mgr.ingest(
|
|
130
|
+
ContextItem(
|
|
131
|
+
id="tr1",
|
|
132
|
+
kind=ItemKind.tool_result,
|
|
133
|
+
text=(
|
|
134
|
+
"invoice_id: INV-001\nstatus: open\namount: 5000\n\n"
|
|
135
|
+
"invoice_id: INV-002\nstatus: open\namount: 3200\n\n"
|
|
136
|
+
"summary: 2 open invoices, total $8,200"
|
|
137
|
+
),
|
|
138
|
+
parent_id="tc1",
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
mgr.add_fact("customer_tier", "enterprise")
|
|
142
|
+
mgr.add_episode("ep-prev", "Previously discussed payment terms with client")
|
|
143
|
+
|
|
144
|
+
pack = mgr.build_sync(phase=Phase.answer, query="open invoices")
|
|
145
|
+
print(f"\n[4/5] Built context pack: phase={pack.phase.value}")
|
|
146
|
+
print(f" Candidates: {pack.stats.total_candidates}, Included: {pack.stats.included_count}")
|
|
147
|
+
print(
|
|
148
|
+
f" Dedup removed: {pack.stats.dedup_removed},"
|
|
149
|
+
f" Closures: {pack.stats.dependency_closures}"
|
|
150
|
+
)
|
|
151
|
+
print(f" Token breakdown: {pack.stats.tokens_per_section}")
|
|
152
|
+
|
|
153
|
+
# 5. Show prompt preview
|
|
154
|
+
preview = pack.prompt[:400]
|
|
155
|
+
print(f"\n[5/5] Prompt preview ({len(pack.prompt)} chars total):")
|
|
156
|
+
print(preview)
|
|
157
|
+
if len(pack.prompt) > 400:
|
|
158
|
+
print(" ...")
|
|
159
|
+
|
|
160
|
+
print("\n" + "=" * 60)
|
|
161
|
+
print("Demo complete.")
|
|
162
|
+
return 0
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _cmd_build(args: argparse.Namespace) -> int:
|
|
166
|
+
"""Build a routing graph from a catalog JSON file."""
|
|
167
|
+
catalog_path: str = args.catalog
|
|
168
|
+
out_path: str = args.out
|
|
169
|
+
max_children: int = args.max_children
|
|
170
|
+
|
|
171
|
+
items = load_catalog_json(catalog_path)
|
|
172
|
+
print(f"Loaded {len(items)} items from {catalog_path}")
|
|
173
|
+
|
|
174
|
+
builder = TreeBuilder(max_children=max_children)
|
|
175
|
+
graph = builder.build(items)
|
|
176
|
+
|
|
177
|
+
save_graph(graph, out_path)
|
|
178
|
+
stats = graph.stats()
|
|
179
|
+
print(f"Graph saved to {out_path}")
|
|
180
|
+
print(f"Stats: {json.dumps(stats, indent=2)}")
|
|
181
|
+
return 0
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _cmd_route(args: argparse.Namespace) -> int:
|
|
185
|
+
"""Route a query over a pre-built graph."""
|
|
186
|
+
graph_path: str = args.graph
|
|
187
|
+
catalog_path: str = args.catalog
|
|
188
|
+
query: str = args.query
|
|
189
|
+
top_k: int = args.top_k
|
|
190
|
+
beam_width: int = args.beam_width
|
|
191
|
+
|
|
192
|
+
graph = load_graph(graph_path)
|
|
193
|
+
all_items = load_catalog_json(catalog_path)
|
|
194
|
+
|
|
195
|
+
# Keep only items present in the graph
|
|
196
|
+
graph_item_ids = set(graph.items())
|
|
197
|
+
items_list = [it for it in all_items if it.id in graph_item_ids]
|
|
198
|
+
|
|
199
|
+
router = Router(graph, items=items_list, beam_width=beam_width, top_k=top_k)
|
|
200
|
+
result = router.route(query)
|
|
201
|
+
|
|
202
|
+
print(f"Query: {query!r}")
|
|
203
|
+
print(f"Results ({len(result.candidate_ids)}):")
|
|
204
|
+
cards = make_choice_cards(
|
|
205
|
+
result.candidate_items,
|
|
206
|
+
scores=dict(zip(result.candidate_ids, result.scores, strict=False)),
|
|
207
|
+
)
|
|
208
|
+
print(render_cards_text(cards))
|
|
209
|
+
return 0
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _cmd_print_tree(args: argparse.Namespace) -> int:
|
|
213
|
+
"""Pretty-print the routing tree for a graph."""
|
|
214
|
+
graph_path: str = args.graph
|
|
215
|
+
max_depth: int = args.depth
|
|
216
|
+
|
|
217
|
+
graph = load_graph(graph_path)
|
|
218
|
+
|
|
219
|
+
def _print_node(node_id: str, depth: int, prefix: str = "") -> None:
|
|
220
|
+
if depth > max_depth:
|
|
221
|
+
return
|
|
222
|
+
node = graph.get_node(node_id)
|
|
223
|
+
is_item = node_id in set(graph.items())
|
|
224
|
+
marker = "*" if is_item else ">"
|
|
225
|
+
label = node.label or node_id
|
|
226
|
+
hint = f" - {node.routing_hint}" if node.routing_hint and not is_item else ""
|
|
227
|
+
print(f"{prefix}{marker} {label}{hint}")
|
|
228
|
+
children = graph.successors(node_id)
|
|
229
|
+
for i, child in enumerate(children):
|
|
230
|
+
last = i == len(children) - 1
|
|
231
|
+
child_prefix = prefix + (" " if last else "| ")
|
|
232
|
+
_print_node(child, depth + 1, child_prefix)
|
|
233
|
+
|
|
234
|
+
print(f"Routing tree (depth={max_depth}):")
|
|
235
|
+
_print_node(graph.root_id, 0)
|
|
236
|
+
stats = graph.stats()
|
|
237
|
+
print(
|
|
238
|
+
f"\nStats: {stats['total_nodes']} nodes,"
|
|
239
|
+
f" {stats['total_items']} items,"
|
|
240
|
+
f" depth={stats['max_depth']}"
|
|
241
|
+
)
|
|
242
|
+
return 0
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _cmd_init(args: argparse.Namespace) -> int:
|
|
246
|
+
"""Scaffold contextweaver config + sample catalog in cwd."""
|
|
247
|
+
force: bool = args.force
|
|
248
|
+
config_path = Path("contextweaver.json")
|
|
249
|
+
catalog_path = Path("sample_catalog.json")
|
|
250
|
+
|
|
251
|
+
existing = [p for p in (config_path, catalog_path) if p.exists()]
|
|
252
|
+
if existing and not force:
|
|
253
|
+
names = ", ".join(str(p) for p in existing)
|
|
254
|
+
print(f"Error: {names} already exist. Use --force to overwrite.", file=sys.stderr)
|
|
255
|
+
return 1
|
|
256
|
+
|
|
257
|
+
config = {
|
|
258
|
+
"version": "0.1.0",
|
|
259
|
+
"budget": {"route": 2000, "call": 3000, "interpret": 4000, "answer": 6000},
|
|
260
|
+
"scoring": {
|
|
261
|
+
"recency_weight": 0.3,
|
|
262
|
+
"tag_match_weight": 0.25,
|
|
263
|
+
"kind_priority_weight": 0.35,
|
|
264
|
+
"token_cost_penalty": 0.1,
|
|
265
|
+
},
|
|
266
|
+
"policy": {"ttl_behavior": "drop", "sensitivity_floor": "confidential"},
|
|
267
|
+
"routing": {"max_children": 20, "beam_width": 2, "top_k": 20, "confidence_gap": 0.15},
|
|
268
|
+
}
|
|
269
|
+
config_path.write_text(json.dumps(config, indent=2) + "\n", encoding="utf-8")
|
|
270
|
+
print(f"Created {config_path}")
|
|
271
|
+
|
|
272
|
+
raw_items = generate_sample_catalog(n=40, seed=42)
|
|
273
|
+
catalog_path.write_text(json.dumps(raw_items, indent=2) + "\n", encoding="utf-8")
|
|
274
|
+
print(f"Created {catalog_path} ({len(raw_items)} items)")
|
|
275
|
+
return 0
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _cmd_ingest(args: argparse.Namespace) -> int:
|
|
279
|
+
"""Ingest a JSONL session into a serialised session file."""
|
|
280
|
+
events_path: str = args.events
|
|
281
|
+
out_path: str = args.out
|
|
282
|
+
|
|
283
|
+
items = _load_jsonl(events_path)
|
|
284
|
+
mgr = ContextManager()
|
|
285
|
+
|
|
286
|
+
firewall_count = 0
|
|
287
|
+
kind_counts: dict[str, int] = {}
|
|
288
|
+
for item in items:
|
|
289
|
+
kind_counts[item.kind.value] = kind_counts.get(item.kind.value, 0) + 1
|
|
290
|
+
if item.kind == ItemKind.tool_result and len(item.text) > 2000:
|
|
291
|
+
_, envelope = mgr.ingest_tool_result(
|
|
292
|
+
tool_call_id=item.parent_id or item.id,
|
|
293
|
+
raw_output=item.text,
|
|
294
|
+
tool_name=str(item.metadata.get("tool_name", "")),
|
|
295
|
+
)
|
|
296
|
+
for i, fact in enumerate(envelope.facts):
|
|
297
|
+
mgr.add_fact(f"{item.id}:fact:{i}", fact)
|
|
298
|
+
firewall_count += 1
|
|
299
|
+
else:
|
|
300
|
+
mgr.ingest(item)
|
|
301
|
+
|
|
302
|
+
# Serialize session
|
|
303
|
+
session: dict[str, Any] = {
|
|
304
|
+
"event_count": len(items),
|
|
305
|
+
"events": [it.to_dict() for it in mgr.event_log.all()],
|
|
306
|
+
"artifacts": {
|
|
307
|
+
ref.handle: {
|
|
308
|
+
"media_type": ref.media_type,
|
|
309
|
+
"size_bytes": ref.size_bytes,
|
|
310
|
+
"label": ref.label,
|
|
311
|
+
}
|
|
312
|
+
for ref in mgr.artifact_store.list_refs()
|
|
313
|
+
},
|
|
314
|
+
"facts": {f.key: f.value for f in mgr.fact_store.all()},
|
|
315
|
+
"episodes": [
|
|
316
|
+
{"episode_id": ep.episode_id, "summary": ep.summary} for ep in mgr.episodic_store.all()
|
|
317
|
+
],
|
|
318
|
+
}
|
|
319
|
+
Path(out_path).write_text(json.dumps(session, indent=2) + "\n", encoding="utf-8")
|
|
320
|
+
|
|
321
|
+
print(f"Ingested {len(items)} events from {events_path}")
|
|
322
|
+
print(f"Event counts: {json.dumps(kind_counts)}")
|
|
323
|
+
print(f"Firewall triggers: {firewall_count}")
|
|
324
|
+
print(f"Artifacts stored: {len(session['artifacts'])}")
|
|
325
|
+
print(f"Session saved to {out_path}")
|
|
326
|
+
return 0
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _cmd_replay(args: argparse.Namespace) -> int:
|
|
330
|
+
"""Replay a session and build context for a given phase."""
|
|
331
|
+
session_path: str = args.session
|
|
332
|
+
phase_str: str = args.phase
|
|
333
|
+
budget_tokens: int = args.budget
|
|
334
|
+
preview: bool = not args.full
|
|
335
|
+
|
|
336
|
+
session: dict[str, Any] = json.loads(Path(session_path).read_text(encoding="utf-8"))
|
|
337
|
+
|
|
338
|
+
# Re-ingest events
|
|
339
|
+
mgr = ContextManager(
|
|
340
|
+
budget=ContextBudget(
|
|
341
|
+
route=budget_tokens,
|
|
342
|
+
call=budget_tokens,
|
|
343
|
+
interpret=budget_tokens,
|
|
344
|
+
answer=budget_tokens,
|
|
345
|
+
)
|
|
346
|
+
)
|
|
347
|
+
for raw_event in session.get("events", []):
|
|
348
|
+
item = ContextItem.from_dict(raw_event)
|
|
349
|
+
mgr.ingest(item)
|
|
350
|
+
|
|
351
|
+
# Restore facts
|
|
352
|
+
for key, value in session.get("facts", {}).items():
|
|
353
|
+
mgr.add_fact(key, value)
|
|
354
|
+
|
|
355
|
+
# Restore episodes
|
|
356
|
+
for ep in session.get("episodes", []):
|
|
357
|
+
mgr.add_episode(ep["episode_id"], ep["summary"])
|
|
358
|
+
|
|
359
|
+
phase = Phase(phase_str)
|
|
360
|
+
pack = mgr.build_sync(phase=phase, query="replay", budget_tokens=budget_tokens)
|
|
361
|
+
|
|
362
|
+
print(f"=== Context Build: phase={phase.value}, budget={budget_tokens} ===")
|
|
363
|
+
print(
|
|
364
|
+
f"Stats: total_candidates={pack.stats.total_candidates}, "
|
|
365
|
+
f"included={pack.stats.included_count}, "
|
|
366
|
+
f"dropped={pack.stats.dropped_count} ({pack.stats.dropped_reasons}), "
|
|
367
|
+
f"dedup={pack.stats.dedup_removed}, "
|
|
368
|
+
f"closures={pack.stats.dependency_closures}"
|
|
369
|
+
)
|
|
370
|
+
print(f"Token breakdown: {pack.stats.tokens_per_section}")
|
|
371
|
+
total_tokens = sum(pack.stats.tokens_per_section.values()) + pack.stats.header_footer_tokens
|
|
372
|
+
print(f"Total tokens: {total_tokens} / {budget_tokens}")
|
|
373
|
+
|
|
374
|
+
artifacts = session.get("artifacts", {})
|
|
375
|
+
if artifacts:
|
|
376
|
+
print(f"Artifacts available: {list(artifacts.keys())}")
|
|
377
|
+
|
|
378
|
+
facts = session.get("facts", {})
|
|
379
|
+
if facts:
|
|
380
|
+
fact_strs = [f"{k}={v}" for k, v in facts.items()]
|
|
381
|
+
print(f"Facts: {fact_strs}")
|
|
382
|
+
|
|
383
|
+
print("--- Rendered prompt ---")
|
|
384
|
+
if preview:
|
|
385
|
+
print(pack.prompt[:500])
|
|
386
|
+
if len(pack.prompt) > 500:
|
|
387
|
+
print(f"... ({len(pack.prompt) - 500} more chars, use --full to see all)")
|
|
388
|
+
else:
|
|
389
|
+
print(pack.prompt)
|
|
390
|
+
return 0
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
# ---------------------------------------------------------------------------
|
|
394
|
+
# Parser
|
|
395
|
+
# ---------------------------------------------------------------------------
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
399
|
+
parser = argparse.ArgumentParser(
|
|
400
|
+
prog="contextweaver",
|
|
401
|
+
description="Dynamic context management for tool-using AI agents.",
|
|
402
|
+
)
|
|
403
|
+
sub = parser.add_subparsers(dest="command", metavar="COMMAND")
|
|
404
|
+
|
|
405
|
+
# demo
|
|
406
|
+
sub.add_parser("demo", help="Run a built-in demonstration of both engines.")
|
|
407
|
+
|
|
408
|
+
# build
|
|
409
|
+
p_build = sub.add_parser("build", help="Build a routing graph from a catalog.")
|
|
410
|
+
p_build.add_argument("--catalog", required=True, help="Path to the tool catalog JSON file.")
|
|
411
|
+
p_build.add_argument("--out", required=True, help="Output path for the graph JSON file.")
|
|
412
|
+
p_build.add_argument(
|
|
413
|
+
"--max-children", type=int, default=20, help="Max children per node (default: 20)."
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# route
|
|
417
|
+
p_route = sub.add_parser("route", help="Route a query over a pre-built graph.")
|
|
418
|
+
p_route.add_argument("--graph", required=True, help="Path to the graph JSON file.")
|
|
419
|
+
p_route.add_argument("--catalog", required=True, help="Path to the catalog JSON file.")
|
|
420
|
+
p_route.add_argument("--query", required=True, help="The user query to route.")
|
|
421
|
+
p_route.add_argument("--top-k", type=int, default=10, help="Max results (default: 10).")
|
|
422
|
+
p_route.add_argument("--beam-width", type=int, default=3, help="Beam width (default: 3).")
|
|
423
|
+
|
|
424
|
+
# print-tree
|
|
425
|
+
p_tree = sub.add_parser("print-tree", help="Pretty-print the routing tree.")
|
|
426
|
+
p_tree.add_argument("--graph", required=True, help="Path to the graph JSON file.")
|
|
427
|
+
p_tree.add_argument("--depth", type=int, default=3, help="Max depth to display (default: 3).")
|
|
428
|
+
|
|
429
|
+
# init
|
|
430
|
+
p_init = sub.add_parser("init", help="Scaffold contextweaver config + sample catalog in cwd.")
|
|
431
|
+
p_init.add_argument(
|
|
432
|
+
"--force", action="store_true", default=False, help="Overwrite existing files."
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
# ingest
|
|
436
|
+
p_ingest = sub.add_parser("ingest", help="Ingest a JSONL session file.")
|
|
437
|
+
p_ingest.add_argument("--events", required=True, help="Path to the JSONL session file.")
|
|
438
|
+
p_ingest.add_argument("--out", required=True, help="Output path for the session JSON file.")
|
|
439
|
+
|
|
440
|
+
# replay
|
|
441
|
+
p_replay = sub.add_parser("replay", help="Replay a session and build context.")
|
|
442
|
+
p_replay.add_argument("--session", required=True, help="Path to the session JSON file.")
|
|
443
|
+
p_replay.add_argument(
|
|
444
|
+
"--phase",
|
|
445
|
+
default="answer",
|
|
446
|
+
choices=["route", "call", "interpret", "answer"],
|
|
447
|
+
help="Phase (default: answer).",
|
|
448
|
+
)
|
|
449
|
+
p_replay.add_argument("--budget", type=int, default=4000, help="Token budget (default: 4000).")
|
|
450
|
+
p_replay.add_argument("--full", action="store_true", default=False, help="Show full prompt.")
|
|
451
|
+
|
|
452
|
+
return parser
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
_HANDLERS = {
|
|
456
|
+
"demo": _cmd_demo,
|
|
457
|
+
"build": _cmd_build,
|
|
458
|
+
"route": _cmd_route,
|
|
459
|
+
"print-tree": _cmd_print_tree,
|
|
460
|
+
"init": _cmd_init,
|
|
461
|
+
"ingest": _cmd_ingest,
|
|
462
|
+
"replay": _cmd_replay,
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def main() -> None:
|
|
467
|
+
"""Entry point for the ``contextweaver`` CLI."""
|
|
468
|
+
parser = _build_parser()
|
|
469
|
+
args = parser.parse_args()
|
|
470
|
+
if args.command is None:
|
|
471
|
+
parser.print_help()
|
|
472
|
+
sys.exit(0)
|
|
473
|
+
handler = _HANDLERS.get(args.command)
|
|
474
|
+
if handler is None:
|
|
475
|
+
parser.print_help()
|
|
476
|
+
sys.exit(1)
|
|
477
|
+
try:
|
|
478
|
+
sys.exit(handler(args))
|
|
479
|
+
except FileNotFoundError as exc:
|
|
480
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
481
|
+
sys.exit(1)
|
|
482
|
+
except json.JSONDecodeError as exc:
|
|
483
|
+
print(f"Error: invalid JSON — {exc}", file=sys.stderr)
|
|
484
|
+
sys.exit(1)
|
|
485
|
+
except (ValueError, PermissionError) as exc:
|
|
486
|
+
print(f"Error: {exc}", file=sys.stderr)
|
|
487
|
+
sys.exit(1)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
if __name__ == "__main__":
|
|
491
|
+
main()
|