biblicus 0.15.1__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biblicus/__init__.py +21 -1
- biblicus/analysis/markov.py +35 -3
- biblicus/backends/__init__.py +6 -2
- biblicus/backends/embedding_index_common.py +334 -0
- biblicus/backends/embedding_index_file.py +272 -0
- biblicus/backends/embedding_index_inmemory.py +270 -0
- biblicus/backends/hybrid.py +8 -5
- biblicus/backends/scan.py +1 -0
- biblicus/backends/sqlite_full_text_search.py +1 -1
- biblicus/backends/{vector.py → tf_vector.py} +28 -35
- biblicus/chunking.py +396 -0
- biblicus/cli.py +75 -25
- biblicus/context.py +27 -12
- biblicus/context_engine/__init__.py +53 -0
- biblicus/context_engine/assembler.py +1060 -0
- biblicus/context_engine/compaction.py +110 -0
- biblicus/context_engine/models.py +423 -0
- biblicus/context_engine/retrieval.py +129 -0
- biblicus/corpus.py +117 -16
- biblicus/embedding_providers.py +122 -0
- biblicus/errors.py +24 -0
- biblicus/frontmatter.py +2 -0
- biblicus/knowledge_base.py +1 -1
- biblicus/models.py +15 -3
- biblicus/retrieval.py +7 -2
- biblicus/sources.py +46 -11
- biblicus/text/link.py +6 -0
- biblicus/text/prompts.py +2 -0
- {biblicus-0.15.1.dist-info → biblicus-1.0.0.dist-info}/METADATA +4 -3
- {biblicus-0.15.1.dist-info → biblicus-1.0.0.dist-info}/RECORD +34 -24
- {biblicus-0.15.1.dist-info → biblicus-1.0.0.dist-info}/WHEEL +0 -0
- {biblicus-0.15.1.dist-info → biblicus-1.0.0.dist-info}/entry_points.txt +0 -0
- {biblicus-0.15.1.dist-info → biblicus-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {biblicus-0.15.1.dist-info → biblicus-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1060 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context assembly utilities for the Biblicus Context Engine.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from string import Formatter
|
|
9
|
+
from typing import Any, Iterable, Optional
|
|
10
|
+
|
|
11
|
+
from biblicus.context import ContextPack, ContextPackBlock
|
|
12
|
+
from biblicus.context_engine.compaction import CompactionRequest, TruncateCompactor, build_compactor
|
|
13
|
+
from biblicus.context_engine.models import (
|
|
14
|
+
AssistantMessageSpec,
|
|
15
|
+
ContextDeclaration,
|
|
16
|
+
ContextInsertSpec,
|
|
17
|
+
ContextMessageSpec,
|
|
18
|
+
ContextPolicySpec,
|
|
19
|
+
ContextRetrieverRequest,
|
|
20
|
+
HistoryInsertSpec,
|
|
21
|
+
SystemMessageSpec,
|
|
22
|
+
UserMessageSpec,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class ContextAssemblyResult:
|
|
28
|
+
"""
|
|
29
|
+
Assembled prompt context for a single agent turn.
|
|
30
|
+
|
|
31
|
+
:ivar system_prompt: System prompt content.
|
|
32
|
+
:vartype system_prompt: str
|
|
33
|
+
:ivar history: Message history for the turn.
|
|
34
|
+
:vartype history: list[dict[str, Any]]
|
|
35
|
+
:ivar user_message: User message content.
|
|
36
|
+
:vartype user_message: str
|
|
37
|
+
:ivar token_count: Estimated token count for assembled content.
|
|
38
|
+
:vartype token_count: int
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
system_prompt: str
|
|
42
|
+
history: list[dict[str, Any]]
|
|
43
|
+
user_message: str
|
|
44
|
+
token_count: int = 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ContextAssembler:
|
|
48
|
+
"""
|
|
49
|
+
Assemble Context declarations into system prompts, history, and user messages.
|
|
50
|
+
|
|
51
|
+
:param context_registry: Context declarations indexed by name.
|
|
52
|
+
:type context_registry: dict[str, ContextDeclaration]
|
|
53
|
+
:param retriever_registry: Retriever declarations indexed by name.
|
|
54
|
+
:type retriever_registry: dict[str, Any] or None
|
|
55
|
+
:param corpus_registry: Corpus declarations indexed by name.
|
|
56
|
+
:type corpus_registry: dict[str, Any] or None
|
|
57
|
+
:param compactor_registry: Compactor declarations indexed by name.
|
|
58
|
+
:type compactor_registry: dict[str, Any] or None
|
|
59
|
+
:param default_retriever: Default retriever callable when no override is supplied.
|
|
60
|
+
:type default_retriever: callable or None
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(
|
|
64
|
+
self,
|
|
65
|
+
context_registry: dict[str, ContextDeclaration],
|
|
66
|
+
retriever_registry: Optional[dict[str, Any]] = None,
|
|
67
|
+
corpus_registry: Optional[dict[str, Any]] = None,
|
|
68
|
+
compactor_registry: Optional[dict[str, Any]] = None,
|
|
69
|
+
default_retriever: Optional[Any] = None,
|
|
70
|
+
):
|
|
71
|
+
self._context_registry = context_registry
|
|
72
|
+
self._retriever_registry = retriever_registry or {}
|
|
73
|
+
self._corpus_registry = corpus_registry or {}
|
|
74
|
+
self._compactor_registry = compactor_registry or {}
|
|
75
|
+
self._default_retriever = default_retriever
|
|
76
|
+
|
|
77
|
+
def assemble(
|
|
78
|
+
self,
|
|
79
|
+
context_name: str,
|
|
80
|
+
base_system_prompt: str,
|
|
81
|
+
history_messages: list[dict[str, Any]],
|
|
82
|
+
user_message: Optional[str],
|
|
83
|
+
template_context: dict[str, Any],
|
|
84
|
+
retriever_override: Optional[Any] = None,
|
|
85
|
+
) -> ContextAssemblyResult:
|
|
86
|
+
"""
|
|
87
|
+
Assemble a Context declaration into prompt components.
|
|
88
|
+
|
|
89
|
+
:param context_name: Name of the Context declaration.
|
|
90
|
+
:type context_name: str
|
|
91
|
+
:param base_system_prompt: Default system prompt from agent config.
|
|
92
|
+
:type base_system_prompt: str
|
|
93
|
+
:param history_messages: Current agent history messages.
|
|
94
|
+
:type history_messages: list[dict[str, Any]]
|
|
95
|
+
:param user_message: Current user message for this turn.
|
|
96
|
+
:type user_message: str or None
|
|
97
|
+
:param template_context: Template variables for resolution.
|
|
98
|
+
:type template_context: dict[str, Any]
|
|
99
|
+
:param retriever_override: Optional retriever override callable.
|
|
100
|
+
:type retriever_override: callable or None
|
|
101
|
+
:return: Assembled prompt components.
|
|
102
|
+
:rtype: ContextAssemblyResult
|
|
103
|
+
:raises ValueError: If the context declaration is not found.
|
|
104
|
+
"""
|
|
105
|
+
if context_name not in self._context_registry:
|
|
106
|
+
raise ValueError(f"Context '{context_name}' not defined")
|
|
107
|
+
|
|
108
|
+
context_spec = self._context_registry[context_name]
|
|
109
|
+
if context_spec.messages is None:
|
|
110
|
+
return self._assemble_default_with_regeneration(
|
|
111
|
+
context_spec,
|
|
112
|
+
base_system_prompt,
|
|
113
|
+
history_messages,
|
|
114
|
+
user_message or "",
|
|
115
|
+
template_context,
|
|
116
|
+
retriever_override,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return self._assemble_explicit_with_regeneration(
|
|
120
|
+
context_spec,
|
|
121
|
+
history_messages,
|
|
122
|
+
user_message or "",
|
|
123
|
+
template_context,
|
|
124
|
+
retriever_override,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def _assemble_default(
|
|
128
|
+
self,
|
|
129
|
+
context_spec: ContextDeclaration,
|
|
130
|
+
base_system_prompt: str,
|
|
131
|
+
history_messages: list[dict[str, Any]],
|
|
132
|
+
user_message: str,
|
|
133
|
+
template_context: dict[str, Any],
|
|
134
|
+
total_budget_override: Optional[int] = None,
|
|
135
|
+
) -> ContextAssemblyResult:
|
|
136
|
+
"""Assemble the default Context plan when messages are omitted."""
|
|
137
|
+
system_prompt = base_system_prompt or ""
|
|
138
|
+
pack_outputs = []
|
|
139
|
+
pack_entries = context_spec.packs or []
|
|
140
|
+
pack_budgets = self._allocate_default_pack_budgets(
|
|
141
|
+
pack_entries, context_spec.policy, total_budget_override
|
|
142
|
+
)
|
|
143
|
+
for pack_entry in pack_entries:
|
|
144
|
+
pack_outputs.append(
|
|
145
|
+
self._render_pack(
|
|
146
|
+
pack_entry.name,
|
|
147
|
+
template_context,
|
|
148
|
+
retriever_override=None,
|
|
149
|
+
pack_budget=pack_budgets.get(pack_entry.name),
|
|
150
|
+
policy=context_spec.policy,
|
|
151
|
+
weight=pack_entry.weight,
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
if pack_outputs:
|
|
156
|
+
system_prompt = self._join_nonempty([system_prompt, *pack_outputs])
|
|
157
|
+
|
|
158
|
+
compacted_prompt, history_messages, user_message, token_count, compacted = (
|
|
159
|
+
self._apply_context_budget(
|
|
160
|
+
system_prompt,
|
|
161
|
+
history_messages,
|
|
162
|
+
user_message,
|
|
163
|
+
context_spec.policy,
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
return ContextAssemblyResult(
|
|
167
|
+
system_prompt=compacted_prompt,
|
|
168
|
+
history=history_messages,
|
|
169
|
+
user_message=user_message,
|
|
170
|
+
token_count=token_count,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
def _assemble_default_with_regeneration(
|
|
174
|
+
self,
|
|
175
|
+
context_spec: ContextDeclaration,
|
|
176
|
+
base_system_prompt: str,
|
|
177
|
+
history_messages: list[dict[str, Any]],
|
|
178
|
+
user_message: str,
|
|
179
|
+
template_context: dict[str, Any],
|
|
180
|
+
retriever_override: Optional[Any],
|
|
181
|
+
total_budget_override: Optional[int] = None,
|
|
182
|
+
) -> ContextAssemblyResult:
|
|
183
|
+
max_iterations = 2
|
|
184
|
+
if context_spec.policy and getattr(context_spec.policy, "max_iterations", None):
|
|
185
|
+
max_iterations = max(1, int(context_spec.policy.max_iterations))
|
|
186
|
+
|
|
187
|
+
pack_scale = 1.0
|
|
188
|
+
last_result: Optional[ContextAssemblyResult] = None
|
|
189
|
+
for _iteration in range(max_iterations):
|
|
190
|
+
system_prompt = base_system_prompt or ""
|
|
191
|
+
pack_outputs = []
|
|
192
|
+
pack_entries = context_spec.packs or []
|
|
193
|
+
pack_budgets = self._allocate_default_pack_budgets(
|
|
194
|
+
pack_entries, context_spec.policy, total_budget_override
|
|
195
|
+
)
|
|
196
|
+
for pack_entry in pack_entries:
|
|
197
|
+
pack_outputs.append(
|
|
198
|
+
self._render_pack(
|
|
199
|
+
pack_entry.name,
|
|
200
|
+
template_context,
|
|
201
|
+
retriever_override,
|
|
202
|
+
pack_budget=pack_budgets.get(pack_entry.name),
|
|
203
|
+
policy=context_spec.policy,
|
|
204
|
+
tighten_pack_budget=pack_scale < 1.0,
|
|
205
|
+
weight=pack_entry.weight,
|
|
206
|
+
)
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
if pack_outputs:
|
|
210
|
+
system_prompt = self._join_nonempty([system_prompt, *pack_outputs])
|
|
211
|
+
|
|
212
|
+
compacted_prompt, history_messages, user_message, token_count, compacted = (
|
|
213
|
+
self._apply_context_budget(
|
|
214
|
+
system_prompt,
|
|
215
|
+
history_messages,
|
|
216
|
+
user_message,
|
|
217
|
+
context_spec.policy,
|
|
218
|
+
)
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
last_result = ContextAssemblyResult(
|
|
222
|
+
system_prompt=compacted_prompt,
|
|
223
|
+
history=history_messages,
|
|
224
|
+
user_message=user_message,
|
|
225
|
+
token_count=token_count,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
if not compacted or not context_spec.policy:
|
|
229
|
+
break
|
|
230
|
+
if getattr(context_spec.policy, "overflow", None) != "compact":
|
|
231
|
+
break
|
|
232
|
+
pack_scale *= 0.5
|
|
233
|
+
|
|
234
|
+
return last_result or ContextAssemblyResult(
|
|
235
|
+
system_prompt="",
|
|
236
|
+
history=history_messages,
|
|
237
|
+
user_message=user_message,
|
|
238
|
+
token_count=0,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def _assemble_explicit_with_regeneration(
|
|
242
|
+
self,
|
|
243
|
+
context_spec: ContextDeclaration,
|
|
244
|
+
history_messages: list[dict[str, Any]],
|
|
245
|
+
user_message: str,
|
|
246
|
+
template_context: dict[str, Any],
|
|
247
|
+
retriever_override: Optional[Any],
|
|
248
|
+
) -> ContextAssemblyResult:
|
|
249
|
+
"""Assemble explicit Context message directives with regeneration loop."""
|
|
250
|
+
max_iterations = 2
|
|
251
|
+
if context_spec.policy and getattr(context_spec.policy, "max_iterations", None):
|
|
252
|
+
max_iterations = max(1, int(context_spec.policy.max_iterations))
|
|
253
|
+
|
|
254
|
+
pack_scale = 1.0
|
|
255
|
+
last_result: Optional[ContextAssemblyResult] = None
|
|
256
|
+
for _iteration in range(max_iterations):
|
|
257
|
+
total_pack_budget = self._resolve_default_pack_total_budget(context_spec.policy)
|
|
258
|
+
if total_pack_budget is not None and pack_scale < 1.0:
|
|
259
|
+
total_pack_budget = max(1, int(total_pack_budget * pack_scale))
|
|
260
|
+
|
|
261
|
+
assembled_messages = self._build_messages(
|
|
262
|
+
context_spec,
|
|
263
|
+
history_messages,
|
|
264
|
+
template_context,
|
|
265
|
+
retriever_override,
|
|
266
|
+
context_spec.policy,
|
|
267
|
+
tighten_pack_budget=pack_scale < 1.0,
|
|
268
|
+
total_pack_budget_override=total_pack_budget,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
system_messages, remaining_messages = self._split_leading_system(assembled_messages)
|
|
272
|
+
system_prompt = self._join_nonempty([m["content"] for m in system_messages])
|
|
273
|
+
resolved_user_message, remaining_messages = self._extract_user_message(
|
|
274
|
+
remaining_messages, user_message
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
compacted_prompt, remaining_messages, resolved_user_message, token_count, compacted = (
|
|
278
|
+
self._apply_context_budget(
|
|
279
|
+
system_prompt,
|
|
280
|
+
remaining_messages,
|
|
281
|
+
resolved_user_message,
|
|
282
|
+
context_spec.policy,
|
|
283
|
+
)
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
last_result = ContextAssemblyResult(
|
|
287
|
+
system_prompt=compacted_prompt,
|
|
288
|
+
history=remaining_messages,
|
|
289
|
+
user_message=resolved_user_message,
|
|
290
|
+
token_count=token_count,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
if not compacted or not context_spec.policy:
|
|
294
|
+
break
|
|
295
|
+
if getattr(context_spec.policy, "overflow", None) != "compact":
|
|
296
|
+
break
|
|
297
|
+
pack_scale *= 0.5
|
|
298
|
+
|
|
299
|
+
return last_result or ContextAssemblyResult(
|
|
300
|
+
system_prompt="",
|
|
301
|
+
history=[],
|
|
302
|
+
user_message=user_message,
|
|
303
|
+
token_count=0,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def _resolve_message_content(
|
|
307
|
+
self,
|
|
308
|
+
directive: SystemMessageSpec | UserMessageSpec | AssistantMessageSpec,
|
|
309
|
+
template_context,
|
|
310
|
+
) -> str:
|
|
311
|
+
"""Resolve message content or templates."""
|
|
312
|
+
if directive.content is not None:
|
|
313
|
+
return directive.content
|
|
314
|
+
return self._resolve_template(directive.template or "", directive.vars, template_context)
|
|
315
|
+
|
|
316
|
+
def _resolve_template(
|
|
317
|
+
self, template_text: str, vars_dict: dict[str, Any], template_context: dict[str, Any]
|
|
318
|
+
) -> str:
|
|
319
|
+
"""Resolve dot-notation templates with context variables."""
|
|
320
|
+
if not template_text:
|
|
321
|
+
return template_text
|
|
322
|
+
|
|
323
|
+
class DotFormatter(Formatter):
|
|
324
|
+
def get_field(self, field_name, args, kwargs):
|
|
325
|
+
path_parts = field_name.split(".")
|
|
326
|
+
current_value = kwargs
|
|
327
|
+
for part in path_parts:
|
|
328
|
+
if isinstance(current_value, dict):
|
|
329
|
+
current_value = current_value.get(part, "")
|
|
330
|
+
else:
|
|
331
|
+
current_value = getattr(current_value, part, "")
|
|
332
|
+
return current_value, field_name
|
|
333
|
+
|
|
334
|
+
merged_context = dict(template_context)
|
|
335
|
+
for key, value in (vars_dict or {}).items():
|
|
336
|
+
merged_context[key] = value
|
|
337
|
+
|
|
338
|
+
formatter = DotFormatter()
|
|
339
|
+
return formatter.format(template_text, **merged_context)
|
|
340
|
+
|
|
341
|
+
def _render_pack(
|
|
342
|
+
self,
|
|
343
|
+
pack_name: str,
|
|
344
|
+
template_context: dict[str, Any],
|
|
345
|
+
retriever_override: Optional[Any],
|
|
346
|
+
pack_budget: Optional[Any],
|
|
347
|
+
policy: Optional[ContextPolicySpec],
|
|
348
|
+
tighten_pack_budget: bool = False,
|
|
349
|
+
weight: Optional[float] = None,
|
|
350
|
+
) -> str:
|
|
351
|
+
"""Render a context pack by name."""
|
|
352
|
+
if pack_name in self._context_registry:
|
|
353
|
+
nested_context = self._context_registry[pack_name]
|
|
354
|
+
return self._render_nested_context_pack(
|
|
355
|
+
nested_context,
|
|
356
|
+
template_context,
|
|
357
|
+
pack_budget,
|
|
358
|
+
policy,
|
|
359
|
+
tighten_pack_budget,
|
|
360
|
+
retriever_override,
|
|
361
|
+
)
|
|
362
|
+
if pack_name in self._retriever_registry:
|
|
363
|
+
return self._render_retriever_pack(
|
|
364
|
+
pack_name,
|
|
365
|
+
template_context,
|
|
366
|
+
retriever_override,
|
|
367
|
+
pack_budget,
|
|
368
|
+
policy,
|
|
369
|
+
tighten_pack_budget,
|
|
370
|
+
weight,
|
|
371
|
+
)
|
|
372
|
+
raise NotImplementedError(
|
|
373
|
+
f"Context pack '{pack_name}' is not available. Only Context or retriever packs are supported."
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
def _render_retriever_pack(
|
|
377
|
+
self,
|
|
378
|
+
retriever_name: str,
|
|
379
|
+
template_context: dict[str, Any],
|
|
380
|
+
retriever_override: Optional[Any],
|
|
381
|
+
pack_budget: Optional[Any],
|
|
382
|
+
policy: Optional[ContextPolicySpec],
|
|
383
|
+
tighten_pack_budget: bool,
|
|
384
|
+
weight: Optional[float] = None,
|
|
385
|
+
) -> str:
|
|
386
|
+
"""Render a retriever pack for the given retriever."""
|
|
387
|
+
retriever_spec = self._retriever_registry[retriever_name]
|
|
388
|
+
config = retriever_spec.config if hasattr(retriever_spec, "config") else {}
|
|
389
|
+
query_template = config.get("query") if isinstance(config, dict) else None
|
|
390
|
+
query = ""
|
|
391
|
+
if isinstance(query_template, str):
|
|
392
|
+
query = self._resolve_template(query_template, {}, template_context)
|
|
393
|
+
if not query:
|
|
394
|
+
input_context = template_context.get("input", {})
|
|
395
|
+
query = input_context.get("query", "") or input_context.get("message", "")
|
|
396
|
+
|
|
397
|
+
split = "train"
|
|
398
|
+
maximum_cache_total_items = None
|
|
399
|
+
maximum_cache_total_characters = None
|
|
400
|
+
limit = 3
|
|
401
|
+
offset = 0
|
|
402
|
+
maximum_total_characters = None
|
|
403
|
+
maximum_items_per_source = None
|
|
404
|
+
include_metadata = False
|
|
405
|
+
metadata_fields = None
|
|
406
|
+
backend_id = None
|
|
407
|
+
corpus_root = None
|
|
408
|
+
run_id = None
|
|
409
|
+
recipe_name = None
|
|
410
|
+
recipe_config = None
|
|
411
|
+
corpus_name = getattr(retriever_spec, "corpus", None)
|
|
412
|
+
join_with = "\n\n"
|
|
413
|
+
|
|
414
|
+
if isinstance(config, dict):
|
|
415
|
+
split = config.get("split", split)
|
|
416
|
+
limit = config.get("limit", limit)
|
|
417
|
+
offset = config.get("offset", offset)
|
|
418
|
+
maximum_total_characters = config.get(
|
|
419
|
+
"maximum_total_characters", maximum_total_characters
|
|
420
|
+
)
|
|
421
|
+
maximum_items_per_source = config.get(
|
|
422
|
+
"maximum_items_per_source",
|
|
423
|
+
config.get("max_items_per_source", maximum_items_per_source),
|
|
424
|
+
)
|
|
425
|
+
include_metadata = config.get("include_metadata", include_metadata)
|
|
426
|
+
metadata_fields = config.get("metadata_fields", metadata_fields)
|
|
427
|
+
backend_id = config.get("backend_id", backend_id)
|
|
428
|
+
run_id = config.get("run_id", run_id)
|
|
429
|
+
recipe_name = config.get("recipe_name", recipe_name)
|
|
430
|
+
recipe_config = config.get("recipe_config", config.get("recipe", recipe_config))
|
|
431
|
+
corpus_name = config.get("corpus", corpus_name)
|
|
432
|
+
join_with = config.get("join_with", join_with)
|
|
433
|
+
|
|
434
|
+
if corpus_name and corpus_name in self._corpus_registry:
|
|
435
|
+
corpus_spec = self._corpus_registry[corpus_name]
|
|
436
|
+
corpus_config = corpus_spec.config if hasattr(corpus_spec, "config") else {}
|
|
437
|
+
if isinstance(corpus_config, dict):
|
|
438
|
+
split = corpus_config.get("split", split)
|
|
439
|
+
maximum_cache_total_items = corpus_config.get(
|
|
440
|
+
"maximum_cache_total_items", maximum_cache_total_items
|
|
441
|
+
)
|
|
442
|
+
maximum_cache_total_characters = corpus_config.get(
|
|
443
|
+
"maximum_cache_total_characters", maximum_cache_total_characters
|
|
444
|
+
)
|
|
445
|
+
backend_id = corpus_config.get("backend_id", backend_id)
|
|
446
|
+
corpus_root = corpus_config.get(
|
|
447
|
+
"corpus_root",
|
|
448
|
+
corpus_config.get("root", corpus_root),
|
|
449
|
+
)
|
|
450
|
+
run_id = corpus_config.get("run_id", run_id)
|
|
451
|
+
recipe_name = corpus_config.get("recipe_name", recipe_name)
|
|
452
|
+
recipe_config = corpus_config.get(
|
|
453
|
+
"recipe_config",
|
|
454
|
+
corpus_config.get("recipe", recipe_config),
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
allocated_tokens = self._allocate_pack_budget(pack_budget, policy, weight)
|
|
458
|
+
if allocated_tokens is not None:
|
|
459
|
+
derived_chars = int(allocated_tokens) * 4
|
|
460
|
+
if maximum_total_characters is None:
|
|
461
|
+
maximum_total_characters = derived_chars
|
|
462
|
+
else:
|
|
463
|
+
maximum_total_characters = min(maximum_total_characters, derived_chars)
|
|
464
|
+
|
|
465
|
+
if tighten_pack_budget:
|
|
466
|
+
if maximum_total_characters is not None:
|
|
467
|
+
maximum_total_characters = max(1, int(maximum_total_characters * 0.5))
|
|
468
|
+
limit = max(1, int(limit * 0.5))
|
|
469
|
+
|
|
470
|
+
retriever_fn = retriever_override or self._default_retriever
|
|
471
|
+
if retriever_fn is None:
|
|
472
|
+
raise ValueError("No retriever override or default retriever configured")
|
|
473
|
+
|
|
474
|
+
request = ContextRetrieverRequest(
|
|
475
|
+
query=query,
|
|
476
|
+
offset=offset,
|
|
477
|
+
limit=limit,
|
|
478
|
+
maximum_total_characters=maximum_total_characters,
|
|
479
|
+
max_tokens=allocated_tokens,
|
|
480
|
+
metadata={
|
|
481
|
+
"retriever": retriever_name,
|
|
482
|
+
"corpus": corpus_name,
|
|
483
|
+
"split": split,
|
|
484
|
+
"maximum_cache_total_items": maximum_cache_total_items,
|
|
485
|
+
"maximum_cache_total_characters": maximum_cache_total_characters,
|
|
486
|
+
"maximum_items_per_source": maximum_items_per_source,
|
|
487
|
+
"include_metadata": include_metadata,
|
|
488
|
+
"metadata_fields": metadata_fields,
|
|
489
|
+
"backend_id": backend_id,
|
|
490
|
+
"corpus_root": corpus_root,
|
|
491
|
+
"run_id": run_id,
|
|
492
|
+
"recipe_name": recipe_name,
|
|
493
|
+
"recipe_config": recipe_config,
|
|
494
|
+
},
|
|
495
|
+
)
|
|
496
|
+
context_pack = self._retrieve_with_expansion(
|
|
497
|
+
retriever_fn,
|
|
498
|
+
request,
|
|
499
|
+
policy,
|
|
500
|
+
join_with,
|
|
501
|
+
allocated_tokens,
|
|
502
|
+
)
|
|
503
|
+
return context_pack.text
|
|
504
|
+
|
|
505
|
+
def _retrieve_with_expansion(
|
|
506
|
+
self,
|
|
507
|
+
retriever_fn: Any,
|
|
508
|
+
request: ContextRetrieverRequest,
|
|
509
|
+
policy: Optional[ContextPolicySpec],
|
|
510
|
+
join_with: str,
|
|
511
|
+
target_tokens: Optional[int],
|
|
512
|
+
) -> ContextPack:
|
|
513
|
+
packs: list[ContextPack] = []
|
|
514
|
+
expansion = policy.expansion if policy else None
|
|
515
|
+
max_pages = 1
|
|
516
|
+
min_fill_ratio = None
|
|
517
|
+
if expansion is not None:
|
|
518
|
+
max_pages = max(1, expansion.max_pages)
|
|
519
|
+
min_fill_ratio = expansion.min_fill_ratio
|
|
520
|
+
|
|
521
|
+
current_request = request
|
|
522
|
+
for page_index in range(max_pages):
|
|
523
|
+
response_pack = retriever_fn(current_request)
|
|
524
|
+
if response_pack is None:
|
|
525
|
+
break
|
|
526
|
+
packs.append(response_pack)
|
|
527
|
+
|
|
528
|
+
if max_pages <= 1 or target_tokens is None:
|
|
529
|
+
break
|
|
530
|
+
if response_pack.evidence_count < current_request.limit:
|
|
531
|
+
break
|
|
532
|
+
|
|
533
|
+
merged_pack = self._merge_context_packs(packs, join_with=join_with)
|
|
534
|
+
token_count = self._estimate_tokens(merged_pack.text)
|
|
535
|
+
threshold_ratio = 1.0 if min_fill_ratio is None else float(min_fill_ratio)
|
|
536
|
+
if token_count >= int(target_tokens * threshold_ratio):
|
|
537
|
+
break
|
|
538
|
+
|
|
539
|
+
current_request = current_request.model_copy(
|
|
540
|
+
update={"offset": current_request.offset + current_request.limit}
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
return self._merge_context_packs(packs, join_with=join_with)
|
|
544
|
+
|
|
545
|
+
def _merge_context_packs(self, packs: Iterable[ContextPack], join_with: str) -> ContextPack:
|
|
546
|
+
blocks: list[ContextPackBlock] = []
|
|
547
|
+
for index, pack in enumerate(packs, start=1):
|
|
548
|
+
if pack.blocks:
|
|
549
|
+
blocks.extend(pack.blocks)
|
|
550
|
+
continue
|
|
551
|
+
if pack.text:
|
|
552
|
+
blocks.append(
|
|
553
|
+
ContextPackBlock(
|
|
554
|
+
evidence_item_id=f"page-{index}",
|
|
555
|
+
text=pack.text,
|
|
556
|
+
metadata=None,
|
|
557
|
+
)
|
|
558
|
+
)
|
|
559
|
+
if not blocks:
|
|
560
|
+
return ContextPack(text="", evidence_count=0, blocks=[])
|
|
561
|
+
text = join_with.join([block.text for block in blocks])
|
|
562
|
+
return ContextPack(text=text, evidence_count=len(blocks), blocks=blocks)
|
|
563
|
+
|
|
564
|
+
def _apply_compaction_policy(
|
|
565
|
+
self, text: str, policy: Any, max_tokens_override: Optional[int] = None
|
|
566
|
+
) -> str:
|
|
567
|
+
if not policy or not getattr(policy, "input_budget", None):
|
|
568
|
+
return text
|
|
569
|
+
max_tokens = max_tokens_override
|
|
570
|
+
if max_tokens is None:
|
|
571
|
+
budget = policy.input_budget
|
|
572
|
+
max_tokens = getattr(budget, "max_tokens", None)
|
|
573
|
+
if max_tokens is None:
|
|
574
|
+
return text
|
|
575
|
+
tokens = text.split()
|
|
576
|
+
if len(tokens) <= max_tokens:
|
|
577
|
+
return text
|
|
578
|
+
if getattr(policy, "overflow", None) != "compact":
|
|
579
|
+
return text
|
|
580
|
+
|
|
581
|
+
compactor = self._resolve_compactor(policy)
|
|
582
|
+
return compactor.compact(CompactionRequest(text=text, max_tokens=max_tokens))
|
|
583
|
+
|
|
584
|
+
def _estimate_tokens(self, text: str) -> int:
|
|
585
|
+
return len(text.split())
|
|
586
|
+
|
|
587
|
+
def _apply_context_budget(
|
|
588
|
+
self,
|
|
589
|
+
system_prompt: str,
|
|
590
|
+
history: list[dict[str, Any]],
|
|
591
|
+
user_message: str,
|
|
592
|
+
policy: Any,
|
|
593
|
+
) -> tuple[str, list[dict[str, Any]], str, int, bool]:
|
|
594
|
+
if not policy or not getattr(policy, "input_budget", None):
|
|
595
|
+
token_count = self._estimate_total_tokens(system_prompt, history, user_message)
|
|
596
|
+
return system_prompt, history, user_message, token_count, False
|
|
597
|
+
|
|
598
|
+
budget = policy.input_budget
|
|
599
|
+
max_tokens = getattr(budget, "max_tokens", None)
|
|
600
|
+
if max_tokens is None:
|
|
601
|
+
token_count = self._estimate_total_tokens(system_prompt, history, user_message)
|
|
602
|
+
return system_prompt, history, user_message, token_count, False
|
|
603
|
+
|
|
604
|
+
token_count = self._estimate_total_tokens(system_prompt, history, user_message)
|
|
605
|
+
if token_count <= max_tokens:
|
|
606
|
+
return system_prompt, history, user_message, token_count, False
|
|
607
|
+
|
|
608
|
+
if getattr(policy, "overflow", None) != "compact":
|
|
609
|
+
return system_prompt, history, user_message, token_count, False
|
|
610
|
+
|
|
611
|
+
trimmed_history = list(history)
|
|
612
|
+
compacted = False
|
|
613
|
+
while trimmed_history and token_count > max_tokens:
|
|
614
|
+
trimmed_history.pop(0)
|
|
615
|
+
token_count = self._estimate_total_tokens(system_prompt, trimmed_history, user_message)
|
|
616
|
+
compacted = True
|
|
617
|
+
|
|
618
|
+
if token_count <= max_tokens:
|
|
619
|
+
return system_prompt, trimmed_history, user_message, token_count, compacted
|
|
620
|
+
|
|
621
|
+
remaining_budget = max_tokens - self._estimate_total_tokens(
|
|
622
|
+
"", trimmed_history, user_message
|
|
623
|
+
)
|
|
624
|
+
if remaining_budget < 0:
|
|
625
|
+
remaining_budget = 0
|
|
626
|
+
compacted_prompt = self._apply_compaction_policy(
|
|
627
|
+
system_prompt,
|
|
628
|
+
policy,
|
|
629
|
+
max_tokens_override=remaining_budget,
|
|
630
|
+
)
|
|
631
|
+
if compacted_prompt != system_prompt:
|
|
632
|
+
compacted = True
|
|
633
|
+
token_count = self._estimate_total_tokens(compacted_prompt, trimmed_history, user_message)
|
|
634
|
+
return compacted_prompt, trimmed_history, user_message, token_count, compacted
|
|
635
|
+
|
|
636
|
+
def _allocate_pack_budget(
|
|
637
|
+
self,
|
|
638
|
+
pack_budget: Optional[Any],
|
|
639
|
+
policy: Optional[ContextPolicySpec],
|
|
640
|
+
weight: Optional[float],
|
|
641
|
+
) -> Optional[int]:
|
|
642
|
+
if pack_budget is not None and hasattr(pack_budget, "max_tokens"):
|
|
643
|
+
max_tokens = getattr(pack_budget, "max_tokens", None)
|
|
644
|
+
if max_tokens is not None:
|
|
645
|
+
return int(max_tokens)
|
|
646
|
+
ratio = getattr(pack_budget, "ratio", None)
|
|
647
|
+
if ratio is not None:
|
|
648
|
+
return self._resolve_ratio_budget(ratio, policy)
|
|
649
|
+
if pack_budget and isinstance(pack_budget, dict):
|
|
650
|
+
max_tokens = pack_budget.get("max_tokens")
|
|
651
|
+
if max_tokens is not None:
|
|
652
|
+
return int(max_tokens)
|
|
653
|
+
ratio = pack_budget.get("ratio")
|
|
654
|
+
if ratio is not None:
|
|
655
|
+
return self._resolve_ratio_budget(ratio, policy)
|
|
656
|
+
|
|
657
|
+
if policy and getattr(policy, "pack_budget", None):
|
|
658
|
+
pack_budget_spec = policy.pack_budget
|
|
659
|
+
base_tokens = getattr(pack_budget_spec, "default_max_tokens", None)
|
|
660
|
+
if base_tokens is None:
|
|
661
|
+
base_ratio = getattr(pack_budget_spec, "default_ratio", None)
|
|
662
|
+
if base_ratio is None:
|
|
663
|
+
return None
|
|
664
|
+
if not getattr(policy, "input_budget", None):
|
|
665
|
+
return None
|
|
666
|
+
input_budget = policy.input_budget
|
|
667
|
+
max_tokens = getattr(input_budget, "max_tokens", None)
|
|
668
|
+
if max_tokens is None:
|
|
669
|
+
return None
|
|
670
|
+
base_tokens = int(max_tokens * base_ratio)
|
|
671
|
+
if weight is None:
|
|
672
|
+
return int(base_tokens)
|
|
673
|
+
return int(base_tokens * weight)
|
|
674
|
+
|
|
675
|
+
return None
|
|
676
|
+
|
|
677
|
+
def _resolve_ratio_budget(
|
|
678
|
+
self, ratio: float, policy: Optional[ContextPolicySpec]
|
|
679
|
+
) -> Optional[int]:
|
|
680
|
+
if ratio is None or not policy or not getattr(policy, "input_budget", None):
|
|
681
|
+
return None
|
|
682
|
+
input_budget = policy.input_budget
|
|
683
|
+
max_tokens = getattr(input_budget, "max_tokens", None)
|
|
684
|
+
if max_tokens is None:
|
|
685
|
+
return None
|
|
686
|
+
return int(max_tokens * float(ratio))
|
|
687
|
+
|
|
688
|
+
def _allocate_directive_budgets(
|
|
689
|
+
self,
|
|
690
|
+
directives: list[ContextMessageSpec],
|
|
691
|
+
policy: Optional[ContextPolicySpec],
|
|
692
|
+
total_budget_override: Optional[int],
|
|
693
|
+
) -> dict[int, dict[str, Any]]:
|
|
694
|
+
if not policy or not getattr(policy, "pack_budget", None):
|
|
695
|
+
return {}
|
|
696
|
+
pack_budget_spec = policy.pack_budget
|
|
697
|
+
total_budget = total_budget_override
|
|
698
|
+
if total_budget is None:
|
|
699
|
+
total_budget = getattr(pack_budget_spec, "default_max_tokens", None)
|
|
700
|
+
if total_budget is None:
|
|
701
|
+
base_ratio = getattr(pack_budget_spec, "default_ratio", None)
|
|
702
|
+
if base_ratio is None:
|
|
703
|
+
return {}
|
|
704
|
+
if not getattr(policy, "input_budget", None):
|
|
705
|
+
return {}
|
|
706
|
+
input_budget = policy.input_budget
|
|
707
|
+
max_tokens = getattr(input_budget, "max_tokens", None)
|
|
708
|
+
if max_tokens is None:
|
|
709
|
+
return {}
|
|
710
|
+
total_budget = int(max_tokens * base_ratio)
|
|
711
|
+
|
|
712
|
+
pack_directives = [
|
|
713
|
+
directive for directive in directives if isinstance(directive, ContextInsertSpec)
|
|
714
|
+
]
|
|
715
|
+
if not pack_directives:
|
|
716
|
+
return {}
|
|
717
|
+
|
|
718
|
+
sorted_directives = sorted(
|
|
719
|
+
pack_directives,
|
|
720
|
+
key=lambda directive: (
|
|
721
|
+
-(directive.priority or 0),
|
|
722
|
+
-(directive.weight or 1.0),
|
|
723
|
+
),
|
|
724
|
+
)
|
|
725
|
+
total_weight = sum(directive.weight or 1.0 for directive in sorted_directives)
|
|
726
|
+
allocations = {}
|
|
727
|
+
remaining_budget = int(total_budget)
|
|
728
|
+
|
|
729
|
+
for directive in sorted_directives:
|
|
730
|
+
weight = directive.weight or 1.0
|
|
731
|
+
allocation = int((total_budget * weight) / total_weight)
|
|
732
|
+
if allocation <= 0:
|
|
733
|
+
allocation = 1
|
|
734
|
+
if allocation > remaining_budget:
|
|
735
|
+
allocation = remaining_budget
|
|
736
|
+
allocations[id(directive)] = {"max_tokens": allocation}
|
|
737
|
+
remaining_budget -= allocation
|
|
738
|
+
if remaining_budget <= 0:
|
|
739
|
+
break
|
|
740
|
+
|
|
741
|
+
if remaining_budget > 0 and sorted_directives:
|
|
742
|
+
allocations[id(sorted_directives[0])]["max_tokens"] += remaining_budget
|
|
743
|
+
|
|
744
|
+
return allocations
|
|
745
|
+
|
|
746
|
+
def _allocate_default_pack_budgets(
|
|
747
|
+
self,
|
|
748
|
+
pack_entries: list[Any],
|
|
749
|
+
policy: Optional[ContextPolicySpec],
|
|
750
|
+
total_budget_override: Optional[int],
|
|
751
|
+
) -> dict[str, dict[str, Any]]:
|
|
752
|
+
if not policy or not getattr(policy, "pack_budget", None):
|
|
753
|
+
return {}
|
|
754
|
+
pack_budget_spec = policy.pack_budget
|
|
755
|
+
total_budget = total_budget_override
|
|
756
|
+
if total_budget is None:
|
|
757
|
+
total_budget = getattr(pack_budget_spec, "default_max_tokens", None)
|
|
758
|
+
if total_budget is None:
|
|
759
|
+
base_ratio = getattr(pack_budget_spec, "default_ratio", None)
|
|
760
|
+
if base_ratio is None:
|
|
761
|
+
return {}
|
|
762
|
+
if not getattr(policy, "input_budget", None):
|
|
763
|
+
return {}
|
|
764
|
+
input_budget = policy.input_budget
|
|
765
|
+
max_tokens = getattr(input_budget, "max_tokens", None)
|
|
766
|
+
if max_tokens is None:
|
|
767
|
+
return {}
|
|
768
|
+
total_budget = int(max_tokens * base_ratio)
|
|
769
|
+
|
|
770
|
+
if not pack_entries:
|
|
771
|
+
return {}
|
|
772
|
+
|
|
773
|
+
entries = sorted(
|
|
774
|
+
pack_entries,
|
|
775
|
+
key=lambda entry: (
|
|
776
|
+
-(getattr(entry, "priority", None) or 0),
|
|
777
|
+
-(getattr(entry, "weight", None) or 1.0),
|
|
778
|
+
),
|
|
779
|
+
)
|
|
780
|
+
total_weight = sum(getattr(entry, "weight", None) or 1.0 for entry in entries)
|
|
781
|
+
allocations = {}
|
|
782
|
+
remaining_budget = int(total_budget)
|
|
783
|
+
|
|
784
|
+
for entry in entries:
|
|
785
|
+
weight = getattr(entry, "weight", None) or 1.0
|
|
786
|
+
allocation = int((total_budget * weight) / total_weight)
|
|
787
|
+
if allocation <= 0:
|
|
788
|
+
allocation = 1
|
|
789
|
+
if allocation > remaining_budget:
|
|
790
|
+
allocation = remaining_budget
|
|
791
|
+
allocations[entry.name] = {"max_tokens": allocation}
|
|
792
|
+
remaining_budget -= allocation
|
|
793
|
+
if remaining_budget <= 0:
|
|
794
|
+
break
|
|
795
|
+
|
|
796
|
+
if remaining_budget > 0 and entries:
|
|
797
|
+
allocations[entries[0].name]["max_tokens"] += remaining_budget
|
|
798
|
+
|
|
799
|
+
return allocations
|
|
800
|
+
|
|
801
|
+
def _resolve_default_pack_total_budget(
|
|
802
|
+
self, policy: Optional[ContextPolicySpec]
|
|
803
|
+
) -> Optional[int]:
|
|
804
|
+
if not policy or not getattr(policy, "pack_budget", None):
|
|
805
|
+
return None
|
|
806
|
+
pack_budget_spec = policy.pack_budget
|
|
807
|
+
total_budget = getattr(pack_budget_spec, "default_max_tokens", None)
|
|
808
|
+
if total_budget is not None:
|
|
809
|
+
return int(total_budget)
|
|
810
|
+
base_ratio = getattr(pack_budget_spec, "default_ratio", None)
|
|
811
|
+
if base_ratio is None:
|
|
812
|
+
return None
|
|
813
|
+
if not getattr(policy, "input_budget", None):
|
|
814
|
+
return None
|
|
815
|
+
input_budget = policy.input_budget
|
|
816
|
+
max_tokens = getattr(input_budget, "max_tokens", None)
|
|
817
|
+
if max_tokens is None:
|
|
818
|
+
return None
|
|
819
|
+
return int(max_tokens * base_ratio)
|
|
820
|
+
|
|
821
|
+
def _extract_pack_budget_max_tokens(
|
|
822
|
+
self, pack_budget: Optional[Any], policy: Optional[ContextPolicySpec]
|
|
823
|
+
) -> Optional[int]:
|
|
824
|
+
if pack_budget is None:
|
|
825
|
+
return None
|
|
826
|
+
if isinstance(pack_budget, dict):
|
|
827
|
+
max_tokens = pack_budget.get("max_tokens")
|
|
828
|
+
if max_tokens is not None:
|
|
829
|
+
return int(max_tokens)
|
|
830
|
+
ratio = pack_budget.get("ratio")
|
|
831
|
+
if ratio is not None:
|
|
832
|
+
return self._resolve_ratio_budget(ratio, policy)
|
|
833
|
+
if hasattr(pack_budget, "max_tokens"):
|
|
834
|
+
max_tokens = getattr(pack_budget, "max_tokens", None)
|
|
835
|
+
if max_tokens is not None:
|
|
836
|
+
return int(max_tokens)
|
|
837
|
+
ratio = getattr(pack_budget, "ratio", None)
|
|
838
|
+
if ratio is not None:
|
|
839
|
+
return self._resolve_ratio_budget(ratio, policy)
|
|
840
|
+
return None
|
|
841
|
+
|
|
842
|
+
def _estimate_total_tokens(
|
|
843
|
+
self, system_prompt: str, history: list[dict[str, Any]], user_message: str
|
|
844
|
+
) -> int:
|
|
845
|
+
total = self._estimate_tokens(system_prompt) + self._estimate_tokens(user_message)
|
|
846
|
+
for message in history:
|
|
847
|
+
content = message.get("content", "")
|
|
848
|
+
total += self._estimate_tokens(content)
|
|
849
|
+
return total
|
|
850
|
+
|
|
851
|
+
def _resolve_compactor(self, policy: ContextPolicySpec):
|
|
852
|
+
compactor_config = None
|
|
853
|
+
if hasattr(policy, "compactor"):
|
|
854
|
+
compactor_config = policy.compactor
|
|
855
|
+
|
|
856
|
+
if isinstance(compactor_config, str):
|
|
857
|
+
compactor_spec = self._compactor_registry.get(compactor_config)
|
|
858
|
+
if compactor_spec is None:
|
|
859
|
+
raise ValueError(f"Compactor '{compactor_config}' not defined")
|
|
860
|
+
config = compactor_spec.config if hasattr(compactor_spec, "config") else {}
|
|
861
|
+
if not isinstance(config, dict):
|
|
862
|
+
config = {}
|
|
863
|
+
return build_compactor(config)
|
|
864
|
+
|
|
865
|
+
if isinstance(compactor_config, dict):
|
|
866
|
+
return build_compactor(compactor_config)
|
|
867
|
+
|
|
868
|
+
return TruncateCompactor()
|
|
869
|
+
|
|
870
|
+
def _build_messages(
|
|
871
|
+
self,
|
|
872
|
+
context_spec: ContextDeclaration,
|
|
873
|
+
history_messages: list[dict[str, Any]],
|
|
874
|
+
template_context: dict[str, Any],
|
|
875
|
+
retriever_override: Optional[Any],
|
|
876
|
+
policy: Optional[ContextPolicySpec],
|
|
877
|
+
tighten_pack_budget: bool = False,
|
|
878
|
+
total_pack_budget_override: Optional[int] = None,
|
|
879
|
+
) -> list[dict[str, Any]]:
|
|
880
|
+
assembled_messages: list[dict[str, Any]] = []
|
|
881
|
+
resolved_context = dict(template_context)
|
|
882
|
+
context_values = dict(resolved_context.get("context", {}))
|
|
883
|
+
resolved_context["context"] = context_values
|
|
884
|
+
directive_budgets = self._allocate_directive_budgets(
|
|
885
|
+
context_spec.messages or [], policy, total_pack_budget_override
|
|
886
|
+
)
|
|
887
|
+
for directive in context_spec.messages or []:
|
|
888
|
+
if isinstance(directive, HistoryInsertSpec):
|
|
889
|
+
assembled_messages.extend(history_messages)
|
|
890
|
+
continue
|
|
891
|
+
if isinstance(directive, ContextInsertSpec):
|
|
892
|
+
override_budget = directive_budgets.get(id(directive))
|
|
893
|
+
pack_content = self._render_pack(
|
|
894
|
+
directive.name,
|
|
895
|
+
resolved_context,
|
|
896
|
+
retriever_override,
|
|
897
|
+
override_budget or directive.budget,
|
|
898
|
+
policy,
|
|
899
|
+
tighten_pack_budget,
|
|
900
|
+
directive.weight,
|
|
901
|
+
)
|
|
902
|
+
context_values[directive.name] = pack_content or ""
|
|
903
|
+
if pack_content:
|
|
904
|
+
assembled_messages.append({"role": "system", "content": pack_content})
|
|
905
|
+
else:
|
|
906
|
+
context_values[directive.name] = ""
|
|
907
|
+
continue
|
|
908
|
+
if isinstance(directive, SystemMessageSpec):
|
|
909
|
+
assembled_messages.append(
|
|
910
|
+
{
|
|
911
|
+
"role": "system",
|
|
912
|
+
"content": self._resolve_message_content(directive, resolved_context),
|
|
913
|
+
}
|
|
914
|
+
)
|
|
915
|
+
continue
|
|
916
|
+
if isinstance(directive, UserMessageSpec):
|
|
917
|
+
assembled_messages.append(
|
|
918
|
+
{
|
|
919
|
+
"role": "user",
|
|
920
|
+
"content": self._resolve_message_content(directive, resolved_context),
|
|
921
|
+
}
|
|
922
|
+
)
|
|
923
|
+
continue
|
|
924
|
+
if isinstance(directive, AssistantMessageSpec):
|
|
925
|
+
assembled_messages.append(
|
|
926
|
+
{
|
|
927
|
+
"role": "assistant",
|
|
928
|
+
"content": self._resolve_message_content(directive, resolved_context),
|
|
929
|
+
}
|
|
930
|
+
)
|
|
931
|
+
continue
|
|
932
|
+
return assembled_messages
|
|
933
|
+
|
|
934
|
+
def _render_nested_context_pack(
|
|
935
|
+
self,
|
|
936
|
+
context_spec: ContextDeclaration,
|
|
937
|
+
template_context: dict[str, Any],
|
|
938
|
+
pack_budget: Optional[Any],
|
|
939
|
+
policy: Optional[ContextPolicySpec],
|
|
940
|
+
tighten_pack_budget: bool,
|
|
941
|
+
retriever_override: Optional[Any],
|
|
942
|
+
) -> str:
|
|
943
|
+
"""Render another Context declaration as a pack string."""
|
|
944
|
+
nested_policy = context_spec.policy or policy
|
|
945
|
+
compaction_policy = policy or nested_policy
|
|
946
|
+
total_budget_override = self._extract_pack_budget_max_tokens(pack_budget, policy)
|
|
947
|
+
|
|
948
|
+
if context_spec.messages is None:
|
|
949
|
+
nested_result = self._assemble_default_with_regeneration(
|
|
950
|
+
context_spec,
|
|
951
|
+
base_system_prompt="",
|
|
952
|
+
history_messages=[],
|
|
953
|
+
user_message="",
|
|
954
|
+
template_context=template_context,
|
|
955
|
+
retriever_override=retriever_override,
|
|
956
|
+
total_budget_override=total_budget_override,
|
|
957
|
+
)
|
|
958
|
+
pack_text = nested_result.system_prompt
|
|
959
|
+
return self._compact_pack_text(
|
|
960
|
+
pack_text, pack_budget, compaction_policy, tighten_pack_budget
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
if any(isinstance(directive, HistoryInsertSpec) for directive in context_spec.messages):
|
|
964
|
+
raise ValueError("Nested context packs cannot include history()")
|
|
965
|
+
|
|
966
|
+
max_iterations = 1
|
|
967
|
+
if compaction_policy and getattr(compaction_policy, "max_iterations", None):
|
|
968
|
+
max_iterations = max(1, int(compaction_policy.max_iterations))
|
|
969
|
+
|
|
970
|
+
pack_scale = 1.0
|
|
971
|
+
last_text = ""
|
|
972
|
+
max_tokens = self._extract_pack_budget_max_tokens(pack_budget, compaction_policy)
|
|
973
|
+
for _iteration in range(max_iterations):
|
|
974
|
+
scaled_override = total_budget_override
|
|
975
|
+
if scaled_override is not None and pack_scale < 1.0:
|
|
976
|
+
scaled_override = max(1, int(scaled_override * pack_scale))
|
|
977
|
+
|
|
978
|
+
assembled_messages = self._build_messages(
|
|
979
|
+
context_spec,
|
|
980
|
+
history_messages=[],
|
|
981
|
+
template_context=template_context,
|
|
982
|
+
retriever_override=retriever_override,
|
|
983
|
+
policy=nested_policy,
|
|
984
|
+
tighten_pack_budget=tighten_pack_budget or pack_scale < 1.0,
|
|
985
|
+
total_pack_budget_override=scaled_override,
|
|
986
|
+
)
|
|
987
|
+
rendered_segments = [message.get("content", "") for message in assembled_messages]
|
|
988
|
+
pack_text = self._join_nonempty(rendered_segments)
|
|
989
|
+
raw_token_count = self._estimate_tokens(pack_text)
|
|
990
|
+
last_text = self._compact_pack_text(
|
|
991
|
+
pack_text, pack_budget, compaction_policy, tighten_pack_budget
|
|
992
|
+
)
|
|
993
|
+
|
|
994
|
+
if max_tokens is None or raw_token_count <= max_tokens:
|
|
995
|
+
break
|
|
996
|
+
if not compaction_policy or getattr(compaction_policy, "overflow", None) != "compact":
|
|
997
|
+
break
|
|
998
|
+
pack_scale *= 0.5
|
|
999
|
+
|
|
1000
|
+
return last_text
|
|
1001
|
+
|
|
1002
|
+
def _compact_pack_text(
|
|
1003
|
+
self,
|
|
1004
|
+
text: str,
|
|
1005
|
+
pack_budget: Optional[Any],
|
|
1006
|
+
policy: Optional[ContextPolicySpec],
|
|
1007
|
+
tighten_pack_budget: bool,
|
|
1008
|
+
) -> str:
|
|
1009
|
+
max_tokens = None
|
|
1010
|
+
if pack_budget is not None:
|
|
1011
|
+
if isinstance(pack_budget, dict):
|
|
1012
|
+
max_tokens = pack_budget.get("max_tokens")
|
|
1013
|
+
elif hasattr(pack_budget, "max_tokens"):
|
|
1014
|
+
max_tokens = getattr(pack_budget, "max_tokens", None)
|
|
1015
|
+
|
|
1016
|
+
if max_tokens is None and policy and getattr(policy, "pack_budget", None):
|
|
1017
|
+
pack_budget_spec = policy.pack_budget
|
|
1018
|
+
max_tokens = getattr(pack_budget_spec, "default_max_tokens", None)
|
|
1019
|
+
|
|
1020
|
+
if max_tokens is None:
|
|
1021
|
+
return text
|
|
1022
|
+
|
|
1023
|
+
if tighten_pack_budget:
|
|
1024
|
+
max_tokens = max(1, int(max_tokens * 0.5))
|
|
1025
|
+
|
|
1026
|
+
compactor = self._resolve_compactor(policy) if policy else TruncateCompactor()
|
|
1027
|
+
return compactor.compact(CompactionRequest(text=text, max_tokens=int(max_tokens)))
|
|
1028
|
+
|
|
1029
|
+
def _split_leading_system(
|
|
1030
|
+
self, messages: list[dict[str, Any]]
|
|
1031
|
+
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
1032
|
+
"""Split leading system messages from the rest."""
|
|
1033
|
+
leading_system: list[dict[str, Any]] = []
|
|
1034
|
+
remaining: list[dict[str, Any]] = []
|
|
1035
|
+
seen_non_system = False
|
|
1036
|
+
for message in messages:
|
|
1037
|
+
if message.get("role") == "system" and not seen_non_system:
|
|
1038
|
+
leading_system.append(message)
|
|
1039
|
+
else:
|
|
1040
|
+
seen_non_system = True
|
|
1041
|
+
remaining.append(message)
|
|
1042
|
+
return leading_system, remaining
|
|
1043
|
+
|
|
1044
|
+
def _extract_user_message(
|
|
1045
|
+
self, messages: list[dict[str, Any]], fallback_message: str
|
|
1046
|
+
) -> tuple[str, list[dict[str, Any]]]:
|
|
1047
|
+
"""Extract the final user message from a list, leaving the rest as history."""
|
|
1048
|
+
last_user_index = None
|
|
1049
|
+
for idx, message in enumerate(messages):
|
|
1050
|
+
if message.get("role") == "user":
|
|
1051
|
+
last_user_index = idx
|
|
1052
|
+
if last_user_index is None:
|
|
1053
|
+
return fallback_message, messages
|
|
1054
|
+
|
|
1055
|
+
user_message = messages[last_user_index].get("content", "")
|
|
1056
|
+
remaining = messages[:last_user_index] + messages[last_user_index + 1 :]
|
|
1057
|
+
return user_message, remaining
|
|
1058
|
+
|
|
1059
|
+
def _join_nonempty(self, parts: Iterable[str]) -> str:
|
|
1060
|
+
return "\n\n".join([part for part in parts if part])
|