biblicus 0.16.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1060 @@
1
+ """
2
+ Context assembly utilities for the Biblicus Context Engine.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+ from string import Formatter
9
+ from typing import Any, Iterable, Optional
10
+
11
+ from biblicus.context import ContextPack, ContextPackBlock
12
+ from biblicus.context_engine.compaction import CompactionRequest, TruncateCompactor, build_compactor
13
+ from biblicus.context_engine.models import (
14
+ AssistantMessageSpec,
15
+ ContextDeclaration,
16
+ ContextInsertSpec,
17
+ ContextMessageSpec,
18
+ ContextPolicySpec,
19
+ ContextRetrieverRequest,
20
+ HistoryInsertSpec,
21
+ SystemMessageSpec,
22
+ UserMessageSpec,
23
+ )
24
+
25
+
26
+ @dataclass
27
+ class ContextAssemblyResult:
28
+ """
29
+ Assembled prompt context for a single agent turn.
30
+
31
+ :ivar system_prompt: System prompt content.
32
+ :vartype system_prompt: str
33
+ :ivar history: Message history for the turn.
34
+ :vartype history: list[dict[str, Any]]
35
+ :ivar user_message: User message content.
36
+ :vartype user_message: str
37
+ :ivar token_count: Estimated token count for assembled content.
38
+ :vartype token_count: int
39
+ """
40
+
41
+ system_prompt: str
42
+ history: list[dict[str, Any]]
43
+ user_message: str
44
+ token_count: int = 0
45
+
46
+
47
+ class ContextAssembler:
48
+ """
49
+ Assemble Context declarations into system prompts, history, and user messages.
50
+
51
+ :param context_registry: Context declarations indexed by name.
52
+ :type context_registry: dict[str, ContextDeclaration]
53
+ :param retriever_registry: Retriever declarations indexed by name.
54
+ :type retriever_registry: dict[str, Any] or None
55
+ :param corpus_registry: Corpus declarations indexed by name.
56
+ :type corpus_registry: dict[str, Any] or None
57
+ :param compactor_registry: Compactor declarations indexed by name.
58
+ :type compactor_registry: dict[str, Any] or None
59
+ :param default_retriever: Default retriever callable when no override is supplied.
60
+ :type default_retriever: callable or None
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ context_registry: dict[str, ContextDeclaration],
66
+ retriever_registry: Optional[dict[str, Any]] = None,
67
+ corpus_registry: Optional[dict[str, Any]] = None,
68
+ compactor_registry: Optional[dict[str, Any]] = None,
69
+ default_retriever: Optional[Any] = None,
70
+ ):
71
+ self._context_registry = context_registry
72
+ self._retriever_registry = retriever_registry or {}
73
+ self._corpus_registry = corpus_registry or {}
74
+ self._compactor_registry = compactor_registry or {}
75
+ self._default_retriever = default_retriever
76
+
77
+ def assemble(
78
+ self,
79
+ context_name: str,
80
+ base_system_prompt: str,
81
+ history_messages: list[dict[str, Any]],
82
+ user_message: Optional[str],
83
+ template_context: dict[str, Any],
84
+ retriever_override: Optional[Any] = None,
85
+ ) -> ContextAssemblyResult:
86
+ """
87
+ Assemble a Context declaration into prompt components.
88
+
89
+ :param context_name: Name of the Context declaration.
90
+ :type context_name: str
91
+ :param base_system_prompt: Default system prompt from agent config.
92
+ :type base_system_prompt: str
93
+ :param history_messages: Current agent history messages.
94
+ :type history_messages: list[dict[str, Any]]
95
+ :param user_message: Current user message for this turn.
96
+ :type user_message: str or None
97
+ :param template_context: Template variables for resolution.
98
+ :type template_context: dict[str, Any]
99
+ :param retriever_override: Optional retriever override callable.
100
+ :type retriever_override: callable or None
101
+ :return: Assembled prompt components.
102
+ :rtype: ContextAssemblyResult
103
+ :raises ValueError: If the context declaration is not found.
104
+ """
105
+ if context_name not in self._context_registry:
106
+ raise ValueError(f"Context '{context_name}' not defined")
107
+
108
+ context_spec = self._context_registry[context_name]
109
+ if context_spec.messages is None:
110
+ return self._assemble_default_with_regeneration(
111
+ context_spec,
112
+ base_system_prompt,
113
+ history_messages,
114
+ user_message or "",
115
+ template_context,
116
+ retriever_override,
117
+ )
118
+
119
+ return self._assemble_explicit_with_regeneration(
120
+ context_spec,
121
+ history_messages,
122
+ user_message or "",
123
+ template_context,
124
+ retriever_override,
125
+ )
126
+
127
+ def _assemble_default(
128
+ self,
129
+ context_spec: ContextDeclaration,
130
+ base_system_prompt: str,
131
+ history_messages: list[dict[str, Any]],
132
+ user_message: str,
133
+ template_context: dict[str, Any],
134
+ total_budget_override: Optional[int] = None,
135
+ ) -> ContextAssemblyResult:
136
+ """Assemble the default Context plan when messages are omitted."""
137
+ system_prompt = base_system_prompt or ""
138
+ pack_outputs = []
139
+ pack_entries = context_spec.packs or []
140
+ pack_budgets = self._allocate_default_pack_budgets(
141
+ pack_entries, context_spec.policy, total_budget_override
142
+ )
143
+ for pack_entry in pack_entries:
144
+ pack_outputs.append(
145
+ self._render_pack(
146
+ pack_entry.name,
147
+ template_context,
148
+ retriever_override=None,
149
+ pack_budget=pack_budgets.get(pack_entry.name),
150
+ policy=context_spec.policy,
151
+ weight=pack_entry.weight,
152
+ )
153
+ )
154
+
155
+ if pack_outputs:
156
+ system_prompt = self._join_nonempty([system_prompt, *pack_outputs])
157
+
158
+ compacted_prompt, history_messages, user_message, token_count, compacted = (
159
+ self._apply_context_budget(
160
+ system_prompt,
161
+ history_messages,
162
+ user_message,
163
+ context_spec.policy,
164
+ )
165
+ )
166
+ return ContextAssemblyResult(
167
+ system_prompt=compacted_prompt,
168
+ history=history_messages,
169
+ user_message=user_message,
170
+ token_count=token_count,
171
+ )
172
+
173
+ def _assemble_default_with_regeneration(
174
+ self,
175
+ context_spec: ContextDeclaration,
176
+ base_system_prompt: str,
177
+ history_messages: list[dict[str, Any]],
178
+ user_message: str,
179
+ template_context: dict[str, Any],
180
+ retriever_override: Optional[Any],
181
+ total_budget_override: Optional[int] = None,
182
+ ) -> ContextAssemblyResult:
183
+ max_iterations = 2
184
+ if context_spec.policy and getattr(context_spec.policy, "max_iterations", None):
185
+ max_iterations = max(1, int(context_spec.policy.max_iterations))
186
+
187
+ pack_scale = 1.0
188
+ last_result: Optional[ContextAssemblyResult] = None
189
+ for _iteration in range(max_iterations):
190
+ system_prompt = base_system_prompt or ""
191
+ pack_outputs = []
192
+ pack_entries = context_spec.packs or []
193
+ pack_budgets = self._allocate_default_pack_budgets(
194
+ pack_entries, context_spec.policy, total_budget_override
195
+ )
196
+ for pack_entry in pack_entries:
197
+ pack_outputs.append(
198
+ self._render_pack(
199
+ pack_entry.name,
200
+ template_context,
201
+ retriever_override,
202
+ pack_budget=pack_budgets.get(pack_entry.name),
203
+ policy=context_spec.policy,
204
+ tighten_pack_budget=pack_scale < 1.0,
205
+ weight=pack_entry.weight,
206
+ )
207
+ )
208
+
209
+ if pack_outputs:
210
+ system_prompt = self._join_nonempty([system_prompt, *pack_outputs])
211
+
212
+ compacted_prompt, history_messages, user_message, token_count, compacted = (
213
+ self._apply_context_budget(
214
+ system_prompt,
215
+ history_messages,
216
+ user_message,
217
+ context_spec.policy,
218
+ )
219
+ )
220
+
221
+ last_result = ContextAssemblyResult(
222
+ system_prompt=compacted_prompt,
223
+ history=history_messages,
224
+ user_message=user_message,
225
+ token_count=token_count,
226
+ )
227
+
228
+ if not compacted or not context_spec.policy:
229
+ break
230
+ if getattr(context_spec.policy, "overflow", None) != "compact":
231
+ break
232
+ pack_scale *= 0.5
233
+
234
+ return last_result or ContextAssemblyResult(
235
+ system_prompt="",
236
+ history=history_messages,
237
+ user_message=user_message,
238
+ token_count=0,
239
+ )
240
+
241
+ def _assemble_explicit_with_regeneration(
242
+ self,
243
+ context_spec: ContextDeclaration,
244
+ history_messages: list[dict[str, Any]],
245
+ user_message: str,
246
+ template_context: dict[str, Any],
247
+ retriever_override: Optional[Any],
248
+ ) -> ContextAssemblyResult:
249
+ """Assemble explicit Context message directives with regeneration loop."""
250
+ max_iterations = 2
251
+ if context_spec.policy and getattr(context_spec.policy, "max_iterations", None):
252
+ max_iterations = max(1, int(context_spec.policy.max_iterations))
253
+
254
+ pack_scale = 1.0
255
+ last_result: Optional[ContextAssemblyResult] = None
256
+ for _iteration in range(max_iterations):
257
+ total_pack_budget = self._resolve_default_pack_total_budget(context_spec.policy)
258
+ if total_pack_budget is not None and pack_scale < 1.0:
259
+ total_pack_budget = max(1, int(total_pack_budget * pack_scale))
260
+
261
+ assembled_messages = self._build_messages(
262
+ context_spec,
263
+ history_messages,
264
+ template_context,
265
+ retriever_override,
266
+ context_spec.policy,
267
+ tighten_pack_budget=pack_scale < 1.0,
268
+ total_pack_budget_override=total_pack_budget,
269
+ )
270
+
271
+ system_messages, remaining_messages = self._split_leading_system(assembled_messages)
272
+ system_prompt = self._join_nonempty([m["content"] for m in system_messages])
273
+ resolved_user_message, remaining_messages = self._extract_user_message(
274
+ remaining_messages, user_message
275
+ )
276
+
277
+ compacted_prompt, remaining_messages, resolved_user_message, token_count, compacted = (
278
+ self._apply_context_budget(
279
+ system_prompt,
280
+ remaining_messages,
281
+ resolved_user_message,
282
+ context_spec.policy,
283
+ )
284
+ )
285
+
286
+ last_result = ContextAssemblyResult(
287
+ system_prompt=compacted_prompt,
288
+ history=remaining_messages,
289
+ user_message=resolved_user_message,
290
+ token_count=token_count,
291
+ )
292
+
293
+ if not compacted or not context_spec.policy:
294
+ break
295
+ if getattr(context_spec.policy, "overflow", None) != "compact":
296
+ break
297
+ pack_scale *= 0.5
298
+
299
+ return last_result or ContextAssemblyResult(
300
+ system_prompt="",
301
+ history=[],
302
+ user_message=user_message,
303
+ token_count=0,
304
+ )
305
+
306
+ def _resolve_message_content(
307
+ self,
308
+ directive: SystemMessageSpec | UserMessageSpec | AssistantMessageSpec,
309
+ template_context,
310
+ ) -> str:
311
+ """Resolve message content or templates."""
312
+ if directive.content is not None:
313
+ return directive.content
314
+ return self._resolve_template(directive.template or "", directive.vars, template_context)
315
+
316
+ def _resolve_template(
317
+ self, template_text: str, vars_dict: dict[str, Any], template_context: dict[str, Any]
318
+ ) -> str:
319
+ """Resolve dot-notation templates with context variables."""
320
+ if not template_text:
321
+ return template_text
322
+
323
+ class DotFormatter(Formatter):
324
+ def get_field(self, field_name, args, kwargs):
325
+ path_parts = field_name.split(".")
326
+ current_value = kwargs
327
+ for part in path_parts:
328
+ if isinstance(current_value, dict):
329
+ current_value = current_value.get(part, "")
330
+ else:
331
+ current_value = getattr(current_value, part, "")
332
+ return current_value, field_name
333
+
334
+ merged_context = dict(template_context)
335
+ for key, value in (vars_dict or {}).items():
336
+ merged_context[key] = value
337
+
338
+ formatter = DotFormatter()
339
+ return formatter.format(template_text, **merged_context)
340
+
341
+ def _render_pack(
342
+ self,
343
+ pack_name: str,
344
+ template_context: dict[str, Any],
345
+ retriever_override: Optional[Any],
346
+ pack_budget: Optional[Any],
347
+ policy: Optional[ContextPolicySpec],
348
+ tighten_pack_budget: bool = False,
349
+ weight: Optional[float] = None,
350
+ ) -> str:
351
+ """Render a context pack by name."""
352
+ if pack_name in self._context_registry:
353
+ nested_context = self._context_registry[pack_name]
354
+ return self._render_nested_context_pack(
355
+ nested_context,
356
+ template_context,
357
+ pack_budget,
358
+ policy,
359
+ tighten_pack_budget,
360
+ retriever_override,
361
+ )
362
+ if pack_name in self._retriever_registry:
363
+ return self._render_retriever_pack(
364
+ pack_name,
365
+ template_context,
366
+ retriever_override,
367
+ pack_budget,
368
+ policy,
369
+ tighten_pack_budget,
370
+ weight,
371
+ )
372
+ raise NotImplementedError(
373
+ f"Context pack '{pack_name}' is not available. Only Context or retriever packs are supported."
374
+ )
375
+
376
+ def _render_retriever_pack(
377
+ self,
378
+ retriever_name: str,
379
+ template_context: dict[str, Any],
380
+ retriever_override: Optional[Any],
381
+ pack_budget: Optional[Any],
382
+ policy: Optional[ContextPolicySpec],
383
+ tighten_pack_budget: bool,
384
+ weight: Optional[float] = None,
385
+ ) -> str:
386
+ """Render a retriever pack for the given retriever."""
387
+ retriever_spec = self._retriever_registry[retriever_name]
388
+ config = retriever_spec.config if hasattr(retriever_spec, "config") else {}
389
+ query_template = config.get("query") if isinstance(config, dict) else None
390
+ query = ""
391
+ if isinstance(query_template, str):
392
+ query = self._resolve_template(query_template, {}, template_context)
393
+ if not query:
394
+ input_context = template_context.get("input", {})
395
+ query = input_context.get("query", "") or input_context.get("message", "")
396
+
397
+ split = "train"
398
+ maximum_cache_total_items = None
399
+ maximum_cache_total_characters = None
400
+ limit = 3
401
+ offset = 0
402
+ maximum_total_characters = None
403
+ maximum_items_per_source = None
404
+ include_metadata = False
405
+ metadata_fields = None
406
+ backend_id = None
407
+ corpus_root = None
408
+ run_id = None
409
+ recipe_name = None
410
+ recipe_config = None
411
+ corpus_name = getattr(retriever_spec, "corpus", None)
412
+ join_with = "\n\n"
413
+
414
+ if isinstance(config, dict):
415
+ split = config.get("split", split)
416
+ limit = config.get("limit", limit)
417
+ offset = config.get("offset", offset)
418
+ maximum_total_characters = config.get(
419
+ "maximum_total_characters", maximum_total_characters
420
+ )
421
+ maximum_items_per_source = config.get(
422
+ "maximum_items_per_source",
423
+ config.get("max_items_per_source", maximum_items_per_source),
424
+ )
425
+ include_metadata = config.get("include_metadata", include_metadata)
426
+ metadata_fields = config.get("metadata_fields", metadata_fields)
427
+ backend_id = config.get("backend_id", backend_id)
428
+ run_id = config.get("run_id", run_id)
429
+ recipe_name = config.get("recipe_name", recipe_name)
430
+ recipe_config = config.get("recipe_config", config.get("recipe", recipe_config))
431
+ corpus_name = config.get("corpus", corpus_name)
432
+ join_with = config.get("join_with", join_with)
433
+
434
+ if corpus_name and corpus_name in self._corpus_registry:
435
+ corpus_spec = self._corpus_registry[corpus_name]
436
+ corpus_config = corpus_spec.config if hasattr(corpus_spec, "config") else {}
437
+ if isinstance(corpus_config, dict):
438
+ split = corpus_config.get("split", split)
439
+ maximum_cache_total_items = corpus_config.get(
440
+ "maximum_cache_total_items", maximum_cache_total_items
441
+ )
442
+ maximum_cache_total_characters = corpus_config.get(
443
+ "maximum_cache_total_characters", maximum_cache_total_characters
444
+ )
445
+ backend_id = corpus_config.get("backend_id", backend_id)
446
+ corpus_root = corpus_config.get(
447
+ "corpus_root",
448
+ corpus_config.get("root", corpus_root),
449
+ )
450
+ run_id = corpus_config.get("run_id", run_id)
451
+ recipe_name = corpus_config.get("recipe_name", recipe_name)
452
+ recipe_config = corpus_config.get(
453
+ "recipe_config",
454
+ corpus_config.get("recipe", recipe_config),
455
+ )
456
+
457
+ allocated_tokens = self._allocate_pack_budget(pack_budget, policy, weight)
458
+ if allocated_tokens is not None:
459
+ derived_chars = int(allocated_tokens) * 4
460
+ if maximum_total_characters is None:
461
+ maximum_total_characters = derived_chars
462
+ else:
463
+ maximum_total_characters = min(maximum_total_characters, derived_chars)
464
+
465
+ if tighten_pack_budget:
466
+ if maximum_total_characters is not None:
467
+ maximum_total_characters = max(1, int(maximum_total_characters * 0.5))
468
+ limit = max(1, int(limit * 0.5))
469
+
470
+ retriever_fn = retriever_override or self._default_retriever
471
+ if retriever_fn is None:
472
+ raise ValueError("No retriever override or default retriever configured")
473
+
474
+ request = ContextRetrieverRequest(
475
+ query=query,
476
+ offset=offset,
477
+ limit=limit,
478
+ maximum_total_characters=maximum_total_characters,
479
+ max_tokens=allocated_tokens,
480
+ metadata={
481
+ "retriever": retriever_name,
482
+ "corpus": corpus_name,
483
+ "split": split,
484
+ "maximum_cache_total_items": maximum_cache_total_items,
485
+ "maximum_cache_total_characters": maximum_cache_total_characters,
486
+ "maximum_items_per_source": maximum_items_per_source,
487
+ "include_metadata": include_metadata,
488
+ "metadata_fields": metadata_fields,
489
+ "backend_id": backend_id,
490
+ "corpus_root": corpus_root,
491
+ "run_id": run_id,
492
+ "recipe_name": recipe_name,
493
+ "recipe_config": recipe_config,
494
+ },
495
+ )
496
+ context_pack = self._retrieve_with_expansion(
497
+ retriever_fn,
498
+ request,
499
+ policy,
500
+ join_with,
501
+ allocated_tokens,
502
+ )
503
+ return context_pack.text
504
+
505
+ def _retrieve_with_expansion(
506
+ self,
507
+ retriever_fn: Any,
508
+ request: ContextRetrieverRequest,
509
+ policy: Optional[ContextPolicySpec],
510
+ join_with: str,
511
+ target_tokens: Optional[int],
512
+ ) -> ContextPack:
513
+ packs: list[ContextPack] = []
514
+ expansion = policy.expansion if policy else None
515
+ max_pages = 1
516
+ min_fill_ratio = None
517
+ if expansion is not None:
518
+ max_pages = max(1, expansion.max_pages)
519
+ min_fill_ratio = expansion.min_fill_ratio
520
+
521
+ current_request = request
522
+ for page_index in range(max_pages):
523
+ response_pack = retriever_fn(current_request)
524
+ if response_pack is None:
525
+ break
526
+ packs.append(response_pack)
527
+
528
+ if max_pages <= 1 or target_tokens is None:
529
+ break
530
+ if response_pack.evidence_count < current_request.limit:
531
+ break
532
+
533
+ merged_pack = self._merge_context_packs(packs, join_with=join_with)
534
+ token_count = self._estimate_tokens(merged_pack.text)
535
+ threshold_ratio = 1.0 if min_fill_ratio is None else float(min_fill_ratio)
536
+ if token_count >= int(target_tokens * threshold_ratio):
537
+ break
538
+
539
+ current_request = current_request.model_copy(
540
+ update={"offset": current_request.offset + current_request.limit}
541
+ )
542
+
543
+ return self._merge_context_packs(packs, join_with=join_with)
544
+
545
+ def _merge_context_packs(self, packs: Iterable[ContextPack], join_with: str) -> ContextPack:
546
+ blocks: list[ContextPackBlock] = []
547
+ for index, pack in enumerate(packs, start=1):
548
+ if pack.blocks:
549
+ blocks.extend(pack.blocks)
550
+ continue
551
+ if pack.text:
552
+ blocks.append(
553
+ ContextPackBlock(
554
+ evidence_item_id=f"page-{index}",
555
+ text=pack.text,
556
+ metadata=None,
557
+ )
558
+ )
559
+ if not blocks:
560
+ return ContextPack(text="", evidence_count=0, blocks=[])
561
+ text = join_with.join([block.text for block in blocks])
562
+ return ContextPack(text=text, evidence_count=len(blocks), blocks=blocks)
563
+
564
+ def _apply_compaction_policy(
565
+ self, text: str, policy: Any, max_tokens_override: Optional[int] = None
566
+ ) -> str:
567
+ if not policy or not getattr(policy, "input_budget", None):
568
+ return text
569
+ max_tokens = max_tokens_override
570
+ if max_tokens is None:
571
+ budget = policy.input_budget
572
+ max_tokens = getattr(budget, "max_tokens", None)
573
+ if max_tokens is None:
574
+ return text
575
+ tokens = text.split()
576
+ if len(tokens) <= max_tokens:
577
+ return text
578
+ if getattr(policy, "overflow", None) != "compact":
579
+ return text
580
+
581
+ compactor = self._resolve_compactor(policy)
582
+ return compactor.compact(CompactionRequest(text=text, max_tokens=max_tokens))
583
+
584
+ def _estimate_tokens(self, text: str) -> int:
585
+ return len(text.split())
586
+
587
+ def _apply_context_budget(
588
+ self,
589
+ system_prompt: str,
590
+ history: list[dict[str, Any]],
591
+ user_message: str,
592
+ policy: Any,
593
+ ) -> tuple[str, list[dict[str, Any]], str, int, bool]:
594
+ if not policy or not getattr(policy, "input_budget", None):
595
+ token_count = self._estimate_total_tokens(system_prompt, history, user_message)
596
+ return system_prompt, history, user_message, token_count, False
597
+
598
+ budget = policy.input_budget
599
+ max_tokens = getattr(budget, "max_tokens", None)
600
+ if max_tokens is None:
601
+ token_count = self._estimate_total_tokens(system_prompt, history, user_message)
602
+ return system_prompt, history, user_message, token_count, False
603
+
604
+ token_count = self._estimate_total_tokens(system_prompt, history, user_message)
605
+ if token_count <= max_tokens:
606
+ return system_prompt, history, user_message, token_count, False
607
+
608
+ if getattr(policy, "overflow", None) != "compact":
609
+ return system_prompt, history, user_message, token_count, False
610
+
611
+ trimmed_history = list(history)
612
+ compacted = False
613
+ while trimmed_history and token_count > max_tokens:
614
+ trimmed_history.pop(0)
615
+ token_count = self._estimate_total_tokens(system_prompt, trimmed_history, user_message)
616
+ compacted = True
617
+
618
+ if token_count <= max_tokens:
619
+ return system_prompt, trimmed_history, user_message, token_count, compacted
620
+
621
+ remaining_budget = max_tokens - self._estimate_total_tokens(
622
+ "", trimmed_history, user_message
623
+ )
624
+ if remaining_budget < 0:
625
+ remaining_budget = 0
626
+ compacted_prompt = self._apply_compaction_policy(
627
+ system_prompt,
628
+ policy,
629
+ max_tokens_override=remaining_budget,
630
+ )
631
+ if compacted_prompt != system_prompt:
632
+ compacted = True
633
+ token_count = self._estimate_total_tokens(compacted_prompt, trimmed_history, user_message)
634
+ return compacted_prompt, trimmed_history, user_message, token_count, compacted
635
+
636
+ def _allocate_pack_budget(
637
+ self,
638
+ pack_budget: Optional[Any],
639
+ policy: Optional[ContextPolicySpec],
640
+ weight: Optional[float],
641
+ ) -> Optional[int]:
642
+ if pack_budget is not None and hasattr(pack_budget, "max_tokens"):
643
+ max_tokens = getattr(pack_budget, "max_tokens", None)
644
+ if max_tokens is not None:
645
+ return int(max_tokens)
646
+ ratio = getattr(pack_budget, "ratio", None)
647
+ if ratio is not None:
648
+ return self._resolve_ratio_budget(ratio, policy)
649
+ if pack_budget and isinstance(pack_budget, dict):
650
+ max_tokens = pack_budget.get("max_tokens")
651
+ if max_tokens is not None:
652
+ return int(max_tokens)
653
+ ratio = pack_budget.get("ratio")
654
+ if ratio is not None:
655
+ return self._resolve_ratio_budget(ratio, policy)
656
+
657
+ if policy and getattr(policy, "pack_budget", None):
658
+ pack_budget_spec = policy.pack_budget
659
+ base_tokens = getattr(pack_budget_spec, "default_max_tokens", None)
660
+ if base_tokens is None:
661
+ base_ratio = getattr(pack_budget_spec, "default_ratio", None)
662
+ if base_ratio is None:
663
+ return None
664
+ if not getattr(policy, "input_budget", None):
665
+ return None
666
+ input_budget = policy.input_budget
667
+ max_tokens = getattr(input_budget, "max_tokens", None)
668
+ if max_tokens is None:
669
+ return None
670
+ base_tokens = int(max_tokens * base_ratio)
671
+ if weight is None:
672
+ return int(base_tokens)
673
+ return int(base_tokens * weight)
674
+
675
+ return None
676
+
677
+ def _resolve_ratio_budget(
678
+ self, ratio: float, policy: Optional[ContextPolicySpec]
679
+ ) -> Optional[int]:
680
+ if ratio is None or not policy or not getattr(policy, "input_budget", None):
681
+ return None
682
+ input_budget = policy.input_budget
683
+ max_tokens = getattr(input_budget, "max_tokens", None)
684
+ if max_tokens is None:
685
+ return None
686
+ return int(max_tokens * float(ratio))
687
+
688
+ def _allocate_directive_budgets(
689
+ self,
690
+ directives: list[ContextMessageSpec],
691
+ policy: Optional[ContextPolicySpec],
692
+ total_budget_override: Optional[int],
693
+ ) -> dict[int, dict[str, Any]]:
694
+ if not policy or not getattr(policy, "pack_budget", None):
695
+ return {}
696
+ pack_budget_spec = policy.pack_budget
697
+ total_budget = total_budget_override
698
+ if total_budget is None:
699
+ total_budget = getattr(pack_budget_spec, "default_max_tokens", None)
700
+ if total_budget is None:
701
+ base_ratio = getattr(pack_budget_spec, "default_ratio", None)
702
+ if base_ratio is None:
703
+ return {}
704
+ if not getattr(policy, "input_budget", None):
705
+ return {}
706
+ input_budget = policy.input_budget
707
+ max_tokens = getattr(input_budget, "max_tokens", None)
708
+ if max_tokens is None:
709
+ return {}
710
+ total_budget = int(max_tokens * base_ratio)
711
+
712
+ pack_directives = [
713
+ directive for directive in directives if isinstance(directive, ContextInsertSpec)
714
+ ]
715
+ if not pack_directives:
716
+ return {}
717
+
718
+ sorted_directives = sorted(
719
+ pack_directives,
720
+ key=lambda directive: (
721
+ -(directive.priority or 0),
722
+ -(directive.weight or 1.0),
723
+ ),
724
+ )
725
+ total_weight = sum(directive.weight or 1.0 for directive in sorted_directives)
726
+ allocations = {}
727
+ remaining_budget = int(total_budget)
728
+
729
+ for directive in sorted_directives:
730
+ weight = directive.weight or 1.0
731
+ allocation = int((total_budget * weight) / total_weight)
732
+ if allocation <= 0:
733
+ allocation = 1
734
+ if allocation > remaining_budget:
735
+ allocation = remaining_budget
736
+ allocations[id(directive)] = {"max_tokens": allocation}
737
+ remaining_budget -= allocation
738
+ if remaining_budget <= 0:
739
+ break
740
+
741
+ if remaining_budget > 0 and sorted_directives:
742
+ allocations[id(sorted_directives[0])]["max_tokens"] += remaining_budget
743
+
744
+ return allocations
745
+
746
+ def _allocate_default_pack_budgets(
747
+ self,
748
+ pack_entries: list[Any],
749
+ policy: Optional[ContextPolicySpec],
750
+ total_budget_override: Optional[int],
751
+ ) -> dict[str, dict[str, Any]]:
752
+ if not policy or not getattr(policy, "pack_budget", None):
753
+ return {}
754
+ pack_budget_spec = policy.pack_budget
755
+ total_budget = total_budget_override
756
+ if total_budget is None:
757
+ total_budget = getattr(pack_budget_spec, "default_max_tokens", None)
758
+ if total_budget is None:
759
+ base_ratio = getattr(pack_budget_spec, "default_ratio", None)
760
+ if base_ratio is None:
761
+ return {}
762
+ if not getattr(policy, "input_budget", None):
763
+ return {}
764
+ input_budget = policy.input_budget
765
+ max_tokens = getattr(input_budget, "max_tokens", None)
766
+ if max_tokens is None:
767
+ return {}
768
+ total_budget = int(max_tokens * base_ratio)
769
+
770
+ if not pack_entries:
771
+ return {}
772
+
773
+ entries = sorted(
774
+ pack_entries,
775
+ key=lambda entry: (
776
+ -(getattr(entry, "priority", None) or 0),
777
+ -(getattr(entry, "weight", None) or 1.0),
778
+ ),
779
+ )
780
+ total_weight = sum(getattr(entry, "weight", None) or 1.0 for entry in entries)
781
+ allocations = {}
782
+ remaining_budget = int(total_budget)
783
+
784
+ for entry in entries:
785
+ weight = getattr(entry, "weight", None) or 1.0
786
+ allocation = int((total_budget * weight) / total_weight)
787
+ if allocation <= 0:
788
+ allocation = 1
789
+ if allocation > remaining_budget:
790
+ allocation = remaining_budget
791
+ allocations[entry.name] = {"max_tokens": allocation}
792
+ remaining_budget -= allocation
793
+ if remaining_budget <= 0:
794
+ break
795
+
796
+ if remaining_budget > 0 and entries:
797
+ allocations[entries[0].name]["max_tokens"] += remaining_budget
798
+
799
+ return allocations
800
+
801
+ def _resolve_default_pack_total_budget(
802
+ self, policy: Optional[ContextPolicySpec]
803
+ ) -> Optional[int]:
804
+ if not policy or not getattr(policy, "pack_budget", None):
805
+ return None
806
+ pack_budget_spec = policy.pack_budget
807
+ total_budget = getattr(pack_budget_spec, "default_max_tokens", None)
808
+ if total_budget is not None:
809
+ return int(total_budget)
810
+ base_ratio = getattr(pack_budget_spec, "default_ratio", None)
811
+ if base_ratio is None:
812
+ return None
813
+ if not getattr(policy, "input_budget", None):
814
+ return None
815
+ input_budget = policy.input_budget
816
+ max_tokens = getattr(input_budget, "max_tokens", None)
817
+ if max_tokens is None:
818
+ return None
819
+ return int(max_tokens * base_ratio)
820
+
821
+ def _extract_pack_budget_max_tokens(
822
+ self, pack_budget: Optional[Any], policy: Optional[ContextPolicySpec]
823
+ ) -> Optional[int]:
824
+ if pack_budget is None:
825
+ return None
826
+ if isinstance(pack_budget, dict):
827
+ max_tokens = pack_budget.get("max_tokens")
828
+ if max_tokens is not None:
829
+ return int(max_tokens)
830
+ ratio = pack_budget.get("ratio")
831
+ if ratio is not None:
832
+ return self._resolve_ratio_budget(ratio, policy)
833
+ if hasattr(pack_budget, "max_tokens"):
834
+ max_tokens = getattr(pack_budget, "max_tokens", None)
835
+ if max_tokens is not None:
836
+ return int(max_tokens)
837
+ ratio = getattr(pack_budget, "ratio", None)
838
+ if ratio is not None:
839
+ return self._resolve_ratio_budget(ratio, policy)
840
+ return None
841
+
842
+ def _estimate_total_tokens(
843
+ self, system_prompt: str, history: list[dict[str, Any]], user_message: str
844
+ ) -> int:
845
+ total = self._estimate_tokens(system_prompt) + self._estimate_tokens(user_message)
846
+ for message in history:
847
+ content = message.get("content", "")
848
+ total += self._estimate_tokens(content)
849
+ return total
850
+
851
+ def _resolve_compactor(self, policy: ContextPolicySpec):
852
+ compactor_config = None
853
+ if hasattr(policy, "compactor"):
854
+ compactor_config = policy.compactor
855
+
856
+ if isinstance(compactor_config, str):
857
+ compactor_spec = self._compactor_registry.get(compactor_config)
858
+ if compactor_spec is None:
859
+ raise ValueError(f"Compactor '{compactor_config}' not defined")
860
+ config = compactor_spec.config if hasattr(compactor_spec, "config") else {}
861
+ if not isinstance(config, dict):
862
+ config = {}
863
+ return build_compactor(config)
864
+
865
+ if isinstance(compactor_config, dict):
866
+ return build_compactor(compactor_config)
867
+
868
+ return TruncateCompactor()
869
+
870
+ def _build_messages(
871
+ self,
872
+ context_spec: ContextDeclaration,
873
+ history_messages: list[dict[str, Any]],
874
+ template_context: dict[str, Any],
875
+ retriever_override: Optional[Any],
876
+ policy: Optional[ContextPolicySpec],
877
+ tighten_pack_budget: bool = False,
878
+ total_pack_budget_override: Optional[int] = None,
879
+ ) -> list[dict[str, Any]]:
880
+ assembled_messages: list[dict[str, Any]] = []
881
+ resolved_context = dict(template_context)
882
+ context_values = dict(resolved_context.get("context", {}))
883
+ resolved_context["context"] = context_values
884
+ directive_budgets = self._allocate_directive_budgets(
885
+ context_spec.messages or [], policy, total_pack_budget_override
886
+ )
887
+ for directive in context_spec.messages or []:
888
+ if isinstance(directive, HistoryInsertSpec):
889
+ assembled_messages.extend(history_messages)
890
+ continue
891
+ if isinstance(directive, ContextInsertSpec):
892
+ override_budget = directive_budgets.get(id(directive))
893
+ pack_content = self._render_pack(
894
+ directive.name,
895
+ resolved_context,
896
+ retriever_override,
897
+ override_budget or directive.budget,
898
+ policy,
899
+ tighten_pack_budget,
900
+ directive.weight,
901
+ )
902
+ context_values[directive.name] = pack_content or ""
903
+ if pack_content:
904
+ assembled_messages.append({"role": "system", "content": pack_content})
905
+ else:
906
+ context_values[directive.name] = ""
907
+ continue
908
+ if isinstance(directive, SystemMessageSpec):
909
+ assembled_messages.append(
910
+ {
911
+ "role": "system",
912
+ "content": self._resolve_message_content(directive, resolved_context),
913
+ }
914
+ )
915
+ continue
916
+ if isinstance(directive, UserMessageSpec):
917
+ assembled_messages.append(
918
+ {
919
+ "role": "user",
920
+ "content": self._resolve_message_content(directive, resolved_context),
921
+ }
922
+ )
923
+ continue
924
+ if isinstance(directive, AssistantMessageSpec):
925
+ assembled_messages.append(
926
+ {
927
+ "role": "assistant",
928
+ "content": self._resolve_message_content(directive, resolved_context),
929
+ }
930
+ )
931
+ continue
932
+ return assembled_messages
933
+
934
+ def _render_nested_context_pack(
935
+ self,
936
+ context_spec: ContextDeclaration,
937
+ template_context: dict[str, Any],
938
+ pack_budget: Optional[Any],
939
+ policy: Optional[ContextPolicySpec],
940
+ tighten_pack_budget: bool,
941
+ retriever_override: Optional[Any],
942
+ ) -> str:
943
+ """Render another Context declaration as a pack string."""
944
+ nested_policy = context_spec.policy or policy
945
+ compaction_policy = policy or nested_policy
946
+ total_budget_override = self._extract_pack_budget_max_tokens(pack_budget, policy)
947
+
948
+ if context_spec.messages is None:
949
+ nested_result = self._assemble_default_with_regeneration(
950
+ context_spec,
951
+ base_system_prompt="",
952
+ history_messages=[],
953
+ user_message="",
954
+ template_context=template_context,
955
+ retriever_override=retriever_override,
956
+ total_budget_override=total_budget_override,
957
+ )
958
+ pack_text = nested_result.system_prompt
959
+ return self._compact_pack_text(
960
+ pack_text, pack_budget, compaction_policy, tighten_pack_budget
961
+ )
962
+
963
+ if any(isinstance(directive, HistoryInsertSpec) for directive in context_spec.messages):
964
+ raise ValueError("Nested context packs cannot include history()")
965
+
966
+ max_iterations = 1
967
+ if compaction_policy and getattr(compaction_policy, "max_iterations", None):
968
+ max_iterations = max(1, int(compaction_policy.max_iterations))
969
+
970
+ pack_scale = 1.0
971
+ last_text = ""
972
+ max_tokens = self._extract_pack_budget_max_tokens(pack_budget, compaction_policy)
973
+ for _iteration in range(max_iterations):
974
+ scaled_override = total_budget_override
975
+ if scaled_override is not None and pack_scale < 1.0:
976
+ scaled_override = max(1, int(scaled_override * pack_scale))
977
+
978
+ assembled_messages = self._build_messages(
979
+ context_spec,
980
+ history_messages=[],
981
+ template_context=template_context,
982
+ retriever_override=retriever_override,
983
+ policy=nested_policy,
984
+ tighten_pack_budget=tighten_pack_budget or pack_scale < 1.0,
985
+ total_pack_budget_override=scaled_override,
986
+ )
987
+ rendered_segments = [message.get("content", "") for message in assembled_messages]
988
+ pack_text = self._join_nonempty(rendered_segments)
989
+ raw_token_count = self._estimate_tokens(pack_text)
990
+ last_text = self._compact_pack_text(
991
+ pack_text, pack_budget, compaction_policy, tighten_pack_budget
992
+ )
993
+
994
+ if max_tokens is None or raw_token_count <= max_tokens:
995
+ break
996
+ if not compaction_policy or getattr(compaction_policy, "overflow", None) != "compact":
997
+ break
998
+ pack_scale *= 0.5
999
+
1000
+ return last_text
1001
+
1002
+ def _compact_pack_text(
1003
+ self,
1004
+ text: str,
1005
+ pack_budget: Optional[Any],
1006
+ policy: Optional[ContextPolicySpec],
1007
+ tighten_pack_budget: bool,
1008
+ ) -> str:
1009
+ max_tokens = None
1010
+ if pack_budget is not None:
1011
+ if isinstance(pack_budget, dict):
1012
+ max_tokens = pack_budget.get("max_tokens")
1013
+ elif hasattr(pack_budget, "max_tokens"):
1014
+ max_tokens = getattr(pack_budget, "max_tokens", None)
1015
+
1016
+ if max_tokens is None and policy and getattr(policy, "pack_budget", None):
1017
+ pack_budget_spec = policy.pack_budget
1018
+ max_tokens = getattr(pack_budget_spec, "default_max_tokens", None)
1019
+
1020
+ if max_tokens is None:
1021
+ return text
1022
+
1023
+ if tighten_pack_budget:
1024
+ max_tokens = max(1, int(max_tokens * 0.5))
1025
+
1026
+ compactor = self._resolve_compactor(policy) if policy else TruncateCompactor()
1027
+ return compactor.compact(CompactionRequest(text=text, max_tokens=int(max_tokens)))
1028
+
1029
+ def _split_leading_system(
1030
+ self, messages: list[dict[str, Any]]
1031
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
1032
+ """Split leading system messages from the rest."""
1033
+ leading_system: list[dict[str, Any]] = []
1034
+ remaining: list[dict[str, Any]] = []
1035
+ seen_non_system = False
1036
+ for message in messages:
1037
+ if message.get("role") == "system" and not seen_non_system:
1038
+ leading_system.append(message)
1039
+ else:
1040
+ seen_non_system = True
1041
+ remaining.append(message)
1042
+ return leading_system, remaining
1043
+
1044
+ def _extract_user_message(
1045
+ self, messages: list[dict[str, Any]], fallback_message: str
1046
+ ) -> tuple[str, list[dict[str, Any]]]:
1047
+ """Extract the final user message from a list, leaving the rest as history."""
1048
+ last_user_index = None
1049
+ for idx, message in enumerate(messages):
1050
+ if message.get("role") == "user":
1051
+ last_user_index = idx
1052
+ if last_user_index is None:
1053
+ return fallback_message, messages
1054
+
1055
+ user_message = messages[last_user_index].get("content", "")
1056
+ remaining = messages[:last_user_index] + messages[last_user_index + 1 :]
1057
+ return user_message, remaining
1058
+
1059
+ def _join_nonempty(self, parts: Iterable[str]) -> str:
1060
+ return "\n\n".join([part for part in parts if part])