biblicus 0.16.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. biblicus/__init__.py +25 -5
  2. biblicus/analysis/__init__.py +1 -1
  3. biblicus/analysis/base.py +10 -10
  4. biblicus/analysis/markov.py +78 -68
  5. biblicus/analysis/models.py +47 -47
  6. biblicus/analysis/profiling.py +58 -48
  7. biblicus/analysis/topic_modeling.py +56 -51
  8. biblicus/cli.py +248 -191
  9. biblicus/{recipes.py → configuration.py} +14 -14
  10. biblicus/constants.py +2 -2
  11. biblicus/context.py +27 -12
  12. biblicus/context_engine/__init__.py +53 -0
  13. biblicus/context_engine/assembler.py +1090 -0
  14. biblicus/context_engine/compaction.py +110 -0
  15. biblicus/context_engine/models.py +423 -0
  16. biblicus/context_engine/retrieval.py +133 -0
  17. biblicus/corpus.py +233 -124
  18. biblicus/errors.py +27 -3
  19. biblicus/evaluation.py +27 -25
  20. biblicus/extraction.py +103 -98
  21. biblicus/extraction_evaluation.py +26 -26
  22. biblicus/extractors/deepgram_stt.py +7 -7
  23. biblicus/extractors/docling_granite_text.py +11 -11
  24. biblicus/extractors/docling_smol_text.py +11 -11
  25. biblicus/extractors/markitdown_text.py +4 -4
  26. biblicus/extractors/openai_stt.py +7 -7
  27. biblicus/extractors/paddleocr_vl_text.py +20 -18
  28. biblicus/extractors/pipeline.py +8 -8
  29. biblicus/extractors/rapidocr_text.py +3 -3
  30. biblicus/extractors/unstructured_text.py +3 -3
  31. biblicus/hooks.py +4 -4
  32. biblicus/knowledge_base.py +34 -32
  33. biblicus/models.py +84 -81
  34. biblicus/retrieval.py +49 -42
  35. biblicus/retrievers/__init__.py +50 -0
  36. biblicus/retrievers/base.py +65 -0
  37. biblicus/{backends → retrievers}/embedding_index_common.py +80 -44
  38. biblicus/{backends → retrievers}/embedding_index_file.py +96 -61
  39. biblicus/{backends → retrievers}/embedding_index_inmemory.py +100 -69
  40. biblicus/retrievers/hybrid.py +301 -0
  41. biblicus/{backends → retrievers}/scan.py +84 -73
  42. biblicus/{backends → retrievers}/sqlite_full_text_search.py +115 -101
  43. biblicus/{backends → retrievers}/tf_vector.py +103 -100
  44. biblicus/sources.py +46 -11
  45. biblicus/text/link.py +6 -0
  46. biblicus/text/prompts.py +18 -8
  47. biblicus/text/tool_loop.py +63 -5
  48. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/METADATA +32 -23
  49. biblicus-1.1.0.dist-info/RECORD +91 -0
  50. biblicus/backends/__init__.py +0 -50
  51. biblicus/backends/base.py +0 -65
  52. biblicus/backends/hybrid.py +0 -291
  53. biblicus-0.16.0.dist-info/RECORD +0 -86
  54. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/WHEEL +0 -0
  55. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/entry_points.txt +0 -0
  56. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/licenses/LICENSE +0 -0
  57. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1090 @@
1
+ """
2
+ Context assembly utilities for the Biblicus Context Engine.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+ from string import Formatter
9
+ from typing import Any, Iterable, Optional
10
+
11
+ from biblicus.context import ContextPack, ContextPackBlock
12
+ from biblicus.context_engine.compaction import CompactionRequest, TruncateCompactor, build_compactor
13
+ from biblicus.context_engine.models import (
14
+ AssistantMessageSpec,
15
+ ContextDeclaration,
16
+ ContextInsertSpec,
17
+ ContextMessageSpec,
18
+ ContextPolicySpec,
19
+ ContextRetrieverRequest,
20
+ HistoryInsertSpec,
21
+ SystemMessageSpec,
22
+ UserMessageSpec,
23
+ )
24
+
25
+
26
+ @dataclass
27
+ class ContextAssemblyResult:
28
+ """
29
+ Assembled prompt context for a single agent turn.
30
+
31
+ :ivar system_prompt: System prompt content.
32
+ :vartype system_prompt: str
33
+ :ivar history: Message history for the turn.
34
+ :vartype history: list[dict[str, Any]]
35
+ :ivar user_message: User message content.
36
+ :vartype user_message: str
37
+ :ivar token_count: Estimated token count for assembled content.
38
+ :vartype token_count: int
39
+ """
40
+
41
+ system_prompt: str
42
+ history: list[dict[str, Any]]
43
+ user_message: str
44
+ token_count: int = 0
45
+
46
+
47
+ class ContextAssembler:
48
+ """
49
+ Assemble Context declarations into system prompts, history, and user messages.
50
+
51
+ :param context_registry: Context declarations indexed by name.
52
+ :type context_registry: dict[str, ContextDeclaration]
53
+ :param retriever_registry: Retriever declarations indexed by name.
54
+ :type retriever_registry: dict[str, Any] or None
55
+ :param corpus_registry: Corpus declarations indexed by name.
56
+ :type corpus_registry: dict[str, Any] or None
57
+ :param compactor_registry: Compactor declarations indexed by name.
58
+ :type compactor_registry: dict[str, Any] or None
59
+ :param default_retriever: Default retriever callable when no override is supplied.
60
+ :type default_retriever: callable or None
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ context_registry: dict[str, ContextDeclaration],
66
+ retriever_registry: Optional[dict[str, Any]] = None,
67
+ corpus_registry: Optional[dict[str, Any]] = None,
68
+ compactor_registry: Optional[dict[str, Any]] = None,
69
+ default_retriever: Optional[Any] = None,
70
+ ):
71
+ self._context_registry = context_registry
72
+ self._retriever_registry = retriever_registry or {}
73
+ self._corpus_registry = corpus_registry or {}
74
+ self._compactor_registry = compactor_registry or {}
75
+ self._default_retriever = default_retriever
76
+
77
+ def assemble(
78
+ self,
79
+ context_name: str,
80
+ base_system_prompt: str,
81
+ history_messages: list[dict[str, Any]],
82
+ user_message: Optional[str],
83
+ template_context: dict[str, Any],
84
+ retriever_override: Optional[Any] = None,
85
+ ) -> ContextAssemblyResult:
86
+ """
87
+ Assemble a Context declaration into prompt components.
88
+
89
+ :param context_name: Name of the Context declaration.
90
+ :type context_name: str
91
+ :param base_system_prompt: Default system prompt from agent config.
92
+ :type base_system_prompt: str
93
+ :param history_messages: Current agent history messages.
94
+ :type history_messages: list[dict[str, Any]]
95
+ :param user_message: Current user message for this turn.
96
+ :type user_message: str or None
97
+ :param template_context: Template variables for resolution.
98
+ :type template_context: dict[str, Any]
99
+ :param retriever_override: Optional retriever override callable.
100
+ :type retriever_override: callable or None
101
+ :return: Assembled prompt components.
102
+ :rtype: ContextAssemblyResult
103
+ :raises ValueError: If the context declaration is not found.
104
+ """
105
+ if context_name not in self._context_registry:
106
+ raise ValueError(f"Context '{context_name}' not defined")
107
+
108
+ context_spec = self._context_registry[context_name]
109
+ if context_spec.messages is None:
110
+ return self._assemble_default_with_regeneration(
111
+ context_spec,
112
+ base_system_prompt,
113
+ history_messages,
114
+ user_message or "",
115
+ template_context,
116
+ retriever_override,
117
+ )
118
+
119
+ return self._assemble_explicit_with_regeneration(
120
+ context_spec,
121
+ history_messages,
122
+ user_message or "",
123
+ template_context,
124
+ retriever_override,
125
+ )
126
+
127
+ def _assemble_default(
128
+ self,
129
+ context_spec: ContextDeclaration,
130
+ base_system_prompt: str,
131
+ history_messages: list[dict[str, Any]],
132
+ user_message: str,
133
+ template_context: dict[str, Any],
134
+ total_budget_override: Optional[int] = None,
135
+ ) -> ContextAssemblyResult:
136
+ """Assemble the default Context plan when messages are omitted."""
137
+ system_prompt = base_system_prompt or ""
138
+ pack_outputs = []
139
+ pack_entries = context_spec.packs or []
140
+ pack_budgets = self._allocate_default_pack_budgets(
141
+ pack_entries, context_spec.policy, total_budget_override
142
+ )
143
+ for pack_entry in pack_entries:
144
+ pack_outputs.append(
145
+ self._render_pack(
146
+ pack_entry.name,
147
+ template_context,
148
+ retriever_override=None,
149
+ pack_budget=pack_budgets.get(pack_entry.name),
150
+ policy=context_spec.policy,
151
+ weight=pack_entry.weight,
152
+ )
153
+ )
154
+
155
+ if pack_outputs:
156
+ system_prompt = self._join_nonempty([system_prompt, *pack_outputs])
157
+
158
+ compacted_prompt, history_messages, user_message, token_count, compacted = (
159
+ self._apply_context_budget(
160
+ system_prompt,
161
+ history_messages,
162
+ user_message,
163
+ context_spec.policy,
164
+ )
165
+ )
166
+ return ContextAssemblyResult(
167
+ system_prompt=compacted_prompt,
168
+ history=history_messages,
169
+ user_message=user_message,
170
+ token_count=token_count,
171
+ )
172
+
173
+ def _assemble_default_with_regeneration(
174
+ self,
175
+ context_spec: ContextDeclaration,
176
+ base_system_prompt: str,
177
+ history_messages: list[dict[str, Any]],
178
+ user_message: str,
179
+ template_context: dict[str, Any],
180
+ retriever_override: Optional[Any],
181
+ total_budget_override: Optional[int] = None,
182
+ ) -> ContextAssemblyResult:
183
+ max_iterations = 2
184
+ if context_spec.policy and getattr(context_spec.policy, "max_iterations", None):
185
+ max_iterations = max(1, int(context_spec.policy.max_iterations))
186
+
187
+ pack_scale = 1.0
188
+ last_result: Optional[ContextAssemblyResult] = None
189
+ for _iteration in range(max_iterations):
190
+ system_prompt = base_system_prompt or ""
191
+ pack_outputs = []
192
+ pack_entries = context_spec.packs or []
193
+ pack_budgets = self._allocate_default_pack_budgets(
194
+ pack_entries, context_spec.policy, total_budget_override
195
+ )
196
+ for pack_entry in pack_entries:
197
+ pack_outputs.append(
198
+ self._render_pack(
199
+ pack_entry.name,
200
+ template_context,
201
+ retriever_override,
202
+ pack_budget=pack_budgets.get(pack_entry.name),
203
+ policy=context_spec.policy,
204
+ tighten_pack_budget=pack_scale < 1.0,
205
+ weight=pack_entry.weight,
206
+ )
207
+ )
208
+
209
+ if pack_outputs:
210
+ system_prompt = self._join_nonempty([system_prompt, *pack_outputs])
211
+
212
+ compacted_prompt, history_messages, user_message, token_count, compacted = (
213
+ self._apply_context_budget(
214
+ system_prompt,
215
+ history_messages,
216
+ user_message,
217
+ context_spec.policy,
218
+ )
219
+ )
220
+
221
+ last_result = ContextAssemblyResult(
222
+ system_prompt=compacted_prompt,
223
+ history=history_messages,
224
+ user_message=user_message,
225
+ token_count=token_count,
226
+ )
227
+
228
+ if not compacted or not context_spec.policy:
229
+ break
230
+ if getattr(context_spec.policy, "overflow", None) != "compact":
231
+ break
232
+ pack_scale *= 0.5
233
+
234
+ return last_result or ContextAssemblyResult(
235
+ system_prompt="",
236
+ history=history_messages,
237
+ user_message=user_message,
238
+ token_count=0,
239
+ )
240
+
241
+ def _assemble_explicit_with_regeneration(
242
+ self,
243
+ context_spec: ContextDeclaration,
244
+ history_messages: list[dict[str, Any]],
245
+ user_message: str,
246
+ template_context: dict[str, Any],
247
+ retriever_override: Optional[Any],
248
+ ) -> ContextAssemblyResult:
249
+ """Assemble explicit Context message directives with regeneration loop."""
250
+ max_iterations = 2
251
+ if context_spec.policy and getattr(context_spec.policy, "max_iterations", None):
252
+ max_iterations = max(1, int(context_spec.policy.max_iterations))
253
+
254
+ pack_scale = 1.0
255
+ last_result: Optional[ContextAssemblyResult] = None
256
+ for _iteration in range(max_iterations):
257
+ total_pack_budget = self._resolve_default_pack_total_budget(context_spec.policy)
258
+ if total_pack_budget is not None and pack_scale < 1.0:
259
+ total_pack_budget = max(1, int(total_pack_budget * pack_scale))
260
+
261
+ assembled_messages = self._build_messages(
262
+ context_spec,
263
+ history_messages,
264
+ template_context,
265
+ retriever_override,
266
+ context_spec.policy,
267
+ tighten_pack_budget=pack_scale < 1.0,
268
+ total_pack_budget_override=total_pack_budget,
269
+ )
270
+
271
+ system_messages, remaining_messages = self._split_leading_system(assembled_messages)
272
+ system_prompt = self._join_nonempty([m["content"] for m in system_messages])
273
+ resolved_user_message, remaining_messages = self._extract_user_message(
274
+ remaining_messages, user_message
275
+ )
276
+
277
+ compacted_prompt, remaining_messages, resolved_user_message, token_count, compacted = (
278
+ self._apply_context_budget(
279
+ system_prompt,
280
+ remaining_messages,
281
+ resolved_user_message,
282
+ context_spec.policy,
283
+ )
284
+ )
285
+
286
+ last_result = ContextAssemblyResult(
287
+ system_prompt=compacted_prompt,
288
+ history=remaining_messages,
289
+ user_message=resolved_user_message,
290
+ token_count=token_count,
291
+ )
292
+
293
+ if not compacted or not context_spec.policy:
294
+ break
295
+ if getattr(context_spec.policy, "overflow", None) != "compact":
296
+ break
297
+ pack_scale *= 0.5
298
+
299
+ return last_result or ContextAssemblyResult(
300
+ system_prompt="",
301
+ history=[],
302
+ user_message=user_message,
303
+ token_count=0,
304
+ )
305
+
306
+ def _resolve_message_content(
307
+ self,
308
+ directive: SystemMessageSpec | UserMessageSpec | AssistantMessageSpec,
309
+ template_context,
310
+ ) -> str:
311
+ """Resolve message content or templates."""
312
+ if directive.content is not None:
313
+ return directive.content
314
+ return self._resolve_template(directive.template or "", directive.vars, template_context)
315
+
316
+ def _resolve_template(
317
+ self, template_text: str, vars_dict: dict[str, Any], template_context: dict[str, Any]
318
+ ) -> str:
319
+ """Resolve dot-notation templates with context variables."""
320
+ if not template_text:
321
+ return template_text
322
+
323
+ class DotFormatter(Formatter):
324
+ def get_field(self, field_name, args, kwargs):
325
+ path_parts = field_name.split(".")
326
+ current_value = kwargs
327
+ for part in path_parts:
328
+ if isinstance(current_value, dict):
329
+ current_value = current_value.get(part, "")
330
+ else:
331
+ current_value = getattr(current_value, part, "")
332
+ return current_value, field_name
333
+
334
+ merged_context = dict(template_context)
335
+ for key, value in (vars_dict or {}).items():
336
+ merged_context[key] = value
337
+
338
+ formatter = DotFormatter()
339
+ return formatter.format(template_text, **merged_context)
340
+
341
+ def _render_pack(
342
+ self,
343
+ pack_name: str,
344
+ template_context: dict[str, Any],
345
+ retriever_override: Optional[Any],
346
+ pack_budget: Optional[Any],
347
+ policy: Optional[ContextPolicySpec],
348
+ tighten_pack_budget: bool = False,
349
+ weight: Optional[float] = None,
350
+ ) -> str:
351
+ """Render a context pack by name."""
352
+ if pack_name in self._context_registry:
353
+ nested_context = self._context_registry[pack_name]
354
+ return self._render_nested_context_pack(
355
+ nested_context,
356
+ template_context,
357
+ pack_budget,
358
+ policy,
359
+ tighten_pack_budget,
360
+ retriever_override,
361
+ )
362
+ if pack_name in self._retriever_registry:
363
+ return self._render_retriever_pack(
364
+ pack_name,
365
+ template_context,
366
+ retriever_override,
367
+ pack_budget,
368
+ policy,
369
+ tighten_pack_budget,
370
+ weight,
371
+ )
372
+ raise NotImplementedError(
373
+ f"Context pack '{pack_name}' is not available. Only Context or retriever packs are supported."
374
+ )
375
+
376
+ def _render_retriever_pack(
377
+ self,
378
+ retriever_name: str,
379
+ template_context: dict[str, Any],
380
+ retriever_override: Optional[Any],
381
+ pack_budget: Optional[Any],
382
+ policy: Optional[ContextPolicySpec],
383
+ tighten_pack_budget: bool,
384
+ weight: Optional[float] = None,
385
+ ) -> str:
386
+ """Render a retriever pack for the given retriever."""
387
+ retriever_spec = self._retriever_registry[retriever_name]
388
+ config = retriever_spec.config if hasattr(retriever_spec, "config") else {}
389
+ query_template = config.get("query") if isinstance(config, dict) else None
390
+ query = ""
391
+ if isinstance(query_template, str):
392
+ query = self._resolve_template(query_template, {}, template_context)
393
+ if not query:
394
+ input_context = template_context.get("input", {})
395
+ query = input_context.get("query", "") or input_context.get("message", "")
396
+
397
+ split = "train"
398
+ maximum_cache_total_items = None
399
+ maximum_cache_total_characters = None
400
+ limit = 3
401
+ offset = 0
402
+ maximum_total_characters = None
403
+ maximum_items_per_source = None
404
+ include_metadata = False
405
+ metadata_fields = None
406
+ retriever_id = None
407
+ corpus_root = None
408
+ snapshot_id = None
409
+ configuration_name = None
410
+ configuration = None
411
+ corpus_name = getattr(retriever_spec, "corpus", None)
412
+ join_with = "\n\n"
413
+ pipeline_config = None
414
+ query_config = None
415
+
416
+ if isinstance(config, dict):
417
+ split = config.get("split", split)
418
+ limit = config.get("limit", limit)
419
+ offset = config.get("offset", offset)
420
+ maximum_total_characters = config.get(
421
+ "maximum_total_characters", maximum_total_characters
422
+ )
423
+ maximum_items_per_source = config.get(
424
+ "maximum_items_per_source",
425
+ config.get("max_items_per_source", maximum_items_per_source),
426
+ )
427
+ include_metadata = config.get("include_metadata", include_metadata)
428
+ metadata_fields = config.get("metadata_fields", metadata_fields)
429
+ retriever_id = config.get("retriever_id", retriever_id)
430
+ snapshot_id = config.get("snapshot_id", snapshot_id)
431
+ configuration_name = config.get("configuration_name", configuration_name)
432
+ if isinstance(config.get("configuration"), dict):
433
+ configuration = config.get("configuration")
434
+ corpus_name = config.get("corpus", corpus_name)
435
+ join_with = config.get("join_with", join_with)
436
+ if isinstance(configuration, dict):
437
+ pipeline_config = configuration.get("pipeline")
438
+ if not isinstance(pipeline_config, dict) and isinstance(config.get("pipeline"), dict):
439
+ pipeline_config = config.get("pipeline")
440
+ if isinstance(pipeline_config, dict):
441
+ if isinstance(pipeline_config.get("query"), dict):
442
+ query_config = pipeline_config.get("query") or {}
443
+ if configuration is None and isinstance(pipeline_config.get("index"), dict):
444
+ configuration = pipeline_config.get("index") or {}
445
+ if configuration is None and isinstance(config.get("index"), dict):
446
+ configuration = config.get("index") or {}
447
+ if configuration is None and isinstance(pipeline_config, dict):
448
+ configuration = pipeline_config.get("index") or {}
449
+ if corpus_name and corpus_name in self._corpus_registry:
450
+ corpus_spec = self._corpus_registry[corpus_name]
451
+ corpus_config = corpus_spec.config if hasattr(corpus_spec, "config") else {}
452
+ if isinstance(corpus_config, dict):
453
+ split = corpus_config.get("split", split)
454
+ maximum_cache_total_items = corpus_config.get(
455
+ "maximum_cache_total_items", maximum_cache_total_items
456
+ )
457
+ maximum_cache_total_characters = corpus_config.get(
458
+ "maximum_cache_total_characters", maximum_cache_total_characters
459
+ )
460
+ corpus_root = corpus_config.get(
461
+ "corpus_root",
462
+ corpus_config.get("root", corpus_root),
463
+ )
464
+ if query_config:
465
+ if "limit" in query_config:
466
+ limit = query_config.get("limit", limit)
467
+ if "offset" in query_config:
468
+ offset = query_config.get("offset", offset)
469
+ if "maximum_total_characters" in query_config:
470
+ maximum_total_characters = query_config.get(
471
+ "maximum_total_characters", maximum_total_characters
472
+ )
473
+ if "maximum_items_per_source" in query_config:
474
+ maximum_items_per_source = query_config.get(
475
+ "maximum_items_per_source",
476
+ maximum_items_per_source,
477
+ )
478
+ if "max_items_per_source" in query_config and maximum_items_per_source is None:
479
+ maximum_items_per_source = query_config.get("max_items_per_source")
480
+ if "include_metadata" in query_config:
481
+ include_metadata = query_config.get("include_metadata", include_metadata)
482
+ if "metadata_fields" in query_config:
483
+ metadata_fields = query_config.get("metadata_fields", metadata_fields)
484
+ if "join_with" in query_config:
485
+ join_with = query_config.get("join_with", join_with)
486
+
487
+ allocated_tokens = self._allocate_pack_budget(pack_budget, policy, weight)
488
+ if allocated_tokens is not None:
489
+ derived_chars = int(allocated_tokens) * 4
490
+ if maximum_total_characters is None:
491
+ maximum_total_characters = derived_chars
492
+ else:
493
+ maximum_total_characters = min(maximum_total_characters, derived_chars)
494
+
495
+ if tighten_pack_budget:
496
+ if maximum_total_characters is not None:
497
+ maximum_total_characters = max(1, int(maximum_total_characters * 0.5))
498
+ limit = max(1, int(limit * 0.5))
499
+
500
+ retriever_fn = retriever_override or self._default_retriever
501
+ if retriever_fn is None:
502
+ raise ValueError("No retriever override or default retriever configured")
503
+
504
+ request = ContextRetrieverRequest(
505
+ query=query,
506
+ offset=offset,
507
+ limit=limit,
508
+ maximum_total_characters=maximum_total_characters,
509
+ max_tokens=allocated_tokens,
510
+ metadata={
511
+ "retriever": retriever_name,
512
+ "corpus": corpus_name,
513
+ "split": split,
514
+ "maximum_cache_total_items": maximum_cache_total_items,
515
+ "maximum_cache_total_characters": maximum_cache_total_characters,
516
+ "maximum_items_per_source": maximum_items_per_source,
517
+ "include_metadata": include_metadata,
518
+ "metadata_fields": metadata_fields,
519
+ "retriever_id": retriever_id,
520
+ "corpus_root": corpus_root,
521
+ "snapshot_id": snapshot_id,
522
+ "configuration_name": configuration_name,
523
+ "configuration": configuration,
524
+ },
525
+ )
526
+ context_pack = self._retrieve_with_expansion(
527
+ retriever_fn,
528
+ request,
529
+ policy,
530
+ join_with,
531
+ allocated_tokens,
532
+ )
533
+ return context_pack.text
534
+
535
+ def _retrieve_with_expansion(
536
+ self,
537
+ retriever_fn: Any,
538
+ request: ContextRetrieverRequest,
539
+ policy: Optional[ContextPolicySpec],
540
+ join_with: str,
541
+ target_tokens: Optional[int],
542
+ ) -> ContextPack:
543
+ packs: list[ContextPack] = []
544
+ expansion = policy.expansion if policy else None
545
+ max_pages = 1
546
+ min_fill_ratio = None
547
+ if expansion is not None:
548
+ max_pages = max(1, expansion.max_pages)
549
+ min_fill_ratio = expansion.min_fill_ratio
550
+
551
+ current_request = request
552
+ for page_index in range(max_pages):
553
+ response_pack = retriever_fn(current_request)
554
+ if response_pack is None:
555
+ break
556
+ packs.append(response_pack)
557
+
558
+ if max_pages <= 1 or target_tokens is None:
559
+ break
560
+ if response_pack.evidence_count < current_request.limit:
561
+ break
562
+
563
+ merged_pack = self._merge_context_packs(packs, join_with=join_with)
564
+ token_count = self._estimate_tokens(merged_pack.text)
565
+ threshold_ratio = 1.0 if min_fill_ratio is None else float(min_fill_ratio)
566
+ if token_count >= int(target_tokens * threshold_ratio):
567
+ break
568
+
569
+ current_request = current_request.model_copy(
570
+ update={"offset": current_request.offset + current_request.limit}
571
+ )
572
+
573
+ return self._merge_context_packs(packs, join_with=join_with)
574
+
575
+ def _merge_context_packs(self, packs: Iterable[ContextPack], join_with: str) -> ContextPack:
576
+ blocks: list[ContextPackBlock] = []
577
+ for index, pack in enumerate(packs, start=1):
578
+ if pack.blocks:
579
+ blocks.extend(pack.blocks)
580
+ continue
581
+ if pack.text:
582
+ blocks.append(
583
+ ContextPackBlock(
584
+ evidence_item_id=f"page-{index}",
585
+ text=pack.text,
586
+ metadata=None,
587
+ )
588
+ )
589
+ if not blocks:
590
+ return ContextPack(text="", evidence_count=0, blocks=[])
591
+ text = join_with.join([block.text for block in blocks])
592
+ return ContextPack(text=text, evidence_count=len(blocks), blocks=blocks)
593
+
594
+ def _apply_compaction_policy(
595
+ self, text: str, policy: Any, max_tokens_override: Optional[int] = None
596
+ ) -> str:
597
+ if not policy or not getattr(policy, "input_budget", None):
598
+ return text
599
+ max_tokens = max_tokens_override
600
+ if max_tokens is None:
601
+ budget = policy.input_budget
602
+ max_tokens = getattr(budget, "max_tokens", None)
603
+ if max_tokens is None:
604
+ return text
605
+ tokens = text.split()
606
+ if len(tokens) <= max_tokens:
607
+ return text
608
+ if getattr(policy, "overflow", None) != "compact":
609
+ return text
610
+
611
+ compactor = self._resolve_compactor(policy)
612
+ return compactor.compact(CompactionRequest(text=text, max_tokens=max_tokens))
613
+
614
+ def _estimate_tokens(self, text: str) -> int:
615
+ return len(text.split())
616
+
617
+ def _apply_context_budget(
618
+ self,
619
+ system_prompt: str,
620
+ history: list[dict[str, Any]],
621
+ user_message: str,
622
+ policy: Any,
623
+ ) -> tuple[str, list[dict[str, Any]], str, int, bool]:
624
+ if not policy or not getattr(policy, "input_budget", None):
625
+ token_count = self._estimate_total_tokens(system_prompt, history, user_message)
626
+ return system_prompt, history, user_message, token_count, False
627
+
628
+ budget = policy.input_budget
629
+ max_tokens = getattr(budget, "max_tokens", None)
630
+ if max_tokens is None:
631
+ token_count = self._estimate_total_tokens(system_prompt, history, user_message)
632
+ return system_prompt, history, user_message, token_count, False
633
+
634
+ token_count = self._estimate_total_tokens(system_prompt, history, user_message)
635
+ if token_count <= max_tokens:
636
+ return system_prompt, history, user_message, token_count, False
637
+
638
+ if getattr(policy, "overflow", None) != "compact":
639
+ return system_prompt, history, user_message, token_count, False
640
+
641
+ trimmed_history = list(history)
642
+ compacted = False
643
+ while trimmed_history and token_count > max_tokens:
644
+ trimmed_history.pop(0)
645
+ token_count = self._estimate_total_tokens(system_prompt, trimmed_history, user_message)
646
+ compacted = True
647
+
648
+ if token_count <= max_tokens:
649
+ return system_prompt, trimmed_history, user_message, token_count, compacted
650
+
651
+ remaining_budget = max_tokens - self._estimate_total_tokens(
652
+ "", trimmed_history, user_message
653
+ )
654
+ if remaining_budget < 0:
655
+ remaining_budget = 0
656
+ compacted_prompt = self._apply_compaction_policy(
657
+ system_prompt,
658
+ policy,
659
+ max_tokens_override=remaining_budget,
660
+ )
661
+ if compacted_prompt != system_prompt:
662
+ compacted = True
663
+ token_count = self._estimate_total_tokens(compacted_prompt, trimmed_history, user_message)
664
+ return compacted_prompt, trimmed_history, user_message, token_count, compacted
665
+
666
+ def _allocate_pack_budget(
667
+ self,
668
+ pack_budget: Optional[Any],
669
+ policy: Optional[ContextPolicySpec],
670
+ weight: Optional[float],
671
+ ) -> Optional[int]:
672
+ if pack_budget is not None and hasattr(pack_budget, "max_tokens"):
673
+ max_tokens = getattr(pack_budget, "max_tokens", None)
674
+ if max_tokens is not None:
675
+ return int(max_tokens)
676
+ ratio = getattr(pack_budget, "ratio", None)
677
+ if ratio is not None:
678
+ return self._resolve_ratio_budget(ratio, policy)
679
+ if pack_budget and isinstance(pack_budget, dict):
680
+ max_tokens = pack_budget.get("max_tokens")
681
+ if max_tokens is not None:
682
+ return int(max_tokens)
683
+ ratio = pack_budget.get("ratio")
684
+ if ratio is not None:
685
+ return self._resolve_ratio_budget(ratio, policy)
686
+
687
+ if policy and getattr(policy, "pack_budget", None):
688
+ pack_budget_spec = policy.pack_budget
689
+ base_tokens = getattr(pack_budget_spec, "default_max_tokens", None)
690
+ if base_tokens is None:
691
+ base_ratio = getattr(pack_budget_spec, "default_ratio", None)
692
+ if base_ratio is None:
693
+ return None
694
+ if not getattr(policy, "input_budget", None):
695
+ return None
696
+ input_budget = policy.input_budget
697
+ max_tokens = getattr(input_budget, "max_tokens", None)
698
+ if max_tokens is None:
699
+ return None
700
+ base_tokens = int(max_tokens * base_ratio)
701
+ if weight is None:
702
+ return int(base_tokens)
703
+ return int(base_tokens * weight)
704
+
705
+ return None
706
+
707
+ def _resolve_ratio_budget(
708
+ self, ratio: float, policy: Optional[ContextPolicySpec]
709
+ ) -> Optional[int]:
710
+ if ratio is None or not policy or not getattr(policy, "input_budget", None):
711
+ return None
712
+ input_budget = policy.input_budget
713
+ max_tokens = getattr(input_budget, "max_tokens", None)
714
+ if max_tokens is None:
715
+ return None
716
+ return int(max_tokens * float(ratio))
717
+
718
+ def _allocate_directive_budgets(
719
+ self,
720
+ directives: list[ContextMessageSpec],
721
+ policy: Optional[ContextPolicySpec],
722
+ total_budget_override: Optional[int],
723
+ ) -> dict[int, dict[str, Any]]:
724
+ if not policy or not getattr(policy, "pack_budget", None):
725
+ return {}
726
+ pack_budget_spec = policy.pack_budget
727
+ total_budget = total_budget_override
728
+ if total_budget is None:
729
+ total_budget = getattr(pack_budget_spec, "default_max_tokens", None)
730
+ if total_budget is None:
731
+ base_ratio = getattr(pack_budget_spec, "default_ratio", None)
732
+ if base_ratio is None:
733
+ return {}
734
+ if not getattr(policy, "input_budget", None):
735
+ return {}
736
+ input_budget = policy.input_budget
737
+ max_tokens = getattr(input_budget, "max_tokens", None)
738
+ if max_tokens is None:
739
+ return {}
740
+ total_budget = int(max_tokens * base_ratio)
741
+
742
+ pack_directives = [
743
+ directive for directive in directives if isinstance(directive, ContextInsertSpec)
744
+ ]
745
+ if not pack_directives:
746
+ return {}
747
+
748
+ sorted_directives = sorted(
749
+ pack_directives,
750
+ key=lambda directive: (
751
+ -(directive.priority or 0),
752
+ -(directive.weight or 1.0),
753
+ ),
754
+ )
755
+ total_weight = sum(directive.weight or 1.0 for directive in sorted_directives)
756
+ allocations = {}
757
+ remaining_budget = int(total_budget)
758
+
759
+ for directive in sorted_directives:
760
+ weight = directive.weight or 1.0
761
+ allocation = int((total_budget * weight) / total_weight)
762
+ if allocation <= 0:
763
+ allocation = 1
764
+ if allocation > remaining_budget:
765
+ allocation = remaining_budget
766
+ allocations[id(directive)] = {"max_tokens": allocation}
767
+ remaining_budget -= allocation
768
+ if remaining_budget <= 0:
769
+ break
770
+
771
+ if remaining_budget > 0 and sorted_directives:
772
+ allocations[id(sorted_directives[0])]["max_tokens"] += remaining_budget
773
+
774
+ return allocations
775
+
776
+ def _allocate_default_pack_budgets(
777
+ self,
778
+ pack_entries: list[Any],
779
+ policy: Optional[ContextPolicySpec],
780
+ total_budget_override: Optional[int],
781
+ ) -> dict[str, dict[str, Any]]:
782
+ if not policy or not getattr(policy, "pack_budget", None):
783
+ return {}
784
+ pack_budget_spec = policy.pack_budget
785
+ total_budget = total_budget_override
786
+ if total_budget is None:
787
+ total_budget = getattr(pack_budget_spec, "default_max_tokens", None)
788
+ if total_budget is None:
789
+ base_ratio = getattr(pack_budget_spec, "default_ratio", None)
790
+ if base_ratio is None:
791
+ return {}
792
+ if not getattr(policy, "input_budget", None):
793
+ return {}
794
+ input_budget = policy.input_budget
795
+ max_tokens = getattr(input_budget, "max_tokens", None)
796
+ if max_tokens is None:
797
+ return {}
798
+ total_budget = int(max_tokens * base_ratio)
799
+
800
+ if not pack_entries:
801
+ return {}
802
+
803
+ entries = sorted(
804
+ pack_entries,
805
+ key=lambda entry: (
806
+ -(getattr(entry, "priority", None) or 0),
807
+ -(getattr(entry, "weight", None) or 1.0),
808
+ ),
809
+ )
810
+ total_weight = sum(getattr(entry, "weight", None) or 1.0 for entry in entries)
811
+ allocations = {}
812
+ remaining_budget = int(total_budget)
813
+
814
+ for entry in entries:
815
+ weight = getattr(entry, "weight", None) or 1.0
816
+ allocation = int((total_budget * weight) / total_weight)
817
+ if allocation <= 0:
818
+ allocation = 1
819
+ if allocation > remaining_budget:
820
+ allocation = remaining_budget
821
+ allocations[entry.name] = {"max_tokens": allocation}
822
+ remaining_budget -= allocation
823
+ if remaining_budget <= 0:
824
+ break
825
+
826
+ if remaining_budget > 0 and entries:
827
+ allocations[entries[0].name]["max_tokens"] += remaining_budget
828
+
829
+ return allocations
830
+
831
+ def _resolve_default_pack_total_budget(
832
+ self, policy: Optional[ContextPolicySpec]
833
+ ) -> Optional[int]:
834
+ if not policy or not getattr(policy, "pack_budget", None):
835
+ return None
836
+ pack_budget_spec = policy.pack_budget
837
+ total_budget = getattr(pack_budget_spec, "default_max_tokens", None)
838
+ if total_budget is not None:
839
+ return int(total_budget)
840
+ base_ratio = getattr(pack_budget_spec, "default_ratio", None)
841
+ if base_ratio is None:
842
+ return None
843
+ if not getattr(policy, "input_budget", None):
844
+ return None
845
+ input_budget = policy.input_budget
846
+ max_tokens = getattr(input_budget, "max_tokens", None)
847
+ if max_tokens is None:
848
+ return None
849
+ return int(max_tokens * base_ratio)
850
+
851
+ def _extract_pack_budget_max_tokens(
852
+ self, pack_budget: Optional[Any], policy: Optional[ContextPolicySpec]
853
+ ) -> Optional[int]:
854
+ if pack_budget is None:
855
+ return None
856
+ if isinstance(pack_budget, dict):
857
+ max_tokens = pack_budget.get("max_tokens")
858
+ if max_tokens is not None:
859
+ return int(max_tokens)
860
+ ratio = pack_budget.get("ratio")
861
+ if ratio is not None:
862
+ return self._resolve_ratio_budget(ratio, policy)
863
+ if hasattr(pack_budget, "max_tokens"):
864
+ max_tokens = getattr(pack_budget, "max_tokens", None)
865
+ if max_tokens is not None:
866
+ return int(max_tokens)
867
+ ratio = getattr(pack_budget, "ratio", None)
868
+ if ratio is not None:
869
+ return self._resolve_ratio_budget(ratio, policy)
870
+ return None
871
+
872
+ def _estimate_total_tokens(
873
+ self, system_prompt: str, history: list[dict[str, Any]], user_message: str
874
+ ) -> int:
875
+ total = self._estimate_tokens(system_prompt) + self._estimate_tokens(user_message)
876
+ for message in history:
877
+ content = message.get("content", "")
878
+ total += self._estimate_tokens(content)
879
+ return total
880
+
881
+ def _resolve_compactor(self, policy: ContextPolicySpec):
882
+ compactor_config = None
883
+ if hasattr(policy, "compactor"):
884
+ compactor_config = policy.compactor
885
+
886
+ if isinstance(compactor_config, str):
887
+ compactor_spec = self._compactor_registry.get(compactor_config)
888
+ if compactor_spec is None:
889
+ raise ValueError(f"Compactor '{compactor_config}' not defined")
890
+ config = compactor_spec.config if hasattr(compactor_spec, "config") else {}
891
+ if not isinstance(config, dict):
892
+ config = {}
893
+ return build_compactor(config)
894
+
895
+ if isinstance(compactor_config, dict):
896
+ return build_compactor(compactor_config)
897
+
898
+ return TruncateCompactor()
899
+
900
+ def _build_messages(
901
+ self,
902
+ context_spec: ContextDeclaration,
903
+ history_messages: list[dict[str, Any]],
904
+ template_context: dict[str, Any],
905
+ retriever_override: Optional[Any],
906
+ policy: Optional[ContextPolicySpec],
907
+ tighten_pack_budget: bool = False,
908
+ total_pack_budget_override: Optional[int] = None,
909
+ ) -> list[dict[str, Any]]:
910
+ assembled_messages: list[dict[str, Any]] = []
911
+ resolved_context = dict(template_context)
912
+ context_values = dict(resolved_context.get("context", {}))
913
+ resolved_context["context"] = context_values
914
+ directive_budgets = self._allocate_directive_budgets(
915
+ context_spec.messages or [], policy, total_pack_budget_override
916
+ )
917
+ for directive in context_spec.messages or []:
918
+ if isinstance(directive, HistoryInsertSpec):
919
+ assembled_messages.extend(history_messages)
920
+ continue
921
+ if isinstance(directive, ContextInsertSpec):
922
+ override_budget = directive_budgets.get(id(directive))
923
+ pack_content = self._render_pack(
924
+ directive.name,
925
+ resolved_context,
926
+ retriever_override,
927
+ override_budget or directive.budget,
928
+ policy,
929
+ tighten_pack_budget,
930
+ directive.weight,
931
+ )
932
+ context_values[directive.name] = pack_content or ""
933
+ if pack_content:
934
+ assembled_messages.append({"role": "system", "content": pack_content})
935
+ else:
936
+ context_values[directive.name] = ""
937
+ continue
938
+ if isinstance(directive, SystemMessageSpec):
939
+ assembled_messages.append(
940
+ {
941
+ "role": "system",
942
+ "content": self._resolve_message_content(directive, resolved_context),
943
+ }
944
+ )
945
+ continue
946
+ if isinstance(directive, UserMessageSpec):
947
+ assembled_messages.append(
948
+ {
949
+ "role": "user",
950
+ "content": self._resolve_message_content(directive, resolved_context),
951
+ }
952
+ )
953
+ continue
954
+ if isinstance(directive, AssistantMessageSpec):
955
+ assembled_messages.append(
956
+ {
957
+ "role": "assistant",
958
+ "content": self._resolve_message_content(directive, resolved_context),
959
+ }
960
+ )
961
+ continue
962
+ return assembled_messages
963
+
964
+ def _render_nested_context_pack(
965
+ self,
966
+ context_spec: ContextDeclaration,
967
+ template_context: dict[str, Any],
968
+ pack_budget: Optional[Any],
969
+ policy: Optional[ContextPolicySpec],
970
+ tighten_pack_budget: bool,
971
+ retriever_override: Optional[Any],
972
+ ) -> str:
973
+ """Render another Context declaration as a pack string."""
974
+ nested_policy = context_spec.policy or policy
975
+ compaction_policy = policy or nested_policy
976
+ total_budget_override = self._extract_pack_budget_max_tokens(pack_budget, policy)
977
+
978
+ if context_spec.messages is None:
979
+ nested_result = self._assemble_default_with_regeneration(
980
+ context_spec,
981
+ base_system_prompt="",
982
+ history_messages=[],
983
+ user_message="",
984
+ template_context=template_context,
985
+ retriever_override=retriever_override,
986
+ total_budget_override=total_budget_override,
987
+ )
988
+ pack_text = nested_result.system_prompt
989
+ return self._compact_pack_text(
990
+ pack_text, pack_budget, compaction_policy, tighten_pack_budget
991
+ )
992
+
993
+ if any(isinstance(directive, HistoryInsertSpec) for directive in context_spec.messages):
994
+ raise ValueError("Nested context packs cannot include history()")
995
+
996
+ max_iterations = 1
997
+ if compaction_policy and getattr(compaction_policy, "max_iterations", None):
998
+ max_iterations = max(1, int(compaction_policy.max_iterations))
999
+
1000
+ pack_scale = 1.0
1001
+ last_text = ""
1002
+ max_tokens = self._extract_pack_budget_max_tokens(pack_budget, compaction_policy)
1003
+ for _iteration in range(max_iterations):
1004
+ scaled_override = total_budget_override
1005
+ if scaled_override is not None and pack_scale < 1.0:
1006
+ scaled_override = max(1, int(scaled_override * pack_scale))
1007
+
1008
+ assembled_messages = self._build_messages(
1009
+ context_spec,
1010
+ history_messages=[],
1011
+ template_context=template_context,
1012
+ retriever_override=retriever_override,
1013
+ policy=nested_policy,
1014
+ tighten_pack_budget=tighten_pack_budget or pack_scale < 1.0,
1015
+ total_pack_budget_override=scaled_override,
1016
+ )
1017
+ rendered_segments = [message.get("content", "") for message in assembled_messages]
1018
+ pack_text = self._join_nonempty(rendered_segments)
1019
+ raw_token_count = self._estimate_tokens(pack_text)
1020
+ last_text = self._compact_pack_text(
1021
+ pack_text, pack_budget, compaction_policy, tighten_pack_budget
1022
+ )
1023
+
1024
+ if max_tokens is None or raw_token_count <= max_tokens:
1025
+ break
1026
+ if not compaction_policy or getattr(compaction_policy, "overflow", None) != "compact":
1027
+ break
1028
+ pack_scale *= 0.5
1029
+
1030
+ return last_text
1031
+
1032
+ def _compact_pack_text(
1033
+ self,
1034
+ text: str,
1035
+ pack_budget: Optional[Any],
1036
+ policy: Optional[ContextPolicySpec],
1037
+ tighten_pack_budget: bool,
1038
+ ) -> str:
1039
+ max_tokens = None
1040
+ if pack_budget is not None:
1041
+ if isinstance(pack_budget, dict):
1042
+ max_tokens = pack_budget.get("max_tokens")
1043
+ elif hasattr(pack_budget, "max_tokens"):
1044
+ max_tokens = getattr(pack_budget, "max_tokens", None)
1045
+
1046
+ if max_tokens is None and policy and getattr(policy, "pack_budget", None):
1047
+ pack_budget_spec = policy.pack_budget
1048
+ max_tokens = getattr(pack_budget_spec, "default_max_tokens", None)
1049
+
1050
+ if max_tokens is None:
1051
+ return text
1052
+
1053
+ if tighten_pack_budget:
1054
+ max_tokens = max(1, int(max_tokens * 0.5))
1055
+
1056
+ compactor = self._resolve_compactor(policy) if policy else TruncateCompactor()
1057
+ return compactor.compact(CompactionRequest(text=text, max_tokens=int(max_tokens)))
1058
+
1059
+ def _split_leading_system(
1060
+ self, messages: list[dict[str, Any]]
1061
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
1062
+ """Split leading system messages from the rest."""
1063
+ leading_system: list[dict[str, Any]] = []
1064
+ remaining: list[dict[str, Any]] = []
1065
+ seen_non_system = False
1066
+ for message in messages:
1067
+ if message.get("role") == "system" and not seen_non_system:
1068
+ leading_system.append(message)
1069
+ else:
1070
+ seen_non_system = True
1071
+ remaining.append(message)
1072
+ return leading_system, remaining
1073
+
1074
+ def _extract_user_message(
1075
+ self, messages: list[dict[str, Any]], fallback_message: str
1076
+ ) -> tuple[str, list[dict[str, Any]]]:
1077
+ """Extract the final user message from a list, leaving the rest as history."""
1078
+ last_user_index = None
1079
+ for idx, message in enumerate(messages):
1080
+ if message.get("role") == "user":
1081
+ last_user_index = idx
1082
+ if last_user_index is None:
1083
+ return fallback_message, messages
1084
+
1085
+ user_message = messages[last_user_index].get("content", "")
1086
+ remaining = messages[:last_user_index] + messages[last_user_index + 1 :]
1087
+ return user_message, remaining
1088
+
1089
+ def _join_nonempty(self, parts: Iterable[str]) -> str:
1090
+ return "\n\n".join([part for part in parts if part])