biblicus 0.16.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ """
2
+ Compaction utilities for Context Engine assembly.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Any
9
+
10
+
11
+ @dataclass
12
+ class CompactionRequest:
13
+ """
14
+ Request payload for compaction.
15
+
16
+ :ivar text: Text to compact.
17
+ :vartype text: str
18
+ :ivar max_tokens: Maximum token budget.
19
+ :vartype max_tokens: int
20
+ """
21
+
22
+ text: str
23
+ max_tokens: int
24
+
25
+
26
+ class BaseCompactor:
27
+ """
28
+ Base class for compaction strategies.
29
+
30
+ Subclasses implement ``compact`` to return a shorter string that fits the
31
+ requested budget.
32
+ """
33
+
34
+ def compact(self, request: CompactionRequest) -> str:
35
+ """
36
+ Compact text to fit within the requested token budget.
37
+
38
+ :param request: Compaction request with text and budget.
39
+ :type request: CompactionRequest
40
+ :return: Compacted text.
41
+ :rtype: str
42
+ """
43
+ raise NotImplementedError
44
+
45
+
46
+ class TruncateCompactor(BaseCompactor):
47
+ """
48
+ Simple truncation compactor (token-based).
49
+ """
50
+
51
+ def compact(self, request: CompactionRequest) -> str:
52
+ """
53
+ Compact by truncating to the maximum token count.
54
+
55
+ :param request: Compaction request with text and budget.
56
+ :type request: CompactionRequest
57
+ :return: Truncated text.
58
+ :rtype: str
59
+ """
60
+ tokens = request.text.split()
61
+ if len(tokens) <= request.max_tokens:
62
+ return request.text
63
+ return " ".join(tokens[: request.max_tokens])
64
+
65
+
66
+ class SummaryCompactor(BaseCompactor):
67
+ """
68
+ Simple sentence-first compactor (deterministic).
69
+ """
70
+
71
+ def compact(self, request: CompactionRequest) -> str:
72
+ """
73
+ Compact by selecting the first sentence within the budget.
74
+
75
+ :param request: Compaction request with text and budget.
76
+ :type request: CompactionRequest
77
+ :return: Compacted text.
78
+ :rtype: str
79
+ """
80
+ sentences = _split_sentences(request.text)
81
+ if not sentences:
82
+ return request.text
83
+
84
+ compacted = sentences[0].strip()
85
+ tokens = compacted.split()
86
+ if len(tokens) > request.max_tokens:
87
+ return " ".join(tokens[: request.max_tokens])
88
+ return compacted
89
+
90
+
91
+ def build_compactor(config: dict[str, Any]) -> BaseCompactor:
92
+ """
93
+ Build a compactor instance from configuration.
94
+
95
+ :param config: Compactor configuration payload.
96
+ :type config: dict[str, Any]
97
+ :return: Compactor instance.
98
+ :rtype: BaseCompactor
99
+ :raises ValueError: If the compactor type is unknown.
100
+ """
101
+ strategy = config.get("type", "truncate")
102
+ if strategy == "truncate":
103
+ return TruncateCompactor()
104
+ if strategy == "summary":
105
+ return SummaryCompactor()
106
+ raise ValueError(f"Unknown compactor type: {strategy}")
107
+
108
+
109
+ def _split_sentences(text: str) -> list[str]:
110
+ return [segment for segment in text.split(". ") if segment]
@@ -0,0 +1,423 @@
1
+ """
2
+ Pydantic models for Biblicus Context Engine configuration.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Literal, Optional, Union
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
10
+
11
+
12
+ class ContextBudgetSpec(BaseModel):
13
+ """
14
+ Token budget specification for Context assembly.
15
+
16
+ :ivar ratio: Optional ratio of the input budget to allocate.
17
+ :vartype ratio: float or None
18
+ :ivar max_tokens: Optional absolute token cap.
19
+ :vartype max_tokens: int or None
20
+ """
21
+
22
+ model_config = ConfigDict(extra="forbid")
23
+
24
+ ratio: Optional[float] = Field(default=None, ge=0.0)
25
+ max_tokens: Optional[int] = Field(default=None, ge=1)
26
+
27
+ @model_validator(mode="after")
28
+ def _validate_budget(self) -> "ContextBudgetSpec":
29
+ """
30
+ Ensure at least one budget control is provided.
31
+
32
+ :return: Validated budget spec.
33
+ :rtype: ContextBudgetSpec
34
+ :raises ValueError: If neither ratio nor max_tokens is provided.
35
+ """
36
+ if self.ratio is None and self.max_tokens is None:
37
+ raise ValueError("Budget must specify ratio or max_tokens")
38
+ return self
39
+
40
+
41
+ class ContextPackBudgetSpec(BaseModel):
42
+ """
43
+ Default budget policy for Context packs.
44
+
45
+ :ivar default_ratio: Optional ratio of the input budget to allocate per pack.
46
+ :vartype default_ratio: float or None
47
+ :ivar default_max_tokens: Optional absolute token cap per pack.
48
+ :vartype default_max_tokens: int or None
49
+ """
50
+
51
+ model_config = ConfigDict(extra="forbid")
52
+
53
+ default_ratio: Optional[float] = Field(default=None, ge=0.0)
54
+ default_max_tokens: Optional[int] = Field(default=None, ge=1)
55
+
56
+ @model_validator(mode="after")
57
+ def _validate_pack_budget(self) -> "ContextPackBudgetSpec":
58
+ """
59
+ Ensure at least one default pack budget control is provided.
60
+
61
+ :return: Validated pack budget spec.
62
+ :rtype: ContextPackBudgetSpec
63
+ :raises ValueError: If neither default_ratio nor default_max_tokens is provided.
64
+ """
65
+ if self.default_ratio is None and self.default_max_tokens is None:
66
+ raise ValueError("Pack budget must specify default_ratio or default_max_tokens")
67
+ return self
68
+
69
+
70
+ class ContextExpansionSpec(BaseModel):
71
+ """
72
+ Pagination policy for expanding retriever packs.
73
+
74
+ :ivar max_pages: Maximum number of retrieval pages to request.
75
+ :vartype max_pages: int
76
+ :ivar min_fill_ratio: Optional minimum fill ratio before stopping expansion.
77
+ :vartype min_fill_ratio: float or None
78
+ """
79
+
80
+ model_config = ConfigDict(extra="forbid")
81
+
82
+ max_pages: int = Field(default=1, ge=1)
83
+ min_fill_ratio: Optional[float] = Field(default=None, ge=0.0, le=1.0)
84
+
85
+
86
+ class ContextPolicySpec(BaseModel):
87
+ """
88
+ Policy configuration for Context assembly, compaction, and expansion.
89
+
90
+ :ivar input_budget: Optional input budget for the full assembled context.
91
+ :vartype input_budget: ContextBudgetSpec or None
92
+ :ivar pack_budget: Optional default budget for individual packs.
93
+ :vartype pack_budget: ContextPackBudgetSpec or None
94
+ :ivar overflow: Overflow behavior (for example, "compact").
95
+ :vartype overflow: str or None
96
+ :ivar compactor: Compactor configuration or registry key.
97
+ :vartype compactor: str or dict[str, Any] or None
98
+ :ivar max_iterations: Maximum compaction regeneration iterations.
99
+ :vartype max_iterations: int or None
100
+ :ivar expansion: Optional expansion policy for retriever pagination.
101
+ :vartype expansion: ContextExpansionSpec or None
102
+ """
103
+
104
+ model_config = ConfigDict(extra="forbid")
105
+
106
+ input_budget: Optional[ContextBudgetSpec] = None
107
+ pack_budget: Optional[ContextPackBudgetSpec] = None
108
+ overflow: Optional[str] = None
109
+ compactor: Optional[Union[str, dict[str, Any]]] = None
110
+ max_iterations: Optional[int] = None
111
+ expansion: Optional[ContextExpansionSpec] = None
112
+
113
+
114
+ class ContextTemplateSpec(BaseModel):
115
+ """
116
+ Template definition for message content.
117
+
118
+ :ivar template: Template string with dot-notation placeholders.
119
+ :vartype template: str
120
+ :ivar vars: Template variable overrides.
121
+ :vartype vars: dict[str, Any]
122
+ """
123
+
124
+ model_config = ConfigDict(extra="forbid")
125
+
126
+ template: str
127
+ vars: dict[str, Any] = Field(default_factory=dict)
128
+
129
+
130
+ class ContextMessageBase(BaseModel):
131
+ """
132
+ Base class for Context message directives.
133
+
134
+ :ivar type: Directive type identifier.
135
+ :vartype type: str
136
+ """
137
+
138
+ model_config = ConfigDict(extra="forbid")
139
+
140
+ type: str
141
+
142
+
143
+ class SystemMessageSpec(ContextMessageBase):
144
+ """
145
+ System message directive.
146
+
147
+ :ivar content: Literal message content.
148
+ :vartype content: str or None
149
+ :ivar template: Template string for message content.
150
+ :vartype template: str or None
151
+ :ivar vars: Template variable overrides.
152
+ :vartype vars: dict[str, Any]
153
+ """
154
+
155
+ type: Literal["system"]
156
+ content: Optional[str] = None
157
+ template: Optional[str] = None
158
+ vars: dict[str, Any] = Field(default_factory=dict)
159
+
160
+ @model_validator(mode="after")
161
+ def _validate_content(self) -> "SystemMessageSpec":
162
+ """
163
+ Ensure exactly one of content/template is provided.
164
+
165
+ :return: Validated message spec.
166
+ :rtype: SystemMessageSpec
167
+ :raises ValueError: If content/template usage is invalid.
168
+ """
169
+ if (self.content is None) == (self.template is None):
170
+ raise ValueError("System message must define either content or template")
171
+ return self
172
+
173
+
174
+ class UserMessageSpec(ContextMessageBase):
175
+ """
176
+ User message directive.
177
+
178
+ :ivar content: Literal message content.
179
+ :vartype content: str or None
180
+ :ivar template: Template string for message content.
181
+ :vartype template: str or None
182
+ :ivar vars: Template variable overrides.
183
+ :vartype vars: dict[str, Any]
184
+ """
185
+
186
+ type: Literal["user"]
187
+ content: Optional[str] = None
188
+ template: Optional[str] = None
189
+ vars: dict[str, Any] = Field(default_factory=dict)
190
+
191
+ @model_validator(mode="after")
192
+ def _validate_content(self) -> "UserMessageSpec":
193
+ """
194
+ Ensure exactly one of content/template is provided.
195
+
196
+ :return: Validated message spec.
197
+ :rtype: UserMessageSpec
198
+ :raises ValueError: If content/template usage is invalid.
199
+ """
200
+ if (self.content is None) == (self.template is None):
201
+ raise ValueError("User message must define either content or template")
202
+ return self
203
+
204
+
205
+ class AssistantMessageSpec(ContextMessageBase):
206
+ """
207
+ Assistant message directive.
208
+
209
+ :ivar content: Literal message content.
210
+ :vartype content: str or None
211
+ :ivar template: Template string for message content.
212
+ :vartype template: str or None
213
+ :ivar vars: Template variable overrides.
214
+ :vartype vars: dict[str, Any]
215
+ """
216
+
217
+ type: Literal["assistant"]
218
+ content: Optional[str] = None
219
+ template: Optional[str] = None
220
+ vars: dict[str, Any] = Field(default_factory=dict)
221
+
222
+ @model_validator(mode="after")
223
+ def _validate_content(self) -> "AssistantMessageSpec":
224
+ """
225
+ Ensure exactly one of content/template is provided.
226
+
227
+ :return: Validated message spec.
228
+ :rtype: AssistantMessageSpec
229
+ :raises ValueError: If content/template usage is invalid.
230
+ """
231
+ if (self.content is None) == (self.template is None):
232
+ raise ValueError("Assistant message must define either content or template")
233
+ return self
234
+
235
+
236
+ class ContextInsertSpec(ContextMessageBase):
237
+ """
238
+ Context pack insertion directive.
239
+
240
+ :ivar name: Context pack name to insert.
241
+ :vartype name: str
242
+ :ivar budget: Optional pack budget override.
243
+ :vartype budget: ContextBudgetSpec or None
244
+ :ivar weight: Optional weight to bias pack budget allocation.
245
+ :vartype weight: float or None
246
+ :ivar priority: Optional priority for pack budget allocation.
247
+ :vartype priority: int or None
248
+ """
249
+
250
+ type: Literal["context"]
251
+ name: str
252
+ budget: Optional[ContextBudgetSpec] = None
253
+ weight: Optional[float] = None
254
+ priority: Optional[int] = None
255
+
256
+
257
+ class HistoryInsertSpec(ContextMessageBase):
258
+ """
259
+ History insertion directive.
260
+
261
+ :ivar type: Always "history".
262
+ :vartype type: str
263
+ """
264
+
265
+ type: Literal["history"]
266
+
267
+
268
+ ContextMessageSpec = Union[
269
+ SystemMessageSpec,
270
+ UserMessageSpec,
271
+ AssistantMessageSpec,
272
+ ContextInsertSpec,
273
+ HistoryInsertSpec,
274
+ ]
275
+
276
+
277
+ class ContextPackSpec(BaseModel):
278
+ """
279
+ Context pack reference for default Context assembly.
280
+
281
+ :ivar name: Context pack name.
282
+ :vartype name: str
283
+ :ivar weight: Optional weight for budget allocation.
284
+ :vartype weight: float or None
285
+ :ivar priority: Optional priority for budget allocation.
286
+ :vartype priority: int or None
287
+ :ivar budget: Optional pack budget override.
288
+ :vartype budget: ContextBudgetSpec or None
289
+ """
290
+
291
+ model_config = ConfigDict(extra="forbid")
292
+
293
+ name: str
294
+ weight: Optional[float] = None
295
+ priority: Optional[int] = None
296
+ budget: Optional[ContextBudgetSpec] = None
297
+
298
+
299
+ class ContextDeclaration(BaseModel):
300
+ """
301
+ Context declaration configuration.
302
+
303
+ :ivar name: Context name.
304
+ :vartype name: str
305
+ :ivar policy: Optional context policy.
306
+ :vartype policy: ContextPolicySpec or None
307
+ :ivar messages: Optional explicit message plan.
308
+ :vartype messages: list[ContextMessageSpec] or None
309
+ :ivar packs: Optional default pack list.
310
+ :vartype packs: list[ContextPackSpec] or None
311
+ """
312
+
313
+ model_config = ConfigDict(extra="forbid")
314
+
315
+ name: str
316
+ policy: Optional[ContextPolicySpec] = None
317
+ messages: Optional[list[ContextMessageSpec]] = None
318
+ packs: Optional[list[ContextPackSpec]] = None
319
+
320
+ @model_validator(mode="before")
321
+ def _coerce_pack_entries(self) -> "ContextDeclaration":
322
+ """
323
+ Normalize pack entries to dicts with name fields.
324
+
325
+ :return: Normalized context declaration.
326
+ :rtype: ContextDeclaration
327
+ """
328
+ if not isinstance(self, dict):
329
+ return self
330
+ packs = self.get("packs")
331
+ if packs is None:
332
+ return self
333
+ if isinstance(packs, str):
334
+ self["packs"] = [{"name": packs}]
335
+ return self
336
+ if isinstance(packs, list):
337
+ normalized = []
338
+ for entry in packs:
339
+ if isinstance(entry, str):
340
+ normalized.append({"name": entry})
341
+ else:
342
+ normalized.append(entry)
343
+ self["packs"] = normalized
344
+ return self
345
+
346
+
347
+ class CorpusDeclaration(BaseModel):
348
+ """
349
+ Corpus declaration configuration.
350
+
351
+ :ivar name: Corpus name.
352
+ :vartype name: str
353
+ :ivar config: Corpus configuration payload.
354
+ :vartype config: dict[str, Any]
355
+ """
356
+
357
+ model_config = ConfigDict(extra="allow")
358
+
359
+ name: str
360
+ config: dict[str, Any] = Field(default_factory=dict)
361
+
362
+
363
+ class RetrieverDeclaration(BaseModel):
364
+ """
365
+ Retriever declaration configuration.
366
+
367
+ :ivar name: Retriever name.
368
+ :vartype name: str
369
+ :ivar corpus: Optional corpus identifier.
370
+ :vartype corpus: str or None
371
+ :ivar config: Retriever configuration payload.
372
+ :vartype config: dict[str, Any]
373
+ """
374
+
375
+ model_config = ConfigDict(extra="allow")
376
+
377
+ name: str
378
+ corpus: Optional[str] = None
379
+ config: dict[str, Any] = Field(default_factory=dict)
380
+
381
+
382
+ class CompactorDeclaration(BaseModel):
383
+ """
384
+ Compactor declaration configuration.
385
+
386
+ :ivar name: Compactor name.
387
+ :vartype name: str
388
+ :ivar config: Compactor configuration payload.
389
+ :vartype config: dict[str, Any]
390
+ """
391
+
392
+ model_config = ConfigDict(extra="allow")
393
+
394
+ name: str
395
+ config: dict[str, Any] = Field(default_factory=dict)
396
+
397
+
398
+ class ContextRetrieverRequest(BaseModel):
399
+ """
400
+ Retrieval request for Context packs.
401
+
402
+ :ivar query: Query text issued against the retriever.
403
+ :vartype query: str
404
+ :ivar offset: Offset into the ranked candidate list.
405
+ :vartype offset: int
406
+ :ivar limit: Maximum number of items to return.
407
+ :vartype limit: int
408
+ :ivar maximum_total_characters: Optional maximum total characters for the pack.
409
+ :vartype maximum_total_characters: int or None
410
+ :ivar max_tokens: Optional maximum token budget for the pack.
411
+ :vartype max_tokens: int or None
412
+ :ivar metadata: Optional metadata for retriever implementations.
413
+ :vartype metadata: dict[str, Any]
414
+ """
415
+
416
+ model_config = ConfigDict(extra="forbid")
417
+
418
+ query: str
419
+ offset: int = Field(default=0, ge=0)
420
+ limit: int = Field(default=3, ge=1)
421
+ maximum_total_characters: Optional[int] = Field(default=None, ge=1)
422
+ max_tokens: Optional[int] = Field(default=None, ge=1)
423
+ metadata: dict[str, Any] = Field(default_factory=dict)
@@ -0,0 +1,129 @@
1
+ """
2
+ Context retrieval helpers for the Biblicus Context Engine.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Optional
8
+
9
+ from biblicus.backends import get_backend
10
+ from biblicus.context import (
11
+ ContextPack,
12
+ ContextPackPolicy,
13
+ TokenBudget,
14
+ build_context_pack,
15
+ fit_context_pack_to_token_budget,
16
+ )
17
+ from biblicus.corpus import Corpus
18
+ from biblicus.models import QueryBudget, RetrievalRun
19
+
20
+ from .models import ContextRetrieverRequest
21
+
22
+
23
+ def _resolve_run(
24
+ corpus: Corpus,
25
+ *,
26
+ backend_id: str,
27
+ run_id: Optional[str],
28
+ recipe_name: Optional[str],
29
+ recipe_config: Optional[dict[str, Any]],
30
+ ) -> RetrievalRun:
31
+ if run_id:
32
+ return corpus.load_run(run_id)
33
+
34
+ latest_run_id = corpus.latest_run_id
35
+ if latest_run_id:
36
+ candidate = corpus.load_run(latest_run_id)
37
+ if candidate.recipe.backend_id == backend_id:
38
+ return candidate
39
+
40
+ if recipe_config is None:
41
+ raise ValueError(
42
+ "No retrieval run available for the requested backend. "
43
+ "Provide run_id or recipe_config to build one."
44
+ )
45
+
46
+ backend = get_backend(backend_id)
47
+ resolved_name = recipe_name or f"Context pack ({backend_id})"
48
+ return backend.build_run(corpus, recipe_name=resolved_name, config=recipe_config)
49
+
50
+
51
+ def retrieve_context_pack(
52
+ *,
53
+ request: ContextRetrieverRequest,
54
+ corpus: Corpus,
55
+ backend_id: str,
56
+ run_id: Optional[str] = None,
57
+ recipe_name: Optional[str] = None,
58
+ recipe_config: Optional[dict[str, Any]] = None,
59
+ join_with: str = "\n\n",
60
+ max_items_per_source: Optional[int] = None,
61
+ include_metadata: bool = False,
62
+ metadata_fields: Optional[list[str]] = None,
63
+ ) -> ContextPack:
64
+ """
65
+ Retrieve a context pack using a Biblicus backend.
66
+
67
+ :param request: Context retrieval request.
68
+ :type request: biblicus.context_engine.ContextRetrieverRequest
69
+ :param corpus: Corpus instance to query.
70
+ :type corpus: biblicus.corpus.Corpus
71
+ :param backend_id: Retrieval backend identifier.
72
+ :type backend_id: str
73
+ :param run_id: Optional retrieval run identifier.
74
+ :type run_id: str or None
75
+ :param recipe_name: Optional recipe name for run builds.
76
+ :type recipe_name: str or None
77
+ :param recipe_config: Optional backend recipe configuration.
78
+ :type recipe_config: dict[str, Any] or None
79
+ :param join_with: Separator between context pack blocks.
80
+ :type join_with: str
81
+ :param max_items_per_source: Optional cap per source.
82
+ :type max_items_per_source: int or None
83
+ :param include_metadata: Whether to include metadata in context blocks.
84
+ :type include_metadata: bool
85
+ :param metadata_fields: Optional metadata fields to include in context blocks.
86
+ :type metadata_fields: list[str] or None
87
+ :return: Context pack derived from retrieval results.
88
+ :rtype: biblicus.context.ContextPack
89
+ :raises ValueError: If no compatible retrieval run is available.
90
+ """
91
+ run = _resolve_run(
92
+ corpus,
93
+ backend_id=backend_id,
94
+ run_id=run_id,
95
+ recipe_name=recipe_name,
96
+ recipe_config=recipe_config,
97
+ )
98
+
99
+ maximum_total_characters = request.maximum_total_characters
100
+ if maximum_total_characters is None and request.max_tokens is not None:
101
+ maximum_total_characters = int(request.max_tokens * 4)
102
+
103
+ budget = QueryBudget(
104
+ max_total_items=request.limit,
105
+ offset=request.offset,
106
+ maximum_total_characters=maximum_total_characters,
107
+ max_items_per_source=max_items_per_source,
108
+ )
109
+ backend = get_backend(backend_id)
110
+ result = backend.query(
111
+ corpus,
112
+ run=run,
113
+ query_text=request.query,
114
+ budget=budget,
115
+ )
116
+ policy = ContextPackPolicy(
117
+ join_with=join_with,
118
+ include_metadata=include_metadata,
119
+ metadata_fields=metadata_fields,
120
+ )
121
+ context_pack = build_context_pack(result, policy=policy)
122
+ if request.max_tokens is None:
123
+ return context_pack
124
+
125
+ return fit_context_pack_to_token_budget(
126
+ context_pack,
127
+ policy=policy,
128
+ token_budget=TokenBudget(max_tokens=int(request.max_tokens)),
129
+ )