biblicus 0.16.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biblicus/__init__.py +21 -1
- biblicus/backends/embedding_index_common.py +36 -3
- biblicus/backends/embedding_index_file.py +11 -5
- biblicus/backends/embedding_index_inmemory.py +14 -12
- biblicus/backends/hybrid.py +4 -3
- biblicus/backends/scan.py +1 -0
- biblicus/backends/tf_vector.py +17 -24
- biblicus/cli.py +25 -15
- biblicus/context.py +27 -12
- biblicus/context_engine/__init__.py +53 -0
- biblicus/context_engine/assembler.py +1060 -0
- biblicus/context_engine/compaction.py +110 -0
- biblicus/context_engine/models.py +423 -0
- biblicus/context_engine/retrieval.py +129 -0
- biblicus/corpus.py +117 -16
- biblicus/errors.py +24 -0
- biblicus/knowledge_base.py +1 -1
- biblicus/models.py +6 -3
- biblicus/retrieval.py +2 -2
- biblicus/sources.py +46 -11
- biblicus/text/link.py +6 -0
- biblicus/text/prompts.py +2 -0
- {biblicus-0.16.0.dist-info → biblicus-1.0.0.dist-info}/METADATA +3 -3
- {biblicus-0.16.0.dist-info → biblicus-1.0.0.dist-info}/RECORD +28 -23
- {biblicus-0.16.0.dist-info → biblicus-1.0.0.dist-info}/WHEEL +0 -0
- {biblicus-0.16.0.dist-info → biblicus-1.0.0.dist-info}/entry_points.txt +0 -0
- {biblicus-0.16.0.dist-info → biblicus-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {biblicus-0.16.0.dist-info → biblicus-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Compaction utilities for Context Engine assembly.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class CompactionRequest:
|
|
13
|
+
"""
|
|
14
|
+
Request payload for compaction.
|
|
15
|
+
|
|
16
|
+
:ivar text: Text to compact.
|
|
17
|
+
:vartype text: str
|
|
18
|
+
:ivar max_tokens: Maximum token budget.
|
|
19
|
+
:vartype max_tokens: int
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
text: str
|
|
23
|
+
max_tokens: int
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BaseCompactor:
|
|
27
|
+
"""
|
|
28
|
+
Base class for compaction strategies.
|
|
29
|
+
|
|
30
|
+
Subclasses implement ``compact`` to return a shorter string that fits the
|
|
31
|
+
requested budget.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def compact(self, request: CompactionRequest) -> str:
|
|
35
|
+
"""
|
|
36
|
+
Compact text to fit within the requested token budget.
|
|
37
|
+
|
|
38
|
+
:param request: Compaction request with text and budget.
|
|
39
|
+
:type request: CompactionRequest
|
|
40
|
+
:return: Compacted text.
|
|
41
|
+
:rtype: str
|
|
42
|
+
"""
|
|
43
|
+
raise NotImplementedError
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class TruncateCompactor(BaseCompactor):
|
|
47
|
+
"""
|
|
48
|
+
Simple truncation compactor (token-based).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def compact(self, request: CompactionRequest) -> str:
|
|
52
|
+
"""
|
|
53
|
+
Compact by truncating to the maximum token count.
|
|
54
|
+
|
|
55
|
+
:param request: Compaction request with text and budget.
|
|
56
|
+
:type request: CompactionRequest
|
|
57
|
+
:return: Truncated text.
|
|
58
|
+
:rtype: str
|
|
59
|
+
"""
|
|
60
|
+
tokens = request.text.split()
|
|
61
|
+
if len(tokens) <= request.max_tokens:
|
|
62
|
+
return request.text
|
|
63
|
+
return " ".join(tokens[: request.max_tokens])
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class SummaryCompactor(BaseCompactor):
|
|
67
|
+
"""
|
|
68
|
+
Simple sentence-first compactor (deterministic).
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def compact(self, request: CompactionRequest) -> str:
|
|
72
|
+
"""
|
|
73
|
+
Compact by selecting the first sentence within the budget.
|
|
74
|
+
|
|
75
|
+
:param request: Compaction request with text and budget.
|
|
76
|
+
:type request: CompactionRequest
|
|
77
|
+
:return: Compacted text.
|
|
78
|
+
:rtype: str
|
|
79
|
+
"""
|
|
80
|
+
sentences = _split_sentences(request.text)
|
|
81
|
+
if not sentences:
|
|
82
|
+
return request.text
|
|
83
|
+
|
|
84
|
+
compacted = sentences[0].strip()
|
|
85
|
+
tokens = compacted.split()
|
|
86
|
+
if len(tokens) > request.max_tokens:
|
|
87
|
+
return " ".join(tokens[: request.max_tokens])
|
|
88
|
+
return compacted
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def build_compactor(config: dict[str, Any]) -> BaseCompactor:
|
|
92
|
+
"""
|
|
93
|
+
Build a compactor instance from configuration.
|
|
94
|
+
|
|
95
|
+
:param config: Compactor configuration payload.
|
|
96
|
+
:type config: dict[str, Any]
|
|
97
|
+
:return: Compactor instance.
|
|
98
|
+
:rtype: BaseCompactor
|
|
99
|
+
:raises ValueError: If the compactor type is unknown.
|
|
100
|
+
"""
|
|
101
|
+
strategy = config.get("type", "truncate")
|
|
102
|
+
if strategy == "truncate":
|
|
103
|
+
return TruncateCompactor()
|
|
104
|
+
if strategy == "summary":
|
|
105
|
+
return SummaryCompactor()
|
|
106
|
+
raise ValueError(f"Unknown compactor type: {strategy}")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _split_sentences(text: str) -> list[str]:
|
|
110
|
+
return [segment for segment in text.split(". ") if segment]
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic models for Biblicus Context Engine configuration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, Literal, Optional, Union
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ContextBudgetSpec(BaseModel):
|
|
13
|
+
"""
|
|
14
|
+
Token budget specification for Context assembly.
|
|
15
|
+
|
|
16
|
+
:ivar ratio: Optional ratio of the input budget to allocate.
|
|
17
|
+
:vartype ratio: float or None
|
|
18
|
+
:ivar max_tokens: Optional absolute token cap.
|
|
19
|
+
:vartype max_tokens: int or None
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
model_config = ConfigDict(extra="forbid")
|
|
23
|
+
|
|
24
|
+
ratio: Optional[float] = Field(default=None, ge=0.0)
|
|
25
|
+
max_tokens: Optional[int] = Field(default=None, ge=1)
|
|
26
|
+
|
|
27
|
+
@model_validator(mode="after")
|
|
28
|
+
def _validate_budget(self) -> "ContextBudgetSpec":
|
|
29
|
+
"""
|
|
30
|
+
Ensure at least one budget control is provided.
|
|
31
|
+
|
|
32
|
+
:return: Validated budget spec.
|
|
33
|
+
:rtype: ContextBudgetSpec
|
|
34
|
+
:raises ValueError: If neither ratio nor max_tokens is provided.
|
|
35
|
+
"""
|
|
36
|
+
if self.ratio is None and self.max_tokens is None:
|
|
37
|
+
raise ValueError("Budget must specify ratio or max_tokens")
|
|
38
|
+
return self
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ContextPackBudgetSpec(BaseModel):
|
|
42
|
+
"""
|
|
43
|
+
Default budget policy for Context packs.
|
|
44
|
+
|
|
45
|
+
:ivar default_ratio: Optional ratio of the input budget to allocate per pack.
|
|
46
|
+
:vartype default_ratio: float or None
|
|
47
|
+
:ivar default_max_tokens: Optional absolute token cap per pack.
|
|
48
|
+
:vartype default_max_tokens: int or None
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
model_config = ConfigDict(extra="forbid")
|
|
52
|
+
|
|
53
|
+
default_ratio: Optional[float] = Field(default=None, ge=0.0)
|
|
54
|
+
default_max_tokens: Optional[int] = Field(default=None, ge=1)
|
|
55
|
+
|
|
56
|
+
@model_validator(mode="after")
|
|
57
|
+
def _validate_pack_budget(self) -> "ContextPackBudgetSpec":
|
|
58
|
+
"""
|
|
59
|
+
Ensure at least one default pack budget control is provided.
|
|
60
|
+
|
|
61
|
+
:return: Validated pack budget spec.
|
|
62
|
+
:rtype: ContextPackBudgetSpec
|
|
63
|
+
:raises ValueError: If neither default_ratio nor default_max_tokens is provided.
|
|
64
|
+
"""
|
|
65
|
+
if self.default_ratio is None and self.default_max_tokens is None:
|
|
66
|
+
raise ValueError("Pack budget must specify default_ratio or default_max_tokens")
|
|
67
|
+
return self
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ContextExpansionSpec(BaseModel):
|
|
71
|
+
"""
|
|
72
|
+
Pagination policy for expanding retriever packs.
|
|
73
|
+
|
|
74
|
+
:ivar max_pages: Maximum number of retrieval pages to request.
|
|
75
|
+
:vartype max_pages: int
|
|
76
|
+
:ivar min_fill_ratio: Optional minimum fill ratio before stopping expansion.
|
|
77
|
+
:vartype min_fill_ratio: float or None
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
model_config = ConfigDict(extra="forbid")
|
|
81
|
+
|
|
82
|
+
max_pages: int = Field(default=1, ge=1)
|
|
83
|
+
min_fill_ratio: Optional[float] = Field(default=None, ge=0.0, le=1.0)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class ContextPolicySpec(BaseModel):
|
|
87
|
+
"""
|
|
88
|
+
Policy configuration for Context assembly, compaction, and expansion.
|
|
89
|
+
|
|
90
|
+
:ivar input_budget: Optional input budget for the full assembled context.
|
|
91
|
+
:vartype input_budget: ContextBudgetSpec or None
|
|
92
|
+
:ivar pack_budget: Optional default budget for individual packs.
|
|
93
|
+
:vartype pack_budget: ContextPackBudgetSpec or None
|
|
94
|
+
:ivar overflow: Overflow behavior (for example, "compact").
|
|
95
|
+
:vartype overflow: str or None
|
|
96
|
+
:ivar compactor: Compactor configuration or registry key.
|
|
97
|
+
:vartype compactor: str or dict[str, Any] or None
|
|
98
|
+
:ivar max_iterations: Maximum compaction regeneration iterations.
|
|
99
|
+
:vartype max_iterations: int or None
|
|
100
|
+
:ivar expansion: Optional expansion policy for retriever pagination.
|
|
101
|
+
:vartype expansion: ContextExpansionSpec or None
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
model_config = ConfigDict(extra="forbid")
|
|
105
|
+
|
|
106
|
+
input_budget: Optional[ContextBudgetSpec] = None
|
|
107
|
+
pack_budget: Optional[ContextPackBudgetSpec] = None
|
|
108
|
+
overflow: Optional[str] = None
|
|
109
|
+
compactor: Optional[Union[str, dict[str, Any]]] = None
|
|
110
|
+
max_iterations: Optional[int] = None
|
|
111
|
+
expansion: Optional[ContextExpansionSpec] = None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class ContextTemplateSpec(BaseModel):
|
|
115
|
+
"""
|
|
116
|
+
Template definition for message content.
|
|
117
|
+
|
|
118
|
+
:ivar template: Template string with dot-notation placeholders.
|
|
119
|
+
:vartype template: str
|
|
120
|
+
:ivar vars: Template variable overrides.
|
|
121
|
+
:vartype vars: dict[str, Any]
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
model_config = ConfigDict(extra="forbid")
|
|
125
|
+
|
|
126
|
+
template: str
|
|
127
|
+
vars: dict[str, Any] = Field(default_factory=dict)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class ContextMessageBase(BaseModel):
|
|
131
|
+
"""
|
|
132
|
+
Base class for Context message directives.
|
|
133
|
+
|
|
134
|
+
:ivar type: Directive type identifier.
|
|
135
|
+
:vartype type: str
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
model_config = ConfigDict(extra="forbid")
|
|
139
|
+
|
|
140
|
+
type: str
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class SystemMessageSpec(ContextMessageBase):
|
|
144
|
+
"""
|
|
145
|
+
System message directive.
|
|
146
|
+
|
|
147
|
+
:ivar content: Literal message content.
|
|
148
|
+
:vartype content: str or None
|
|
149
|
+
:ivar template: Template string for message content.
|
|
150
|
+
:vartype template: str or None
|
|
151
|
+
:ivar vars: Template variable overrides.
|
|
152
|
+
:vartype vars: dict[str, Any]
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
type: Literal["system"]
|
|
156
|
+
content: Optional[str] = None
|
|
157
|
+
template: Optional[str] = None
|
|
158
|
+
vars: dict[str, Any] = Field(default_factory=dict)
|
|
159
|
+
|
|
160
|
+
@model_validator(mode="after")
|
|
161
|
+
def _validate_content(self) -> "SystemMessageSpec":
|
|
162
|
+
"""
|
|
163
|
+
Ensure exactly one of content/template is provided.
|
|
164
|
+
|
|
165
|
+
:return: Validated message spec.
|
|
166
|
+
:rtype: SystemMessageSpec
|
|
167
|
+
:raises ValueError: If content/template usage is invalid.
|
|
168
|
+
"""
|
|
169
|
+
if (self.content is None) == (self.template is None):
|
|
170
|
+
raise ValueError("System message must define either content or template")
|
|
171
|
+
return self
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class UserMessageSpec(ContextMessageBase):
|
|
175
|
+
"""
|
|
176
|
+
User message directive.
|
|
177
|
+
|
|
178
|
+
:ivar content: Literal message content.
|
|
179
|
+
:vartype content: str or None
|
|
180
|
+
:ivar template: Template string for message content.
|
|
181
|
+
:vartype template: str or None
|
|
182
|
+
:ivar vars: Template variable overrides.
|
|
183
|
+
:vartype vars: dict[str, Any]
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
type: Literal["user"]
|
|
187
|
+
content: Optional[str] = None
|
|
188
|
+
template: Optional[str] = None
|
|
189
|
+
vars: dict[str, Any] = Field(default_factory=dict)
|
|
190
|
+
|
|
191
|
+
@model_validator(mode="after")
|
|
192
|
+
def _validate_content(self) -> "UserMessageSpec":
|
|
193
|
+
"""
|
|
194
|
+
Ensure exactly one of content/template is provided.
|
|
195
|
+
|
|
196
|
+
:return: Validated message spec.
|
|
197
|
+
:rtype: UserMessageSpec
|
|
198
|
+
:raises ValueError: If content/template usage is invalid.
|
|
199
|
+
"""
|
|
200
|
+
if (self.content is None) == (self.template is None):
|
|
201
|
+
raise ValueError("User message must define either content or template")
|
|
202
|
+
return self
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class AssistantMessageSpec(ContextMessageBase):
|
|
206
|
+
"""
|
|
207
|
+
Assistant message directive.
|
|
208
|
+
|
|
209
|
+
:ivar content: Literal message content.
|
|
210
|
+
:vartype content: str or None
|
|
211
|
+
:ivar template: Template string for message content.
|
|
212
|
+
:vartype template: str or None
|
|
213
|
+
:ivar vars: Template variable overrides.
|
|
214
|
+
:vartype vars: dict[str, Any]
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
type: Literal["assistant"]
|
|
218
|
+
content: Optional[str] = None
|
|
219
|
+
template: Optional[str] = None
|
|
220
|
+
vars: dict[str, Any] = Field(default_factory=dict)
|
|
221
|
+
|
|
222
|
+
@model_validator(mode="after")
|
|
223
|
+
def _validate_content(self) -> "AssistantMessageSpec":
|
|
224
|
+
"""
|
|
225
|
+
Ensure exactly one of content/template is provided.
|
|
226
|
+
|
|
227
|
+
:return: Validated message spec.
|
|
228
|
+
:rtype: AssistantMessageSpec
|
|
229
|
+
:raises ValueError: If content/template usage is invalid.
|
|
230
|
+
"""
|
|
231
|
+
if (self.content is None) == (self.template is None):
|
|
232
|
+
raise ValueError("Assistant message must define either content or template")
|
|
233
|
+
return self
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class ContextInsertSpec(ContextMessageBase):
|
|
237
|
+
"""
|
|
238
|
+
Context pack insertion directive.
|
|
239
|
+
|
|
240
|
+
:ivar name: Context pack name to insert.
|
|
241
|
+
:vartype name: str
|
|
242
|
+
:ivar budget: Optional pack budget override.
|
|
243
|
+
:vartype budget: ContextBudgetSpec or None
|
|
244
|
+
:ivar weight: Optional weight to bias pack budget allocation.
|
|
245
|
+
:vartype weight: float or None
|
|
246
|
+
:ivar priority: Optional priority for pack budget allocation.
|
|
247
|
+
:vartype priority: int or None
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
type: Literal["context"]
|
|
251
|
+
name: str
|
|
252
|
+
budget: Optional[ContextBudgetSpec] = None
|
|
253
|
+
weight: Optional[float] = None
|
|
254
|
+
priority: Optional[int] = None
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class HistoryInsertSpec(ContextMessageBase):
|
|
258
|
+
"""
|
|
259
|
+
History insertion directive.
|
|
260
|
+
|
|
261
|
+
:ivar type: Always "history".
|
|
262
|
+
:vartype type: str
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
type: Literal["history"]
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
ContextMessageSpec = Union[
|
|
269
|
+
SystemMessageSpec,
|
|
270
|
+
UserMessageSpec,
|
|
271
|
+
AssistantMessageSpec,
|
|
272
|
+
ContextInsertSpec,
|
|
273
|
+
HistoryInsertSpec,
|
|
274
|
+
]
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class ContextPackSpec(BaseModel):
|
|
278
|
+
"""
|
|
279
|
+
Context pack reference for default Context assembly.
|
|
280
|
+
|
|
281
|
+
:ivar name: Context pack name.
|
|
282
|
+
:vartype name: str
|
|
283
|
+
:ivar weight: Optional weight for budget allocation.
|
|
284
|
+
:vartype weight: float or None
|
|
285
|
+
:ivar priority: Optional priority for budget allocation.
|
|
286
|
+
:vartype priority: int or None
|
|
287
|
+
:ivar budget: Optional pack budget override.
|
|
288
|
+
:vartype budget: ContextBudgetSpec or None
|
|
289
|
+
"""
|
|
290
|
+
|
|
291
|
+
model_config = ConfigDict(extra="forbid")
|
|
292
|
+
|
|
293
|
+
name: str
|
|
294
|
+
weight: Optional[float] = None
|
|
295
|
+
priority: Optional[int] = None
|
|
296
|
+
budget: Optional[ContextBudgetSpec] = None
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class ContextDeclaration(BaseModel):
|
|
300
|
+
"""
|
|
301
|
+
Context declaration configuration.
|
|
302
|
+
|
|
303
|
+
:ivar name: Context name.
|
|
304
|
+
:vartype name: str
|
|
305
|
+
:ivar policy: Optional context policy.
|
|
306
|
+
:vartype policy: ContextPolicySpec or None
|
|
307
|
+
:ivar messages: Optional explicit message plan.
|
|
308
|
+
:vartype messages: list[ContextMessageSpec] or None
|
|
309
|
+
:ivar packs: Optional default pack list.
|
|
310
|
+
:vartype packs: list[ContextPackSpec] or None
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
model_config = ConfigDict(extra="forbid")
|
|
314
|
+
|
|
315
|
+
name: str
|
|
316
|
+
policy: Optional[ContextPolicySpec] = None
|
|
317
|
+
messages: Optional[list[ContextMessageSpec]] = None
|
|
318
|
+
packs: Optional[list[ContextPackSpec]] = None
|
|
319
|
+
|
|
320
|
+
@model_validator(mode="before")
|
|
321
|
+
def _coerce_pack_entries(self) -> "ContextDeclaration":
|
|
322
|
+
"""
|
|
323
|
+
Normalize pack entries to dicts with name fields.
|
|
324
|
+
|
|
325
|
+
:return: Normalized context declaration.
|
|
326
|
+
:rtype: ContextDeclaration
|
|
327
|
+
"""
|
|
328
|
+
if not isinstance(self, dict):
|
|
329
|
+
return self
|
|
330
|
+
packs = self.get("packs")
|
|
331
|
+
if packs is None:
|
|
332
|
+
return self
|
|
333
|
+
if isinstance(packs, str):
|
|
334
|
+
self["packs"] = [{"name": packs}]
|
|
335
|
+
return self
|
|
336
|
+
if isinstance(packs, list):
|
|
337
|
+
normalized = []
|
|
338
|
+
for entry in packs:
|
|
339
|
+
if isinstance(entry, str):
|
|
340
|
+
normalized.append({"name": entry})
|
|
341
|
+
else:
|
|
342
|
+
normalized.append(entry)
|
|
343
|
+
self["packs"] = normalized
|
|
344
|
+
return self
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
class CorpusDeclaration(BaseModel):
|
|
348
|
+
"""
|
|
349
|
+
Corpus declaration configuration.
|
|
350
|
+
|
|
351
|
+
:ivar name: Corpus name.
|
|
352
|
+
:vartype name: str
|
|
353
|
+
:ivar config: Corpus configuration payload.
|
|
354
|
+
:vartype config: dict[str, Any]
|
|
355
|
+
"""
|
|
356
|
+
|
|
357
|
+
model_config = ConfigDict(extra="allow")
|
|
358
|
+
|
|
359
|
+
name: str
|
|
360
|
+
config: dict[str, Any] = Field(default_factory=dict)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class RetrieverDeclaration(BaseModel):
|
|
364
|
+
"""
|
|
365
|
+
Retriever declaration configuration.
|
|
366
|
+
|
|
367
|
+
:ivar name: Retriever name.
|
|
368
|
+
:vartype name: str
|
|
369
|
+
:ivar corpus: Optional corpus identifier.
|
|
370
|
+
:vartype corpus: str or None
|
|
371
|
+
:ivar config: Retriever configuration payload.
|
|
372
|
+
:vartype config: dict[str, Any]
|
|
373
|
+
"""
|
|
374
|
+
|
|
375
|
+
model_config = ConfigDict(extra="allow")
|
|
376
|
+
|
|
377
|
+
name: str
|
|
378
|
+
corpus: Optional[str] = None
|
|
379
|
+
config: dict[str, Any] = Field(default_factory=dict)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class CompactorDeclaration(BaseModel):
|
|
383
|
+
"""
|
|
384
|
+
Compactor declaration configuration.
|
|
385
|
+
|
|
386
|
+
:ivar name: Compactor name.
|
|
387
|
+
:vartype name: str
|
|
388
|
+
:ivar config: Compactor configuration payload.
|
|
389
|
+
:vartype config: dict[str, Any]
|
|
390
|
+
"""
|
|
391
|
+
|
|
392
|
+
model_config = ConfigDict(extra="allow")
|
|
393
|
+
|
|
394
|
+
name: str
|
|
395
|
+
config: dict[str, Any] = Field(default_factory=dict)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
class ContextRetrieverRequest(BaseModel):
|
|
399
|
+
"""
|
|
400
|
+
Retrieval request for Context packs.
|
|
401
|
+
|
|
402
|
+
:ivar query: Query text issued against the retriever.
|
|
403
|
+
:vartype query: str
|
|
404
|
+
:ivar offset: Offset into the ranked candidate list.
|
|
405
|
+
:vartype offset: int
|
|
406
|
+
:ivar limit: Maximum number of items to return.
|
|
407
|
+
:vartype limit: int
|
|
408
|
+
:ivar maximum_total_characters: Optional maximum total characters for the pack.
|
|
409
|
+
:vartype maximum_total_characters: int or None
|
|
410
|
+
:ivar max_tokens: Optional maximum token budget for the pack.
|
|
411
|
+
:vartype max_tokens: int or None
|
|
412
|
+
:ivar metadata: Optional metadata for retriever implementations.
|
|
413
|
+
:vartype metadata: dict[str, Any]
|
|
414
|
+
"""
|
|
415
|
+
|
|
416
|
+
model_config = ConfigDict(extra="forbid")
|
|
417
|
+
|
|
418
|
+
query: str
|
|
419
|
+
offset: int = Field(default=0, ge=0)
|
|
420
|
+
limit: int = Field(default=3, ge=1)
|
|
421
|
+
maximum_total_characters: Optional[int] = Field(default=None, ge=1)
|
|
422
|
+
max_tokens: Optional[int] = Field(default=None, ge=1)
|
|
423
|
+
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context retrieval helpers for the Biblicus Context Engine.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
|
|
9
|
+
from biblicus.backends import get_backend
|
|
10
|
+
from biblicus.context import (
|
|
11
|
+
ContextPack,
|
|
12
|
+
ContextPackPolicy,
|
|
13
|
+
TokenBudget,
|
|
14
|
+
build_context_pack,
|
|
15
|
+
fit_context_pack_to_token_budget,
|
|
16
|
+
)
|
|
17
|
+
from biblicus.corpus import Corpus
|
|
18
|
+
from biblicus.models import QueryBudget, RetrievalRun
|
|
19
|
+
|
|
20
|
+
from .models import ContextRetrieverRequest
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _resolve_run(
|
|
24
|
+
corpus: Corpus,
|
|
25
|
+
*,
|
|
26
|
+
backend_id: str,
|
|
27
|
+
run_id: Optional[str],
|
|
28
|
+
recipe_name: Optional[str],
|
|
29
|
+
recipe_config: Optional[dict[str, Any]],
|
|
30
|
+
) -> RetrievalRun:
|
|
31
|
+
if run_id:
|
|
32
|
+
return corpus.load_run(run_id)
|
|
33
|
+
|
|
34
|
+
latest_run_id = corpus.latest_run_id
|
|
35
|
+
if latest_run_id:
|
|
36
|
+
candidate = corpus.load_run(latest_run_id)
|
|
37
|
+
if candidate.recipe.backend_id == backend_id:
|
|
38
|
+
return candidate
|
|
39
|
+
|
|
40
|
+
if recipe_config is None:
|
|
41
|
+
raise ValueError(
|
|
42
|
+
"No retrieval run available for the requested backend. "
|
|
43
|
+
"Provide run_id or recipe_config to build one."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
backend = get_backend(backend_id)
|
|
47
|
+
resolved_name = recipe_name or f"Context pack ({backend_id})"
|
|
48
|
+
return backend.build_run(corpus, recipe_name=resolved_name, config=recipe_config)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def retrieve_context_pack(
|
|
52
|
+
*,
|
|
53
|
+
request: ContextRetrieverRequest,
|
|
54
|
+
corpus: Corpus,
|
|
55
|
+
backend_id: str,
|
|
56
|
+
run_id: Optional[str] = None,
|
|
57
|
+
recipe_name: Optional[str] = None,
|
|
58
|
+
recipe_config: Optional[dict[str, Any]] = None,
|
|
59
|
+
join_with: str = "\n\n",
|
|
60
|
+
max_items_per_source: Optional[int] = None,
|
|
61
|
+
include_metadata: bool = False,
|
|
62
|
+
metadata_fields: Optional[list[str]] = None,
|
|
63
|
+
) -> ContextPack:
|
|
64
|
+
"""
|
|
65
|
+
Retrieve a context pack using a Biblicus backend.
|
|
66
|
+
|
|
67
|
+
:param request: Context retrieval request.
|
|
68
|
+
:type request: biblicus.context_engine.ContextRetrieverRequest
|
|
69
|
+
:param corpus: Corpus instance to query.
|
|
70
|
+
:type corpus: biblicus.corpus.Corpus
|
|
71
|
+
:param backend_id: Retrieval backend identifier.
|
|
72
|
+
:type backend_id: str
|
|
73
|
+
:param run_id: Optional retrieval run identifier.
|
|
74
|
+
:type run_id: str or None
|
|
75
|
+
:param recipe_name: Optional recipe name for run builds.
|
|
76
|
+
:type recipe_name: str or None
|
|
77
|
+
:param recipe_config: Optional backend recipe configuration.
|
|
78
|
+
:type recipe_config: dict[str, Any] or None
|
|
79
|
+
:param join_with: Separator between context pack blocks.
|
|
80
|
+
:type join_with: str
|
|
81
|
+
:param max_items_per_source: Optional cap per source.
|
|
82
|
+
:type max_items_per_source: int or None
|
|
83
|
+
:param include_metadata: Whether to include metadata in context blocks.
|
|
84
|
+
:type include_metadata: bool
|
|
85
|
+
:param metadata_fields: Optional metadata fields to include in context blocks.
|
|
86
|
+
:type metadata_fields: list[str] or None
|
|
87
|
+
:return: Context pack derived from retrieval results.
|
|
88
|
+
:rtype: biblicus.context.ContextPack
|
|
89
|
+
:raises ValueError: If no compatible retrieval run is available.
|
|
90
|
+
"""
|
|
91
|
+
run = _resolve_run(
|
|
92
|
+
corpus,
|
|
93
|
+
backend_id=backend_id,
|
|
94
|
+
run_id=run_id,
|
|
95
|
+
recipe_name=recipe_name,
|
|
96
|
+
recipe_config=recipe_config,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
maximum_total_characters = request.maximum_total_characters
|
|
100
|
+
if maximum_total_characters is None and request.max_tokens is not None:
|
|
101
|
+
maximum_total_characters = int(request.max_tokens * 4)
|
|
102
|
+
|
|
103
|
+
budget = QueryBudget(
|
|
104
|
+
max_total_items=request.limit,
|
|
105
|
+
offset=request.offset,
|
|
106
|
+
maximum_total_characters=maximum_total_characters,
|
|
107
|
+
max_items_per_source=max_items_per_source,
|
|
108
|
+
)
|
|
109
|
+
backend = get_backend(backend_id)
|
|
110
|
+
result = backend.query(
|
|
111
|
+
corpus,
|
|
112
|
+
run=run,
|
|
113
|
+
query_text=request.query,
|
|
114
|
+
budget=budget,
|
|
115
|
+
)
|
|
116
|
+
policy = ContextPackPolicy(
|
|
117
|
+
join_with=join_with,
|
|
118
|
+
include_metadata=include_metadata,
|
|
119
|
+
metadata_fields=metadata_fields,
|
|
120
|
+
)
|
|
121
|
+
context_pack = build_context_pack(result, policy=policy)
|
|
122
|
+
if request.max_tokens is None:
|
|
123
|
+
return context_pack
|
|
124
|
+
|
|
125
|
+
return fit_context_pack_to_token_budget(
|
|
126
|
+
context_pack,
|
|
127
|
+
policy=policy,
|
|
128
|
+
token_budget=TokenBudget(max_tokens=int(request.max_tokens)),
|
|
129
|
+
)
|