biblicus 0.16.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
biblicus/__init__.py CHANGED
@@ -2,6 +2,17 @@
2
2
  Biblicus public package interface.
3
3
  """
4
4
 
5
+ from .context_engine import (
6
+ ContextAssembler,
7
+ ContextBudgetSpec,
8
+ ContextDeclaration,
9
+ ContextExpansionSpec,
10
+ ContextPackBudgetSpec,
11
+ ContextPackSpec,
12
+ ContextPolicySpec,
13
+ ContextRetrieverRequest,
14
+ retrieve_context_pack,
15
+ )
5
16
  from .corpus import Corpus
6
17
  from .knowledge_base import KnowledgeBase
7
18
  from .models import (
@@ -16,6 +27,15 @@ from .models import (
16
27
 
17
28
  __all__ = [
18
29
  "__version__",
30
+ "ContextAssembler",
31
+ "ContextBudgetSpec",
32
+ "ContextDeclaration",
33
+ "ContextExpansionSpec",
34
+ "ContextPackBudgetSpec",
35
+ "ContextPackSpec",
36
+ "ContextPolicySpec",
37
+ "ContextRetrieverRequest",
38
+ "retrieve_context_pack",
19
39
  "Corpus",
20
40
  "CorpusConfig",
21
41
  "Evidence",
@@ -27,4 +47,4 @@ __all__ = [
27
47
  "RetrievalRun",
28
48
  ]
29
49
 
30
- __version__ = "0.16.0"
50
+ __version__ = "1.0.0"
@@ -47,8 +47,6 @@ class EmbeddingIndexRecipeConfig(BaseModel):
47
47
  """
48
48
  Configuration for embedding-index retrieval backends.
49
49
 
50
- :ivar snippet_characters: Maximum characters to include in evidence snippets.
51
- :vartype snippet_characters: int
52
50
  :ivar extraction_run: Optional extraction run reference in the form extractor_id:run_id.
53
51
  :vartype extraction_run: str or None
54
52
  :ivar chunker: Chunker configuration.
@@ -57,17 +55,52 @@ class EmbeddingIndexRecipeConfig(BaseModel):
57
55
  :vartype tokenizer: biblicus.chunking.TokenizerConfig or None
58
56
  :ivar embedding_provider: Embedding provider configuration.
59
57
  :vartype embedding_provider: biblicus.embedding_providers.EmbeddingProviderConfig
58
+ :ivar snippet_characters: Optional maximum character count for returned evidence text.
59
+ :vartype snippet_characters: int or None
60
+ :ivar maximum_cache_total_items: Optional maximum number of vectors cached per scan batch.
61
+ :vartype maximum_cache_total_items: int or None
62
+ :ivar maximum_cache_total_characters: Optional maximum characters cached per scan batch.
63
+ :vartype maximum_cache_total_characters: int or None
60
64
  """
61
65
 
62
66
  model_config = ConfigDict(extra="forbid")
63
67
 
64
- snippet_characters: int = Field(default=400, ge=1)
68
+ snippet_characters: Optional[int] = Field(default=None, ge=1)
69
+ maximum_cache_total_items: Optional[int] = Field(default=None, ge=1)
70
+ maximum_cache_total_characters: Optional[int] = Field(default=None, ge=1)
65
71
  extraction_run: Optional[str] = None
66
72
  chunker: ChunkerConfig = Field(default_factory=lambda: ChunkerConfig(chunker_id="paragraph"))
67
73
  tokenizer: Optional[TokenizerConfig] = None
68
74
  embedding_provider: EmbeddingProviderConfig
69
75
 
70
76
 
77
+ def _extract_span_text(text: Optional[str], span: Tuple[int, int]) -> Optional[str]:
78
+ if not isinstance(text, str):
79
+ return None
80
+ span_start, span_end = span
81
+ if span_start < 0 or span_end <= span_start:
82
+ return text
83
+ return text[span_start:span_end]
84
+
85
+
86
+ def _build_snippet(
87
+ text: Optional[str], span: Tuple[int, int], max_chars: Optional[int]
88
+ ) -> Optional[str]:
89
+ if not isinstance(text, str):
90
+ return None
91
+ if max_chars is None:
92
+ return _extract_span_text(text, span)
93
+ if max_chars <= 0:
94
+ return ""
95
+ span_start, span_end = span
96
+ if span_start < 0 or span_end <= span_start:
97
+ return text[:max_chars]
98
+ half_window = max_chars // 2
99
+ snippet_start = max(span_start - half_window, 0)
100
+ snippet_end = min(span_end + half_window, len(text))
101
+ return text[snippet_start:snippet_end]
102
+
103
+
71
104
  def resolve_extraction_reference(
72
105
  corpus: Corpus, recipe_config: EmbeddingIndexRecipeConfig
73
106
  ) -> Optional[ExtractionRunReference]:
@@ -16,6 +16,8 @@ from ..time import utc_now_iso
16
16
  from .embedding_index_common import (
17
17
  ChunkRecord,
18
18
  EmbeddingIndexRecipeConfig,
19
+ _build_snippet,
20
+ _extract_span_text,
19
21
  artifact_paths_for_run,
20
22
  chunks_to_records,
21
23
  collect_chunks,
@@ -26,7 +28,6 @@ from .embedding_index_common import (
26
28
  write_chunks_jsonl,
27
29
  write_embeddings,
28
30
  )
29
- from .scan import _build_snippet
30
31
 
31
32
 
32
33
  class EmbeddingIndexFileBackend:
@@ -132,10 +133,12 @@ class EmbeddingIndexFileBackend:
132
133
  if query_embedding.shape[0] != 1:
133
134
  raise ValueError("Embedding provider returned an invalid query embedding shape")
134
135
 
136
+ batch_rows = recipe_config.maximum_cache_total_items or 4096
135
137
  candidates = _top_indices_batched(
136
138
  embeddings=embeddings,
137
139
  query_vector=query_embedding[0],
138
140
  limit=_candidate_limit(budget.max_total_items + budget.offset),
141
+ batch_rows=batch_rows,
139
142
  )
140
143
  evidence_items = _build_evidence(
141
144
  corpus,
@@ -222,9 +225,11 @@ def _build_evidence(
222
225
  media_type=str(getattr(catalog_item, "media_type")),
223
226
  extraction_reference=extraction_reference,
224
227
  )
225
- snippet = _build_snippet(
226
- text, (record.span_start, record.span_end), max_chars=recipe_config.snippet_characters
228
+ span_text = _build_snippet(
229
+ text, (record.span_start, record.span_end), recipe_config.snippet_characters
227
230
  )
231
+ if span_text is None:
232
+ span_text = _extract_span_text(text, (record.span_start, record.span_end))
228
233
  score = float(cosine_similarity_scores(embeddings[idx : idx + 1], query_vector)[0])
229
234
  evidence_items.append(
230
235
  Evidence(
@@ -233,7 +238,7 @@ def _build_evidence(
233
238
  media_type=str(getattr(catalog_item, "media_type")),
234
239
  score=score,
235
240
  rank=1,
236
- text=snippet,
241
+ text=span_text,
237
242
  content_ref=None,
238
243
  span_start=record.span_start,
239
244
  span_end=record.span_end,
@@ -241,7 +246,8 @@ def _build_evidence(
241
246
  stage_scores=None,
242
247
  recipe_id=run.recipe.recipe_id,
243
248
  run_id=run.run_id,
244
- hash=hash_text(snippet),
249
+ metadata=getattr(catalog_item, "metadata", {}) or {},
250
+ hash=hash_text(span_text or ""),
245
251
  )
246
252
  )
247
253
  return evidence_items
@@ -16,6 +16,8 @@ from ..time import utc_now_iso
16
16
  from .embedding_index_common import (
17
17
  ChunkRecord,
18
18
  EmbeddingIndexRecipeConfig,
19
+ _build_snippet,
20
+ _extract_span_text,
19
21
  artifact_paths_for_run,
20
22
  chunks_to_records,
21
23
  collect_chunks,
@@ -26,20 +28,19 @@ from .embedding_index_common import (
26
28
  write_chunks_jsonl,
27
29
  write_embeddings,
28
30
  )
29
- from .scan import _build_snippet
30
31
 
31
32
 
32
33
  class EmbeddingIndexInMemoryRecipeConfig(EmbeddingIndexRecipeConfig):
33
34
  """
34
35
  Configuration for embedding-index-inmemory retrieval.
35
36
 
36
- :ivar max_chunks: Maximum chunks allowed for in-memory query loading.
37
- :vartype max_chunks: int
37
+ :ivar maximum_cache_total_items: Maximum chunks allowed for in-memory query loading.
38
+ :vartype maximum_cache_total_items: int
38
39
  """
39
40
 
40
41
  model_config = ConfigDict(extra="forbid")
41
42
 
42
- max_chunks: int = Field(default=25000, ge=1)
43
+ maximum_cache_total_items: int = Field(default=25000, ge=1)
43
44
 
44
45
 
45
46
  class EmbeddingIndexInMemoryBackend:
@@ -66,10 +67,10 @@ class EmbeddingIndexInMemoryBackend:
66
67
  """
67
68
  recipe_config = EmbeddingIndexInMemoryRecipeConfig.model_validate(config)
68
69
  chunks, text_items = collect_chunks(corpus, recipe_config=recipe_config)
69
- if len(chunks) > recipe_config.max_chunks:
70
+ if len(chunks) > recipe_config.maximum_cache_total_items:
70
71
  raise ValueError(
71
- "embedding-index-inmemory exceeded max_chunks. "
72
- "Use embedding-index-file or increase max_chunks."
72
+ "embedding-index-inmemory exceeded maximum_cache_total_items. "
73
+ "Use embedding-index-file or increase maximum_cache_total_items."
73
74
  )
74
75
 
75
76
  provider = recipe_config.embedding_provider.build_provider()
@@ -225,9 +226,9 @@ def _build_evidence(
225
226
  media_type=media_type,
226
227
  extraction_reference=extraction_reference,
227
228
  )
228
- snippet = _build_snippet(
229
- text, (span_start, span_end), max_chars=recipe_config.snippet_characters
230
- )
229
+ span_text = _build_snippet(text, (span_start, span_end), recipe_config.snippet_characters)
230
+ if span_text is None:
231
+ span_text = _extract_span_text(text, (span_start, span_end))
231
232
  evidence_items.append(
232
233
  Evidence(
233
234
  item_id=item_id,
@@ -235,7 +236,7 @@ def _build_evidence(
235
236
  media_type=media_type,
236
237
  score=float(scores[idx]),
237
238
  rank=1,
238
- text=snippet,
239
+ text=span_text,
239
240
  content_ref=None,
240
241
  span_start=span_start,
241
242
  span_end=span_end,
@@ -243,7 +244,8 @@ def _build_evidence(
243
244
  stage_scores=None,
244
245
  recipe_id=run.recipe.recipe_id,
245
246
  run_id=run.run_id,
246
- hash=hash_text(snippet),
247
+ metadata=getattr(catalog_item, "metadata", {}) or {},
248
+ hash=hash_text(span_text or ""),
247
249
  )
248
250
  )
249
251
  return evidence_items
@@ -217,9 +217,9 @@ def _expand_component_budget(budget: QueryBudget, *, multiplier: int = 5) -> Que
217
217
  :return: Expanded budget for component backends.
218
218
  :rtype: QueryBudget
219
219
  """
220
- max_total_characters = budget.max_total_characters
220
+ maximum_total_characters = budget.maximum_total_characters
221
221
  expanded_characters = (
222
- max_total_characters * multiplier if max_total_characters is not None else None
222
+ maximum_total_characters * multiplier if maximum_total_characters is not None else None
223
223
  )
224
224
  expanded_max_items_per_source = (
225
225
  budget.max_items_per_source * multiplier
@@ -230,7 +230,7 @@ def _expand_component_budget(budget: QueryBudget, *, multiplier: int = 5) -> Que
230
230
  return QueryBudget(
231
231
  max_total_items=requested_items * multiplier,
232
232
  offset=0,
233
- max_total_characters=expanded_characters,
233
+ maximum_total_characters=expanded_characters,
234
234
  max_items_per_source=expanded_max_items_per_source,
235
235
  )
236
236
 
@@ -285,6 +285,7 @@ def _fuse_evidence(
285
285
  stage_scores={"lexical": lexical_score, "embedding": embedding_score},
286
286
  recipe_id="",
287
287
  run_id="",
288
+ metadata=base_evidence.metadata,
288
289
  hash=base_evidence.hash,
289
290
  )
290
291
  )
biblicus/backends/scan.py CHANGED
@@ -368,6 +368,7 @@ def _score_items(
368
368
  stage="scan",
369
369
  recipe_id="",
370
370
  run_id="",
371
+ metadata=getattr(catalog_item, "metadata", {}) or {},
371
372
  hash=hash_text(snippet),
372
373
  )
373
374
  )
@@ -8,7 +8,7 @@ import math
8
8
  import re
9
9
  from typing import Dict, Iterable, List, Optional, Tuple
10
10
 
11
- from pydantic import BaseModel, ConfigDict, Field
11
+ from pydantic import BaseModel, ConfigDict
12
12
 
13
13
  from ..corpus import Corpus
14
14
  from ..frontmatter import parse_front_matter
@@ -28,16 +28,16 @@ class TfVectorRecipeConfig(BaseModel):
28
28
  """
29
29
  Configuration for the term-frequency vector retrieval backend.
30
30
 
31
- :ivar snippet_characters: Maximum characters to include in evidence snippets.
32
- :vartype snippet_characters: int
33
31
  :ivar extraction_run: Optional extraction run reference in the form extractor_id:run_id.
34
32
  :vartype extraction_run: str or None
33
+ :ivar snippet_characters: Optional maximum character count for returned evidence text.
34
+ :vartype snippet_characters: int or None
35
35
  """
36
36
 
37
37
  model_config = ConfigDict(extra="forbid")
38
38
 
39
- snippet_characters: int = Field(default=400, ge=1)
40
39
  extraction_run: Optional[str] = None
40
+ snippet_characters: Optional[int] = None
41
41
 
42
42
 
43
43
  class TfVectorBackend:
@@ -125,8 +125,8 @@ class TfVectorBackend:
125
125
  query_tokens=query_tokens,
126
126
  query_vector=query_vector,
127
127
  query_norm=query_norm,
128
- snippet_characters=recipe_config.snippet_characters,
129
128
  extraction_reference=extraction_reference,
129
+ snippet_characters=recipe_config.snippet_characters,
130
130
  )
131
131
  sorted_candidates = sorted(
132
132
  scored_candidates,
@@ -359,21 +359,13 @@ def _find_first_match(text: str, tokens: List[str]) -> Optional[Tuple[int, int]]
359
359
  return best_start, best_end
360
360
 
361
361
 
362
- def _build_snippet(text: str, span: Optional[Tuple[int, int]], *, max_chars: int) -> str:
363
- """
364
- Build a snippet around a match span, constrained by a character budget.
365
-
366
- :param text: Source text to slice.
367
- :type text: str
368
- :param span: Match span to center on.
369
- :type span: tuple[int, int] or None
370
- :param max_chars: Maximum snippet length.
371
- :type max_chars: int
372
- :return: Snippet text.
373
- :rtype: str
374
- """
362
+ def _build_snippet(text: str, span: Optional[Tuple[int, int]], *, max_chars: Optional[int]) -> str:
363
+ if max_chars is None:
364
+ return text
375
365
  if not text:
376
366
  return ""
367
+ if max_chars <= 0:
368
+ return ""
377
369
  if span is None:
378
370
  return text[:max_chars]
379
371
  span_start, span_end = span
@@ -390,8 +382,8 @@ def _score_items(
390
382
  query_tokens: List[str],
391
383
  query_vector: Dict[str, float],
392
384
  query_norm: float,
393
- snippet_characters: int,
394
385
  extraction_reference: Optional[ExtractionRunReference],
386
+ snippet_characters: Optional[int],
395
387
  ) -> List[Evidence]:
396
388
  """
397
389
  Score catalog items and return evidence candidates.
@@ -406,10 +398,10 @@ def _score_items(
406
398
  :type query_vector: dict[str, float]
407
399
  :param query_norm: Query vector norm.
408
400
  :type query_norm: float
409
- :param snippet_characters: Snippet length budget.
410
- :type snippet_characters: int
411
401
  :param extraction_reference: Optional extraction run reference.
412
402
  :type extraction_reference: ExtractionRunReference or None
403
+ :param snippet_characters: Optional maximum character count for returned evidence text.
404
+ :type snippet_characters: int or None
413
405
  :return: Evidence candidates with provisional ranks.
414
406
  :rtype: list[Evidence]
415
407
  """
@@ -437,9 +429,9 @@ def _score_items(
437
429
  if similarity <= 0:
438
430
  continue
439
431
  span = _find_first_match(item_text, query_tokens)
440
- snippet = _build_snippet(item_text, span, max_chars=snippet_characters)
441
432
  span_start = span[0] if span else None
442
433
  span_end = span[1] if span else None
434
+ evidence_text = _build_snippet(item_text, span, max_chars=snippet_characters)
443
435
  evidence_items.append(
444
436
  Evidence(
445
437
  item_id=str(getattr(catalog_item, "id")),
@@ -447,14 +439,15 @@ def _score_items(
447
439
  media_type=str(media_type),
448
440
  score=float(similarity),
449
441
  rank=1,
450
- text=snippet,
442
+ text=evidence_text,
451
443
  content_ref=None,
452
444
  span_start=span_start,
453
445
  span_end=span_end,
454
446
  stage="tf-vector",
455
447
  recipe_id="",
456
448
  run_id="",
457
- hash=hash_text(snippet),
449
+ metadata=getattr(catalog_item, "metadata", {}) or {},
450
+ hash=hash_text(evidence_text or ""),
458
451
  )
459
452
  )
460
453
  return evidence_items
biblicus/cli.py CHANGED
@@ -24,7 +24,7 @@ from .context import (
24
24
  )
25
25
  from .corpus import Corpus
26
26
  from .crawl import CrawlRequest, crawl_into_corpus
27
- from .errors import ExtractionRunFatalError
27
+ from .errors import ExtractionRunFatalError, IngestCollisionError
28
28
  from .evaluation import evaluate_run, load_dataset
29
29
  from .evidence_processing import apply_evidence_filter, apply_evidence_reranker
30
30
  from .extraction import build_extraction_run
@@ -117,18 +117,28 @@ def cmd_ingest(arguments: argparse.Namespace) -> int:
117
117
 
118
118
  results = []
119
119
 
120
- if arguments.note is not None or arguments.stdin:
121
- text = arguments.note if arguments.note is not None else sys.stdin.read()
122
- ingest_result = corpus.ingest_note(
123
- text,
124
- title=arguments.title,
125
- tags=tags,
126
- source_uri="stdin" if arguments.stdin else "text",
120
+ try:
121
+ if arguments.note is not None or arguments.stdin:
122
+ text = arguments.note if arguments.note is not None else sys.stdin.read()
123
+ ingest_result = corpus.ingest_note(
124
+ text,
125
+ title=arguments.title,
126
+ tags=tags,
127
+ source_uri=None if arguments.stdin else None,
128
+ )
129
+ results.append(ingest_result)
130
+
131
+ for source_path in arguments.files or []:
132
+ results.append(corpus.ingest_source(source_path, tags=tags))
133
+ except IngestCollisionError as error:
134
+ print(
135
+ "Ingest failed: source already ingested\n"
136
+ f"source_uri: {error.source_uri}\n"
137
+ f"existing_item_id: {error.existing_item_id}\n"
138
+ f"existing_relpath: {error.existing_relpath}",
139
+ file=sys.stderr,
127
140
  )
128
- results.append(ingest_result)
129
-
130
- for source_path in arguments.files or []:
131
- results.append(corpus.ingest_source(source_path, tags=tags))
141
+ return 3
132
142
 
133
143
  if not results:
134
144
  print("Nothing to ingest: provide file paths, --note, or --stdin", file=sys.stderr)
@@ -374,7 +384,7 @@ def _budget_from_args(arguments: argparse.Namespace) -> QueryBudget:
374
384
  return QueryBudget(
375
385
  max_total_items=arguments.max_total_items,
376
386
  offset=getattr(arguments, "offset", 0),
377
- max_total_characters=arguments.max_total_characters,
387
+ maximum_total_characters=arguments.maximum_total_characters,
378
388
  max_items_per_source=arguments.max_items_per_source,
379
389
  )
380
390
 
@@ -1071,7 +1081,7 @@ def build_parser() -> argparse.ArgumentParser:
1071
1081
  help="Skip this many ranked candidates before selecting evidence (pagination).",
1072
1082
  )
1073
1083
  p_query.add_argument("--max-total-items", type=int, default=5)
1074
- p_query.add_argument("--max-total-characters", type=int, default=2000)
1084
+ p_query.add_argument("--maximum-total-characters", type=int, default=2000)
1075
1085
  p_query.add_argument("--max-items-per-source", type=int, default=5)
1076
1086
  p_query.add_argument(
1077
1087
  "--reranker-id",
@@ -1131,7 +1141,7 @@ def build_parser() -> argparse.ArgumentParser:
1131
1141
  help="Path to dataset JavaScript Object Notation file.",
1132
1142
  )
1133
1143
  p_eval.add_argument("--max-total-items", type=int, default=5)
1134
- p_eval.add_argument("--max-total-characters", type=int, default=2000)
1144
+ p_eval.add_argument("--maximum-total-characters", type=int, default=2000)
1135
1145
  p_eval.add_argument("--max-items-per-source", type=int, default=5)
1136
1146
  p_eval.set_defaults(func=cmd_eval)
1137
1147
 
biblicus/context.py CHANGED
@@ -25,6 +25,8 @@ class ContextPackPolicy(BaseModel):
25
25
  :vartype ordering: str
26
26
  :ivar include_metadata: Whether to include evidence metadata lines in each block.
27
27
  :vartype include_metadata: bool
28
+ :ivar metadata_fields: Optional evidence metadata fields to include.
29
+ :vartype metadata_fields: list[str] or None
28
30
  """
29
31
 
30
32
  model_config = ConfigDict(extra="forbid")
@@ -32,6 +34,7 @@ class ContextPackPolicy(BaseModel):
32
34
  join_with: str = Field(default="\n\n")
33
35
  ordering: str = Field(default="rank", min_length=1)
34
36
  include_metadata: bool = Field(default=False)
37
+ metadata_fields: Optional[List[str]] = None
35
38
 
36
39
 
37
40
  class ContextPack(BaseModel):
@@ -132,7 +135,9 @@ def build_context_pack(result: RetrievalResult, *, policy: ContextPackPolicy) ->
132
135
  trimmed_text = evidence.text.strip()
133
136
  if not trimmed_text:
134
137
  continue
135
- metadata = _metadata_for_evidence(evidence) if policy.include_metadata else None
138
+ metadata = (
139
+ _metadata_for_evidence(evidence, policy=policy) if policy.include_metadata else None
140
+ )
136
141
  block_text = _format_block_text(trimmed_text, metadata=metadata)
137
142
  selected_blocks.append(
138
143
  ContextPackBlock(
@@ -276,7 +281,11 @@ def _order_evidence(
276
281
  raise ValueError(f"Unknown context pack ordering: {policy.ordering}")
277
282
 
278
283
 
279
- def _metadata_for_evidence(evidence: Evidence) -> Dict[str, object]:
284
+ def _metadata_for_evidence(
285
+ evidence: Evidence,
286
+ *,
287
+ policy: ContextPackPolicy,
288
+ ) -> Dict[str, object]:
280
289
  """
281
290
  Build metadata for a context pack block.
282
291
 
@@ -285,12 +294,19 @@ def _metadata_for_evidence(evidence: Evidence) -> Dict[str, object]:
285
294
  :return: Metadata mapping.
286
295
  :rtype: dict[str, object]
287
296
  """
288
- return {
297
+ metadata = {
289
298
  "item_id": evidence.item_id,
290
299
  "source_uri": evidence.source_uri or "none",
291
300
  "score": evidence.score,
292
301
  "stage": evidence.stage,
293
302
  }
303
+ extra = evidence.metadata or {}
304
+ if policy.metadata_fields is not None:
305
+ extra = {key: extra.get(key) for key in policy.metadata_fields if key in extra}
306
+ for key, value in extra.items():
307
+ if key not in metadata:
308
+ metadata[key] = value
309
+ return metadata
294
310
 
295
311
 
296
312
  def _format_block_text(text: str, *, metadata: Optional[Dict[str, object]]) -> str:
@@ -306,12 +322,11 @@ def _format_block_text(text: str, *, metadata: Optional[Dict[str, object]]) -> s
306
322
  """
307
323
  if not metadata:
308
324
  return text
309
- metadata_lines = "\n".join(
310
- [
311
- f"item_id: {metadata['item_id']}",
312
- f"source_uri: {metadata['source_uri']}",
313
- f"score: {metadata['score']}",
314
- f"stage: {metadata['stage']}",
315
- ]
316
- )
317
- return f"{metadata_lines}\n{text}"
325
+ ordered_keys = ["item_id", "source_uri", "score", "stage"]
326
+ metadata_lines = [f"{key}: {metadata[key]}" for key in ordered_keys if key in metadata]
327
+ for key in sorted(metadata.keys()):
328
+ if key in ordered_keys:
329
+ continue
330
+ metadata_lines.append(f"{key}: {metadata[key]}")
331
+ metadata_text = "\n".join(metadata_lines)
332
+ return f"{metadata_text}\n{text}"
@@ -0,0 +1,53 @@
1
+ """
2
+ Public interface for the Biblicus Context Engine.
3
+ """
4
+
5
+ from .assembler import ContextAssembler, ContextAssemblyResult
6
+ from .compaction import BaseCompactor, CompactionRequest, SummaryCompactor, TruncateCompactor
7
+ from .models import (
8
+ AssistantMessageSpec,
9
+ CompactorDeclaration,
10
+ ContextBudgetSpec,
11
+ ContextDeclaration,
12
+ ContextExpansionSpec,
13
+ ContextInsertSpec,
14
+ ContextMessageSpec,
15
+ ContextPackBudgetSpec,
16
+ ContextPackSpec,
17
+ ContextPolicySpec,
18
+ ContextRetrieverRequest,
19
+ ContextTemplateSpec,
20
+ CorpusDeclaration,
21
+ HistoryInsertSpec,
22
+ RetrieverDeclaration,
23
+ SystemMessageSpec,
24
+ UserMessageSpec,
25
+ )
26
+ from .retrieval import retrieve_context_pack
27
+
28
+ __all__ = [
29
+ "ContextAssembler",
30
+ "ContextAssemblyResult",
31
+ "BaseCompactor",
32
+ "CompactionRequest",
33
+ "SummaryCompactor",
34
+ "TruncateCompactor",
35
+ "ContextBudgetSpec",
36
+ "ContextDeclaration",
37
+ "ContextExpansionSpec",
38
+ "ContextInsertSpec",
39
+ "ContextMessageSpec",
40
+ "ContextPackBudgetSpec",
41
+ "ContextPackSpec",
42
+ "ContextPolicySpec",
43
+ "ContextRetrieverRequest",
44
+ "ContextTemplateSpec",
45
+ "CorpusDeclaration",
46
+ "RetrieverDeclaration",
47
+ "CompactorDeclaration",
48
+ "HistoryInsertSpec",
49
+ "SystemMessageSpec",
50
+ "UserMessageSpec",
51
+ "AssistantMessageSpec",
52
+ "retrieve_context_pack",
53
+ ]