biblicus 0.16.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. biblicus/__init__.py +25 -5
  2. biblicus/analysis/__init__.py +1 -1
  3. biblicus/analysis/base.py +10 -10
  4. biblicus/analysis/markov.py +78 -68
  5. biblicus/analysis/models.py +47 -47
  6. biblicus/analysis/profiling.py +58 -48
  7. biblicus/analysis/topic_modeling.py +56 -51
  8. biblicus/cli.py +248 -191
  9. biblicus/{recipes.py → configuration.py} +14 -14
  10. biblicus/constants.py +2 -2
  11. biblicus/context.py +27 -12
  12. biblicus/context_engine/__init__.py +53 -0
  13. biblicus/context_engine/assembler.py +1090 -0
  14. biblicus/context_engine/compaction.py +110 -0
  15. biblicus/context_engine/models.py +423 -0
  16. biblicus/context_engine/retrieval.py +133 -0
  17. biblicus/corpus.py +233 -124
  18. biblicus/errors.py +27 -3
  19. biblicus/evaluation.py +27 -25
  20. biblicus/extraction.py +103 -98
  21. biblicus/extraction_evaluation.py +26 -26
  22. biblicus/extractors/deepgram_stt.py +7 -7
  23. biblicus/extractors/docling_granite_text.py +11 -11
  24. biblicus/extractors/docling_smol_text.py +11 -11
  25. biblicus/extractors/markitdown_text.py +4 -4
  26. biblicus/extractors/openai_stt.py +7 -7
  27. biblicus/extractors/paddleocr_vl_text.py +20 -18
  28. biblicus/extractors/pipeline.py +8 -8
  29. biblicus/extractors/rapidocr_text.py +3 -3
  30. biblicus/extractors/unstructured_text.py +3 -3
  31. biblicus/hooks.py +4 -4
  32. biblicus/knowledge_base.py +34 -32
  33. biblicus/models.py +84 -81
  34. biblicus/retrieval.py +49 -42
  35. biblicus/retrievers/__init__.py +50 -0
  36. biblicus/retrievers/base.py +65 -0
  37. biblicus/{backends → retrievers}/embedding_index_common.py +80 -44
  38. biblicus/{backends → retrievers}/embedding_index_file.py +96 -61
  39. biblicus/{backends → retrievers}/embedding_index_inmemory.py +100 -69
  40. biblicus/retrievers/hybrid.py +301 -0
  41. biblicus/{backends → retrievers}/scan.py +84 -73
  42. biblicus/{backends → retrievers}/sqlite_full_text_search.py +115 -101
  43. biblicus/{backends → retrievers}/tf_vector.py +103 -100
  44. biblicus/sources.py +46 -11
  45. biblicus/text/link.py +6 -0
  46. biblicus/text/prompts.py +18 -8
  47. biblicus/text/tool_loop.py +63 -5
  48. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/METADATA +32 -23
  49. biblicus-1.1.0.dist-info/RECORD +91 -0
  50. biblicus/backends/__init__.py +0 -50
  51. biblicus/backends/base.py +0 -65
  52. biblicus/backends/hybrid.py +0 -291
  53. biblicus-0.16.0.dist-info/RECORD +0 -86
  54. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/WHEEL +0 -0
  55. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/entry_points.txt +0 -0
  56. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/licenses/LICENSE +0 -0
  57. {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  """
2
- Recipe loading utilities for Biblicus.
2
+ Configuration loading utilities for Biblicus.
3
3
  """
4
4
 
5
5
  from __future__ import annotations
@@ -103,34 +103,34 @@ def apply_dotted_overrides(
103
103
  return updated
104
104
 
105
105
 
106
- def load_recipe_view(
107
- recipe_paths: Iterable[str],
106
+ def load_configuration_view(
107
+ configuration_paths: Iterable[str],
108
108
  *,
109
- recipe_label: str = "Recipe",
109
+ configuration_label: str = "Configuration",
110
110
  mapping_error_message: Optional[str] = None,
111
111
  ) -> Dict[str, object]:
112
112
  """
113
- Load a composed recipe view from one or more YAML files.
113
+ Load a composed configuration view from one or more YAML files.
114
114
 
115
- :param recipe_paths: Iterable of recipe file paths in precedence order.
116
- :type recipe_paths: Iterable[str]
117
- :param recipe_label: Label used in error messages (for example: "Recipe file").
118
- :type recipe_label: str
115
+ :param configuration_paths: Iterable of configuration file paths in precedence order.
116
+ :type configuration_paths: Iterable[str]
117
+ :param configuration_label: Label used in error messages (for example: "Configuration file").
118
+ :type configuration_label: str
119
119
  :return: Composed configuration view.
120
120
  :rtype: dict[str, object]
121
- :raises FileNotFoundError: If any recipe file is missing.
122
- :raises ValueError: If any recipe file is not a mapping/object.
121
+ :raises FileNotFoundError: If any configuration file is missing.
122
+ :raises ValueError: If any configuration file is not a mapping/object.
123
123
  """
124
124
  from biblicus._vendor.dotyaml import load_yaml_view
125
125
 
126
- paths: List[str] = [str(path) for path in recipe_paths]
126
+ paths: List[str] = [str(path) for path in configuration_paths]
127
127
  for raw in paths:
128
128
  candidate = Path(raw)
129
129
  if not candidate.is_file():
130
- raise FileNotFoundError(f"{recipe_label} not found: {candidate}")
130
+ raise FileNotFoundError(f"{configuration_label} not found: {candidate}")
131
131
  try:
132
132
  view = load_yaml_view(paths)
133
133
  except ValueError as exc:
134
- message = mapping_error_message or f"{recipe_label} must be a mapping/object"
134
+ message = mapping_error_message or f"{configuration_label} must be a mapping/object"
135
135
  raise ValueError(message) from exc
136
136
  return view
biblicus/constants.py CHANGED
@@ -9,7 +9,7 @@ ANALYSIS_SCHEMA_VERSION = 1
9
9
  CORPUS_DIR_NAME = ".biblicus"
10
10
  DEFAULT_RAW_DIR = "raw"
11
11
  SIDECAR_SUFFIX = ".biblicus.yml"
12
- RUNS_DIR_NAME = "runs"
13
- EXTRACTION_RUNS_DIR_NAME = "extraction"
12
+ SNAPSHOTS_DIR_NAME = "snapshots"
13
+ EXTRACTION_SNAPSHOTS_DIR_NAME = "extraction"
14
14
  ANALYSIS_RUNS_DIR_NAME = "analysis"
15
15
  HOOK_LOGS_DIR_NAME = "hook_logs"
biblicus/context.py CHANGED
@@ -25,6 +25,8 @@ class ContextPackPolicy(BaseModel):
25
25
  :vartype ordering: str
26
26
  :ivar include_metadata: Whether to include evidence metadata lines in each block.
27
27
  :vartype include_metadata: bool
28
+ :ivar metadata_fields: Optional evidence metadata fields to include.
29
+ :vartype metadata_fields: list[str] or None
28
30
  """
29
31
 
30
32
  model_config = ConfigDict(extra="forbid")
@@ -32,6 +34,7 @@ class ContextPackPolicy(BaseModel):
32
34
  join_with: str = Field(default="\n\n")
33
35
  ordering: str = Field(default="rank", min_length=1)
34
36
  include_metadata: bool = Field(default=False)
37
+ metadata_fields: Optional[List[str]] = None
35
38
 
36
39
 
37
40
  class ContextPack(BaseModel):
@@ -132,7 +135,9 @@ def build_context_pack(result: RetrievalResult, *, policy: ContextPackPolicy) ->
132
135
  trimmed_text = evidence.text.strip()
133
136
  if not trimmed_text:
134
137
  continue
135
- metadata = _metadata_for_evidence(evidence) if policy.include_metadata else None
138
+ metadata = (
139
+ _metadata_for_evidence(evidence, policy=policy) if policy.include_metadata else None
140
+ )
136
141
  block_text = _format_block_text(trimmed_text, metadata=metadata)
137
142
  selected_blocks.append(
138
143
  ContextPackBlock(
@@ -276,7 +281,11 @@ def _order_evidence(
276
281
  raise ValueError(f"Unknown context pack ordering: {policy.ordering}")
277
282
 
278
283
 
279
- def _metadata_for_evidence(evidence: Evidence) -> Dict[str, object]:
284
+ def _metadata_for_evidence(
285
+ evidence: Evidence,
286
+ *,
287
+ policy: ContextPackPolicy,
288
+ ) -> Dict[str, object]:
280
289
  """
281
290
  Build metadata for a context pack block.
282
291
 
@@ -285,12 +294,19 @@ def _metadata_for_evidence(evidence: Evidence) -> Dict[str, object]:
285
294
  :return: Metadata mapping.
286
295
  :rtype: dict[str, object]
287
296
  """
288
- return {
297
+ metadata = {
289
298
  "item_id": evidence.item_id,
290
299
  "source_uri": evidence.source_uri or "none",
291
300
  "score": evidence.score,
292
301
  "stage": evidence.stage,
293
302
  }
303
+ extra = evidence.metadata or {}
304
+ if policy.metadata_fields is not None:
305
+ extra = {key: extra.get(key) for key in policy.metadata_fields if key in extra}
306
+ for key, value in extra.items():
307
+ if key not in metadata:
308
+ metadata[key] = value
309
+ return metadata
294
310
 
295
311
 
296
312
  def _format_block_text(text: str, *, metadata: Optional[Dict[str, object]]) -> str:
@@ -306,12 +322,11 @@ def _format_block_text(text: str, *, metadata: Optional[Dict[str, object]]) -> s
306
322
  """
307
323
  if not metadata:
308
324
  return text
309
- metadata_lines = "\n".join(
310
- [
311
- f"item_id: {metadata['item_id']}",
312
- f"source_uri: {metadata['source_uri']}",
313
- f"score: {metadata['score']}",
314
- f"stage: {metadata['stage']}",
315
- ]
316
- )
317
- return f"{metadata_lines}\n{text}"
325
+ ordered_keys = ["item_id", "source_uri", "score", "stage"]
326
+ metadata_lines = [f"{key}: {metadata[key]}" for key in ordered_keys if key in metadata]
327
+ for key in sorted(metadata.keys()):
328
+ if key in ordered_keys:
329
+ continue
330
+ metadata_lines.append(f"{key}: {metadata[key]}")
331
+ metadata_text = "\n".join(metadata_lines)
332
+ return f"{metadata_text}\n{text}"
@@ -0,0 +1,53 @@
1
+ """
2
+ Public interface for the Biblicus Context Engine.
3
+ """
4
+
5
+ from .assembler import ContextAssembler, ContextAssemblyResult
6
+ from .compaction import BaseCompactor, CompactionRequest, SummaryCompactor, TruncateCompactor
7
+ from .models import (
8
+ AssistantMessageSpec,
9
+ CompactorDeclaration,
10
+ ContextBudgetSpec,
11
+ ContextDeclaration,
12
+ ContextExpansionSpec,
13
+ ContextInsertSpec,
14
+ ContextMessageSpec,
15
+ ContextPackBudgetSpec,
16
+ ContextPackSpec,
17
+ ContextPolicySpec,
18
+ ContextRetrieverRequest,
19
+ ContextTemplateSpec,
20
+ CorpusDeclaration,
21
+ HistoryInsertSpec,
22
+ RetrieverDeclaration,
23
+ SystemMessageSpec,
24
+ UserMessageSpec,
25
+ )
26
+ from .retrieval import retrieve_context_pack
27
+
28
+ __all__ = [
29
+ "ContextAssembler",
30
+ "ContextAssemblyResult",
31
+ "BaseCompactor",
32
+ "CompactionRequest",
33
+ "SummaryCompactor",
34
+ "TruncateCompactor",
35
+ "ContextBudgetSpec",
36
+ "ContextDeclaration",
37
+ "ContextExpansionSpec",
38
+ "ContextInsertSpec",
39
+ "ContextMessageSpec",
40
+ "ContextPackBudgetSpec",
41
+ "ContextPackSpec",
42
+ "ContextPolicySpec",
43
+ "ContextRetrieverRequest",
44
+ "ContextTemplateSpec",
45
+ "CorpusDeclaration",
46
+ "RetrieverDeclaration",
47
+ "CompactorDeclaration",
48
+ "HistoryInsertSpec",
49
+ "SystemMessageSpec",
50
+ "UserMessageSpec",
51
+ "AssistantMessageSpec",
52
+ "retrieve_context_pack",
53
+ ]