biblicus 0.16.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biblicus/__init__.py +25 -5
- biblicus/analysis/__init__.py +1 -1
- biblicus/analysis/base.py +10 -10
- biblicus/analysis/markov.py +78 -68
- biblicus/analysis/models.py +47 -47
- biblicus/analysis/profiling.py +58 -48
- biblicus/analysis/topic_modeling.py +56 -51
- biblicus/cli.py +248 -191
- biblicus/{recipes.py → configuration.py} +14 -14
- biblicus/constants.py +2 -2
- biblicus/context.py +27 -12
- biblicus/context_engine/__init__.py +53 -0
- biblicus/context_engine/assembler.py +1090 -0
- biblicus/context_engine/compaction.py +110 -0
- biblicus/context_engine/models.py +423 -0
- biblicus/context_engine/retrieval.py +133 -0
- biblicus/corpus.py +233 -124
- biblicus/errors.py +27 -3
- biblicus/evaluation.py +27 -25
- biblicus/extraction.py +103 -98
- biblicus/extraction_evaluation.py +26 -26
- biblicus/extractors/deepgram_stt.py +7 -7
- biblicus/extractors/docling_granite_text.py +11 -11
- biblicus/extractors/docling_smol_text.py +11 -11
- biblicus/extractors/markitdown_text.py +4 -4
- biblicus/extractors/openai_stt.py +7 -7
- biblicus/extractors/paddleocr_vl_text.py +20 -18
- biblicus/extractors/pipeline.py +8 -8
- biblicus/extractors/rapidocr_text.py +3 -3
- biblicus/extractors/unstructured_text.py +3 -3
- biblicus/hooks.py +4 -4
- biblicus/knowledge_base.py +34 -32
- biblicus/models.py +84 -81
- biblicus/retrieval.py +49 -42
- biblicus/retrievers/__init__.py +50 -0
- biblicus/retrievers/base.py +65 -0
- biblicus/{backends → retrievers}/embedding_index_common.py +80 -44
- biblicus/{backends → retrievers}/embedding_index_file.py +96 -61
- biblicus/{backends → retrievers}/embedding_index_inmemory.py +100 -69
- biblicus/retrievers/hybrid.py +301 -0
- biblicus/{backends → retrievers}/scan.py +84 -73
- biblicus/{backends → retrievers}/sqlite_full_text_search.py +115 -101
- biblicus/{backends → retrievers}/tf_vector.py +103 -100
- biblicus/sources.py +46 -11
- biblicus/text/link.py +6 -0
- biblicus/text/prompts.py +18 -8
- biblicus/text/tool_loop.py +63 -5
- {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/METADATA +32 -23
- biblicus-1.1.0.dist-info/RECORD +91 -0
- biblicus/backends/__init__.py +0 -50
- biblicus/backends/base.py +0 -65
- biblicus/backends/hybrid.py +0 -291
- biblicus-0.16.0.dist-info/RECORD +0 -86
- {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/WHEEL +0 -0
- {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/entry_points.txt +0 -0
- {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
Configuration loading utilities for Biblicus.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
@@ -103,34 +103,34 @@ def apply_dotted_overrides(
|
|
|
103
103
|
return updated
|
|
104
104
|
|
|
105
105
|
|
|
106
|
-
def
|
|
107
|
-
|
|
106
|
+
def load_configuration_view(
|
|
107
|
+
configuration_paths: Iterable[str],
|
|
108
108
|
*,
|
|
109
|
-
|
|
109
|
+
configuration_label: str = "Configuration",
|
|
110
110
|
mapping_error_message: Optional[str] = None,
|
|
111
111
|
) -> Dict[str, object]:
|
|
112
112
|
"""
|
|
113
|
-
Load a composed
|
|
113
|
+
Load a composed configuration view from one or more YAML files.
|
|
114
114
|
|
|
115
|
-
:param
|
|
116
|
-
:type
|
|
117
|
-
:param
|
|
118
|
-
:type
|
|
115
|
+
:param configuration_paths: Iterable of configuration file paths in precedence order.
|
|
116
|
+
:type configuration_paths: Iterable[str]
|
|
117
|
+
:param configuration_label: Label used in error messages (for example: "Configuration file").
|
|
118
|
+
:type configuration_label: str
|
|
119
119
|
:return: Composed configuration view.
|
|
120
120
|
:rtype: dict[str, object]
|
|
121
|
-
:raises FileNotFoundError: If any
|
|
122
|
-
:raises ValueError: If any
|
|
121
|
+
:raises FileNotFoundError: If any configuration file is missing.
|
|
122
|
+
:raises ValueError: If any configuration file is not a mapping/object.
|
|
123
123
|
"""
|
|
124
124
|
from biblicus._vendor.dotyaml import load_yaml_view
|
|
125
125
|
|
|
126
|
-
paths: List[str] = [str(path) for path in
|
|
126
|
+
paths: List[str] = [str(path) for path in configuration_paths]
|
|
127
127
|
for raw in paths:
|
|
128
128
|
candidate = Path(raw)
|
|
129
129
|
if not candidate.is_file():
|
|
130
|
-
raise FileNotFoundError(f"{
|
|
130
|
+
raise FileNotFoundError(f"{configuration_label} not found: {candidate}")
|
|
131
131
|
try:
|
|
132
132
|
view = load_yaml_view(paths)
|
|
133
133
|
except ValueError as exc:
|
|
134
|
-
message = mapping_error_message or f"{
|
|
134
|
+
message = mapping_error_message or f"{configuration_label} must be a mapping/object"
|
|
135
135
|
raise ValueError(message) from exc
|
|
136
136
|
return view
|
biblicus/constants.py
CHANGED
|
@@ -9,7 +9,7 @@ ANALYSIS_SCHEMA_VERSION = 1
|
|
|
9
9
|
CORPUS_DIR_NAME = ".biblicus"
|
|
10
10
|
DEFAULT_RAW_DIR = "raw"
|
|
11
11
|
SIDECAR_SUFFIX = ".biblicus.yml"
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
SNAPSHOTS_DIR_NAME = "snapshots"
|
|
13
|
+
EXTRACTION_SNAPSHOTS_DIR_NAME = "extraction"
|
|
14
14
|
ANALYSIS_RUNS_DIR_NAME = "analysis"
|
|
15
15
|
HOOK_LOGS_DIR_NAME = "hook_logs"
|
biblicus/context.py
CHANGED
|
@@ -25,6 +25,8 @@ class ContextPackPolicy(BaseModel):
|
|
|
25
25
|
:vartype ordering: str
|
|
26
26
|
:ivar include_metadata: Whether to include evidence metadata lines in each block.
|
|
27
27
|
:vartype include_metadata: bool
|
|
28
|
+
:ivar metadata_fields: Optional evidence metadata fields to include.
|
|
29
|
+
:vartype metadata_fields: list[str] or None
|
|
28
30
|
"""
|
|
29
31
|
|
|
30
32
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -32,6 +34,7 @@ class ContextPackPolicy(BaseModel):
|
|
|
32
34
|
join_with: str = Field(default="\n\n")
|
|
33
35
|
ordering: str = Field(default="rank", min_length=1)
|
|
34
36
|
include_metadata: bool = Field(default=False)
|
|
37
|
+
metadata_fields: Optional[List[str]] = None
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
class ContextPack(BaseModel):
|
|
@@ -132,7 +135,9 @@ def build_context_pack(result: RetrievalResult, *, policy: ContextPackPolicy) ->
|
|
|
132
135
|
trimmed_text = evidence.text.strip()
|
|
133
136
|
if not trimmed_text:
|
|
134
137
|
continue
|
|
135
|
-
metadata =
|
|
138
|
+
metadata = (
|
|
139
|
+
_metadata_for_evidence(evidence, policy=policy) if policy.include_metadata else None
|
|
140
|
+
)
|
|
136
141
|
block_text = _format_block_text(trimmed_text, metadata=metadata)
|
|
137
142
|
selected_blocks.append(
|
|
138
143
|
ContextPackBlock(
|
|
@@ -276,7 +281,11 @@ def _order_evidence(
|
|
|
276
281
|
raise ValueError(f"Unknown context pack ordering: {policy.ordering}")
|
|
277
282
|
|
|
278
283
|
|
|
279
|
-
def _metadata_for_evidence(
|
|
284
|
+
def _metadata_for_evidence(
|
|
285
|
+
evidence: Evidence,
|
|
286
|
+
*,
|
|
287
|
+
policy: ContextPackPolicy,
|
|
288
|
+
) -> Dict[str, object]:
|
|
280
289
|
"""
|
|
281
290
|
Build metadata for a context pack block.
|
|
282
291
|
|
|
@@ -285,12 +294,19 @@ def _metadata_for_evidence(evidence: Evidence) -> Dict[str, object]:
|
|
|
285
294
|
:return: Metadata mapping.
|
|
286
295
|
:rtype: dict[str, object]
|
|
287
296
|
"""
|
|
288
|
-
|
|
297
|
+
metadata = {
|
|
289
298
|
"item_id": evidence.item_id,
|
|
290
299
|
"source_uri": evidence.source_uri or "none",
|
|
291
300
|
"score": evidence.score,
|
|
292
301
|
"stage": evidence.stage,
|
|
293
302
|
}
|
|
303
|
+
extra = evidence.metadata or {}
|
|
304
|
+
if policy.metadata_fields is not None:
|
|
305
|
+
extra = {key: extra.get(key) for key in policy.metadata_fields if key in extra}
|
|
306
|
+
for key, value in extra.items():
|
|
307
|
+
if key not in metadata:
|
|
308
|
+
metadata[key] = value
|
|
309
|
+
return metadata
|
|
294
310
|
|
|
295
311
|
|
|
296
312
|
def _format_block_text(text: str, *, metadata: Optional[Dict[str, object]]) -> str:
|
|
@@ -306,12 +322,11 @@ def _format_block_text(text: str, *, metadata: Optional[Dict[str, object]]) -> s
|
|
|
306
322
|
"""
|
|
307
323
|
if not metadata:
|
|
308
324
|
return text
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
return f"{metadata_lines}\n{text}"
|
|
325
|
+
ordered_keys = ["item_id", "source_uri", "score", "stage"]
|
|
326
|
+
metadata_lines = [f"{key}: {metadata[key]}" for key in ordered_keys if key in metadata]
|
|
327
|
+
for key in sorted(metadata.keys()):
|
|
328
|
+
if key in ordered_keys:
|
|
329
|
+
continue
|
|
330
|
+
metadata_lines.append(f"{key}: {metadata[key]}")
|
|
331
|
+
metadata_text = "\n".join(metadata_lines)
|
|
332
|
+
return f"{metadata_text}\n{text}"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Public interface for the Biblicus Context Engine.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .assembler import ContextAssembler, ContextAssemblyResult
|
|
6
|
+
from .compaction import BaseCompactor, CompactionRequest, SummaryCompactor, TruncateCompactor
|
|
7
|
+
from .models import (
|
|
8
|
+
AssistantMessageSpec,
|
|
9
|
+
CompactorDeclaration,
|
|
10
|
+
ContextBudgetSpec,
|
|
11
|
+
ContextDeclaration,
|
|
12
|
+
ContextExpansionSpec,
|
|
13
|
+
ContextInsertSpec,
|
|
14
|
+
ContextMessageSpec,
|
|
15
|
+
ContextPackBudgetSpec,
|
|
16
|
+
ContextPackSpec,
|
|
17
|
+
ContextPolicySpec,
|
|
18
|
+
ContextRetrieverRequest,
|
|
19
|
+
ContextTemplateSpec,
|
|
20
|
+
CorpusDeclaration,
|
|
21
|
+
HistoryInsertSpec,
|
|
22
|
+
RetrieverDeclaration,
|
|
23
|
+
SystemMessageSpec,
|
|
24
|
+
UserMessageSpec,
|
|
25
|
+
)
|
|
26
|
+
from .retrieval import retrieve_context_pack
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"ContextAssembler",
|
|
30
|
+
"ContextAssemblyResult",
|
|
31
|
+
"BaseCompactor",
|
|
32
|
+
"CompactionRequest",
|
|
33
|
+
"SummaryCompactor",
|
|
34
|
+
"TruncateCompactor",
|
|
35
|
+
"ContextBudgetSpec",
|
|
36
|
+
"ContextDeclaration",
|
|
37
|
+
"ContextExpansionSpec",
|
|
38
|
+
"ContextInsertSpec",
|
|
39
|
+
"ContextMessageSpec",
|
|
40
|
+
"ContextPackBudgetSpec",
|
|
41
|
+
"ContextPackSpec",
|
|
42
|
+
"ContextPolicySpec",
|
|
43
|
+
"ContextRetrieverRequest",
|
|
44
|
+
"ContextTemplateSpec",
|
|
45
|
+
"CorpusDeclaration",
|
|
46
|
+
"RetrieverDeclaration",
|
|
47
|
+
"CompactorDeclaration",
|
|
48
|
+
"HistoryInsertSpec",
|
|
49
|
+
"SystemMessageSpec",
|
|
50
|
+
"UserMessageSpec",
|
|
51
|
+
"AssistantMessageSpec",
|
|
52
|
+
"retrieve_context_pack",
|
|
53
|
+
]
|