PyPI - biblicus - Versions diffs - 0.16.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

biblicus 0.16.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

biblicus/__init__.py +25 -5
biblicus/analysis/__init__.py +1 -1
biblicus/analysis/base.py +10 -10
biblicus/analysis/markov.py +78 -68
biblicus/analysis/models.py +47 -47
biblicus/analysis/profiling.py +58 -48
biblicus/analysis/topic_modeling.py +56 -51
biblicus/cli.py +248 -191
biblicus/{recipes.py → configuration.py} +14 -14
biblicus/constants.py +2 -2
biblicus/context.py +27 -12
biblicus/context_engine/__init__.py +53 -0
biblicus/context_engine/assembler.py +1090 -0
biblicus/context_engine/compaction.py +110 -0
biblicus/context_engine/models.py +423 -0
biblicus/context_engine/retrieval.py +133 -0
biblicus/corpus.py +233 -124
biblicus/errors.py +27 -3
biblicus/evaluation.py +27 -25
biblicus/extraction.py +103 -98
biblicus/extraction_evaluation.py +26 -26
biblicus/extractors/deepgram_stt.py +7 -7
biblicus/extractors/docling_granite_text.py +11 -11
biblicus/extractors/docling_smol_text.py +11 -11
biblicus/extractors/markitdown_text.py +4 -4
biblicus/extractors/openai_stt.py +7 -7
biblicus/extractors/paddleocr_vl_text.py +20 -18
biblicus/extractors/pipeline.py +8 -8
biblicus/extractors/rapidocr_text.py +3 -3
biblicus/extractors/unstructured_text.py +3 -3
biblicus/hooks.py +4 -4
biblicus/knowledge_base.py +34 -32
biblicus/models.py +84 -81
biblicus/retrieval.py +49 -42
biblicus/retrievers/__init__.py +50 -0
biblicus/retrievers/base.py +65 -0
biblicus/{backends → retrievers}/embedding_index_common.py +80 -44
biblicus/{backends → retrievers}/embedding_index_file.py +96 -61
biblicus/{backends → retrievers}/embedding_index_inmemory.py +100 -69
biblicus/retrievers/hybrid.py +301 -0
biblicus/{backends → retrievers}/scan.py +84 -73
biblicus/{backends → retrievers}/sqlite_full_text_search.py +115 -101
biblicus/{backends → retrievers}/tf_vector.py +103 -100
biblicus/sources.py +46 -11
biblicus/text/link.py +6 -0
biblicus/text/prompts.py +18 -8
biblicus/text/tool_loop.py +63 -5
{biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/METADATA +32 -23
biblicus-1.1.0.dist-info/RECORD +91 -0
biblicus/backends/__init__.py +0 -50
biblicus/backends/base.py +0 -65
biblicus/backends/hybrid.py +0 -291
biblicus-0.16.0.dist-info/RECORD +0 -86
{biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/WHEEL +0 -0
{biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/entry_points.txt +0 -0
{biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/licenses/LICENSE +0 -0
{biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/top_level.txt +0 -0

biblicus/text/tool_loop.py CHANGED Viewed

@@ -5,6 +5,7 @@ Shared tool loop for virtual file edit workflows.
 from __future__ import annotations
 import json
+import re
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, List, Optional, Sequence
@@ -182,6 +183,18 @@ def run_tool_loop(
                         last_error = "Tool loop requires non-empty old_str and new_str"
                         tool_result = f"Error: {last_error}"
                     else:
+                        if old_str == new_str:
+                            last_error = "Tool loop requires str_replace to make a change"
+                            tool_result = f"Error: {last_error}"
+                            had_tool_error = True
+                            messages.append(
+                                {
+                                    "role": "tool",
+                                    "tool_call_id": tool_call.get("id", ""),
+                                    "content": tool_result,
+                                }
+                            )
+                            continue
                         try:
                             current_text = apply_str_replace(current_text, old_str, new_str)
                             tool_result = (
@@ -214,6 +227,7 @@ def run_tool_loop(
                     "content": _build_tool_error_message(
                         error_message=last_error,
                         current_text=current_text,
+                        old_str=old_str if "old_str" in locals() else "",
                     ),
                 }
             )
@@ -260,19 +274,26 @@ def _build_retry_message(
     )
-def _build_tool_error_message(*, error_message: str, current_text: str) -> str:
-    if "not unique" in error_message:
+def _build_tool_error_message(*, error_message: str, current_text: str, old_str: str) -> str:
+    if "found 0 matches" in error_message or "not found" in error_message:
+        guidance = (
+            "Copy the exact old_str from the current text (including punctuation/case) "
+            "or call view to inspect the latest text."
+        )
+    elif "found " in error_message and "matches" in error_message:
         guidance = (
             "Use a longer unique old_str by including surrounding words or punctuation "
             "so it matches exactly once."
         )
-    elif "not found" in error_message:
+    elif "not unique" in error_message:
         guidance = (
-            "Copy the exact old_str from the current text (including punctuation/case) "
-            "or call view to inspect the latest text."
+            "Use a longer unique old_str by including surrounding words or punctuation "
+            "so it matches exactly once."
         )
     else:
         guidance = "Fix the tool call and try again."
+    if old_str and len(old_str) <= 3:
+        guidance = f"{guidance} If unsure, call view to pick a longer unique substring."
     return (
         "Your last tool call failed.\n"
         f"Error: {error_message}\n"
@@ -282,6 +303,43 @@ def _build_tool_error_message(*, error_message: str, current_text: str) -> str:
     )
+_SPAN_OPEN_PATTERN = re.compile(r"<span\b[^>]*>")
+_SPAN_CLOSE_PATTERN = re.compile(r"</span>")
+_SLICE_PATTERN = re.compile(r"<slice\s*/>")
+def _strip_markup(text: str) -> str:
+    without_spans = _SPAN_CLOSE_PATTERN.sub("", _SPAN_OPEN_PATTERN.sub("", text))
+    return _SLICE_PATTERN.sub("", without_spans)
+def apply_unique_str_replace(text: str, old_str: str, new_str: str) -> str:
+    """
+    Apply a single replacement only when old_str matches exactly once.
+    :param text: Current text content.
+    :type text: str
+    :param old_str: Substring to replace.
+    :type old_str: str
+    :param new_str: Replacement string.
+    :type new_str: str
+    :return: Updated text.
+    :rtype: str
+    :raises ValueError: If old_str matches zero or multiple times.
+    """
+    matches = text.count(old_str)
+    if matches != 1:
+        raise ValueError(
+            "Tool loop requires old_str to match exactly once " f"(found {matches} matches)"
+        )
+    if _strip_markup(old_str) != _strip_markup(new_str):
+        raise ValueError(
+            "Tool loop replacements may only insert markup tags; "
+            "the underlying text must stay the same"
+        )
+    return text.replace(old_str, new_str, 1)
 def _build_no_tool_calls_message(*, assistant_message: str, current_text: str) -> str:
     guidance = (
         "Use the tools to edit the text. "

{biblicus-0.16.0.dist-info → biblicus-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: biblicus
-Version: 0.16.0
+Version: 1.1.0
 Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
 License: MIT
 Requires-Python: >=3.9
@@ -80,7 +80,7 @@ See [retrieval augmented generation overview] for a short introduction to the id
 ## Analysis highlights
 - `biblicus analyze markov` learns a directed, weighted state transition graph over segmented text.
-- YAML recipes support cascading composition plus dotted `--config key=value` overrides.
+- YAML configurations support cascading composition plus dotted `--config key=value` overrides.
 - Text extract splits long texts with an LLM by inserting XML tags in-place for structured spans.
 - See `docs/MARKOV_ANALYSIS.md` for Markov analysis details and runnable demos.
 - See `docs/TEXT_EXTRACT.md` for the text extract utility and examples.
@@ -167,7 +167,7 @@ sequenceDiagram
 - You can ingest raw material once, then try many retrieval approaches over time.
 - You can keep raw files readable and portable, without locking your data inside a database.
-- You can evaluate retrieval runs against shared datasets and compare backends using the same corpus.
+- You can evaluate retrieval snapshots against shared datasets and compare backends using the same corpus.
 ## Typical flow
@@ -176,7 +176,7 @@ sequenceDiagram
 - Crawl a website section into corpus items when you want a repeatable “import from the web” workflow.
 - Run extraction when you want derived text artifacts from non-text sources.
 - Reindex to refresh the catalog after edits.
-- Build a retrieval run with a backend.
+- Build a retrieval snapshot with a backend.
 - Query the run to collect evidence and evaluate it with datasets.
 ## Install
@@ -292,8 +292,8 @@ for note_title, note_text in notes:
     corpus.ingest_note(note_text, title=note_title, tags=["memory"])
 backend = get_backend("scan")
-run = backend.build_run(corpus, recipe_name="Story demo", config={})
-budget = QueryBudget(max_total_items=5, max_total_characters=2000, max_items_per_source=None)
+run = backend.build_run(corpus, configuration_name="Story demo", config={})
+budget = QueryBudget(max_total_items=5, maximum_total_characters=2000, max_items_per_source=None)
 result = backend.query(
     corpus,
     run=run,
@@ -333,11 +333,11 @@ Example output:
   "query_text": "Primary button style preference",
   "budget": {
     "max_total_items": 5,
-    "max_total_characters": 2000,
+    "maximum_total_characters": 2000,
     "max_items_per_source": null
   },
-  "run_id": "RUN_ID",
-  "recipe_id": "RECIPE_ID",
+  "snapshot_id": "RUN_ID",
+  "configuration_id": "RECIPE_ID",
   "backend_id": "scan",
   "generated_at": "2026-01-29T00:00:00.000000Z",
   "evidence": [
@@ -352,8 +352,8 @@ Example output:
       "span_start": null,
       "span_end": null,
       "stage": "scan",
-      "recipe_id": "RECIPE_ID",
-      "run_id": "RUN_ID",
+      "configuration_id": "RECIPE_ID",
+      "snapshot_id": "RUN_ID",
       "hash": null
     }
   ],
@@ -422,7 +422,7 @@ flowchart TB
       subgraph RowExtraction[Pluggable: extraction pipeline]
         direction TB
-        Catalog --> Extract[Extract pipeline] --> ExtractedText[Extracted text artifacts] --> ExtractionRun[Extraction run manifest]
+        Catalog --> Extract[Extract pipeline] --> ExtractedText[Extracted text artifacts] --> ExtractionRun[Extraction snapshot manifest]
       end
       subgraph RowRetrieval[Pluggable: retrieval backend]
@@ -484,7 +484,7 @@ From Python, the same flow is available through the Corpus class and backend int
 - Ingest notes with `Corpus.ingest_note`.
 - Ingest files or web addresses with `Corpus.ingest_source`.
 - List items with `Corpus.list_items`.
-- Build a retrieval run with `get_backend` and `backend.build_run`.
+- Build a retrieval snapshot with `get_backend` and `backend.build_run`.
 - Query a run with `backend.query`.
 - Evaluate with `evaluate_run`.
@@ -530,13 +530,13 @@ corpus/
     runs/
       extraction/
         pipeline/
-          <run id>/
+          <snapshot id>/
             manifest.json
             text/
               <item id>.txt
       retrieval/
         <backend id>/
-          <run id>/
+          <snapshot id>/
             manifest.json
 ```
@@ -552,7 +552,7 @@ For detailed documentation including configuration options, performance characte
 ## Retrieval documentation
-For the retrieval pipeline overview and run artifacts, see `docs/RETRIEVAL.md`. For retrieval quality upgrades
+For the retrieval pipeline overview and snapshot artifacts, see `docs/RETRIEVAL.md`. For retrieval quality upgrades
 (tuned lexical baseline, reranking, hybrid retrieval), see `docs/RETRIEVAL_QUALITY.md`. For evaluation workflows
 and dataset formats, see `docs/RETRIEVAL_EVALUATION.md`. For a runnable walkthrough, use the retrieval evaluation lab
 script (`scripts/retrieval_evaluation_lab.py`).
@@ -615,26 +615,26 @@ See `docs/TEXT_SLICE.md` for the utility API and examples.
 Biblicus can run analysis pipelines on extracted text without changing the raw corpus. Profiling and topic modeling
 are the first analysis backends. Profiling summarizes corpus composition and extraction coverage. Topic modeling reads
-an extraction run, optionally applies an LLM-driven extraction pass, applies lexical processing, runs BERTopic, and
+an extraction snapshot, optionally applies an LLM-driven extraction pass, applies lexical processing, runs BERTopic, and
 optionally applies an LLM fine-tuning pass to label topics. The output is structured JavaScript Object Notation.
 See `docs/ANALYSIS.md` for the analysis pipeline overview, `docs/PROFILING.md` for profiling, and
 `docs/TOPIC_MODELING.md` for topic modeling details.
-Run a topic analysis using a recipe file:
+Run a topic analysis using a configuration file:
 ```
-biblicus analyze topics --corpus corpora/example --recipe recipes/topic-modeling.yml --extraction-run pipeline:<run_id>
+biblicus analyze topics --corpus corpora/example --configuration configurations/topic-modeling.yml --extraction-run pipeline:<snapshot_id>
 ```
-If `--extraction-run` is omitted, Biblicus uses the most recent extraction run and emits a warning about
+If `--extraction-run` is omitted, Biblicus uses the most recent extraction snapshot and emits a warning about
 reproducibility. The analysis output is stored under:
 ```
-.biblicus/runs/analysis/topic-modeling/<run_id>/output.json
+.biblicus/runs/analysis/topic-modeling/<snapshot_id>/output.json
 ```
-Minimal recipe example:
+Minimal configuration example:
 ```yaml
 schema_version: 1
@@ -659,7 +659,7 @@ llm_fine_tuning:
 ```
 LLM extraction and fine-tuning require `biblicus[openai]` and a configured OpenAI API key.
-Recipe files are validated strictly against the topic modeling schema, so type mismatches or unknown fields are errors.
+Configuration files are validated strictly against the topic modeling schema, so type mismatches or unknown fields are errors.
 AG News integration runs require `biblicus[datasets]` in addition to `biblicus[topic-modeling]`.
 For a repeatable, real-world integration run that downloads AG News and executes topic modeling, use:
@@ -712,6 +712,15 @@ Build the documentation:
 python -m sphinx -b html docs docs/_build/html
 ```
+Preview the documentation locally:
+```
+cd docs/_build/html
+python -m http.server
+```
+Open `http://localhost:8000` in your browser.
 ## License
 License terms are in `LICENSE`.

biblicus-1.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,91 @@
+biblicus/__init__.py,sha256=O9FlaC1aaafCfDoI3sIsbtUsjNKJpBI6sP-RTp_kCaI,1013
+biblicus/__main__.py,sha256=ipfkUoTlocVnrQDM69C7TeBqQxmHVeiWMRaT3G9rtnk,117
+biblicus/chunking.py,sha256=GdJr0skAAI0Su99mr7dXqCgR7eJ0sJu8n2XesVGyddY,13206
+biblicus/cli.py,sha256=GN7L0-s0k9tAj_lthvBrJlfo_DG9y53vYc6k_IhSea0,45797
+biblicus/configuration.py,sha256=JzQU-2pzO4hY7pBw8J79Ci0Glc9cvh4KrRvzSMK2d5w,4329
+biblicus/constants.py,sha256=VVjfZvdmoiCNsiQv0JVI-cA6JKXWUsvGL_IjnTxlEI8,386
+biblicus/context.py,sha256=I7L86ag2AbNr_QgiP5YSt1uwwULGx1cH73eR2nE9T3g,10842
+biblicus/corpus.py,sha256=D9O1Z8lQ7yFNXQDkaKR9fSTRDMSwtrYTGavh_GM7Eww,60374
+biblicus/crawl.py,sha256=n8rXBMnziBK9vtKQQCXYOpBzqsPCswj2PzVJUb370KY,6250
+biblicus/embedding_providers.py,sha256=phWEsq1vryyTFRRs6uZ0sx9FhrqWIkDsS3I52I64zqM,3839
+biblicus/errors.py,sha256=7fAGJbe_pCD8ygnfbTn6bNRV6pam0Vx3xjIpLrxrucg,1382
+biblicus/evaluation.py,sha256=XnQKPbUcUBnELllh7cNEzvTK8EKU1Ub0q3u_sIhXB5E,8372
+biblicus/evidence_processing.py,sha256=sJe6T1nLxvU0xs9yMH8JZZS19zHXMR-Fpr5lWi5ndUM,6120
+biblicus/extraction.py,sha256=YiJqLWY3mglYokSJqA8-oIxpFBPW4Hz0TEeeNp0PtWA,20581
+biblicus/extraction_evaluation.py,sha256=kFbyKcHzZK_z0OgCmQ3Olj55zgGoxin0Ir3dUA50TLI,10641
+biblicus/frontmatter.py,sha256=uFC4iIrgpnTDiP1gvAnT_CbFYdNuUVtETX7tZ3a9g-Y,2517
+biblicus/hook_logging.py,sha256=IMvde-JhVWrx9tNz3eDJ1CY_rr5Sj7DZ2YNomYCZbz0,5366
+biblicus/hook_manager.py,sha256=ZCAkE5wLvn4lnQz8jho_o0HGEC9KdQd9qitkAEUQRcw,6997
+biblicus/hooks.py,sha256=-ZcKZ4scK9ctas_PcseOmJJOLCkwxpnIxrACcz1qUus,7907
+biblicus/ignore.py,sha256=fyjt34E6tWNNrm1FseOhgH2MgryyVBQVzxhKL5s4aio,1800
+biblicus/inference.py,sha256=_k00AIPoXD2lruiTB-JUagtY4f_WKcdzA3axwiq1tck,3512
+biblicus/knowledge_base.py,sha256=pDZQlihjMB7AF61LccVG21rWEAjisgRtkEcn-dymZTM,6915
+biblicus/models.py,sha256=UlaqdvdqPZHd2__4Gcd4pryA_DBVgSiv86uI2AYD8Ag,16990
+biblicus/retrieval.py,sha256=9RA3KGw43dBOD1EFZwt9sqcVf334UtXb1qNHUqYW6As,4646
+biblicus/sources.py,sha256=FNwW1FWts0jxWIL3AHon7D6c5ZatyG9AGFqzn1Id5mE,8504
+biblicus/time.py,sha256=3BSKOSo7R10K-0Dzrbdtl3fh5_yShTYqfdlKvvdkx7M,485
+biblicus/uris.py,sha256=xXD77lqsT9NxbyzI1spX9Y5a3-U6sLYMnpeSAV7g-nM,2013
+biblicus/user_config.py,sha256=UXUYBNUN4FR37ggZGJG1wv3K8XzsMR8pXW1T18lrivw,6495
+biblicus/_vendor/dotyaml/__init__.py,sha256=OVv6IsuCvsjaUznLzuit4UbSLVg4TiTVm9cOPY1Y2Cs,409
+biblicus/_vendor/dotyaml/interpolation.py,sha256=FVUkdQr_KbXjoFPvGTv6I5v0X5iZkJe5yhZtYKRbYzI,1991
+biblicus/_vendor/dotyaml/loader.py,sha256=vFfnhbvHYYyOKzl5iq2FH97GSHH2GvEHmGiPnE0g0kA,6954
+biblicus/_vendor/dotyaml/transformer.py,sha256=RWNrm_KAsanG409HEIWquTH9i_jz-ZFK9fM86emXeF4,3724
+biblicus/ai/__init__.py,sha256=HY8PKhqRLIDYJYlL9A2JjqKxQaujITNLYgIytNUhnrU,1161
+biblicus/ai/embeddings.py,sha256=n2xlonZOHcmDrP1XMhGcja5Hzr8r87PF-IecH-Yhu98,3703
+biblicus/ai/llm.py,sha256=g724_UAxmicB_W-Z7Uu9SRsI9-aVNZUlYIjvnlE17VE,4712
+biblicus/ai/models.py,sha256=6newnT0NJf3uf9FvWXVC-9Gkk5xRB-PjXDZpeBHA04Y,7857
+biblicus/analysis/__init__.py,sha256=d1q11tEx3JkrOPMaiGMNCHhN9tCOTr_QpQP-tI1J2Wk,1389
+biblicus/analysis/base.py,sha256=HErFLn3gv1qf9ckAUxbolHF2k9sJDNZjPjdboCMhyBE,1349
+biblicus/analysis/markov.py,sha256=pLtKvt4gtsqa1CASizh8bBJ4CQW2e0wGaQ-BgdP7Pfg,63766
+biblicus/analysis/models.py,sha256=dYnm5gwUzTk5HvrHZjQx4vug_TZLnXU9qN6CLIRyLng,56495
+biblicus/analysis/profiling.py,sha256=IynvrgcopqFj6lMUPHS1prwd0FxN8FzIa5p3JInDFCc,11185
+biblicus/analysis/schema.py,sha256=MCiAQJmijVk8iM8rOUYbzyaDwsMR-Oo86iZU5NCbDMM,435
+biblicus/analysis/topic_modeling.py,sha256=dsNHuqxcEoCKO_8aDAM9yEOa0kWCjPWS2NvcQayIyXQ,22623
+biblicus/context_engine/__init__.py,sha256=cIJWTUwOewW1x13a2n0YKfr4-XU0IwlVdAH_0pckfKk,1337
+biblicus/context_engine/assembler.py,sha256=E7VPdqUJ9peZUoonM0Ooa1wsaklFOuLCt2IH9nFxAfM,44260
+biblicus/context_engine/compaction.py,sha256=2bLaCpT48d1TL7vt9rrcRCgfdHeWWp9LX85Cgij12o0,2921
+biblicus/context_engine/models.py,sha256=jesVd83ZQcatO-7yNlzwKkactSQ-e1znYuWof4rxVFg,12762
+biblicus/context_engine/retrieval.py,sha256=A0w6C5uPrDY_aeGeirRkSGr6I-gU0U0cY6ElvrLhe0Q,4425
+biblicus/extractors/__init__.py,sha256=ci3oldbdQZ8meAfHccM48CqQtZsPSRg3HkPrBSZF15M,2673
+biblicus/extractors/base.py,sha256=ka-nz_1zHPr4TS9sU4JfOoY-PJh7lbHPBOEBrbQFGSc,2171
+biblicus/extractors/deepgram_stt.py,sha256=xx_zrROGRHotF5aht23Qey9dpCnU3KHZ0unwa933Pto,6358
+biblicus/extractors/docling_granite_text.py,sha256=iDHWZVgqZd86Q3Zu-fcdCq7ia00xTpbFbTU7JbDNZ38,6953
+biblicus/extractors/docling_smol_text.py,sha256=qI7m93Odrjmob0RW-Yvnt5Ck4AgFcgdVjwatLQA5krI,6885
+biblicus/extractors/markitdown_text.py,sha256=kYixZbVxaIyeWtpezocnrSxC3z_9KqWuBzeK8sI4s1o,4567
+biblicus/extractors/metadata_text.py,sha256=7FbEPp0K1mXc7FH1_c0KhPhPexF9U6eLd3TVY1vTp1s,3537
+biblicus/extractors/openai_stt.py,sha256=d2CaVhxapfkXaeI_QZcoXwdVm5Bj5YwGLdmTZDcqgTc,7197
+biblicus/extractors/paddleocr_vl_text.py,sha256=2xoHA1Jviw8zzeBvHBI74Lkx4SX_vSarCe3wxvYf6c4,11794
+biblicus/extractors/pass_through_text.py,sha256=DNxkCwpH2bbXjPGPEQwsx8kfqXi6rIxXNY_n3TU2-WI,2777
+biblicus/extractors/pdf_text.py,sha256=YtUphgLVxyWJXew6ZsJ8wBRh67Y5ri4ZTRlMmq3g1Bk,3255
+biblicus/extractors/pipeline.py,sha256=qdlBBSUVNdg2V4izHacv_8a2DikCGlVMAdpkZkzNvyY,3288
+biblicus/extractors/rapidocr_text.py,sha256=5adSCiOmyHiCgX3jBMcl1OiQlGzYLxmgJQzo9GHSecs,4791
+biblicus/extractors/select_longest_text.py,sha256=wRveXAfYLdj7CpGuo4RoD7zE6SIfylRCbv40z2azO0k,3702
+biblicus/extractors/select_override.py,sha256=gSpffFmn1ux9pGtFvHD5Uu_LO8TmmJC4L_mvjehiSec,4014
+biblicus/extractors/select_smart_override.py,sha256=-sLMnNoeXbCB3dO9zflQq324eHuLbd6hpveSwduXP-U,6763
+biblicus/extractors/select_text.py,sha256=w0ATmDy3tWWbOObzW87jGZuHbgXllUhotX5XyySLs-o,3395
+biblicus/extractors/unstructured_text.py,sha256=WXr_fu4KQ0NODkbb05e4HrAX-trOWRKiDOmznh9_pLI,3579
+biblicus/retrievers/__init__.py,sha256=LOxhUYgph1sPAeY6PmSmXH4Os4bIGGOtw88iOdI9S2k,1704
+biblicus/retrievers/base.py,sha256=DSf5Ve5IFeunIyV9zt7T1vEUvSkJWO4iBj96co5F0Qo,1891
+biblicus/retrievers/embedding_index_common.py,sha256=63_dUds-yIALhq2L9_0oDNpoh-_h7v2j1kexbdVw1-o,11756
+biblicus/retrievers/embedding_index_file.py,sha256=mvtXqRX-_eQpi9bRxQ2yqFxY26YhP8Vn2WGcoWVtMtc,10668
+biblicus/retrievers/embedding_index_inmemory.py,sha256=8csrdjCGkkR7DgvmLZ72oD1gm4duWWUnxJsjw6nLicU,10525
+biblicus/retrievers/hybrid.py,sha256=kaH-kIi4wxYyUWnKNFT7UNBbHFkRtcGlwjjiJpx-TJY,11789
+biblicus/retrievers/scan.py,sha256=ccDGVnqBS9a2ymKeBEXdfJz8XLahsBeYWYyYXQcg2KQ,13147
+biblicus/retrievers/sqlite_full_text_search.py,sha256=7rzYfzpRhPbsKuXjXi8x2-rmq8-z1em3amUF9UPAomI,25392
+biblicus/retrievers/tf_vector.py,sha256=rkcRG1GU5S_3t8GRbQTBThITj-eHT5fs1dyVzXPLg8w,15776
+biblicus/text/__init__.py,sha256=MiaGAY7xWlUCeBzDzNz6pJnSMiU_Ge5EmlSiEzhqTRo,947
+biblicus/text/annotate.py,sha256=asmpj3_s_t8hl6stEg99apmqxAhDTkoPzHhZNggYE3Y,8355
+biblicus/text/extract.py,sha256=pdnUiZWtfCUj7kZK5zhd-tjqokgmhYYheWhyN3iShRU,7669
+biblicus/text/link.py,sha256=2IdOi3WgyBKPFau0bpS1eToV1q2v_6wq5RK5_P_qUDg,20448
+biblicus/text/markup.py,sha256=8jj9aX03HiZTOWdPs_VC4JLpQ7TlPHgGuXj_QUQIHVw,6265
+biblicus/text/models.py,sha256=REp6RowUWFdV-6y437JENP7XtGKt57BOvVtF91KmUqI,10853
+biblicus/text/prompts.py,sha256=9dx1cWpJb6oBY4AhDHxlkRUYs7DfbySH0gb-uBTNvtk,7567
+biblicus/text/redact.py,sha256=tkDRmA0VvOZwMryEmBPLEHf3Z6VHJkkaWjBaNIMyGZ0,8415
+biblicus/text/slice.py,sha256=dlHxGO8c5P8BszXGwlNQoQ-cyWjJf6PfS1LUBJXXGEE,5762
+biblicus/text/tool_loop.py,sha256=dFeIEcCUA-yR8GMqsJ_n4007fHVmn9zK2hhlm6NlWyg,14161
+biblicus-1.1.0.dist-info/licenses/LICENSE,sha256=lw44GXFG_Q0fS8m5VoEvv_xtdBXK26pBcbSPUCXee_Q,1078
+biblicus-1.1.0.dist-info/METADATA,sha256=8hRnC6tlf8crtWxf6FPbGANZH9lxL6kiAtOtcxqJ3Ig,31202
+biblicus-1.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+biblicus-1.1.0.dist-info/entry_points.txt,sha256=BZmO4H8Uz00fyi1RAFryOCGfZgX7eHWkY2NE-G54U5A,47
+biblicus-1.1.0.dist-info/top_level.txt,sha256=sUD_XVZwDxZ29-FBv1MknTGh4mgDXznGuP28KJY_WKc,9
+biblicus-1.1.0.dist-info/RECORD,,

biblicus/backends/__init__.py DELETED Viewed

@@ -1,50 +0,0 @@
-"""
-Backend registry for Biblicus retrieval engines.
-"""
-from __future__ import annotations
-from typing import Dict, Type
-from .base import RetrievalBackend
-from .embedding_index_file import EmbeddingIndexFileBackend
-from .embedding_index_inmemory import EmbeddingIndexInMemoryBackend
-from .hybrid import HybridBackend
-from .scan import ScanBackend
-from .sqlite_full_text_search import SqliteFullTextSearchBackend
-from .tf_vector import TfVectorBackend
-def available_backends() -> Dict[str, Type[RetrievalBackend]]:
-    """
-    Return the registered retrieval backends.
-    :return: Mapping of backend identifiers to backend classes.
-    :rtype: dict[str, Type[RetrievalBackend]]
-    """
-    return {
-        EmbeddingIndexFileBackend.backend_id: EmbeddingIndexFileBackend,
-        EmbeddingIndexInMemoryBackend.backend_id: EmbeddingIndexInMemoryBackend,
-        HybridBackend.backend_id: HybridBackend,
-        ScanBackend.backend_id: ScanBackend,
-        SqliteFullTextSearchBackend.backend_id: SqliteFullTextSearchBackend,
-        TfVectorBackend.backend_id: TfVectorBackend,
-    }
-def get_backend(backend_id: str) -> RetrievalBackend:
-    """
-    Instantiate a retrieval backend by identifier.
-    :param backend_id: Backend identifier.
-    :type backend_id: str
-    :return: Backend instance.
-    :rtype: RetrievalBackend
-    :raises KeyError: If the backend identifier is unknown.
-    """
-    registry = available_backends()
-    backend_class = registry.get(backend_id)
-    if backend_class is None:
-        known = ", ".join(sorted(registry))
-        raise KeyError(f"Unknown backend '{backend_id}'. Known backends: {known}")
-    return backend_class()

biblicus/backends/base.py DELETED Viewed

@@ -1,65 +0,0 @@
-"""
-Backend interface for Biblicus retrieval engines.
-"""
-from __future__ import annotations
-from abc import ABC, abstractmethod
-from typing import Dict
-from ..corpus import Corpus
-from ..models import QueryBudget, RetrievalResult, RetrievalRun
-class RetrievalBackend(ABC):
-    """
-    Abstract interface for retrieval backends.
-    :ivar backend_id: Identifier string for the backend.
-    :vartype backend_id: str
-    """
-    backend_id: str
-    @abstractmethod
-    def build_run(
-        self, corpus: Corpus, *, recipe_name: str, config: Dict[str, object]
-    ) -> RetrievalRun:
-        """
-        Build or register a retrieval run for the backend.
-        :param corpus: Corpus to build against.
-        :type corpus: Corpus
-        :param recipe_name: Human name for the recipe.
-        :type recipe_name: str
-        :param config: Backend-specific configuration values.
-        :type config: dict[str, object]
-        :return: Run manifest describing the build.
-        :rtype: RetrievalRun
-        """
-        raise NotImplementedError
-    @abstractmethod
-    def query(
-        self,
-        corpus: Corpus,
-        *,
-        run: RetrievalRun,
-        query_text: str,
-        budget: QueryBudget,
-    ) -> RetrievalResult:
-        """
-        Run a retrieval query against a backend.
-        :param corpus: Corpus associated with the run.
-        :type corpus: Corpus
-        :param run: Run manifest to use for querying.
-        :type run: RetrievalRun
-        :param query_text: Query text to execute.
-        :type query_text: str
-        :param budget: Evidence selection budget.
-        :type budget: QueryBudget
-        :return: Retrieval results containing evidence.
-        :rtype: RetrievalResult
-        """
-        raise NotImplementedError

biblicus 0.16.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

biblicus 0.16.0py3-none-any.whl → 1.1.0py3-none-any.whl