biblicus 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biblicus/__init__.py +1 -1
- biblicus/_vendor/dotyaml/__init__.py +0 -1
- biblicus/_vendor/dotyaml/interpolation.py +0 -1
- biblicus/_vendor/dotyaml/loader.py +0 -1
- biblicus/_vendor/dotyaml/transformer.py +0 -1
- biblicus/analysis/__init__.py +2 -0
- biblicus/analysis/models.py +228 -5
- biblicus/analysis/profiling.py +337 -0
- biblicus/analysis/topic_modeling.py +3 -6
- biblicus/backends/__init__.py +4 -0
- biblicus/backends/hybrid.py +284 -0
- biblicus/backends/sqlite_full_text_search.py +266 -22
- biblicus/backends/vector.py +460 -0
- biblicus/cli.py +83 -4
- biblicus/corpus.py +9 -3
- biblicus/evidence_processing.py +4 -2
- biblicus/extraction.py +3 -1
- biblicus/extractors/markitdown_text.py +1 -0
- biblicus/extractors/paddleocr_vl_text.py +1 -3
- biblicus/models.py +3 -0
- biblicus/user_config.py +2 -6
- {biblicus-0.9.0.dist-info → biblicus-0.11.0.dist-info}/METADATA +13 -6
- {biblicus-0.9.0.dist-info → biblicus-0.11.0.dist-info}/RECORD +27 -24
- {biblicus-0.9.0.dist-info → biblicus-0.11.0.dist-info}/WHEEL +0 -0
- {biblicus-0.9.0.dist-info → biblicus-0.11.0.dist-info}/entry_points.txt +0 -0
- {biblicus-0.9.0.dist-info → biblicus-0.11.0.dist-info}/licenses/LICENSE +0 -0
- {biblicus-0.9.0.dist-info → biblicus-0.11.0.dist-info}/top_level.txt +0 -0
biblicus/user_config.py
CHANGED
|
@@ -170,9 +170,7 @@ def resolve_openai_api_key(*, config: Optional[BiblicusUserConfig] = None) -> Op
|
|
|
170
170
|
return loaded.openai.api_key
|
|
171
171
|
|
|
172
172
|
|
|
173
|
-
def resolve_huggingface_api_key(
|
|
174
|
-
*, config: Optional[BiblicusUserConfig] = None
|
|
175
|
-
) -> Optional[str]:
|
|
173
|
+
def resolve_huggingface_api_key(*, config: Optional[BiblicusUserConfig] = None) -> Optional[str]:
|
|
176
174
|
"""
|
|
177
175
|
Resolve a HuggingFace API key from environment or user configuration.
|
|
178
176
|
|
|
@@ -192,9 +190,7 @@ def resolve_huggingface_api_key(
|
|
|
192
190
|
return loaded.huggingface.api_key
|
|
193
191
|
|
|
194
192
|
|
|
195
|
-
def resolve_deepgram_api_key(
|
|
196
|
-
*, config: Optional[BiblicusUserConfig] = None
|
|
197
|
-
) -> Optional[str]:
|
|
193
|
+
def resolve_deepgram_api_key(*, config: Optional[BiblicusUserConfig] = None) -> Optional[str]:
|
|
198
194
|
"""
|
|
199
195
|
Resolve a Deepgram API key from environment or user configuration.
|
|
200
196
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: biblicus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.11.0
|
|
4
4
|
Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
|
|
5
5
|
License: MIT
|
|
6
6
|
Requires-Python: >=3.9
|
|
@@ -493,6 +493,12 @@ Two backends are included.
|
|
|
493
493
|
|
|
494
494
|
For detailed documentation including configuration options, performance characteristics, and usage examples, see the [Backend Reference][backend-reference].
|
|
495
495
|
|
|
496
|
+
## Retrieval documentation
|
|
497
|
+
|
|
498
|
+
For the retrieval pipeline overview and run artifacts, see `docs/RETRIEVAL.md`. For retrieval quality upgrades
|
|
499
|
+
(tuned lexical baseline, reranking, hybrid retrieval), see `docs/RETRIEVAL_QUALITY.md`. For evaluation workflows
|
|
500
|
+
and dataset formats, see `docs/RETRIEVAL_EVALUATION.md`.
|
|
501
|
+
|
|
496
502
|
## Extraction backends
|
|
497
503
|
|
|
498
504
|
These extractors are built in. Optional ones require extra dependencies. See [text extraction documentation][text-extraction] for details.
|
|
@@ -531,12 +537,13 @@ For detailed documentation on all extractors, see the [Extractor Reference][extr
|
|
|
531
537
|
|
|
532
538
|
## Topic modeling analysis
|
|
533
539
|
|
|
534
|
-
Biblicus can run analysis pipelines on extracted text without changing the raw corpus.
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
JavaScript Object Notation.
|
|
540
|
+
Biblicus can run analysis pipelines on extracted text without changing the raw corpus. Profiling and topic modeling
|
|
541
|
+
are the first analysis backends. Profiling summarizes corpus composition and extraction coverage. Topic modeling reads
|
|
542
|
+
an extraction run, optionally applies an LLM-driven extraction pass, applies lexical processing, runs BERTopic, and
|
|
543
|
+
optionally applies an LLM fine-tuning pass to label topics. The output is structured JavaScript Object Notation.
|
|
538
544
|
|
|
539
|
-
See `docs/ANALYSIS.md` for the analysis pipeline overview
|
|
545
|
+
See `docs/ANALYSIS.md` for the analysis pipeline overview, `docs/PROFILING.md` for profiling, and
|
|
546
|
+
`docs/TOPIC_MODELING.md` for topic modeling details.
|
|
540
547
|
|
|
541
548
|
Run a topic analysis using a recipe file:
|
|
542
549
|
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
biblicus/__init__.py,sha256=
|
|
1
|
+
biblicus/__init__.py,sha256=sT0PFc3DRGFRcN7Zx4Yooc8OzmLvaj1-ZjbvFHce8lU,496
|
|
2
2
|
biblicus/__main__.py,sha256=ipfkUoTlocVnrQDM69C7TeBqQxmHVeiWMRaT3G9rtnk,117
|
|
3
|
-
biblicus/cli.py,sha256=
|
|
3
|
+
biblicus/cli.py,sha256=aH3plnednnYgcPnSoYQf200nboKc6N-tuc3FuLPQEcU,35132
|
|
4
4
|
biblicus/constants.py,sha256=-JaHI3Dngte2drawx93cGWxFVobbgIuaVhmjUJpf4GI,333
|
|
5
5
|
biblicus/context.py,sha256=qnT9CH7_ldoPcg-rxnUOtRhheOmpDAbF8uqhf8OdjC4,5832
|
|
6
|
-
biblicus/corpus.py,sha256=
|
|
6
|
+
biblicus/corpus.py,sha256=qSDnYJXhWlF2p_BbFLl6xtI53lIIPxwyKLLGLC432Sg,55612
|
|
7
7
|
biblicus/crawl.py,sha256=n8rXBMnziBK9vtKQQCXYOpBzqsPCswj2PzVJUb370KY,6250
|
|
8
8
|
biblicus/errors.py,sha256=uMajd5DvgnJ_-jq5sbeom1GV8DPUc-kojBaECFi6CsY,467
|
|
9
9
|
biblicus/evaluation.py,sha256=5xWpb-8f49Osh9aHzo1ab3AXOmls3Imc5rdnEC0pN-8,8143
|
|
10
|
-
biblicus/evidence_processing.py,sha256=
|
|
11
|
-
biblicus/extraction.py,sha256=
|
|
10
|
+
biblicus/evidence_processing.py,sha256=sJe6T1nLxvU0xs9yMH8JZZS19zHXMR-Fpr5lWi5ndUM,6120
|
|
11
|
+
biblicus/extraction.py,sha256=qvrsq6zSz2Kg-cap-18HPHC9pQlqEGo7pyID2uKCyBo,19760
|
|
12
12
|
biblicus/frontmatter.py,sha256=JOGjIDzbbOkebQw2RzA-3WDVMAMtJta2INjS4e7-LMg,2463
|
|
13
13
|
biblicus/hook_logging.py,sha256=IMvde-JhVWrx9tNz3eDJ1CY_rr5Sj7DZ2YNomYCZbz0,5366
|
|
14
14
|
biblicus/hook_manager.py,sha256=ZCAkE5wLvn4lnQz8jho_o0HGEC9KdQd9qitkAEUQRcw,6997
|
|
@@ -16,35 +16,38 @@ biblicus/hooks.py,sha256=OHQOmOi7rUcQqYWVeod4oPe8nVLepD7F_SlN7O_-BsE,7863
|
|
|
16
16
|
biblicus/ignore.py,sha256=fyjt34E6tWNNrm1FseOhgH2MgryyVBQVzxhKL5s4aio,1800
|
|
17
17
|
biblicus/inference.py,sha256=_k00AIPoXD2lruiTB-JUagtY4f_WKcdzA3axwiq1tck,3512
|
|
18
18
|
biblicus/knowledge_base.py,sha256=JmlJw8WD_fgstuq1PyWVzU9kzvVzyv7_xOvhS70xwUw,6654
|
|
19
|
-
biblicus/models.py,sha256=
|
|
19
|
+
biblicus/models.py,sha256=r28O6cg3d1bjJnKqpLieVLTgtXTfzb_60wMORvVuDN0,15846
|
|
20
20
|
biblicus/retrieval.py,sha256=A1SI4WK5cX-WbtN6FJ0QQxqlEOtQhddLrL0LZIuoTC4,4180
|
|
21
21
|
biblicus/sources.py,sha256=EFy8-rQNLsyzz-98mH-z8gEHMYbqigcNFKLaR92KfDE,7241
|
|
22
22
|
biblicus/time.py,sha256=3BSKOSo7R10K-0Dzrbdtl3fh5_yShTYqfdlKvvdkx7M,485
|
|
23
23
|
biblicus/uris.py,sha256=xXD77lqsT9NxbyzI1spX9Y5a3-U6sLYMnpeSAV7g-nM,2013
|
|
24
|
-
biblicus/user_config.py,sha256=
|
|
25
|
-
biblicus/_vendor/dotyaml/__init__.py,sha256=
|
|
26
|
-
biblicus/_vendor/dotyaml/interpolation.py,sha256=
|
|
27
|
-
biblicus/_vendor/dotyaml/loader.py,sha256=
|
|
28
|
-
biblicus/_vendor/dotyaml/transformer.py,sha256=
|
|
29
|
-
biblicus/analysis/__init__.py,sha256=
|
|
24
|
+
biblicus/user_config.py,sha256=UXUYBNUN4FR37ggZGJG1wv3K8XzsMR8pXW1T18lrivw,6495
|
|
25
|
+
biblicus/_vendor/dotyaml/__init__.py,sha256=WAWdbFNFqO5cJPthxA8Kx-L76Bh07sKMosUxC_3o9qA,375
|
|
26
|
+
biblicus/_vendor/dotyaml/interpolation.py,sha256=FVUkdQr_KbXjoFPvGTv6I5v0X5iZkJe5yhZtYKRbYzI,1991
|
|
27
|
+
biblicus/_vendor/dotyaml/loader.py,sha256=zy_zinR5fiatmRyZSiELHv1vVz1Y2eRSboSf_x3kfi4,5623
|
|
28
|
+
biblicus/_vendor/dotyaml/transformer.py,sha256=RWNrm_KAsanG409HEIWquTH9i_jz-ZFK9fM86emXeF4,3724
|
|
29
|
+
biblicus/analysis/__init__.py,sha256=Z4Wb4d-EoUuGHkcfRm9ILuZ8vr9FBqRxC0u1i6Fp_0w,1288
|
|
30
30
|
biblicus/analysis/base.py,sha256=gB4ilvyMpiWU1m_ydy2dIHGP96ZFIFvVUL9iVDZKPJM,1265
|
|
31
31
|
biblicus/analysis/llm.py,sha256=VjkZDKauHCDfj-TP-bTbI6a9WAXEIDe8bEiwErPx9xc,3309
|
|
32
|
-
biblicus/analysis/models.py,sha256=
|
|
32
|
+
biblicus/analysis/models.py,sha256=LuR52w27JRzV-Mr-WAOduZrBOCTrp5uYkMc46QHTRrI,27300
|
|
33
|
+
biblicus/analysis/profiling.py,sha256=v2B4Tn9WiXRRP_wIADBPRQVKkMc92KXCas7OBa7n0LU,10670
|
|
33
34
|
biblicus/analysis/schema.py,sha256=MCiAQJmijVk8iM8rOUYbzyaDwsMR-Oo86iZU5NCbDMM,435
|
|
34
|
-
biblicus/analysis/topic_modeling.py,sha256=
|
|
35
|
-
biblicus/backends/__init__.py,sha256=
|
|
35
|
+
biblicus/analysis/topic_modeling.py,sha256=ZGXvm2MyU6plxz2FE1RQU-3bra6QZ-t8EJj8kG1TW0M,19438
|
|
36
|
+
biblicus/backends/__init__.py,sha256=3HJY0oMm8pFFVGC4Z-dlPRHhIPVDdUzsa4IMjKP_9dI,1378
|
|
36
37
|
biblicus/backends/base.py,sha256=Erfj9dXg0nkRKnEcNjHR9_0Ddb2B1NvbmRksVm_g1dU,1776
|
|
38
|
+
biblicus/backends/hybrid.py,sha256=CXh6QrlE0RsTJjSlZRdtomLlILfkglBDQG3YVa8RpFU,10589
|
|
37
39
|
biblicus/backends/scan.py,sha256=hdNnQWqi5IH6j95w30BZHxLJ0W9PTaOkqfWJuxCCEMI,12478
|
|
38
|
-
biblicus/backends/sqlite_full_text_search.py,sha256=
|
|
40
|
+
biblicus/backends/sqlite_full_text_search.py,sha256=VAn4fDdfiaS1Rn6zHlYz3E10_3vMU9P94QU8cL0l8Mk,24466
|
|
41
|
+
biblicus/backends/vector.py,sha256=3RdxSBPb1kOX4Sfd4d1qXFW9ecuiRvGpOHadLCbeh1g,15183
|
|
39
42
|
biblicus/extractors/__init__.py,sha256=ci3oldbdQZ8meAfHccM48CqQtZsPSRg3HkPrBSZF15M,2673
|
|
40
43
|
biblicus/extractors/base.py,sha256=ka-nz_1zHPr4TS9sU4JfOoY-PJh7lbHPBOEBrbQFGSc,2171
|
|
41
44
|
biblicus/extractors/deepgram_stt.py,sha256=VI71i4lbE-EFHcvpNcCPRpT8z7A5IuaSrT1UaPyZ8UY,6323
|
|
42
45
|
biblicus/extractors/docling_granite_text.py,sha256=aFNx-HubvaMmVJHbNqk3CR_ilSwN96-phkaENT6E2B0,6879
|
|
43
46
|
biblicus/extractors/docling_smol_text.py,sha256=cSbQcT4O47MMcM6_pmQCvqgC5ferLvaxJnm3v9EQd0A,6811
|
|
44
|
-
biblicus/extractors/markitdown_text.py,sha256
|
|
47
|
+
biblicus/extractors/markitdown_text.py,sha256=ZvN2TFh65icTTdzCe7L-ZB8zTPP2mxQ4MhOOqSc81Z0,4547
|
|
45
48
|
biblicus/extractors/metadata_text.py,sha256=7FbEPp0K1mXc7FH1_c0KhPhPexF9U6eLd3TVY1vTp1s,3537
|
|
46
49
|
biblicus/extractors/openai_stt.py,sha256=fggErIu6YN6tXbleNTuROhfYi7zDgMd2vD_ecXZ7eXs,7162
|
|
47
|
-
biblicus/extractors/paddleocr_vl_text.py,sha256=
|
|
50
|
+
biblicus/extractors/paddleocr_vl_text.py,sha256=59csxihkqK0lELpAtK2YLcfbSUvNGiuOw7CwPa_0l_c,11692
|
|
48
51
|
biblicus/extractors/pass_through_text.py,sha256=DNxkCwpH2bbXjPGPEQwsx8kfqXi6rIxXNY_n3TU2-WI,2777
|
|
49
52
|
biblicus/extractors/pdf_text.py,sha256=YtUphgLVxyWJXew6ZsJ8wBRh67Y5ri4ZTRlMmq3g1Bk,3255
|
|
50
53
|
biblicus/extractors/pipeline.py,sha256=LY6eM3ypw50MDB2cPEQqZrjxkhVvIc6sv4UEhHdNDrE,3208
|
|
@@ -54,9 +57,9 @@ biblicus/extractors/select_override.py,sha256=gSpffFmn1ux9pGtFvHD5Uu_LO8TmmJC4L_
|
|
|
54
57
|
biblicus/extractors/select_smart_override.py,sha256=-sLMnNoeXbCB3dO9zflQq324eHuLbd6hpveSwduXP-U,6763
|
|
55
58
|
biblicus/extractors/select_text.py,sha256=w0ATmDy3tWWbOObzW87jGZuHbgXllUhotX5XyySLs-o,3395
|
|
56
59
|
biblicus/extractors/unstructured_text.py,sha256=l2S_wD_htu7ZHoJQNQtP-kGlEgOeKV_w2IzAC93lePE,3564
|
|
57
|
-
biblicus-0.
|
|
58
|
-
biblicus-0.
|
|
59
|
-
biblicus-0.
|
|
60
|
-
biblicus-0.
|
|
61
|
-
biblicus-0.
|
|
62
|
-
biblicus-0.
|
|
60
|
+
biblicus-0.11.0.dist-info/licenses/LICENSE,sha256=lw44GXFG_Q0fS8m5VoEvv_xtdBXK26pBcbSPUCXee_Q,1078
|
|
61
|
+
biblicus-0.11.0.dist-info/METADATA,sha256=zrJESYGfGLu7Iq1I--GPIkEY9gXDb9szBIuenlWor7I,27765
|
|
62
|
+
biblicus-0.11.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
63
|
+
biblicus-0.11.0.dist-info/entry_points.txt,sha256=BZmO4H8Uz00fyi1RAFryOCGfZgX7eHWkY2NE-G54U5A,47
|
|
64
|
+
biblicus-0.11.0.dist-info/top_level.txt,sha256=sUD_XVZwDxZ29-FBv1MknTGh4mgDXznGuP28KJY_WKc,9
|
|
65
|
+
biblicus-0.11.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|