biblicus 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
biblicus/inference.py ADDED
@@ -0,0 +1,104 @@
1
+ """
2
+ Inference backend abstraction for machine learning powered components.
3
+
4
+ This module provides reusable configuration and credential resolution patterns for components
5
+ that can execute locally or via API providers.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from enum import Enum
12
+ from typing import Optional
13
+
14
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
15
+
16
+
17
+ class InferenceBackendMode(str, Enum):
18
+ """Execution mode for inference backends."""
19
+
20
+ LOCAL = "local"
21
+ API = "api"
22
+
23
+
24
+ class ApiProvider(str, Enum):
25
+ """Supported application programming interface providers for inference."""
26
+
27
+ HUGGINGFACE = "huggingface"
28
+ OPENAI = "openai"
29
+
30
+
31
+ class InferenceBackendConfig(BaseModel):
32
+ """
33
+ Composable configuration for inference backends.
34
+
35
+ This config can be embedded in extractor or transformer configurations to provide
36
+ a uniform interface for local versus application programming interface execution.
37
+
38
+ :ivar mode: Execution mode, local or application programming interface.
39
+ :vartype mode: InferenceBackendMode
40
+ :ivar api_provider: Application programming interface provider when mode is application programming interface.
41
+ :vartype api_provider: ApiProvider or None
42
+ :ivar api_key: Optional per-config application programming interface key override.
43
+ :vartype api_key: str or None
44
+ :ivar model_id: Optional model identifier for application programming interface requests.
45
+ :vartype model_id: str or None
46
+ """
47
+
48
+ model_config = ConfigDict(extra="forbid")
49
+
50
+ mode: InferenceBackendMode = Field(default=InferenceBackendMode.LOCAL)
51
+ api_provider: Optional[ApiProvider] = Field(default=None)
52
+ api_key: Optional[str] = Field(default=None)
53
+ model_id: Optional[str] = Field(default=None)
54
+
55
+ @model_validator(mode="after")
56
+ def _validate_api_provider_required(self) -> "InferenceBackendConfig":
57
+ if self.mode == InferenceBackendMode.API and self.api_provider is None:
58
+ raise ValueError("api_provider is required when mode is 'api'")
59
+ return self
60
+
61
+
62
+ def resolve_api_key(
63
+ provider: ApiProvider,
64
+ *,
65
+ config_override: Optional[str] = None,
66
+ ) -> Optional[str]:
67
+ """
68
+ Resolve an application programming interface key with precedence rules.
69
+
70
+ Precedence order (highest to lowest):
71
+ 1. Explicit config override parameter
72
+ 2. Environment variable for the provider
73
+ 3. User configuration file
74
+
75
+ :param provider: Application programming interface provider to resolve key for.
76
+ :type provider: ApiProvider
77
+ :param config_override: Optional explicit key from configuration.
78
+ :type config_override: str or None
79
+ :return: Resolved application programming interface key or None if unavailable.
80
+ :rtype: str or None
81
+ """
82
+ if config_override is not None:
83
+ return config_override
84
+
85
+ from .user_config import load_user_config
86
+
87
+ if provider == ApiProvider.HUGGINGFACE:
88
+ env_key = os.environ.get("HUGGINGFACE_API_KEY")
89
+ if env_key:
90
+ return env_key
91
+ user_config = load_user_config()
92
+ if user_config.huggingface is not None:
93
+ return user_config.huggingface.api_key
94
+ return None
95
+ elif provider == ApiProvider.OPENAI:
96
+ env_key = os.environ.get("OPENAI_API_KEY")
97
+ if env_key:
98
+ return env_key
99
+ user_config = load_user_config()
100
+ if user_config.openai is not None:
101
+ return user_config.openai.api_key
102
+ return None
103
+ else:
104
+ return None
biblicus/models.py CHANGED
@@ -399,6 +399,8 @@ class ExtractedText(BaseModel):
399
399
  :vartype producer_extractor_id: str
400
400
  :ivar source_step_index: Optional pipeline step index where this text originated.
401
401
  :vartype source_step_index: int or None
402
+ :ivar confidence: Optional confidence score from 0.0 to 1.0.
403
+ :vartype confidence: float or None
402
404
  """
403
405
 
404
406
  model_config = ConfigDict(extra="forbid")
@@ -406,6 +408,7 @@ class ExtractedText(BaseModel):
406
408
  text: str
407
409
  producer_extractor_id: str = Field(min_length=1)
408
410
  source_step_index: Optional[int] = Field(default=None, ge=1)
411
+ confidence: Optional[float] = Field(default=None, ge=0.0, le=1.0)
409
412
 
410
413
 
411
414
  class ExtractionStepOutput(BaseModel):
@@ -426,6 +429,8 @@ class ExtractionStepOutput(BaseModel):
426
429
  :vartype producer_extractor_id: str or None
427
430
  :ivar source_step_index: Optional step index that supplied the text for selection-style extractors.
428
431
  :vartype source_step_index: int or None
432
+ :ivar confidence: Optional confidence score from 0.0 to 1.0.
433
+ :vartype confidence: float or None
429
434
  :ivar error_type: Optional error type name for errored steps.
430
435
  :vartype error_type: str or None
431
436
  :ivar error_message: Optional error message for errored steps.
@@ -441,5 +446,6 @@ class ExtractionStepOutput(BaseModel):
441
446
  text_characters: int = Field(default=0, ge=0)
442
447
  producer_extractor_id: Optional[str] = None
443
448
  source_step_index: Optional[int] = Field(default=None, ge=1)
449
+ confidence: Optional[float] = Field(default=None, ge=0.0, le=1.0)
444
450
  error_type: Optional[str] = None
445
451
  error_message: Optional[str] = None
biblicus/user_config.py CHANGED
@@ -29,17 +29,49 @@ class OpenAiUserConfig(BaseModel):
29
29
  api_key: str = Field(min_length=1)
30
30
 
31
31
 
32
+ class HuggingFaceUserConfig(BaseModel):
33
+ """
34
+ Configuration for HuggingFace integrations.
35
+
36
+ :ivar api_key: HuggingFace API key used for authenticated requests.
37
+ :vartype api_key: str
38
+ """
39
+
40
+ model_config = ConfigDict(extra="forbid")
41
+
42
+ api_key: str = Field(min_length=1)
43
+
44
+
45
+ class DeepgramUserConfig(BaseModel):
46
+ """
47
+ Configuration for Deepgram integrations.
48
+
49
+ :ivar api_key: Deepgram API key used for authenticated requests.
50
+ :vartype api_key: str
51
+ """
52
+
53
+ model_config = ConfigDict(extra="forbid")
54
+
55
+ api_key: str = Field(min_length=1)
56
+
57
+
32
58
  class BiblicusUserConfig(BaseModel):
33
59
  """
34
60
  Parsed user configuration for Biblicus.
35
61
 
36
62
  :ivar openai: Optional OpenAI configuration.
37
63
  :vartype openai: OpenAiUserConfig or None
64
+ :ivar huggingface: Optional HuggingFace configuration.
65
+ :vartype huggingface: HuggingFaceUserConfig or None
66
+ :ivar deepgram: Optional Deepgram configuration.
67
+ :vartype deepgram: DeepgramUserConfig or None
38
68
  """
39
69
 
40
70
  model_config = ConfigDict(extra="forbid")
41
71
 
42
72
  openai: Optional[OpenAiUserConfig] = None
73
+ huggingface: Optional[HuggingFaceUserConfig] = None
74
+ deepgram: Optional[DeepgramUserConfig] = None
43
75
 
44
76
 
45
77
  def default_user_config_paths(
@@ -136,3 +168,47 @@ def resolve_openai_api_key(*, config: Optional[BiblicusUserConfig] = None) -> Op
136
168
  if loaded.openai is None:
137
169
  return None
138
170
  return loaded.openai.api_key
171
+
172
+
173
+ def resolve_huggingface_api_key(
174
+ *, config: Optional[BiblicusUserConfig] = None
175
+ ) -> Optional[str]:
176
+ """
177
+ Resolve a HuggingFace API key from environment or user configuration.
178
+
179
+ Environment takes precedence over configuration.
180
+
181
+ :param config: Optional pre-loaded user configuration.
182
+ :type config: BiblicusUserConfig or None
183
+ :return: API key string, or None when no key is available.
184
+ :rtype: str or None
185
+ """
186
+ env_key = os.environ.get("HUGGINGFACE_API_KEY")
187
+ if env_key:
188
+ return env_key
189
+ loaded = config or load_user_config()
190
+ if loaded.huggingface is None:
191
+ return None
192
+ return loaded.huggingface.api_key
193
+
194
+
195
+ def resolve_deepgram_api_key(
196
+ *, config: Optional[BiblicusUserConfig] = None
197
+ ) -> Optional[str]:
198
+ """
199
+ Resolve a Deepgram API key from environment or user configuration.
200
+
201
+ Environment takes precedence over configuration.
202
+
203
+ :param config: Optional pre-loaded user configuration.
204
+ :type config: BiblicusUserConfig or None
205
+ :return: API key string, or None when no key is available.
206
+ :rtype: str or None
207
+ """
208
+ env_key = os.environ.get("DEEPGRAM_API_KEY")
209
+ if env_key:
210
+ return env_key
211
+ loaded = config or load_user_config()
212
+ if loaded.deepgram is None:
213
+ return None
214
+ return loaded.deepgram.api_key
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: biblicus
3
- Version: 0.6.0
3
+ Version: 0.8.0
4
4
  Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
5
5
  License: MIT
6
6
  Requires-Python: >=3.9
@@ -25,6 +25,21 @@ Requires-Dist: unstructured>=0.12.0; extra == "unstructured"
25
25
  Requires-Dist: python-docx>=1.1.0; extra == "unstructured"
26
26
  Provides-Extra: ocr
27
27
  Requires-Dist: rapidocr-onnxruntime>=1.3.0; extra == "ocr"
28
+ Provides-Extra: paddleocr
29
+ Requires-Dist: paddleocr>=2.7.0; extra == "paddleocr"
30
+ Requires-Dist: paddlepaddle>=2.5.0; extra == "paddleocr"
31
+ Requires-Dist: huggingface_hub>=0.20.0; extra == "paddleocr"
32
+ Requires-Dist: requests>=2.28.0; extra == "paddleocr"
33
+ Provides-Extra: markitdown
34
+ Requires-Dist: markitdown[all]>=0.1.0; python_version >= "3.10" and extra == "markitdown"
35
+ Provides-Extra: deepgram
36
+ Requires-Dist: deepgram-sdk>=3.0; extra == "deepgram"
37
+ Provides-Extra: docling
38
+ Requires-Dist: docling[vlm]>=2.0.0; extra == "docling"
39
+ Provides-Extra: docling-mlx
40
+ Requires-Dist: docling[mlx-vlm]>=2.0.0; extra == "docling-mlx"
41
+ Provides-Extra: topic-modeling
42
+ Requires-Dist: bertopic>=0.15.0; extra == "topic-modeling"
28
43
  Dynamic: license-file
29
44
 
30
45
  # Biblicus
@@ -67,7 +82,7 @@ If you want to run a real, executable version of this story, use `scripts/readme
67
82
  This simplified sequence diagram shows the same idea at a high level.
68
83
 
69
84
  ```mermaid
70
- %%{init: {"theme": "base", "themeVariables": {"primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
85
+ %%{init: {"theme": "base", "themeVariables": {"background": "#ffffff", "primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
71
86
  sequenceDiagram
72
87
  participant App as Your assistant code
73
88
  participant KB as Knowledge base
@@ -106,7 +121,7 @@ In a coding assistant, retrieval is often triggered by what the user is doing ri
106
121
  This diagram shows two sequential Biblicus calls. They are shown separately to make the boundaries explicit: retrieval returns evidence, and context pack building consumes evidence.
107
122
 
108
123
  ```mermaid
109
- %%{init: {"theme": "base", "themeVariables": {"primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
124
+ %%{init: {"theme": "base", "themeVariables": {"background": "#ffffff", "primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
110
125
  sequenceDiagram
111
126
  participant User
112
127
  participant App as Your assistant code
@@ -158,8 +173,14 @@ python3 -m pip install biblicus
158
173
  Some extractors are optional so the base install stays small.
159
174
 
160
175
  - Optical character recognition for images: `python3 -m pip install "biblicus[ocr]"`
161
- - Speech to text transcription: `python3 -m pip install "biblicus[openai]"` (requires an OpenAI API key in `~/.biblicus/config.yml` or `./.biblicus/config.yml`)
176
+ - Advanced optical character recognition with PaddleOCR: `python3 -m pip install "biblicus[paddleocr]"`
177
+ - Document understanding with Docling VLM: `python3 -m pip install "biblicus[docling]"`
178
+ - Document understanding with Docling VLM and MLX acceleration: `python3 -m pip install "biblicus[docling-mlx]"`
179
+ - Speech to text transcription with OpenAI: `python3 -m pip install "biblicus[openai]"` (requires an OpenAI API key in `~/.biblicus/config.yml` or `./.biblicus/config.yml`)
180
+ - Speech to text transcription with Deepgram: `python3 -m pip install "biblicus[deepgram]"` (requires a Deepgram API key in `~/.biblicus/config.yml` or `./.biblicus/config.yml`)
162
181
  - Broad document parsing fallback: `python3 -m pip install "biblicus[unstructured]"`
182
+ - MarkItDown document conversion (requires Python 3.10 or higher): `python3 -m pip install "biblicus[markitdown]"`
183
+ - Topic modeling analysis with BERTopic: `python3 -m pip install "biblicus[topic-modeling]"`
163
184
 
164
185
  ## Quick start
165
186
 
@@ -417,6 +438,7 @@ The documents below follow the pipeline from raw items to model context:
417
438
 
418
439
  - [Corpus][corpus]
419
440
  - [Text extraction][text-extraction]
441
+ - [Speech to text][speech-to-text]
420
442
  - [Knowledge base][knowledge-base]
421
443
  - [Backends][backends]
422
444
  - [Context packs][context-packs]
@@ -465,7 +487,97 @@ corpus/
465
487
  Two backends are included.
466
488
 
467
489
  - `scan` is a minimal baseline that scans raw items directly.
468
- - `sqlite-full-text-search` is a practical baseline that builds a full text search index in Sqlite.
490
+ - `sqlite-full-text-search` is a practical baseline that builds a full text search index in SQLite.
491
+
492
+ For detailed documentation including configuration options, performance characteristics, and usage examples, see the [Backend Reference][backend-reference].
493
+
494
+ ## Extraction backends
495
+
496
+ These extractors are built in. Optional ones require extra dependencies. See [text extraction documentation][text-extraction] for details.
497
+
498
+ ### Text and document extraction
499
+
500
+ - [`pass-through-text`](docs/extractors/text-document/pass-through.md) reads text items and strips Markdown front matter.
501
+ - [`metadata-text`](docs/extractors/text-document/metadata.md) turns catalog metadata into a small text artifact.
502
+ - [`pdf-text`](docs/extractors/text-document/pdf.md) extracts text from Portable Document Format items with `pypdf`.
503
+ - [`unstructured`](docs/extractors/text-document/unstructured.md) provides broad document parsing (optional).
504
+ - [`markitdown`](docs/extractors/text-document/markitdown.md) converts many formats into Markdown-like text (optional).
505
+
506
+ ### Optical character recognition
507
+
508
+ - [`ocr-rapidocr`](docs/extractors/ocr/rapidocr.md) does optical character recognition on images (optional).
509
+ - [`ocr-paddleocr-vl`](docs/extractors/ocr/paddleocr-vl.md) does advanced optical character recognition with PaddleOCR vision-language model (optional).
510
+
511
+ ### Vision-language models
512
+
513
+ - [`docling-smol`](docs/extractors/vlm-document/docling-smol.md) uses the SmolDocling-256M vision-language model for fast document understanding (optional).
514
+ - [`docling-granite`](docs/extractors/vlm-document/docling-granite.md) uses the Granite Docling-258M vision-language model for high-accuracy extraction (optional).
515
+
516
+ ### Speech to text
517
+
518
+ - [`stt-openai`](docs/extractors/speech-to-text/openai.md) performs speech to text on audio using OpenAI (optional).
519
+ - [`stt-deepgram`](docs/extractors/speech-to-text/deepgram.md) performs speech to text on audio using Deepgram (optional).
520
+
521
+ ### Pipeline utilities
522
+
523
+ - [`select-text`](docs/extractors/pipeline-utilities/select-text.md) chooses one prior extraction result in a pipeline.
524
+ - [`select-longest-text`](docs/extractors/pipeline-utilities/select-longest.md) chooses the longest prior extraction result.
525
+ - [`select-override`](docs/extractors/pipeline-utilities/select-override.md) chooses the last extraction result for matching media types in a pipeline.
526
+ - [`select-smart-override`](docs/extractors/pipeline-utilities/select-smart-override.md) intelligently chooses between extraction results based on confidence and content quality.
527
+
528
+ For detailed documentation on all extractors, see the [Extractor Reference][extractor-reference].
529
+
530
+ ## Topic modeling analysis
531
+
532
+ Biblicus can run analysis pipelines on extracted text without changing the raw corpus. Topic modeling is the first
533
+ analysis backend. It reads an extraction run, optionally applies an LLM-driven extraction pass, applies lexical
534
+ processing, runs BERTopic, and optionally applies an LLM fine-tuning pass to label topics. The output is structured
535
+ JavaScript Object Notation.
536
+
537
+ Run a topic analysis using a recipe file:
538
+
539
+ ```
540
+ biblicus analyze topics --corpus corpora/example --recipe recipes/topic-modeling.yml --extraction-run pipeline:<run_id>
541
+ ```
542
+
543
+ If `--extraction-run` is omitted, Biblicus uses the most recent extraction run and emits a warning about
544
+ reproducibility. The analysis output is stored under:
545
+
546
+ ```
547
+ .biblicus/runs/analysis/topic-modeling/<run_id>/output.json
548
+ ```
549
+
550
+ Minimal recipe example:
551
+
552
+ ```yaml
553
+ schema_version: 1
554
+ text_source:
555
+ sample_size: 200
556
+ llm_extraction:
557
+ enabled: false
558
+ lexical_processing:
559
+ enabled: true
560
+ lowercase: true
561
+ strip_punctuation: false
562
+ collapse_whitespace: true
563
+ bertopic_analysis:
564
+ parameters:
565
+ min_topic_size: 8
566
+ nr_topics: 10
567
+ llm_fine_tuning:
568
+ enabled: false
569
+ ```
570
+
571
+ LLM extraction and fine-tuning require `biblicus[openai]` and a configured OpenAI API key.
572
+ Recipe files are validated strictly against the topic modeling schema, so type mismatches or unknown fields are errors.
573
+
574
+ For a repeatable, real-world integration run that downloads a Wikipedia corpus and executes topic modeling, use:
575
+
576
+ ```
577
+ python3 scripts/topic_modeling_integration.py --corpus corpora/wiki_demo --force
578
+ ```
579
+
580
+ See `docs/TOPIC_MODELING.md` for parameter examples and per-topic output behavior.
469
581
 
470
582
  ## Integration corpus and evaluation dataset
471
583
 
@@ -522,6 +634,9 @@ License terms are in `LICENSE`.
522
634
  [corpus]: docs/CORPUS.md
523
635
  [knowledge-base]: docs/KNOWLEDGE_BASE.md
524
636
  [text-extraction]: docs/EXTRACTION.md
637
+ [extractor-reference]: docs/extractors/index.md
638
+ [backend-reference]: docs/backends/index.md
639
+ [speech-to-text]: docs/STT.md
525
640
  [user-configuration]: docs/USER_CONFIGURATION.md
526
641
  [backends]: docs/BACKENDS.md
527
642
  [context-packs]: docs/CONTEXT_PACK.md
@@ -1,48 +1,62 @@
1
- biblicus/__init__.py,sha256=jxBNIMVKudpRsbzdiE5CmU6nIjgnNhCRq0OZLSwt_kM,495
1
+ biblicus/__init__.py,sha256=XhgZfXIpkQ5_SzHj-2Vqt_N3hvx6TSOv6KMdac6HfaI,495
2
2
  biblicus/__main__.py,sha256=ipfkUoTlocVnrQDM69C7TeBqQxmHVeiWMRaT3G9rtnk,117
3
- biblicus/cli.py,sha256=hBau464XNdSGdWeOCE2Q7dm0P8I4sR0W-NgVT0wPmh4,27724
4
- biblicus/constants.py,sha256=R6fZDoLVMCwgKvTaxEx7G0CstwHGaUTlW9MsmNLDZ44,269
3
+ biblicus/cli.py,sha256=GVmZlCSZPUMBbq69yjN16f4xNw71edlFbGPHX3300oI,32643
4
+ biblicus/constants.py,sha256=-JaHI3Dngte2drawx93cGWxFVobbgIuaVhmjUJpf4GI,333
5
5
  biblicus/context.py,sha256=qnT9CH7_ldoPcg-rxnUOtRhheOmpDAbF8uqhf8OdjC4,5832
6
- biblicus/corpus.py,sha256=gF1RNl6fdz7wplzpHEIkEBkhYxHgKTKguBR_kD9IgUw,54109
6
+ biblicus/corpus.py,sha256=Pq2OvXom7giwD1tuWoM3RhFnak5YFx5bCh6JTd6JYtI,55554
7
7
  biblicus/crawl.py,sha256=n8rXBMnziBK9vtKQQCXYOpBzqsPCswj2PzVJUb370KY,6250
8
8
  biblicus/errors.py,sha256=uMajd5DvgnJ_-jq5sbeom1GV8DPUc-kojBaECFi6CsY,467
9
9
  biblicus/evaluation.py,sha256=5xWpb-8f49Osh9aHzo1ab3AXOmls3Imc5rdnEC0pN-8,8143
10
10
  biblicus/evidence_processing.py,sha256=EMv1AkV_Eufk-poBz9nRR1dZgC-QewvI-NrULBUGVGA,6074
11
- biblicus/extraction.py,sha256=VEjBjIpaBboftGgEcpDj7z7um41e5uDZpP_7acQg7fw,19448
11
+ biblicus/extraction.py,sha256=20lRxz6Te6IcA4d-rfT4qjJtgRG_c4YvrqfXNA7EYfs,19738
12
12
  biblicus/frontmatter.py,sha256=JOGjIDzbbOkebQw2RzA-3WDVMAMtJta2INjS4e7-LMg,2463
13
13
  biblicus/hook_logging.py,sha256=IMvde-JhVWrx9tNz3eDJ1CY_rr5Sj7DZ2YNomYCZbz0,5366
14
14
  biblicus/hook_manager.py,sha256=ZCAkE5wLvn4lnQz8jho_o0HGEC9KdQd9qitkAEUQRcw,6997
15
15
  biblicus/hooks.py,sha256=OHQOmOi7rUcQqYWVeod4oPe8nVLepD7F_SlN7O_-BsE,7863
16
16
  biblicus/ignore.py,sha256=fyjt34E6tWNNrm1FseOhgH2MgryyVBQVzxhKL5s4aio,1800
17
+ biblicus/inference.py,sha256=_k00AIPoXD2lruiTB-JUagtY4f_WKcdzA3axwiq1tck,3512
17
18
  biblicus/knowledge_base.py,sha256=JmlJw8WD_fgstuq1PyWVzU9kzvVzyv7_xOvhS70xwUw,6654
18
- biblicus/models.py,sha256=6SWQ2Czg9O3zjuam8a4m8V3LlEgcGLbEctYDB6F1rRs,15317
19
+ biblicus/models.py,sha256=vlvPP7AOZGtnHSq47-s9YW-fqLwjgYR6NBcSfeC8YKk,15665
19
20
  biblicus/retrieval.py,sha256=A1SI4WK5cX-WbtN6FJ0QQxqlEOtQhddLrL0LZIuoTC4,4180
20
21
  biblicus/sources.py,sha256=EFy8-rQNLsyzz-98mH-z8gEHMYbqigcNFKLaR92KfDE,7241
21
22
  biblicus/time.py,sha256=3BSKOSo7R10K-0Dzrbdtl3fh5_yShTYqfdlKvvdkx7M,485
22
23
  biblicus/uris.py,sha256=xXD77lqsT9NxbyzI1spX9Y5a3-U6sLYMnpeSAV7g-nM,2013
23
- biblicus/user_config.py,sha256=DqO08yLn82DhTiFpmIyyLj_J0nMbrtE8xieTj2Cgd6A,4287
24
+ biblicus/user_config.py,sha256=okK57CRmT0W_yrc45tMPRl_abT7-D96IOrCBZtKtumM,6507
24
25
  biblicus/_vendor/dotyaml/__init__.py,sha256=e4zbejeJRwlD4I0q3YvotMypO19lXqmT8iyU1q6SvhY,376
25
26
  biblicus/_vendor/dotyaml/interpolation.py,sha256=PfUAEEOTFobv7Ox0E6nAxht6BqhHIDe4hP32fZn5TOs,1992
26
27
  biblicus/_vendor/dotyaml/loader.py,sha256=KePkjyhKZSvQZphmlmlzTYZJBQsqL5qhtGV1y7G6wzM,5624
27
28
  biblicus/_vendor/dotyaml/transformer.py,sha256=2AKPS8DMOPuYtzmM-dlwIqVbARfbBH5jYV1m5qpR49E,3725
29
+ biblicus/analysis/__init__.py,sha256=TrKsE2GmdZDr3OARo2poa9H0powo0bjiEEWVx0tZmEg,1192
30
+ biblicus/analysis/base.py,sha256=gB4ilvyMpiWU1m_ydy2dIHGP96ZFIFvVUL9iVDZKPJM,1265
31
+ biblicus/analysis/llm.py,sha256=VjkZDKauHCDfj-TP-bTbI6a9WAXEIDe8bEiwErPx9xc,3309
32
+ biblicus/analysis/models.py,sha256=XocDiEVF7ud53hd9eCFTuMXS68U-eBthpe7a6J9j6uU,17824
33
+ biblicus/analysis/schema.py,sha256=MCiAQJmijVk8iM8rOUYbzyaDwsMR-Oo86iZU5NCbDMM,435
34
+ biblicus/analysis/topic_modeling.py,sha256=Y_9Auh47_wRD4LXVZ_c-S7AYeO72wLu39CHHa_ZLunI,18352
28
35
  biblicus/backends/__init__.py,sha256=wLXIumV51l6ZIKzjoKKeU7AgIxGOryG7T7ls3a_Fv98,1212
29
36
  biblicus/backends/base.py,sha256=Erfj9dXg0nkRKnEcNjHR9_0Ddb2B1NvbmRksVm_g1dU,1776
30
37
  biblicus/backends/scan.py,sha256=hdNnQWqi5IH6j95w30BZHxLJ0W9PTaOkqfWJuxCCEMI,12478
31
38
  biblicus/backends/sqlite_full_text_search.py,sha256=KgmwOiKvkA0pv7vD0V7bcOdDx_nZIOfuIN6Z4Ij7I68,16516
32
- biblicus/extractors/__init__.py,sha256=X3pu18QL85IBpYf56l6_5PUxFPhEN5qLTlOrxYpfGck,1776
39
+ biblicus/extractors/__init__.py,sha256=ci3oldbdQZ8meAfHccM48CqQtZsPSRg3HkPrBSZF15M,2673
33
40
  biblicus/extractors/base.py,sha256=ka-nz_1zHPr4TS9sU4JfOoY-PJh7lbHPBOEBrbQFGSc,2171
41
+ biblicus/extractors/deepgram_stt.py,sha256=VI71i4lbE-EFHcvpNcCPRpT8z7A5IuaSrT1UaPyZ8UY,6323
42
+ biblicus/extractors/docling_granite_text.py,sha256=aFNx-HubvaMmVJHbNqk3CR_ilSwN96-phkaENT6E2B0,6879
43
+ biblicus/extractors/docling_smol_text.py,sha256=cSbQcT4O47MMcM6_pmQCvqgC5ferLvaxJnm3v9EQd0A,6811
44
+ biblicus/extractors/markitdown_text.py,sha256=-7N8ebi3pYfNPnplccyy3qvsKi6uImC1xyo_dSDiD10,4546
34
45
  biblicus/extractors/metadata_text.py,sha256=7FbEPp0K1mXc7FH1_c0KhPhPexF9U6eLd3TVY1vTp1s,3537
35
46
  biblicus/extractors/openai_stt.py,sha256=fggErIu6YN6tXbleNTuROhfYi7zDgMd2vD_ecXZ7eXs,7162
47
+ biblicus/extractors/paddleocr_vl_text.py,sha256=augbxZ-kx22yHvFR1b6CUAS2I6ktXFsJx8nLWRfvdOA,11722
36
48
  biblicus/extractors/pass_through_text.py,sha256=DNxkCwpH2bbXjPGPEQwsx8kfqXi6rIxXNY_n3TU2-WI,2777
37
49
  biblicus/extractors/pdf_text.py,sha256=YtUphgLVxyWJXew6ZsJ8wBRh67Y5ri4ZTRlMmq3g1Bk,3255
38
50
  biblicus/extractors/pipeline.py,sha256=LY6eM3ypw50MDB2cPEQqZrjxkhVvIc6sv4UEhHdNDrE,3208
39
- biblicus/extractors/rapidocr_text.py,sha256=OMAuZealLSSTFVVmBalT-AFJy2pEpHyyvpuWxlnY-GU,4531
51
+ biblicus/extractors/rapidocr_text.py,sha256=StvizEha5BkEG7i5KJmnOUtji89p5pghF4w8iQ-WwFk,4776
40
52
  biblicus/extractors/select_longest_text.py,sha256=wRveXAfYLdj7CpGuo4RoD7zE6SIfylRCbv40z2azO0k,3702
53
+ biblicus/extractors/select_override.py,sha256=gSpffFmn1ux9pGtFvHD5Uu_LO8TmmJC4L_mvjehiSec,4014
54
+ biblicus/extractors/select_smart_override.py,sha256=-sLMnNoeXbCB3dO9zflQq324eHuLbd6hpveSwduXP-U,6763
41
55
  biblicus/extractors/select_text.py,sha256=w0ATmDy3tWWbOObzW87jGZuHbgXllUhotX5XyySLs-o,3395
42
56
  biblicus/extractors/unstructured_text.py,sha256=l2S_wD_htu7ZHoJQNQtP-kGlEgOeKV_w2IzAC93lePE,3564
43
- biblicus-0.6.0.dist-info/licenses/LICENSE,sha256=lw44GXFG_Q0fS8m5VoEvv_xtdBXK26pBcbSPUCXee_Q,1078
44
- biblicus-0.6.0.dist-info/METADATA,sha256=NXcMvQZklQCSukUOGcZaLSw_aqUm6wFojy6k_pfZvzc,21311
45
- biblicus-0.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
46
- biblicus-0.6.0.dist-info/entry_points.txt,sha256=BZmO4H8Uz00fyi1RAFryOCGfZgX7eHWkY2NE-G54U5A,47
47
- biblicus-0.6.0.dist-info/top_level.txt,sha256=sUD_XVZwDxZ29-FBv1MknTGh4mgDXznGuP28KJY_WKc,9
48
- biblicus-0.6.0.dist-info/RECORD,,
57
+ biblicus-0.8.0.dist-info/licenses/LICENSE,sha256=lw44GXFG_Q0fS8m5VoEvv_xtdBXK26pBcbSPUCXee_Q,1078
58
+ biblicus-0.8.0.dist-info/METADATA,sha256=I4zW3JWMOmyh4tBpR-D2MGAl9YCp9IqtFo8wxoNA1qQ,27116
59
+ biblicus-0.8.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
60
+ biblicus-0.8.0.dist-info/entry_points.txt,sha256=BZmO4H8Uz00fyi1RAFryOCGfZgX7eHWkY2NE-G54U5A,47
61
+ biblicus-0.8.0.dist-info/top_level.txt,sha256=sUD_XVZwDxZ29-FBv1MknTGh4mgDXznGuP28KJY_WKc,9
62
+ biblicus-0.8.0.dist-info/RECORD,,