biblicus 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biblicus-0.9.0/LICENSE +21 -0
- biblicus-0.9.0/MANIFEST.in +21 -0
- biblicus-0.9.0/PKG-INFO +654 -0
- biblicus-0.9.0/README.md +608 -0
- biblicus-0.9.0/THIRD_PARTY_NOTICES.md +36 -0
- biblicus-0.9.0/datasets/wikipedia_mini.json +37 -0
- biblicus-0.9.0/docs/ANALYSIS.md +36 -0
- biblicus-0.9.0/docs/ARCHITECTURE.md +180 -0
- biblicus-0.9.0/docs/BACKENDS.md +39 -0
- biblicus-0.9.0/docs/CONTEXT_PACK.md +61 -0
- biblicus-0.9.0/docs/CORPUS.md +116 -0
- biblicus-0.9.0/docs/CORPUS_DESIGN.md +404 -0
- biblicus-0.9.0/docs/DEMOS.md +396 -0
- biblicus-0.9.0/docs/EXTRACTION.md +195 -0
- biblicus-0.9.0/docs/FEATURE_INDEX.md +265 -0
- biblicus-0.9.0/docs/KNOWLEDGE_BASE.md +68 -0
- biblicus-0.9.0/docs/ROADMAP.md +111 -0
- biblicus-0.9.0/docs/STT.md +89 -0
- biblicus-0.9.0/docs/TESTING.md +53 -0
- biblicus-0.9.0/docs/TOPIC_MODELING.md +159 -0
- biblicus-0.9.0/docs/USER_CONFIGURATION.md +49 -0
- biblicus-0.9.0/docs/api.rst +51 -0
- biblicus-0.9.0/docs/backends/index.md +242 -0
- biblicus-0.9.0/docs/backends/scan.md +327 -0
- biblicus-0.9.0/docs/backends/sqlite-full-text-search.md +487 -0
- biblicus-0.9.0/docs/conf.py +55 -0
- biblicus-0.9.0/docs/extractors/index.md +135 -0
- biblicus-0.9.0/docs/extractors/ocr/index.md +141 -0
- biblicus-0.9.0/docs/extractors/ocr/paddleocr-vl.md +456 -0
- biblicus-0.9.0/docs/extractors/ocr/rapidocr.md +359 -0
- biblicus-0.9.0/docs/extractors/pipeline-utilities/index.md +234 -0
- biblicus-0.9.0/docs/extractors/pipeline-utilities/pipeline.md +542 -0
- biblicus-0.9.0/docs/extractors/pipeline-utilities/select-longest.md +404 -0
- biblicus-0.9.0/docs/extractors/pipeline-utilities/select-override.md +402 -0
- biblicus-0.9.0/docs/extractors/pipeline-utilities/select-smart-override.md +472 -0
- biblicus-0.9.0/docs/extractors/pipeline-utilities/select-text.md +339 -0
- biblicus-0.9.0/docs/extractors/speech-to-text/deepgram.md +482 -0
- biblicus-0.9.0/docs/extractors/speech-to-text/index.md +158 -0
- biblicus-0.9.0/docs/extractors/speech-to-text/openai.md +449 -0
- biblicus-0.9.0/docs/extractors/text-document/index.md +107 -0
- biblicus-0.9.0/docs/extractors/text-document/markitdown.md +394 -0
- biblicus-0.9.0/docs/extractors/text-document/metadata.md +335 -0
- biblicus-0.9.0/docs/extractors/text-document/pass-through.md +253 -0
- biblicus-0.9.0/docs/extractors/text-document/pdf.md +339 -0
- biblicus-0.9.0/docs/extractors/text-document/unstructured.md +405 -0
- biblicus-0.9.0/docs/extractors/vlm-document/docling-granite.md +311 -0
- biblicus-0.9.0/docs/extractors/vlm-document/docling-smol.md +269 -0
- biblicus-0.9.0/docs/extractors/vlm-document/index.md +229 -0
- biblicus-0.9.0/docs/index.rst +28 -0
- biblicus-0.9.0/features/analysis_schema.feature +58 -0
- biblicus-0.9.0/features/backend_validation.feature +14 -0
- biblicus-0.9.0/features/biblicus_corpus.feature +99 -0
- biblicus-0.9.0/features/cli_entrypoint.feature +6 -0
- biblicus-0.9.0/features/cli_parsing.feature +26 -0
- biblicus-0.9.0/features/cli_step_spec_parsing.feature +41 -0
- biblicus-0.9.0/features/content_sniffing.feature +111 -0
- biblicus-0.9.0/features/context_pack.feature +42 -0
- biblicus-0.9.0/features/context_pack_cli.feature +29 -0
- biblicus-0.9.0/features/corpus_edge_cases.feature +133 -0
- biblicus-0.9.0/features/corpus_identity.feature +14 -0
- biblicus-0.9.0/features/corpus_purge.feature +31 -0
- biblicus-0.9.0/features/crawl.feature +81 -0
- biblicus-0.9.0/features/docling_granite_extractor.feature +202 -0
- biblicus-0.9.0/features/docling_smol_extractor.feature +202 -0
- biblicus-0.9.0/features/environment.py +387 -0
- biblicus-0.9.0/features/error_cases.feature +170 -0
- biblicus-0.9.0/features/evaluation.feature +80 -0
- biblicus-0.9.0/features/evidence_processing.feature +25 -0
- biblicus-0.9.0/features/extraction_error_handling.feature +32 -0
- biblicus-0.9.0/features/extraction_run_lifecycle.feature +117 -0
- biblicus-0.9.0/features/extraction_selection.feature +72 -0
- biblicus-0.9.0/features/extraction_selection_longest.feature +66 -0
- biblicus-0.9.0/features/extractor_pipeline.feature +105 -0
- biblicus-0.9.0/features/extractor_validation.feature +7 -0
- biblicus-0.9.0/features/frontmatter.feature +16 -0
- biblicus-0.9.0/features/hook_config_validation.feature +28 -0
- biblicus-0.9.0/features/hook_error_handling.feature +15 -0
- biblicus-0.9.0/features/import_tree.feature +54 -0
- biblicus-0.9.0/features/inference_backend.feature +117 -0
- biblicus-0.9.0/features/ingest_sources.feature +38 -0
- biblicus-0.9.0/features/integration_audio_samples.feature +13 -0
- biblicus-0.9.0/features/integration_image_samples.feature +11 -0
- biblicus-0.9.0/features/integration_mixed_corpus.feature +15 -0
- biblicus-0.9.0/features/integration_mixed_extraction.feature +15 -0
- biblicus-0.9.0/features/integration_ocr_image_extraction.feature +11 -0
- biblicus-0.9.0/features/integration_pdf_retrieval.feature +20 -0
- biblicus-0.9.0/features/integration_pdf_samples.feature +8 -0
- biblicus-0.9.0/features/integration_unstructured_extraction.feature +11 -0
- biblicus-0.9.0/features/integration_wikipedia.feature +7 -0
- biblicus-0.9.0/features/knowledge_base.feature +55 -0
- biblicus-0.9.0/features/lifecycle_hooks.feature +96 -0
- biblicus-0.9.0/features/markitdown_extractor.feature +99 -0
- biblicus-0.9.0/features/model_validation.feature +6 -0
- biblicus-0.9.0/features/ocr_extractor.feature +61 -0
- biblicus-0.9.0/features/paddleocr_vl_extractor.feature +299 -0
- biblicus-0.9.0/features/paddleocr_vl_parse_api_response.feature +18 -0
- biblicus-0.9.0/features/pdf_text_extraction.feature +41 -0
- biblicus-0.9.0/features/python_api.feature +74 -0
- biblicus-0.9.0/features/python_hook_logging.feature +10 -0
- biblicus-0.9.0/features/query_processing.feature +27 -0
- biblicus-0.9.0/features/recipe_file_extraction.feature +35 -0
- biblicus-0.9.0/features/retrieval_budget.feature +7 -0
- biblicus-0.9.0/features/retrieval_scan.feature +77 -0
- biblicus-0.9.0/features/retrieval_sqlite_full_text_search.feature +59 -0
- biblicus-0.9.0/features/retrieval_uses_extraction_run.feature +110 -0
- biblicus-0.9.0/features/retrieval_utilities.feature +43 -0
- biblicus-0.9.0/features/select_override.feature +126 -0
- biblicus-0.9.0/features/smart_override_selection.feature +406 -0
- biblicus-0.9.0/features/source_loading.feature +9 -0
- biblicus-0.9.0/features/steps/analysis_steps.py +249 -0
- biblicus-0.9.0/features/steps/backend_steps.py +126 -0
- biblicus-0.9.0/features/steps/cli_parsing_steps.py +76 -0
- biblicus-0.9.0/features/steps/cli_steps.py +1025 -0
- biblicus-0.9.0/features/steps/context_pack_steps.py +115 -0
- biblicus-0.9.0/features/steps/crawl_steps.py +68 -0
- biblicus-0.9.0/features/steps/deepgram_steps.py +222 -0
- biblicus-0.9.0/features/steps/docling_steps.py +360 -0
- biblicus-0.9.0/features/steps/evidence_processing_steps.py +47 -0
- biblicus-0.9.0/features/steps/extraction_run_lifecycle_steps.py +148 -0
- biblicus-0.9.0/features/steps/extraction_steps.py +640 -0
- biblicus-0.9.0/features/steps/extractor_steps.py +97 -0
- biblicus-0.9.0/features/steps/frontmatter_steps.py +53 -0
- biblicus-0.9.0/features/steps/inference_steps.py +63 -0
- biblicus-0.9.0/features/steps/knowledge_base_steps.py +90 -0
- biblicus-0.9.0/features/steps/markitdown_steps.py +173 -0
- biblicus-0.9.0/features/steps/model_steps.py +34 -0
- biblicus-0.9.0/features/steps/openai_steps.py +312 -0
- biblicus-0.9.0/features/steps/paddleocr_mock_steps.py +48 -0
- biblicus-0.9.0/features/steps/paddleocr_vl_steps.py +196 -0
- biblicus-0.9.0/features/steps/paddleocr_vl_unit_steps.py +108 -0
- biblicus-0.9.0/features/steps/pdf_steps.py +115 -0
- biblicus-0.9.0/features/steps/python_api_steps.py +416 -0
- biblicus-0.9.0/features/steps/rapidocr_steps.py +145 -0
- biblicus-0.9.0/features/steps/requests_mock_steps.py +158 -0
- biblicus-0.9.0/features/steps/retrieval_steps.py +563 -0
- biblicus-0.9.0/features/steps/stt_deepgram_steps.py +93 -0
- biblicus-0.9.0/features/steps/stt_steps.py +93 -0
- biblicus-0.9.0/features/steps/topic_modeling_steps.py +318 -0
- biblicus-0.9.0/features/steps/unstructured_steps.py +143 -0
- biblicus-0.9.0/features/steps/user_config_steps.py +183 -0
- biblicus-0.9.0/features/streaming_ingest.feature +11 -0
- biblicus-0.9.0/features/stt_deepgram_extractor.feature +142 -0
- biblicus-0.9.0/features/stt_extractor.feature +139 -0
- biblicus-0.9.0/features/text_extraction_runs.feature +85 -0
- biblicus-0.9.0/features/token_budget.feature +37 -0
- biblicus-0.9.0/features/topic_modeling.feature +1078 -0
- biblicus-0.9.0/features/unstructured_extractor.feature +62 -0
- biblicus-0.9.0/features/user_config.feature +85 -0
- biblicus-0.9.0/pyproject.toml +131 -0
- biblicus-0.9.0/scripts/download_ag_news.py +151 -0
- biblicus-0.9.0/scripts/download_audio_samples.py +200 -0
- biblicus-0.9.0/scripts/download_image_samples.py +180 -0
- biblicus-0.9.0/scripts/download_mixed_samples.py +239 -0
- biblicus-0.9.0/scripts/download_pdf_samples.py +136 -0
- biblicus-0.9.0/scripts/download_wikipedia.py +155 -0
- biblicus-0.9.0/scripts/readme_end_to_end_demo.py +81 -0
- biblicus-0.9.0/scripts/test.py +123 -0
- biblicus-0.9.0/scripts/topic_modeling_integration.py +314 -0
- biblicus-0.9.0/scripts/wikipedia_rag_demo.py +212 -0
- biblicus-0.9.0/setup.cfg +4 -0
- biblicus-0.9.0/src/biblicus/__init__.py +30 -0
- biblicus-0.9.0/src/biblicus/__main__.py +8 -0
- biblicus-0.9.0/src/biblicus/_vendor/dotyaml/__init__.py +14 -0
- biblicus-0.9.0/src/biblicus/_vendor/dotyaml/interpolation.py +63 -0
- biblicus-0.9.0/src/biblicus/_vendor/dotyaml/loader.py +181 -0
- biblicus-0.9.0/src/biblicus/_vendor/dotyaml/transformer.py +135 -0
- biblicus-0.9.0/src/biblicus/analysis/__init__.py +40 -0
- biblicus-0.9.0/src/biblicus/analysis/base.py +49 -0
- biblicus-0.9.0/src/biblicus/analysis/llm.py +106 -0
- biblicus-0.9.0/src/biblicus/analysis/models.py +554 -0
- biblicus-0.9.0/src/biblicus/analysis/schema.py +18 -0
- biblicus-0.9.0/src/biblicus/analysis/topic_modeling.py +585 -0
- biblicus-0.9.0/src/biblicus/backends/__init__.py +42 -0
- biblicus-0.9.0/src/biblicus/backends/base.py +65 -0
- biblicus-0.9.0/src/biblicus/backends/scan.py +375 -0
- biblicus-0.9.0/src/biblicus/backends/sqlite_full_text_search.py +487 -0
- biblicus-0.9.0/src/biblicus/cli.py +953 -0
- biblicus-0.9.0/src/biblicus/constants.py +14 -0
- biblicus-0.9.0/src/biblicus/context.py +183 -0
- biblicus-0.9.0/src/biblicus/corpus.py +1573 -0
- biblicus-0.9.0/src/biblicus/crawl.py +186 -0
- biblicus-0.9.0/src/biblicus/errors.py +15 -0
- biblicus-0.9.0/src/biblicus/evaluation.py +257 -0
- biblicus-0.9.0/src/biblicus/evidence_processing.py +201 -0
- biblicus-0.9.0/src/biblicus/extraction.py +536 -0
- biblicus-0.9.0/src/biblicus/extractors/__init__.py +58 -0
- biblicus-0.9.0/src/biblicus/extractors/base.py +68 -0
- biblicus-0.9.0/src/biblicus/extractors/deepgram_stt.py +166 -0
- biblicus-0.9.0/src/biblicus/extractors/docling_granite_text.py +188 -0
- biblicus-0.9.0/src/biblicus/extractors/docling_smol_text.py +188 -0
- biblicus-0.9.0/src/biblicus/extractors/markitdown_text.py +128 -0
- biblicus-0.9.0/src/biblicus/extractors/metadata_text.py +106 -0
- biblicus-0.9.0/src/biblicus/extractors/openai_stt.py +180 -0
- biblicus-0.9.0/src/biblicus/extractors/paddleocr_vl_text.py +305 -0
- biblicus-0.9.0/src/biblicus/extractors/pass_through_text.py +84 -0
- biblicus-0.9.0/src/biblicus/extractors/pdf_text.py +100 -0
- biblicus-0.9.0/src/biblicus/extractors/pipeline.py +105 -0
- biblicus-0.9.0/src/biblicus/extractors/rapidocr_text.py +136 -0
- biblicus-0.9.0/src/biblicus/extractors/select_longest_text.py +105 -0
- biblicus-0.9.0/src/biblicus/extractors/select_override.py +121 -0
- biblicus-0.9.0/src/biblicus/extractors/select_smart_override.py +187 -0
- biblicus-0.9.0/src/biblicus/extractors/select_text.py +100 -0
- biblicus-0.9.0/src/biblicus/extractors/unstructured_text.py +100 -0
- biblicus-0.9.0/src/biblicus/frontmatter.py +89 -0
- biblicus-0.9.0/src/biblicus/hook_logging.py +180 -0
- biblicus-0.9.0/src/biblicus/hook_manager.py +203 -0
- biblicus-0.9.0/src/biblicus/hooks.py +261 -0
- biblicus-0.9.0/src/biblicus/ignore.py +64 -0
- biblicus-0.9.0/src/biblicus/inference.py +104 -0
- biblicus-0.9.0/src/biblicus/knowledge_base.py +191 -0
- biblicus-0.9.0/src/biblicus/models.py +451 -0
- biblicus-0.9.0/src/biblicus/retrieval.py +133 -0
- biblicus-0.9.0/src/biblicus/sources.py +212 -0
- biblicus-0.9.0/src/biblicus/time.py +17 -0
- biblicus-0.9.0/src/biblicus/uris.py +63 -0
- biblicus-0.9.0/src/biblicus/user_config.py +214 -0
- biblicus-0.9.0/src/biblicus.egg-info/PKG-INFO +654 -0
- biblicus-0.9.0/src/biblicus.egg-info/SOURCES.txt +220 -0
- biblicus-0.9.0/src/biblicus.egg-info/dependency_links.txt +1 -0
- biblicus-0.9.0/src/biblicus.egg-info/entry_points.txt +2 -0
- biblicus-0.9.0/src/biblicus.egg-info/requires.txt +49 -0
- biblicus-0.9.0/src/biblicus.egg-info/top_level.txt +1 -0
biblicus-0.9.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Biblicus Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include LICENSE
|
|
3
|
+
include THIRD_PARTY_NOTICES.md
|
|
4
|
+
include .biblicus/config.example.yml
|
|
5
|
+
include pyproject.toml
|
|
6
|
+
|
|
7
|
+
recursive-include src *.py
|
|
8
|
+
recursive-include docs *.rst *.md *.py
|
|
9
|
+
recursive-include features *.feature *.py
|
|
10
|
+
recursive-include scripts *.py
|
|
11
|
+
recursive-include datasets *.json
|
|
12
|
+
|
|
13
|
+
prune corpora
|
|
14
|
+
prune reports
|
|
15
|
+
prune docs/_build
|
|
16
|
+
|
|
17
|
+
global-exclude *.pyc
|
|
18
|
+
global-exclude *.pyo
|
|
19
|
+
global-exclude __pycache__/*
|
|
20
|
+
global-exclude .DS_Store
|
|
21
|
+
global-exclude .coverage
|