biblicus 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. biblicus-0.9.0/LICENSE +21 -0
  2. biblicus-0.9.0/MANIFEST.in +21 -0
  3. biblicus-0.9.0/PKG-INFO +654 -0
  4. biblicus-0.9.0/README.md +608 -0
  5. biblicus-0.9.0/THIRD_PARTY_NOTICES.md +36 -0
  6. biblicus-0.9.0/datasets/wikipedia_mini.json +37 -0
  7. biblicus-0.9.0/docs/ANALYSIS.md +36 -0
  8. biblicus-0.9.0/docs/ARCHITECTURE.md +180 -0
  9. biblicus-0.9.0/docs/BACKENDS.md +39 -0
  10. biblicus-0.9.0/docs/CONTEXT_PACK.md +61 -0
  11. biblicus-0.9.0/docs/CORPUS.md +116 -0
  12. biblicus-0.9.0/docs/CORPUS_DESIGN.md +404 -0
  13. biblicus-0.9.0/docs/DEMOS.md +396 -0
  14. biblicus-0.9.0/docs/EXTRACTION.md +195 -0
  15. biblicus-0.9.0/docs/FEATURE_INDEX.md +265 -0
  16. biblicus-0.9.0/docs/KNOWLEDGE_BASE.md +68 -0
  17. biblicus-0.9.0/docs/ROADMAP.md +111 -0
  18. biblicus-0.9.0/docs/STT.md +89 -0
  19. biblicus-0.9.0/docs/TESTING.md +53 -0
  20. biblicus-0.9.0/docs/TOPIC_MODELING.md +159 -0
  21. biblicus-0.9.0/docs/USER_CONFIGURATION.md +49 -0
  22. biblicus-0.9.0/docs/api.rst +51 -0
  23. biblicus-0.9.0/docs/backends/index.md +242 -0
  24. biblicus-0.9.0/docs/backends/scan.md +327 -0
  25. biblicus-0.9.0/docs/backends/sqlite-full-text-search.md +487 -0
  26. biblicus-0.9.0/docs/conf.py +55 -0
  27. biblicus-0.9.0/docs/extractors/index.md +135 -0
  28. biblicus-0.9.0/docs/extractors/ocr/index.md +141 -0
  29. biblicus-0.9.0/docs/extractors/ocr/paddleocr-vl.md +456 -0
  30. biblicus-0.9.0/docs/extractors/ocr/rapidocr.md +359 -0
  31. biblicus-0.9.0/docs/extractors/pipeline-utilities/index.md +234 -0
  32. biblicus-0.9.0/docs/extractors/pipeline-utilities/pipeline.md +542 -0
  33. biblicus-0.9.0/docs/extractors/pipeline-utilities/select-longest.md +404 -0
  34. biblicus-0.9.0/docs/extractors/pipeline-utilities/select-override.md +402 -0
  35. biblicus-0.9.0/docs/extractors/pipeline-utilities/select-smart-override.md +472 -0
  36. biblicus-0.9.0/docs/extractors/pipeline-utilities/select-text.md +339 -0
  37. biblicus-0.9.0/docs/extractors/speech-to-text/deepgram.md +482 -0
  38. biblicus-0.9.0/docs/extractors/speech-to-text/index.md +158 -0
  39. biblicus-0.9.0/docs/extractors/speech-to-text/openai.md +449 -0
  40. biblicus-0.9.0/docs/extractors/text-document/index.md +107 -0
  41. biblicus-0.9.0/docs/extractors/text-document/markitdown.md +394 -0
  42. biblicus-0.9.0/docs/extractors/text-document/metadata.md +335 -0
  43. biblicus-0.9.0/docs/extractors/text-document/pass-through.md +253 -0
  44. biblicus-0.9.0/docs/extractors/text-document/pdf.md +339 -0
  45. biblicus-0.9.0/docs/extractors/text-document/unstructured.md +405 -0
  46. biblicus-0.9.0/docs/extractors/vlm-document/docling-granite.md +311 -0
  47. biblicus-0.9.0/docs/extractors/vlm-document/docling-smol.md +269 -0
  48. biblicus-0.9.0/docs/extractors/vlm-document/index.md +229 -0
  49. biblicus-0.9.0/docs/index.rst +28 -0
  50. biblicus-0.9.0/features/analysis_schema.feature +58 -0
  51. biblicus-0.9.0/features/backend_validation.feature +14 -0
  52. biblicus-0.9.0/features/biblicus_corpus.feature +99 -0
  53. biblicus-0.9.0/features/cli_entrypoint.feature +6 -0
  54. biblicus-0.9.0/features/cli_parsing.feature +26 -0
  55. biblicus-0.9.0/features/cli_step_spec_parsing.feature +41 -0
  56. biblicus-0.9.0/features/content_sniffing.feature +111 -0
  57. biblicus-0.9.0/features/context_pack.feature +42 -0
  58. biblicus-0.9.0/features/context_pack_cli.feature +29 -0
  59. biblicus-0.9.0/features/corpus_edge_cases.feature +133 -0
  60. biblicus-0.9.0/features/corpus_identity.feature +14 -0
  61. biblicus-0.9.0/features/corpus_purge.feature +31 -0
  62. biblicus-0.9.0/features/crawl.feature +81 -0
  63. biblicus-0.9.0/features/docling_granite_extractor.feature +202 -0
  64. biblicus-0.9.0/features/docling_smol_extractor.feature +202 -0
  65. biblicus-0.9.0/features/environment.py +387 -0
  66. biblicus-0.9.0/features/error_cases.feature +170 -0
  67. biblicus-0.9.0/features/evaluation.feature +80 -0
  68. biblicus-0.9.0/features/evidence_processing.feature +25 -0
  69. biblicus-0.9.0/features/extraction_error_handling.feature +32 -0
  70. biblicus-0.9.0/features/extraction_run_lifecycle.feature +117 -0
  71. biblicus-0.9.0/features/extraction_selection.feature +72 -0
  72. biblicus-0.9.0/features/extraction_selection_longest.feature +66 -0
  73. biblicus-0.9.0/features/extractor_pipeline.feature +105 -0
  74. biblicus-0.9.0/features/extractor_validation.feature +7 -0
  75. biblicus-0.9.0/features/frontmatter.feature +16 -0
  76. biblicus-0.9.0/features/hook_config_validation.feature +28 -0
  77. biblicus-0.9.0/features/hook_error_handling.feature +15 -0
  78. biblicus-0.9.0/features/import_tree.feature +54 -0
  79. biblicus-0.9.0/features/inference_backend.feature +117 -0
  80. biblicus-0.9.0/features/ingest_sources.feature +38 -0
  81. biblicus-0.9.0/features/integration_audio_samples.feature +13 -0
  82. biblicus-0.9.0/features/integration_image_samples.feature +11 -0
  83. biblicus-0.9.0/features/integration_mixed_corpus.feature +15 -0
  84. biblicus-0.9.0/features/integration_mixed_extraction.feature +15 -0
  85. biblicus-0.9.0/features/integration_ocr_image_extraction.feature +11 -0
  86. biblicus-0.9.0/features/integration_pdf_retrieval.feature +20 -0
  87. biblicus-0.9.0/features/integration_pdf_samples.feature +8 -0
  88. biblicus-0.9.0/features/integration_unstructured_extraction.feature +11 -0
  89. biblicus-0.9.0/features/integration_wikipedia.feature +7 -0
  90. biblicus-0.9.0/features/knowledge_base.feature +55 -0
  91. biblicus-0.9.0/features/lifecycle_hooks.feature +96 -0
  92. biblicus-0.9.0/features/markitdown_extractor.feature +99 -0
  93. biblicus-0.9.0/features/model_validation.feature +6 -0
  94. biblicus-0.9.0/features/ocr_extractor.feature +61 -0
  95. biblicus-0.9.0/features/paddleocr_vl_extractor.feature +299 -0
  96. biblicus-0.9.0/features/paddleocr_vl_parse_api_response.feature +18 -0
  97. biblicus-0.9.0/features/pdf_text_extraction.feature +41 -0
  98. biblicus-0.9.0/features/python_api.feature +74 -0
  99. biblicus-0.9.0/features/python_hook_logging.feature +10 -0
  100. biblicus-0.9.0/features/query_processing.feature +27 -0
  101. biblicus-0.9.0/features/recipe_file_extraction.feature +35 -0
  102. biblicus-0.9.0/features/retrieval_budget.feature +7 -0
  103. biblicus-0.9.0/features/retrieval_scan.feature +77 -0
  104. biblicus-0.9.0/features/retrieval_sqlite_full_text_search.feature +59 -0
  105. biblicus-0.9.0/features/retrieval_uses_extraction_run.feature +110 -0
  106. biblicus-0.9.0/features/retrieval_utilities.feature +43 -0
  107. biblicus-0.9.0/features/select_override.feature +126 -0
  108. biblicus-0.9.0/features/smart_override_selection.feature +406 -0
  109. biblicus-0.9.0/features/source_loading.feature +9 -0
  110. biblicus-0.9.0/features/steps/analysis_steps.py +249 -0
  111. biblicus-0.9.0/features/steps/backend_steps.py +126 -0
  112. biblicus-0.9.0/features/steps/cli_parsing_steps.py +76 -0
  113. biblicus-0.9.0/features/steps/cli_steps.py +1025 -0
  114. biblicus-0.9.0/features/steps/context_pack_steps.py +115 -0
  115. biblicus-0.9.0/features/steps/crawl_steps.py +68 -0
  116. biblicus-0.9.0/features/steps/deepgram_steps.py +222 -0
  117. biblicus-0.9.0/features/steps/docling_steps.py +360 -0
  118. biblicus-0.9.0/features/steps/evidence_processing_steps.py +47 -0
  119. biblicus-0.9.0/features/steps/extraction_run_lifecycle_steps.py +148 -0
  120. biblicus-0.9.0/features/steps/extraction_steps.py +640 -0
  121. biblicus-0.9.0/features/steps/extractor_steps.py +97 -0
  122. biblicus-0.9.0/features/steps/frontmatter_steps.py +53 -0
  123. biblicus-0.9.0/features/steps/inference_steps.py +63 -0
  124. biblicus-0.9.0/features/steps/knowledge_base_steps.py +90 -0
  125. biblicus-0.9.0/features/steps/markitdown_steps.py +173 -0
  126. biblicus-0.9.0/features/steps/model_steps.py +34 -0
  127. biblicus-0.9.0/features/steps/openai_steps.py +312 -0
  128. biblicus-0.9.0/features/steps/paddleocr_mock_steps.py +48 -0
  129. biblicus-0.9.0/features/steps/paddleocr_vl_steps.py +196 -0
  130. biblicus-0.9.0/features/steps/paddleocr_vl_unit_steps.py +108 -0
  131. biblicus-0.9.0/features/steps/pdf_steps.py +115 -0
  132. biblicus-0.9.0/features/steps/python_api_steps.py +416 -0
  133. biblicus-0.9.0/features/steps/rapidocr_steps.py +145 -0
  134. biblicus-0.9.0/features/steps/requests_mock_steps.py +158 -0
  135. biblicus-0.9.0/features/steps/retrieval_steps.py +563 -0
  136. biblicus-0.9.0/features/steps/stt_deepgram_steps.py +93 -0
  137. biblicus-0.9.0/features/steps/stt_steps.py +93 -0
  138. biblicus-0.9.0/features/steps/topic_modeling_steps.py +318 -0
  139. biblicus-0.9.0/features/steps/unstructured_steps.py +143 -0
  140. biblicus-0.9.0/features/steps/user_config_steps.py +183 -0
  141. biblicus-0.9.0/features/streaming_ingest.feature +11 -0
  142. biblicus-0.9.0/features/stt_deepgram_extractor.feature +142 -0
  143. biblicus-0.9.0/features/stt_extractor.feature +139 -0
  144. biblicus-0.9.0/features/text_extraction_runs.feature +85 -0
  145. biblicus-0.9.0/features/token_budget.feature +37 -0
  146. biblicus-0.9.0/features/topic_modeling.feature +1078 -0
  147. biblicus-0.9.0/features/unstructured_extractor.feature +62 -0
  148. biblicus-0.9.0/features/user_config.feature +85 -0
  149. biblicus-0.9.0/pyproject.toml +131 -0
  150. biblicus-0.9.0/scripts/download_ag_news.py +151 -0
  151. biblicus-0.9.0/scripts/download_audio_samples.py +200 -0
  152. biblicus-0.9.0/scripts/download_image_samples.py +180 -0
  153. biblicus-0.9.0/scripts/download_mixed_samples.py +239 -0
  154. biblicus-0.9.0/scripts/download_pdf_samples.py +136 -0
  155. biblicus-0.9.0/scripts/download_wikipedia.py +155 -0
  156. biblicus-0.9.0/scripts/readme_end_to_end_demo.py +81 -0
  157. biblicus-0.9.0/scripts/test.py +123 -0
  158. biblicus-0.9.0/scripts/topic_modeling_integration.py +314 -0
  159. biblicus-0.9.0/scripts/wikipedia_rag_demo.py +212 -0
  160. biblicus-0.9.0/setup.cfg +4 -0
  161. biblicus-0.9.0/src/biblicus/__init__.py +30 -0
  162. biblicus-0.9.0/src/biblicus/__main__.py +8 -0
  163. biblicus-0.9.0/src/biblicus/_vendor/dotyaml/__init__.py +14 -0
  164. biblicus-0.9.0/src/biblicus/_vendor/dotyaml/interpolation.py +63 -0
  165. biblicus-0.9.0/src/biblicus/_vendor/dotyaml/loader.py +181 -0
  166. biblicus-0.9.0/src/biblicus/_vendor/dotyaml/transformer.py +135 -0
  167. biblicus-0.9.0/src/biblicus/analysis/__init__.py +40 -0
  168. biblicus-0.9.0/src/biblicus/analysis/base.py +49 -0
  169. biblicus-0.9.0/src/biblicus/analysis/llm.py +106 -0
  170. biblicus-0.9.0/src/biblicus/analysis/models.py +554 -0
  171. biblicus-0.9.0/src/biblicus/analysis/schema.py +18 -0
  172. biblicus-0.9.0/src/biblicus/analysis/topic_modeling.py +585 -0
  173. biblicus-0.9.0/src/biblicus/backends/__init__.py +42 -0
  174. biblicus-0.9.0/src/biblicus/backends/base.py +65 -0
  175. biblicus-0.9.0/src/biblicus/backends/scan.py +375 -0
  176. biblicus-0.9.0/src/biblicus/backends/sqlite_full_text_search.py +487 -0
  177. biblicus-0.9.0/src/biblicus/cli.py +953 -0
  178. biblicus-0.9.0/src/biblicus/constants.py +14 -0
  179. biblicus-0.9.0/src/biblicus/context.py +183 -0
  180. biblicus-0.9.0/src/biblicus/corpus.py +1573 -0
  181. biblicus-0.9.0/src/biblicus/crawl.py +186 -0
  182. biblicus-0.9.0/src/biblicus/errors.py +15 -0
  183. biblicus-0.9.0/src/biblicus/evaluation.py +257 -0
  184. biblicus-0.9.0/src/biblicus/evidence_processing.py +201 -0
  185. biblicus-0.9.0/src/biblicus/extraction.py +536 -0
  186. biblicus-0.9.0/src/biblicus/extractors/__init__.py +58 -0
  187. biblicus-0.9.0/src/biblicus/extractors/base.py +68 -0
  188. biblicus-0.9.0/src/biblicus/extractors/deepgram_stt.py +166 -0
  189. biblicus-0.9.0/src/biblicus/extractors/docling_granite_text.py +188 -0
  190. biblicus-0.9.0/src/biblicus/extractors/docling_smol_text.py +188 -0
  191. biblicus-0.9.0/src/biblicus/extractors/markitdown_text.py +128 -0
  192. biblicus-0.9.0/src/biblicus/extractors/metadata_text.py +106 -0
  193. biblicus-0.9.0/src/biblicus/extractors/openai_stt.py +180 -0
  194. biblicus-0.9.0/src/biblicus/extractors/paddleocr_vl_text.py +305 -0
  195. biblicus-0.9.0/src/biblicus/extractors/pass_through_text.py +84 -0
  196. biblicus-0.9.0/src/biblicus/extractors/pdf_text.py +100 -0
  197. biblicus-0.9.0/src/biblicus/extractors/pipeline.py +105 -0
  198. biblicus-0.9.0/src/biblicus/extractors/rapidocr_text.py +136 -0
  199. biblicus-0.9.0/src/biblicus/extractors/select_longest_text.py +105 -0
  200. biblicus-0.9.0/src/biblicus/extractors/select_override.py +121 -0
  201. biblicus-0.9.0/src/biblicus/extractors/select_smart_override.py +187 -0
  202. biblicus-0.9.0/src/biblicus/extractors/select_text.py +100 -0
  203. biblicus-0.9.0/src/biblicus/extractors/unstructured_text.py +100 -0
  204. biblicus-0.9.0/src/biblicus/frontmatter.py +89 -0
  205. biblicus-0.9.0/src/biblicus/hook_logging.py +180 -0
  206. biblicus-0.9.0/src/biblicus/hook_manager.py +203 -0
  207. biblicus-0.9.0/src/biblicus/hooks.py +261 -0
  208. biblicus-0.9.0/src/biblicus/ignore.py +64 -0
  209. biblicus-0.9.0/src/biblicus/inference.py +104 -0
  210. biblicus-0.9.0/src/biblicus/knowledge_base.py +191 -0
  211. biblicus-0.9.0/src/biblicus/models.py +451 -0
  212. biblicus-0.9.0/src/biblicus/retrieval.py +133 -0
  213. biblicus-0.9.0/src/biblicus/sources.py +212 -0
  214. biblicus-0.9.0/src/biblicus/time.py +17 -0
  215. biblicus-0.9.0/src/biblicus/uris.py +63 -0
  216. biblicus-0.9.0/src/biblicus/user_config.py +214 -0
  217. biblicus-0.9.0/src/biblicus.egg-info/PKG-INFO +654 -0
  218. biblicus-0.9.0/src/biblicus.egg-info/SOURCES.txt +220 -0
  219. biblicus-0.9.0/src/biblicus.egg-info/dependency_links.txt +1 -0
  220. biblicus-0.9.0/src/biblicus.egg-info/entry_points.txt +2 -0
  221. biblicus-0.9.0/src/biblicus.egg-info/requires.txt +49 -0
  222. biblicus-0.9.0/src/biblicus.egg-info/top_level.txt +1 -0
biblicus-0.9.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Biblicus Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,21 @@
1
+ include README.md
2
+ include LICENSE
3
+ include THIRD_PARTY_NOTICES.md
4
+ include .biblicus/config.example.yml
5
+ include pyproject.toml
6
+
7
+ recursive-include src *.py
8
+ recursive-include docs *.rst *.md *.py
9
+ recursive-include features *.feature *.py
10
+ recursive-include scripts *.py
11
+ recursive-include datasets *.json
12
+
13
+ prune corpora
14
+ prune reports
15
+ prune docs/_build
16
+
17
+ global-exclude *.pyc
18
+ global-exclude *.pyo
19
+ global-exclude __pycache__/*
20
+ global-exclude .DS_Store
21
+ global-exclude .coverage