biblicus 1.0.0__tar.gz → 1.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (442) hide show
  1. {biblicus-1.0.0/src/biblicus.egg-info → biblicus-1.1.1}/PKG-INFO +52 -43
  2. {biblicus-1.0.0 → biblicus-1.1.1}/README.md +51 -42
  3. {biblicus-1.0.0 → biblicus-1.1.1}/docs/CHUNKING.md +1 -1
  4. {biblicus-1.0.0 → biblicus-1.1.1}/docs/CORPUS.md +2 -2
  5. {biblicus-1.0.0 → biblicus-1.1.1}/docs/PROFILING.md +17 -17
  6. biblicus-1.0.0/docs/ANALYSIS.md → biblicus-1.1.1/docs/analysis.md +28 -28
  7. biblicus-1.1.1/docs/architecture.md +107 -0
  8. {biblicus-1.0.0 → biblicus-1.1.1}/docs/backends/embedding-index-file.md +2 -2
  9. {biblicus-1.0.0 → biblicus-1.1.1}/docs/backends/embedding-index-inmemory.md +2 -2
  10. {biblicus-1.0.0 → biblicus-1.1.1}/docs/backends/index.md +20 -20
  11. {biblicus-1.0.0 → biblicus-1.1.1}/docs/backends/scan.md +21 -21
  12. {biblicus-1.0.0 → biblicus-1.1.1}/docs/backends/sqlite-full-text-search.md +22 -22
  13. {biblicus-1.0.0 → biblicus-1.1.1}/docs/backends/tf-vector.md +5 -5
  14. biblicus-1.0.0/docs/BACKENDS.md → biblicus-1.1.1/docs/backends.md +7 -7
  15. {biblicus-1.0.0 → biblicus-1.1.1}/docs/conf.py +3 -1
  16. biblicus-1.0.0/docs/CONTEXT_ENGINE.md → biblicus-1.1.1/docs/context-engine.md +28 -3
  17. biblicus-1.0.0/docs/CONTEXT_PACK.md → biblicus-1.1.1/docs/context-pack.md +1 -1
  18. biblicus-1.0.0/docs/CORPUS_DESIGN.md → biblicus-1.1.1/docs/corpus-design.md +13 -13
  19. biblicus-1.0.0/docs/DEMOS.md → biblicus-1.1.1/docs/demos.md +44 -131
  20. biblicus-1.1.1/docs/embedding-retrieval.md +68 -0
  21. biblicus-1.0.0/docs/EXTRACTION_EVALUATION.md → biblicus-1.1.1/docs/extraction-evaluation.md +13 -13
  22. biblicus-1.0.0/docs/EXTRACTION.md → biblicus-1.1.1/docs/extraction.md +19 -19
  23. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/index.md +1 -1
  24. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/ocr/paddleocr-vl.md +4 -4
  25. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/ocr/rapidocr.md +3 -3
  26. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/pipeline-utilities/pipeline.md +8 -8
  27. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/pipeline-utilities/select-longest.md +4 -4
  28. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/pipeline-utilities/select-override.md +4 -4
  29. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/pipeline-utilities/select-smart-override.md +4 -4
  30. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/pipeline-utilities/select-text.md +4 -4
  31. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/speech-to-text/deepgram.md +4 -4
  32. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/speech-to-text/openai.md +4 -4
  33. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/text-document/markitdown.md +3 -3
  34. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/text-document/metadata.md +4 -4
  35. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/text-document/pass-through.md +5 -5
  36. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/text-document/pdf.md +3 -3
  37. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/text-document/unstructured.md +3 -3
  38. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/vlm-document/docling-granite.md +4 -4
  39. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/vlm-document/docling-smol.md +4 -4
  40. biblicus-1.0.0/docs/FEATURE_INDEX.md → biblicus-1.1.1/docs/feature-index.md +29 -29
  41. {biblicus-1.0.0 → biblicus-1.1.1}/docs/index.rst +39 -42
  42. biblicus-1.0.0/docs/KNOWLEDGE_BASE.md → biblicus-1.1.1/docs/knowledge-base.md +5 -5
  43. biblicus-1.0.0/docs/MARKOV_ANALYSIS.md → biblicus-1.1.1/docs/markov-analysis.md +27 -22
  44. biblicus-1.0.0/docs/RETRIEVAL_EVALUATION.md → biblicus-1.1.1/docs/retrieval-evaluation.md +15 -15
  45. biblicus-1.0.0/docs/RETRIEVAL_QUALITY.md → biblicus-1.1.1/docs/retrieval-quality.md +5 -5
  46. biblicus-1.0.0/docs/RETRIEVAL.md → biblicus-1.1.1/docs/retrieval.md +12 -12
  47. biblicus-1.0.0/docs/ROADMAP.md → biblicus-1.1.1/docs/roadmap.md +3 -3
  48. biblicus-1.0.0/docs/TEXT_ANNOTATE.md → biblicus-1.1.1/docs/text-annotate.md +39 -9
  49. biblicus-1.0.0/docs/TEXT_EXTRACT.md → biblicus-1.1.1/docs/text-extract.md +105 -55
  50. biblicus-1.0.0/docs/TEXT_LINK.md → biblicus-1.1.1/docs/text-link.md +18 -8
  51. biblicus-1.0.0/docs/TEXT_REDACT.md → biblicus-1.1.1/docs/text-redact.md +28 -13
  52. biblicus-1.0.0/docs/TEXT_SLICE.md → biblicus-1.1.1/docs/text-slice.md +44 -24
  53. biblicus-1.1.1/docs/text-utilities.md +414 -0
  54. biblicus-1.0.0/docs/TOPIC_MODELING.md → biblicus-1.1.1/docs/topic-modeling.md +13 -13
  55. {biblicus-1.0.0 → biblicus-1.1.1}/docs/use_cases/sequence_markov.md +7 -7
  56. {biblicus-1.0.0 → biblicus-1.1.1}/docs/use_cases/text_folder_search.md +1 -1
  57. biblicus-1.0.0/docs/UTILITIES.md → biblicus-1.1.1/docs/utilities.md +3 -3
  58. {biblicus-1.0.0 → biblicus-1.1.1}/features/89_context_engine_internal_branches.feature +21 -0
  59. {biblicus-1.0.0 → biblicus-1.1.1}/features/90_embedding_index_evidence_fallback.feature +2 -2
  60. {biblicus-1.0.0 → biblicus-1.1.1}/features/analysis_schema.feature +6 -6
  61. biblicus-1.1.1/features/backend_validation.feature +14 -0
  62. {biblicus-1.0.0 → biblicus-1.1.1}/features/cli_entrypoint.feature +1 -1
  63. {biblicus-1.0.0 → biblicus-1.1.1}/features/cli_step_spec_parsing.feature +5 -5
  64. biblicus-1.1.1/features/context_engine_retrieval_internal_branches.feature +6 -0
  65. {biblicus-1.0.0 → biblicus-1.1.1}/features/context_engine_retrieve_context_pack.feature +10 -10
  66. {biblicus-1.0.0 → biblicus-1.1.1}/features/context_pack_cli.feature +5 -5
  67. {biblicus-1.0.0 → biblicus-1.1.1}/features/corpus_edge_cases.feature +3 -3
  68. {biblicus-1.0.0 → biblicus-1.1.1}/features/corpus_purge.feature +4 -4
  69. {biblicus-1.0.0 → biblicus-1.1.1}/features/docling_granite_extractor.feature +36 -36
  70. {biblicus-1.0.0 → biblicus-1.1.1}/features/docling_smol_extractor.feature +36 -36
  71. {biblicus-1.0.0 → biblicus-1.1.1}/features/embedding_retrieval.feature +47 -47
  72. {biblicus-1.0.0 → biblicus-1.1.1}/features/error_cases.feature +36 -36
  73. {biblicus-1.0.0 → biblicus-1.1.1}/features/evaluation.feature +13 -13
  74. {biblicus-1.0.0 → biblicus-1.1.1}/features/extraction_error_handling.feature +10 -10
  75. {biblicus-1.0.0 → biblicus-1.1.1}/features/extraction_evaluation.feature +28 -28
  76. {biblicus-1.0.0 → biblicus-1.1.1}/features/extraction_evaluation_lab.feature +1 -1
  77. biblicus-1.1.1/features/extraction_run_lifecycle.feature +117 -0
  78. {biblicus-1.0.0 → biblicus-1.1.1}/features/extraction_selection.feature +8 -8
  79. {biblicus-1.0.0 → biblicus-1.1.1}/features/extraction_selection_longest.feature +7 -7
  80. {biblicus-1.0.0 → biblicus-1.1.1}/features/extractor_pipeline.feature +15 -15
  81. {biblicus-1.0.0 → biblicus-1.1.1}/features/import_tree.feature +3 -3
  82. {biblicus-1.0.0 → biblicus-1.1.1}/features/inference_backend.feature +12 -12
  83. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_audio_samples.feature +2 -2
  84. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_mixed_extraction.feature +6 -6
  85. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_ocr_image_extraction.feature +4 -4
  86. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_pdf_retrieval.feature +3 -3
  87. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_text_annotate.feature +2 -2
  88. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_text_extract.feature +2 -2
  89. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_unstructured_extraction.feature +2 -2
  90. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_use_cases.feature +1 -1
  91. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_use_cases_sequence_markov.feature +2 -2
  92. {biblicus-1.0.0 → biblicus-1.1.1}/features/markitdown_extractor.feature +24 -24
  93. {biblicus-1.0.0 → biblicus-1.1.1}/features/markov_analysis.feature +4 -4
  94. {biblicus-1.0.0 → biblicus-1.1.1}/features/markov_analysis_categorical.feature +3 -3
  95. {biblicus-1.0.0 → biblicus-1.1.1}/features/markov_analysis_llm.feature +4 -4
  96. {biblicus-1.0.0 → biblicus-1.1.1}/features/markov_analysis_topic_modeling.feature +4 -4
  97. {biblicus-1.0.0 → biblicus-1.1.1}/features/markov_analysis_variants.feature +70 -70
  98. {biblicus-1.0.0 → biblicus-1.1.1}/features/markov_internal_branches.feature +8 -8
  99. {biblicus-1.0.0 → biblicus-1.1.1}/features/markov_schema.feature +39 -39
  100. {biblicus-1.0.0 → biblicus-1.1.1}/features/ocr_extractor.feature +9 -9
  101. {biblicus-1.0.0 → biblicus-1.1.1}/features/paddleocr_vl_extractor.feature +32 -32
  102. {biblicus-1.0.0 → biblicus-1.1.1}/features/pdf_text_extraction.feature +13 -13
  103. {biblicus-1.0.0 → biblicus-1.1.1}/features/profiling.feature +35 -35
  104. {biblicus-1.0.0 → biblicus-1.1.1}/features/profiling_config_overrides.feature +4 -4
  105. {biblicus-1.0.0 → biblicus-1.1.1}/features/query_processing.feature +2 -2
  106. {biblicus-1.0.0 → biblicus-1.1.1}/features/recipe_cascading.feature +12 -12
  107. biblicus-1.1.1/features/recipe_file_extraction.feature +35 -0
  108. {biblicus-1.0.0 → biblicus-1.1.1}/features/recipe_utilities.feature +2 -2
  109. biblicus-1.1.1/features/retrieval_build_recipes.feature +19 -0
  110. {biblicus-1.0.0 → biblicus-1.1.1}/features/retrieval_evaluation_lab.feature +1 -1
  111. {biblicus-1.0.0 → biblicus-1.1.1}/features/retrieval_quality.feature +37 -37
  112. {biblicus-1.0.0 → biblicus-1.1.1}/features/retrieval_scan.feature +14 -14
  113. {biblicus-1.0.0 → biblicus-1.1.1}/features/retrieval_sqlite_full_text_search.feature +12 -12
  114. {biblicus-1.0.0 → biblicus-1.1.1}/features/retrieval_uses_extraction_run.feature +28 -28
  115. {biblicus-1.0.0 → biblicus-1.1.1}/features/retrieval_utilities.feature +5 -5
  116. {biblicus-1.0.0 → biblicus-1.1.1}/features/select_override.feature +10 -10
  117. {biblicus-1.0.0 → biblicus-1.1.1}/features/smart_override_selection.feature +27 -27
  118. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/analysis_steps.py +28 -25
  119. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/backend_steps.py +47 -40
  120. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/cli_steps.py +11 -11
  121. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_engine_full_paths_steps.py +8 -8
  122. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_engine_internal_steps.py +200 -1
  123. biblicus-1.1.1/features/steps/context_engine_retrieval_internal_steps.py +114 -0
  124. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_engine_retrieve_context_pack_steps.py +24 -22
  125. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_pack_steps.py +20 -20
  126. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/docling_steps.py +6 -6
  127. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/embedding_index_evidence_steps.py +25 -24
  128. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/embedding_index_internal_steps.py +1 -1
  129. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/embedding_retrieval_coverage_steps.py +42 -32
  130. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/extraction_evaluation_lab_steps.py +1 -1
  131. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/extraction_evaluation_steps.py +7 -7
  132. biblicus-1.1.1/features/steps/extraction_run_lifecycle_steps.py +156 -0
  133. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/extraction_steps.py +241 -193
  134. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/extractor_steps.py +2 -2
  135. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/markov_embeddings_error_steps.py +3 -3
  136. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/markov_internal_steps.py +49 -49
  137. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/markov_schema_steps.py +143 -111
  138. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/markov_steps.py +69 -64
  139. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/model_steps.py +2 -2
  140. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/paddleocr_vl_steps.py +5 -0
  141. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/profiling_steps.py +82 -37
  142. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/recipe_steps.py +5 -1
  143. biblicus-1.1.1/features/steps/retrieval_build_recipe_steps.py +66 -0
  144. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/retrieval_evaluation_lab_steps.py +3 -1
  145. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/retrieval_quality_steps.py +28 -23
  146. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/retrieval_steps.py +104 -76
  147. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/text_annotate_steps.py +4 -2
  148. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/text_extract_steps.py +24 -12
  149. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/text_link_steps.py +4 -2
  150. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/text_redact_steps.py +4 -2
  151. biblicus-1.1.1/features/steps/text_tool_loop_steps.py +138 -0
  152. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/tf_vector_internal_steps.py +1 -1
  153. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/topic_modeling_steps.py +46 -34
  154. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/use_cases_steps.py +3 -3
  155. {biblicus-1.0.0 → biblicus-1.1.1}/features/stt_deepgram_extractor.feature +13 -13
  156. {biblicus-1.0.0 → biblicus-1.1.1}/features/stt_extractor.feature +14 -14
  157. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_extraction_runs.feature +29 -29
  158. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_utilities.feature +26 -0
  159. {biblicus-1.0.0 → biblicus-1.1.1}/features/topic_modeling.feature +117 -117
  160. {biblicus-1.0.0 → biblicus-1.1.1}/features/unstructured_extractor.feature +15 -15
  161. {biblicus-1.0.0 → biblicus-1.1.1}/features/use_cases.feature +3 -3
  162. {biblicus-1.0.0 → biblicus-1.1.1}/features/user_config.feature +2 -2
  163. {biblicus-1.0.0 → biblicus-1.1.1}/pyproject.toml +1 -1
  164. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/extraction_evaluation_demo.py +12 -12
  165. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/extraction_evaluation_lab.py +12 -12
  166. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/markov_analysis_demo.py +77 -71
  167. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/markov_cached_segments_demo.py +88 -76
  168. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/markov_run_report.py +8 -8
  169. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/profiling_demo.py +22 -22
  170. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/readme_end_to_end_demo.py +11 -7
  171. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/retrieval_evaluation_lab.py +20 -20
  172. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/topic_modeling_integration.py +28 -28
  173. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/use_cases/notes_to_context_pack_demo.py +10 -6
  174. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/use_cases/sequence_markov_demo.py +37 -31
  175. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/use_cases/text_folder_search_demo.py +14 -14
  176. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/wikipedia_rag_demo.py +13 -13
  177. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/__init__.py +5 -5
  178. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/analysis/__init__.py +1 -1
  179. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/analysis/base.py +10 -10
  180. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/analysis/markov.py +78 -68
  181. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/analysis/models.py +47 -47
  182. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/analysis/profiling.py +58 -48
  183. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/analysis/topic_modeling.py +56 -51
  184. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/cli.py +224 -177
  185. biblicus-1.0.0/src/biblicus/recipes.py → biblicus-1.1.1/src/biblicus/configuration.py +14 -14
  186. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/constants.py +2 -2
  187. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/context_engine/assembler.py +49 -19
  188. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/context_engine/retrieval.py +46 -42
  189. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/corpus.py +116 -108
  190. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/errors.py +3 -3
  191. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/evaluation.py +27 -25
  192. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extraction.py +103 -98
  193. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extraction_evaluation.py +26 -26
  194. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/deepgram_stt.py +7 -7
  195. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/docling_granite_text.py +11 -11
  196. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/docling_smol_text.py +11 -11
  197. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/markitdown_text.py +4 -4
  198. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/openai_stt.py +7 -7
  199. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/paddleocr_vl_text.py +20 -18
  200. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/pipeline.py +8 -8
  201. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/rapidocr_text.py +3 -3
  202. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/unstructured_text.py +3 -3
  203. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/hooks.py +4 -4
  204. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/knowledge_base.py +33 -31
  205. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/models.py +78 -78
  206. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/retrieval.py +47 -40
  207. biblicus-1.1.1/src/biblicus/retrievers/__init__.py +50 -0
  208. biblicus-1.1.1/src/biblicus/retrievers/base.py +65 -0
  209. {biblicus-1.0.0/src/biblicus/backends → biblicus-1.1.1/src/biblicus/retrievers}/embedding_index_common.py +44 -41
  210. {biblicus-1.0.0/src/biblicus/backends → biblicus-1.1.1/src/biblicus/retrievers}/embedding_index_file.py +87 -58
  211. {biblicus-1.0.0/src/biblicus/backends → biblicus-1.1.1/src/biblicus/retrievers}/embedding_index_inmemory.py +88 -59
  212. biblicus-1.1.1/src/biblicus/retrievers/hybrid.py +301 -0
  213. {biblicus-1.0.0/src/biblicus/backends → biblicus-1.1.1/src/biblicus/retrievers}/scan.py +83 -73
  214. {biblicus-1.0.0/src/biblicus/backends → biblicus-1.1.1/src/biblicus/retrievers}/sqlite_full_text_search.py +115 -101
  215. {biblicus-1.0.0/src/biblicus/backends → biblicus-1.1.1/src/biblicus/retrievers}/tf_vector.py +87 -77
  216. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/prompts.py +16 -8
  217. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/tool_loop.py +63 -5
  218. {biblicus-1.0.0 → biblicus-1.1.1/src/biblicus.egg-info}/PKG-INFO +52 -43
  219. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus.egg-info/SOURCES.txt +42 -43
  220. biblicus-1.1.1/tests/test_text_extract_tool_calls.py +110 -0
  221. biblicus-1.1.1/tests/test_text_utility_tool_calls.py +314 -0
  222. biblicus-1.1.1/tests/test_tool_loop_safeguards.py +171 -0
  223. biblicus-1.0.0/docs/ARCHITECTURE.md +0 -46
  224. biblicus-1.0.0/docs/ARCHITECTURE_DETAIL.md +0 -267
  225. biblicus-1.0.0/docs/EMBEDDING_RETRIEVAL.md +0 -57
  226. biblicus-1.0.0/docs/PR_FAQ_CONTEXT_ENGINE.md +0 -43
  227. biblicus-1.0.0/docs/PR_FAQ_EMBEDDING_RETRIEVAL.md +0 -105
  228. biblicus-1.0.0/docs/PR_FAQ_TEXT_ANNOTATE.md +0 -118
  229. biblicus-1.0.0/docs/TEXT_UTILITIES.md +0 -137
  230. biblicus-1.0.0/features/backend_validation.feature +0 -14
  231. biblicus-1.0.0/features/context_engine_retrieval_internal_branches.feature +0 -6
  232. biblicus-1.0.0/features/extraction_run_lifecycle.feature +0 -117
  233. biblicus-1.0.0/features/recipe_file_extraction.feature +0 -35
  234. biblicus-1.0.0/features/retrieval_build_recipes.feature +0 -19
  235. biblicus-1.0.0/features/steps/context_engine_retrieval_internal_steps.py +0 -113
  236. biblicus-1.0.0/features/steps/extraction_run_lifecycle_steps.py +0 -152
  237. biblicus-1.0.0/features/steps/retrieval_build_recipe_steps.py +0 -64
  238. biblicus-1.0.0/features/steps/text_tool_loop_steps.py +0 -36
  239. biblicus-1.0.0/src/biblicus/backends/__init__.py +0 -50
  240. biblicus-1.0.0/src/biblicus/backends/base.py +0 -65
  241. biblicus-1.0.0/src/biblicus/backends/hybrid.py +0 -292
  242. {biblicus-1.0.0 → biblicus-1.1.1}/LICENSE +0 -0
  243. {biblicus-1.0.0 → biblicus-1.1.1}/MANIFEST.in +0 -0
  244. {biblicus-1.0.0 → biblicus-1.1.1}/THIRD_PARTY_NOTICES.md +0 -0
  245. {biblicus-1.0.0 → biblicus-1.1.1}/datasets/extraction_lab/labels.json +0 -0
  246. {biblicus-1.0.0 → biblicus-1.1.1}/datasets/retrieval_lab/labels.json +0 -0
  247. {biblicus-1.0.0 → biblicus-1.1.1}/datasets/wikipedia_mini.json +0 -0
  248. {biblicus-1.0.0 → biblicus-1.1.1}/docs/STT.md +0 -0
  249. {biblicus-1.0.0 → biblicus-1.1.1}/docs/TESTING.md +0 -0
  250. {biblicus-1.0.0 → biblicus-1.1.1}/docs/api.rst +0 -0
  251. /biblicus-1.0.0/docs/CONTEXT_ENGINE_DEMO.md → /biblicus-1.1.1/docs/context-engine-demo.md +0 -0
  252. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/ocr/index.md +0 -0
  253. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/pipeline-utilities/index.md +0 -0
  254. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/speech-to-text/index.md +0 -0
  255. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/text-document/index.md +0 -0
  256. {biblicus-1.0.0 → biblicus-1.1.1}/docs/extractors/vlm-document/index.md +0 -0
  257. /biblicus-1.0.0/docs/USE_CASES.md → /biblicus-1.1.1/docs/use-cases.md +0 -0
  258. {biblicus-1.0.0 → biblicus-1.1.1}/docs/use_cases/notes_to_context_pack.md +0 -0
  259. {biblicus-1.0.0 → biblicus-1.1.1}/docs/use_cases/text_redact.md +0 -0
  260. /biblicus-1.0.0/docs/USER_CONFIGURATION.md → /biblicus-1.1.1/docs/user-configuration.md +0 -0
  261. {biblicus-1.0.0 → biblicus-1.1.1}/features/70_context_retriever.feature +0 -0
  262. {biblicus-1.0.0 → biblicus-1.1.1}/features/71_context_compaction.feature +0 -0
  263. {biblicus-1.0.0 → biblicus-1.1.1}/features/72_context_history_compaction.feature +0 -0
  264. {biblicus-1.0.0 → biblicus-1.1.1}/features/73_context_nested_compaction.feature +0 -0
  265. {biblicus-1.0.0 → biblicus-1.1.1}/features/74_context_regeneration.feature +0 -0
  266. {biblicus-1.0.0 → biblicus-1.1.1}/features/75_context_default_regeneration.feature +0 -0
  267. {biblicus-1.0.0 → biblicus-1.1.1}/features/76_context_pack_budget_weights.feature +0 -0
  268. {biblicus-1.0.0 → biblicus-1.1.1}/features/77_context_default_pack_priority.feature +0 -0
  269. {biblicus-1.0.0 → biblicus-1.1.1}/features/78_context_default_pack_weights.feature +0 -0
  270. {biblicus-1.0.0 → biblicus-1.1.1}/features/79_context_nested_context_packs.feature +0 -0
  271. {biblicus-1.0.0 → biblicus-1.1.1}/features/80_context_nested_pack_budget_cap.feature +0 -0
  272. {biblicus-1.0.0 → biblicus-1.1.1}/features/81_context_nested_regeneration.feature +0 -0
  273. {biblicus-1.0.0 → biblicus-1.1.1}/features/82_context_explicit_regeneration.feature +0 -0
  274. {biblicus-1.0.0 → biblicus-1.1.1}/features/83_context_explicit_pack_priority.feature +0 -0
  275. {biblicus-1.0.0 → biblicus-1.1.1}/features/84_context_explicit_pack_weights.feature +0 -0
  276. {biblicus-1.0.0 → biblicus-1.1.1}/features/85_context_expansion.feature +0 -0
  277. {biblicus-1.0.0 → biblicus-1.1.1}/features/86_context_engine_errors.feature +0 -0
  278. {biblicus-1.0.0 → biblicus-1.1.1}/features/87_context_compactor_strategies.feature +0 -0
  279. {biblicus-1.0.0 → biblicus-1.1.1}/features/88_context_engine_model_validation.feature +0 -0
  280. {biblicus-1.0.0 → biblicus-1.1.1}/features/91_tf_vector_internal_branches.feature +0 -0
  281. {biblicus-1.0.0 → biblicus-1.1.1}/features/93_context_engine_full_paths.feature +0 -0
  282. {biblicus-1.0.0 → biblicus-1.1.1}/features/ai_llm.feature +0 -0
  283. {biblicus-1.0.0 → biblicus-1.1.1}/features/ai_models.feature +0 -0
  284. {biblicus-1.0.0 → biblicus-1.1.1}/features/biblicus_corpus.feature +0 -0
  285. {biblicus-1.0.0 → biblicus-1.1.1}/features/cli_parsing.feature +0 -0
  286. {biblicus-1.0.0 → biblicus-1.1.1}/features/content_sniffing.feature +0 -0
  287. {biblicus-1.0.0 → biblicus-1.1.1}/features/context_pack.feature +0 -0
  288. {biblicus-1.0.0 → biblicus-1.1.1}/features/context_pack_policies.feature +0 -0
  289. {biblicus-1.0.0 → biblicus-1.1.1}/features/corpus_identity.feature +0 -0
  290. {biblicus-1.0.0 → biblicus-1.1.1}/features/corpus_internal_branches.feature +0 -0
  291. {biblicus-1.0.0 → biblicus-1.1.1}/features/crawl.feature +0 -0
  292. {biblicus-1.0.0 → biblicus-1.1.1}/features/embedding_index_internal_branches.feature +0 -0
  293. {biblicus-1.0.0 → biblicus-1.1.1}/features/embeddings.feature +0 -0
  294. {biblicus-1.0.0 → biblicus-1.1.1}/features/environment.py +0 -0
  295. {biblicus-1.0.0 → biblicus-1.1.1}/features/evidence_processing.feature +0 -0
  296. {biblicus-1.0.0 → biblicus-1.1.1}/features/extractor_validation.feature +0 -0
  297. {biblicus-1.0.0 → biblicus-1.1.1}/features/frontmatter.feature +0 -0
  298. {biblicus-1.0.0 → biblicus-1.1.1}/features/hook_config_validation.feature +0 -0
  299. {biblicus-1.0.0 → biblicus-1.1.1}/features/hook_error_handling.feature +0 -0
  300. {biblicus-1.0.0 → biblicus-1.1.1}/features/hook_logging_internal_branches.feature +0 -0
  301. {biblicus-1.0.0 → biblicus-1.1.1}/features/ingest_namespacing.feature +0 -0
  302. {biblicus-1.0.0 → biblicus-1.1.1}/features/ingest_sources.feature +0 -0
  303. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_image_samples.feature +0 -0
  304. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_mixed_corpus.feature +0 -0
  305. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_pdf_samples.feature +0 -0
  306. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_text_link.feature +0 -0
  307. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_text_redact.feature +0 -0
  308. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_text_slice.feature +0 -0
  309. {biblicus-1.0.0 → biblicus-1.1.1}/features/integration_wikipedia.feature +0 -0
  310. {biblicus-1.0.0 → biblicus-1.1.1}/features/knowledge_base.feature +0 -0
  311. {biblicus-1.0.0 → biblicus-1.1.1}/features/lifecycle_hooks.feature +0 -0
  312. {biblicus-1.0.0 → biblicus-1.1.1}/features/markov_embeddings_errors.feature +0 -0
  313. {biblicus-1.0.0 → biblicus-1.1.1}/features/markov_start_end_labels.feature +0 -0
  314. {biblicus-1.0.0 → biblicus-1.1.1}/features/model_validation.feature +0 -0
  315. {biblicus-1.0.0 → biblicus-1.1.1}/features/paddleocr_vl_parse_api_response.feature +0 -0
  316. {biblicus-1.0.0 → biblicus-1.1.1}/features/python_api.feature +0 -0
  317. {biblicus-1.0.0 → biblicus-1.1.1}/features/python_hook_logging.feature +0 -0
  318. {biblicus-1.0.0 → biblicus-1.1.1}/features/retrieval_budget.feature +0 -0
  319. {biblicus-1.0.0 → biblicus-1.1.1}/features/select_override_defaults.feature +0 -0
  320. {biblicus-1.0.0 → biblicus-1.1.1}/features/source_helper_internal_branches.feature +0 -0
  321. {biblicus-1.0.0 → biblicus-1.1.1}/features/source_loading.feature +0 -0
  322. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/ai_llm_steps.py +0 -0
  323. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/ai_models_steps.py +0 -0
  324. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/cli_parsing_steps.py +0 -0
  325. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_compaction_steps.py +0 -0
  326. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_compactor_steps.py +0 -0
  327. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_default_pack_priority_steps.py +0 -0
  328. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_default_pack_weights_steps.py +0 -0
  329. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_default_regeneration_steps.py +0 -0
  330. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_engine_error_steps.py +0 -0
  331. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_engine_model_steps.py +0 -0
  332. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_engine_registry.py +0 -0
  333. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_engine_retriever.py +0 -0
  334. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_expansion_steps.py +0 -0
  335. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_explicit_pack_priority_steps.py +0 -0
  336. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_explicit_pack_weights_steps.py +0 -0
  337. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_explicit_regeneration_steps.py +0 -0
  338. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_history_compaction_steps.py +0 -0
  339. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_nested_compaction_steps.py +0 -0
  340. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_nested_context_packs_steps.py +0 -0
  341. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_nested_pack_budget_cap_steps.py +0 -0
  342. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_nested_regeneration_steps.py +0 -0
  343. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_pack_budget_steps.py +0 -0
  344. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_regeneration_steps.py +0 -0
  345. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/context_retriever_steps.py +0 -0
  346. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/corpus_internal_steps.py +0 -0
  347. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/crawl_steps.py +0 -0
  348. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/deepgram_steps.py +0 -0
  349. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/embeddings_steps.py +0 -0
  350. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/evidence_processing_steps.py +0 -0
  351. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/frontmatter_steps.py +0 -0
  352. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/hook_logging_steps.py +0 -0
  353. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/inference_steps.py +0 -0
  354. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/knowledge_base_steps.py +0 -0
  355. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/markitdown_steps.py +0 -0
  356. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/markov_start_end_steps.py +0 -0
  357. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/openai_steps.py +0 -0
  358. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/paddleocr_mock_steps.py +0 -0
  359. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/paddleocr_vl_unit_steps.py +0 -0
  360. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/pdf_steps.py +0 -0
  361. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/python_api_steps.py +0 -0
  362. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/rapidocr_steps.py +0 -0
  363. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/requests_mock_steps.py +0 -0
  364. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/select_override_defaults_steps.py +0 -0
  365. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/source_helper_steps.py +0 -0
  366. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/stt_deepgram_steps.py +0 -0
  367. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/stt_steps.py +0 -0
  368. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/text_internal_steps.py +0 -0
  369. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/text_link_internal_steps.py +0 -0
  370. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/text_mock_steps.py +0 -0
  371. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/text_slice_steps.py +0 -0
  372. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/unstructured_steps.py +0 -0
  373. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/user_config_steps.py +0 -0
  374. {biblicus-1.0.0 → biblicus-1.1.1}/features/steps/wikitext_steps.py +0 -0
  375. {biblicus-1.0.0 → biblicus-1.1.1}/features/streaming_ingest.feature +0 -0
  376. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_annotate.feature +0 -0
  377. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_extract.feature +0 -0
  378. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_internal_branches.feature +0 -0
  379. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_link.feature +0 -0
  380. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_link_internal_branches.feature +0 -0
  381. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_mock.feature +0 -0
  382. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_redact.feature +0 -0
  383. {biblicus-1.0.0 → biblicus-1.1.1}/features/text_slice.feature +0 -0
  384. {biblicus-1.0.0 → biblicus-1.1.1}/features/token_budget.feature +0 -0
  385. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/demo_context_engine.py +0 -0
  386. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/download_ag_news.py +0 -0
  387. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/download_audio_samples.py +0 -0
  388. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/download_image_samples.py +0 -0
  389. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/download_mixed_samples.py +0 -0
  390. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/download_pdf_samples.py +0 -0
  391. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/download_wikipedia.py +0 -0
  392. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/test.py +0 -0
  393. {biblicus-1.0.0 → biblicus-1.1.1}/scripts/use_cases/text_redact_demo.py +0 -0
  394. {biblicus-1.0.0 → biblicus-1.1.1}/setup.cfg +0 -0
  395. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/__main__.py +0 -0
  396. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/_vendor/dotyaml/__init__.py +0 -0
  397. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/_vendor/dotyaml/interpolation.py +0 -0
  398. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/_vendor/dotyaml/loader.py +0 -0
  399. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/_vendor/dotyaml/transformer.py +0 -0
  400. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/ai/__init__.py +0 -0
  401. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/ai/embeddings.py +0 -0
  402. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/ai/llm.py +0 -0
  403. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/ai/models.py +0 -0
  404. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/analysis/schema.py +0 -0
  405. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/chunking.py +0 -0
  406. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/context.py +0 -0
  407. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/context_engine/__init__.py +0 -0
  408. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/context_engine/compaction.py +0 -0
  409. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/context_engine/models.py +0 -0
  410. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/crawl.py +0 -0
  411. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/embedding_providers.py +0 -0
  412. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/evidence_processing.py +0 -0
  413. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/__init__.py +0 -0
  414. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/base.py +0 -0
  415. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/metadata_text.py +0 -0
  416. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/pass_through_text.py +0 -0
  417. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/pdf_text.py +0 -0
  418. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/select_longest_text.py +0 -0
  419. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/select_override.py +0 -0
  420. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/select_smart_override.py +0 -0
  421. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/extractors/select_text.py +0 -0
  422. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/frontmatter.py +0 -0
  423. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/hook_logging.py +0 -0
  424. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/hook_manager.py +0 -0
  425. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/ignore.py +0 -0
  426. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/inference.py +0 -0
  427. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/sources.py +0 -0
  428. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/__init__.py +0 -0
  429. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/annotate.py +0 -0
  430. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/extract.py +0 -0
  431. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/link.py +0 -0
  432. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/markup.py +0 -0
  433. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/models.py +0 -0
  434. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/redact.py +0 -0
  435. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/text/slice.py +0 -0
  436. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/time.py +0 -0
  437. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/uris.py +0 -0
  438. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus/user_config.py +0 -0
  439. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus.egg-info/dependency_links.txt +0 -0
  440. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus.egg-info/entry_points.txt +0 -0
  441. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus.egg-info/requires.txt +0 -0
  442. {biblicus-1.0.0 → biblicus-1.1.1}/src/biblicus.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: biblicus
3
- Version: 1.0.0
3
+ Version: 1.1.1
4
4
  Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
5
5
  License: MIT
6
6
  Requires-Python: >=3.9
@@ -80,10 +80,10 @@ See [retrieval augmented generation overview] for a short introduction to the id
80
80
  ## Analysis highlights
81
81
 
82
82
  - `biblicus analyze markov` learns a directed, weighted state transition graph over segmented text.
83
- - YAML recipes support cascading composition plus dotted `--config key=value` overrides.
83
+ - YAML configurations support cascading composition plus dotted `--config key=value` overrides.
84
84
  - Text extract splits long texts with an LLM by inserting XML tags in-place for structured spans.
85
- - See `docs/MARKOV_ANALYSIS.md` for Markov analysis details and runnable demos.
86
- - See `docs/TEXT_EXTRACT.md` for the text extract utility and examples.
85
+ - See `docs/markov-analysis.md` for Markov analysis details and runnable demos.
86
+ - See `docs/text-extract.md` for the text extract utility and examples.
87
87
 
88
88
  ## Start with a knowledge base
89
89
 
@@ -167,7 +167,7 @@ sequenceDiagram
167
167
 
168
168
  - You can ingest raw material once, then try many retrieval approaches over time.
169
169
  - You can keep raw files readable and portable, without locking your data inside a database.
170
- - You can evaluate retrieval runs against shared datasets and compare backends using the same corpus.
170
+ - You can evaluate retrieval snapshots against shared datasets and compare backends using the same corpus.
171
171
 
172
172
  ## Typical flow
173
173
 
@@ -176,7 +176,7 @@ sequenceDiagram
176
176
  - Crawl a website section into corpus items when you want a repeatable “import from the web” workflow.
177
177
  - Run extraction when you want derived text artifacts from non-text sources.
178
178
  - Reindex to refresh the catalog after edits.
179
- - Build a retrieval run with a backend.
179
+ - Build a retrieval snapshot with a backend.
180
180
  - Query the run to collect evidence and evaluate it with datasets.
181
181
 
182
182
  ## Install
@@ -292,7 +292,7 @@ for note_title, note_text in notes:
292
292
  corpus.ingest_note(note_text, title=note_title, tags=["memory"])
293
293
 
294
294
  backend = get_backend("scan")
295
- run = backend.build_run(corpus, recipe_name="Story demo", config={})
295
+ run = backend.build_run(corpus, configuration_name="Story demo", config={})
296
296
  budget = QueryBudget(max_total_items=5, maximum_total_characters=2000, max_items_per_source=None)
297
297
  result = backend.query(
298
298
  corpus,
@@ -336,8 +336,8 @@ Example output:
336
336
  "maximum_total_characters": 2000,
337
337
  "max_items_per_source": null
338
338
  },
339
- "run_id": "RUN_ID",
340
- "recipe_id": "RECIPE_ID",
339
+ "snapshot_id": "RUN_ID",
340
+ "configuration_id": "RECIPE_ID",
341
341
  "backend_id": "scan",
342
342
  "generated_at": "2026-01-29T00:00:00.000000Z",
343
343
  "evidence": [
@@ -352,8 +352,8 @@ Example output:
352
352
  "span_start": null,
353
353
  "span_end": null,
354
354
  "stage": "scan",
355
- "recipe_id": "RECIPE_ID",
356
- "run_id": "RUN_ID",
355
+ "configuration_id": "RECIPE_ID",
356
+ "snapshot_id": "RUN_ID",
357
357
  "hash": null
358
358
  }
359
359
  ],
@@ -422,7 +422,7 @@ flowchart TB
422
422
 
423
423
  subgraph RowExtraction[Pluggable: extraction pipeline]
424
424
  direction TB
425
- Catalog --> Extract[Extract pipeline] --> ExtractedText[Extracted text artifacts] --> ExtractionRun[Extraction run manifest]
425
+ Catalog --> Extract[Extract pipeline] --> ExtractedText[Extracted text artifacts] --> ExtractionRun[Extraction snapshot manifest]
426
426
  end
427
427
 
428
428
  subgraph RowRetrieval[Pluggable: retrieval backend]
@@ -484,7 +484,7 @@ From Python, the same flow is available through the Corpus class and backend int
484
484
  - Ingest notes with `Corpus.ingest_note`.
485
485
  - Ingest files or web addresses with `Corpus.ingest_source`.
486
486
  - List items with `Corpus.list_items`.
487
- - Build a retrieval run with `get_backend` and `backend.build_run`.
487
+ - Build a retrieval snapshot with `get_backend` and `backend.build_run`.
488
488
  - Query a run with `backend.query`.
489
489
  - Evaluate with `evaluate_run`.
490
490
 
@@ -530,13 +530,13 @@ corpus/
530
530
  runs/
531
531
  extraction/
532
532
  pipeline/
533
- <run id>/
533
+ <snapshot id>/
534
534
  manifest.json
535
535
  text/
536
536
  <item id>.txt
537
537
  retrieval/
538
538
  <backend id>/
539
- <run id>/
539
+ <snapshot id>/
540
540
  manifest.json
541
541
  ```
542
542
 
@@ -552,9 +552,9 @@ For detailed documentation including configuration options, performance characte
552
552
 
553
553
  ## Retrieval documentation
554
554
 
555
- For the retrieval pipeline overview and run artifacts, see `docs/RETRIEVAL.md`. For retrieval quality upgrades
556
- (tuned lexical baseline, reranking, hybrid retrieval), see `docs/RETRIEVAL_QUALITY.md`. For evaluation workflows
557
- and dataset formats, see `docs/RETRIEVAL_EVALUATION.md`. For a runnable walkthrough, use the retrieval evaluation lab
555
+ For the retrieval pipeline overview and snapshot artifacts, see `docs/retrieval.md`. For retrieval quality upgrades
556
+ (tuned lexical baseline, reranking, hybrid retrieval), see `docs/retrieval-quality.md`. For evaluation workflows
557
+ and dataset formats, see `docs/retrieval-evaluation.md`. For a runnable walkthrough, use the retrieval evaluation lab
558
558
  script (`scripts/retrieval_evaluation_lab.py`).
559
559
 
560
560
  ## Extraction backends
@@ -594,7 +594,7 @@ These extractors are built in. Optional ones require extra dependencies. See [te
594
594
  For detailed documentation on all extractors, see the [Extractor Reference][extractor-reference].
595
595
 
596
596
  For extraction evaluation workflows, dataset formats, and report interpretation, see
597
- `docs/EXTRACTION_EVALUATION.md`.
597
+ `docs/extraction-evaluation.md`.
598
598
 
599
599
  ## Text extract utility
600
600
 
@@ -602,39 +602,39 @@ Text extract is a reusable analysis utility that lets a model insert XML tags in
602
602
  entire document. It returns structured spans and the marked-up text, and it is used as a segmentation option in Markov
603
603
  analysis.
604
604
 
605
- See `docs/TEXT_EXTRACT.md` for the utility API and examples, and `docs/MARKOV_ANALYSIS.md` for the Markov integration.
605
+ See `docs/text-extract.md` for the utility API and examples, and `docs/markov-analysis.md` for the Markov integration.
606
606
 
607
607
  ## Text slice utility
608
608
 
609
609
  Text slice is a reusable analysis utility that lets a model insert `<slice/>` markers into a long text without
610
610
  re-emitting the entire document. It returns ordered slices and the marked-up text for auditing and reuse.
611
611
 
612
- See `docs/TEXT_SLICE.md` for the utility API and examples.
612
+ See `docs/text-slice.md` for the utility API and examples.
613
613
 
614
614
  ## Topic modeling analysis
615
615
 
616
616
  Biblicus can run analysis pipelines on extracted text without changing the raw corpus. Profiling and topic modeling
617
617
  are the first analysis backends. Profiling summarizes corpus composition and extraction coverage. Topic modeling reads
618
- an extraction run, optionally applies an LLM-driven extraction pass, applies lexical processing, runs BERTopic, and
618
+ an extraction snapshot, optionally applies an LLM-driven extraction pass, applies lexical processing, runs BERTopic, and
619
619
  optionally applies an LLM fine-tuning pass to label topics. The output is structured JavaScript Object Notation.
620
620
 
621
- See `docs/ANALYSIS.md` for the analysis pipeline overview, `docs/PROFILING.md` for profiling, and
622
- `docs/TOPIC_MODELING.md` for topic modeling details.
621
+ See `docs/analysis.md` for the analysis pipeline overview, `docs/profiling.md` for profiling, and
622
+ `docs/topic-modeling.md` for topic modeling details.
623
623
 
624
- Run a topic analysis using a recipe file:
624
+ Run a topic analysis using a configuration file:
625
625
 
626
626
  ```
627
- biblicus analyze topics --corpus corpora/example --recipe recipes/topic-modeling.yml --extraction-run pipeline:<run_id>
627
+ biblicus analyze topics --corpus corpora/example --configuration configurations/topic-modeling.yml --extraction-run pipeline:<snapshot_id>
628
628
  ```
629
629
 
630
- If `--extraction-run` is omitted, Biblicus uses the most recent extraction run and emits a warning about
630
+ If `--extraction-run` is omitted, Biblicus uses the most recent extraction snapshot and emits a warning about
631
631
  reproducibility. The analysis output is stored under:
632
632
 
633
633
  ```
634
- .biblicus/runs/analysis/topic-modeling/<run_id>/output.json
634
+ .biblicus/runs/analysis/topic-modeling/<snapshot_id>/output.json
635
635
  ```
636
636
 
637
- Minimal recipe example:
637
+ Minimal configuration example:
638
638
 
639
639
  ```yaml
640
640
  schema_version: 1
@@ -659,7 +659,7 @@ llm_fine_tuning:
659
659
  ```
660
660
 
661
661
  LLM extraction and fine-tuning require `biblicus[openai]` and a configured OpenAI API key.
662
- Recipe files are validated strictly against the topic modeling schema, so type mismatches or unknown fields are errors.
662
+ Configuration files are validated strictly against the topic modeling schema, so type mismatches or unknown fields are errors.
663
663
  AG News integration runs require `biblicus[datasets]` in addition to `biblicus[topic-modeling]`.
664
664
 
665
665
  For a repeatable, real-world integration run that downloads AG News and executes topic modeling, use:
@@ -668,7 +668,7 @@ For a repeatable, real-world integration run that downloads AG News and executes
668
668
  python scripts/topic_modeling_integration.py --corpus corpora/ag_news_demo --force
669
669
  ```
670
670
 
671
- See `docs/TOPIC_MODELING.md` for parameter examples and per-topic output behavior.
671
+ See `docs/topic-modeling.md` for parameter examples and per-topic output behavior.
672
672
 
673
673
  ## Integration corpus and evaluation dataset
674
674
 
@@ -712,25 +712,34 @@ Build the documentation:
712
712
  python -m sphinx -b html docs docs/_build/html
713
713
  ```
714
714
 
715
+ Preview the documentation locally:
716
+
717
+ ```
718
+ cd docs/_build/html
719
+ python -m http.server
720
+ ```
721
+
722
+ Open `http://localhost:8000` in your browser.
723
+
715
724
  ## License
716
725
 
717
726
  License terms are in `LICENSE`.
718
727
 
719
728
  [retrieval augmented generation overview]: https://en.wikipedia.org/wiki/Retrieval-augmented_generation
720
- [architecture]: docs/ARCHITECTURE.md
721
- [roadmap]: docs/ROADMAP.md
722
- [feature-index]: docs/FEATURE_INDEX.md
723
- [corpus]: docs/CORPUS.md
724
- [knowledge-base]: docs/KNOWLEDGE_BASE.md
725
- [text-extraction]: docs/EXTRACTION.md
729
+ [architecture]: docs/architecture.md
730
+ [roadmap]: docs/roadmap.md
731
+ [feature-index]: docs/feature-index.md
732
+ [corpus]: docs/corpus.md
733
+ [knowledge-base]: docs/knowledge-base.md
734
+ [text-extraction]: docs/extraction.md
726
735
  [extractor-reference]: docs/extractors/index.md
727
736
  [backend-reference]: docs/backends/index.md
728
- [speech-to-text]: docs/STT.md
729
- [user-configuration]: docs/USER_CONFIGURATION.md
730
- [backends]: docs/BACKENDS.md
731
- [context-packs]: docs/CONTEXT_PACK.md
732
- [demos]: docs/DEMOS.md
733
- [testing]: docs/TESTING.md
737
+ [speech-to-text]: docs/stt.md
738
+ [user-configuration]: docs/user-configuration.md
739
+ [backends]: docs/backends.md
740
+ [context-packs]: docs/context-pack.md
741
+ [demos]: docs/demos.md
742
+ [testing]: docs/testing.md
734
743
 
735
744
  [continuous-integration-badge]: https://github.com/AnthusAI/Biblicus/actions/workflows/ci.yml/badge.svg?branch=main
736
745
  [coverage-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/AnthusAI/Biblicus/main/coverage_badge.json
@@ -26,10 +26,10 @@ See [retrieval augmented generation overview] for a short introduction to the id
26
26
  ## Analysis highlights
27
27
 
28
28
  - `biblicus analyze markov` learns a directed, weighted state transition graph over segmented text.
29
- - YAML recipes support cascading composition plus dotted `--config key=value` overrides.
29
+ - YAML configurations support cascading composition plus dotted `--config key=value` overrides.
30
30
  - Text extract splits long texts with an LLM by inserting XML tags in-place for structured spans.
31
- - See `docs/MARKOV_ANALYSIS.md` for Markov analysis details and runnable demos.
32
- - See `docs/TEXT_EXTRACT.md` for the text extract utility and examples.
31
+ - See `docs/markov-analysis.md` for Markov analysis details and runnable demos.
32
+ - See `docs/text-extract.md` for the text extract utility and examples.
33
33
 
34
34
  ## Start with a knowledge base
35
35
 
@@ -113,7 +113,7 @@ sequenceDiagram
113
113
 
114
114
  - You can ingest raw material once, then try many retrieval approaches over time.
115
115
  - You can keep raw files readable and portable, without locking your data inside a database.
116
- - You can evaluate retrieval runs against shared datasets and compare backends using the same corpus.
116
+ - You can evaluate retrieval snapshots against shared datasets and compare backends using the same corpus.
117
117
 
118
118
  ## Typical flow
119
119
 
@@ -122,7 +122,7 @@ sequenceDiagram
122
122
  - Crawl a website section into corpus items when you want a repeatable “import from the web” workflow.
123
123
  - Run extraction when you want derived text artifacts from non-text sources.
124
124
  - Reindex to refresh the catalog after edits.
125
- - Build a retrieval run with a backend.
125
+ - Build a retrieval snapshot with a backend.
126
126
  - Query the run to collect evidence and evaluate it with datasets.
127
127
 
128
128
  ## Install
@@ -238,7 +238,7 @@ for note_title, note_text in notes:
238
238
  corpus.ingest_note(note_text, title=note_title, tags=["memory"])
239
239
 
240
240
  backend = get_backend("scan")
241
- run = backend.build_run(corpus, recipe_name="Story demo", config={})
241
+ run = backend.build_run(corpus, configuration_name="Story demo", config={})
242
242
  budget = QueryBudget(max_total_items=5, maximum_total_characters=2000, max_items_per_source=None)
243
243
  result = backend.query(
244
244
  corpus,
@@ -282,8 +282,8 @@ Example output:
282
282
  "maximum_total_characters": 2000,
283
283
  "max_items_per_source": null
284
284
  },
285
- "run_id": "RUN_ID",
286
- "recipe_id": "RECIPE_ID",
285
+ "snapshot_id": "RUN_ID",
286
+ "configuration_id": "RECIPE_ID",
287
287
  "backend_id": "scan",
288
288
  "generated_at": "2026-01-29T00:00:00.000000Z",
289
289
  "evidence": [
@@ -298,8 +298,8 @@ Example output:
298
298
  "span_start": null,
299
299
  "span_end": null,
300
300
  "stage": "scan",
301
- "recipe_id": "RECIPE_ID",
302
- "run_id": "RUN_ID",
301
+ "configuration_id": "RECIPE_ID",
302
+ "snapshot_id": "RUN_ID",
303
303
  "hash": null
304
304
  }
305
305
  ],
@@ -368,7 +368,7 @@ flowchart TB
368
368
 
369
369
  subgraph RowExtraction[Pluggable: extraction pipeline]
370
370
  direction TB
371
- Catalog --> Extract[Extract pipeline] --> ExtractedText[Extracted text artifacts] --> ExtractionRun[Extraction run manifest]
371
+ Catalog --> Extract[Extract pipeline] --> ExtractedText[Extracted text artifacts] --> ExtractionRun[Extraction snapshot manifest]
372
372
  end
373
373
 
374
374
  subgraph RowRetrieval[Pluggable: retrieval backend]
@@ -430,7 +430,7 @@ From Python, the same flow is available through the Corpus class and backend int
430
430
  - Ingest notes with `Corpus.ingest_note`.
431
431
  - Ingest files or web addresses with `Corpus.ingest_source`.
432
432
  - List items with `Corpus.list_items`.
433
- - Build a retrieval run with `get_backend` and `backend.build_run`.
433
+ - Build a retrieval snapshot with `get_backend` and `backend.build_run`.
434
434
  - Query a run with `backend.query`.
435
435
  - Evaluate with `evaluate_run`.
436
436
 
@@ -476,13 +476,13 @@ corpus/
476
476
  runs/
477
477
  extraction/
478
478
  pipeline/
479
- <run id>/
479
+ <snapshot id>/
480
480
  manifest.json
481
481
  text/
482
482
  <item id>.txt
483
483
  retrieval/
484
484
  <backend id>/
485
- <run id>/
485
+ <snapshot id>/
486
486
  manifest.json
487
487
  ```
488
488
 
@@ -498,9 +498,9 @@ For detailed documentation including configuration options, performance characte
498
498
 
499
499
  ## Retrieval documentation
500
500
 
501
- For the retrieval pipeline overview and run artifacts, see `docs/RETRIEVAL.md`. For retrieval quality upgrades
502
- (tuned lexical baseline, reranking, hybrid retrieval), see `docs/RETRIEVAL_QUALITY.md`. For evaluation workflows
503
- and dataset formats, see `docs/RETRIEVAL_EVALUATION.md`. For a runnable walkthrough, use the retrieval evaluation lab
501
+ For the retrieval pipeline overview and snapshot artifacts, see `docs/retrieval.md`. For retrieval quality upgrades
502
+ (tuned lexical baseline, reranking, hybrid retrieval), see `docs/retrieval-quality.md`. For evaluation workflows
503
+ and dataset formats, see `docs/retrieval-evaluation.md`. For a runnable walkthrough, use the retrieval evaluation lab
504
504
  script (`scripts/retrieval_evaluation_lab.py`).
505
505
 
506
506
  ## Extraction backends
@@ -540,7 +540,7 @@ These extractors are built in. Optional ones require extra dependencies. See [te
540
540
  For detailed documentation on all extractors, see the [Extractor Reference][extractor-reference].
541
541
 
542
542
  For extraction evaluation workflows, dataset formats, and report interpretation, see
543
- `docs/EXTRACTION_EVALUATION.md`.
543
+ `docs/extraction-evaluation.md`.
544
544
 
545
545
  ## Text extract utility
546
546
 
@@ -548,39 +548,39 @@ Text extract is a reusable analysis utility that lets a model insert XML tags in
548
548
  entire document. It returns structured spans and the marked-up text, and it is used as a segmentation option in Markov
549
549
  analysis.
550
550
 
551
- See `docs/TEXT_EXTRACT.md` for the utility API and examples, and `docs/MARKOV_ANALYSIS.md` for the Markov integration.
551
+ See `docs/text-extract.md` for the utility API and examples, and `docs/markov-analysis.md` for the Markov integration.
552
552
 
553
553
  ## Text slice utility
554
554
 
555
555
  Text slice is a reusable analysis utility that lets a model insert `<slice/>` markers into a long text without
556
556
  re-emitting the entire document. It returns ordered slices and the marked-up text for auditing and reuse.
557
557
 
558
- See `docs/TEXT_SLICE.md` for the utility API and examples.
558
+ See `docs/text-slice.md` for the utility API and examples.
559
559
 
560
560
  ## Topic modeling analysis
561
561
 
562
562
  Biblicus can run analysis pipelines on extracted text without changing the raw corpus. Profiling and topic modeling
563
563
  are the first analysis backends. Profiling summarizes corpus composition and extraction coverage. Topic modeling reads
564
- an extraction run, optionally applies an LLM-driven extraction pass, applies lexical processing, runs BERTopic, and
564
+ an extraction snapshot, optionally applies an LLM-driven extraction pass, applies lexical processing, runs BERTopic, and
565
565
  optionally applies an LLM fine-tuning pass to label topics. The output is structured JavaScript Object Notation.
566
566
 
567
- See `docs/ANALYSIS.md` for the analysis pipeline overview, `docs/PROFILING.md` for profiling, and
568
- `docs/TOPIC_MODELING.md` for topic modeling details.
567
+ See `docs/analysis.md` for the analysis pipeline overview, `docs/profiling.md` for profiling, and
568
+ `docs/topic-modeling.md` for topic modeling details.
569
569
 
570
- Run a topic analysis using a recipe file:
570
+ Run a topic analysis using a configuration file:
571
571
 
572
572
  ```
573
- biblicus analyze topics --corpus corpora/example --recipe recipes/topic-modeling.yml --extraction-run pipeline:<run_id>
573
+ biblicus analyze topics --corpus corpora/example --configuration configurations/topic-modeling.yml --extraction-run pipeline:<snapshot_id>
574
574
  ```
575
575
 
576
- If `--extraction-run` is omitted, Biblicus uses the most recent extraction run and emits a warning about
576
+ If `--extraction-run` is omitted, Biblicus uses the most recent extraction snapshot and emits a warning about
577
577
  reproducibility. The analysis output is stored under:
578
578
 
579
579
  ```
580
- .biblicus/runs/analysis/topic-modeling/<run_id>/output.json
580
+ .biblicus/runs/analysis/topic-modeling/<snapshot_id>/output.json
581
581
  ```
582
582
 
583
- Minimal recipe example:
583
+ Minimal configuration example:
584
584
 
585
585
  ```yaml
586
586
  schema_version: 1
@@ -605,7 +605,7 @@ llm_fine_tuning:
605
605
  ```
606
606
 
607
607
  LLM extraction and fine-tuning require `biblicus[openai]` and a configured OpenAI API key.
608
- Recipe files are validated strictly against the topic modeling schema, so type mismatches or unknown fields are errors.
608
+ Configuration files are validated strictly against the topic modeling schema, so type mismatches or unknown fields are errors.
609
609
  AG News integration runs require `biblicus[datasets]` in addition to `biblicus[topic-modeling]`.
610
610
 
611
611
  For a repeatable, real-world integration run that downloads AG News and executes topic modeling, use:
@@ -614,7 +614,7 @@ For a repeatable, real-world integration run that downloads AG News and executes
614
614
  python scripts/topic_modeling_integration.py --corpus corpora/ag_news_demo --force
615
615
  ```
616
616
 
617
- See `docs/TOPIC_MODELING.md` for parameter examples and per-topic output behavior.
617
+ See `docs/topic-modeling.md` for parameter examples and per-topic output behavior.
618
618
 
619
619
  ## Integration corpus and evaluation dataset
620
620
 
@@ -658,25 +658,34 @@ Build the documentation:
658
658
  python -m sphinx -b html docs docs/_build/html
659
659
  ```
660
660
 
661
+ Preview the documentation locally:
662
+
663
+ ```
664
+ cd docs/_build/html
665
+ python -m http.server
666
+ ```
667
+
668
+ Open `http://localhost:8000` in your browser.
669
+
661
670
  ## License
662
671
 
663
672
  License terms are in `LICENSE`.
664
673
 
665
674
  [retrieval augmented generation overview]: https://en.wikipedia.org/wiki/Retrieval-augmented_generation
666
- [architecture]: docs/ARCHITECTURE.md
667
- [roadmap]: docs/ROADMAP.md
668
- [feature-index]: docs/FEATURE_INDEX.md
669
- [corpus]: docs/CORPUS.md
670
- [knowledge-base]: docs/KNOWLEDGE_BASE.md
671
- [text-extraction]: docs/EXTRACTION.md
675
+ [architecture]: docs/architecture.md
676
+ [roadmap]: docs/roadmap.md
677
+ [feature-index]: docs/feature-index.md
678
+ [corpus]: docs/corpus.md
679
+ [knowledge-base]: docs/knowledge-base.md
680
+ [text-extraction]: docs/extraction.md
672
681
  [extractor-reference]: docs/extractors/index.md
673
682
  [backend-reference]: docs/backends/index.md
674
- [speech-to-text]: docs/STT.md
675
- [user-configuration]: docs/USER_CONFIGURATION.md
676
- [backends]: docs/BACKENDS.md
677
- [context-packs]: docs/CONTEXT_PACK.md
678
- [demos]: docs/DEMOS.md
679
- [testing]: docs/TESTING.md
683
+ [speech-to-text]: docs/stt.md
684
+ [user-configuration]: docs/user-configuration.md
685
+ [backends]: docs/backends.md
686
+ [context-packs]: docs/context-pack.md
687
+ [demos]: docs/demos.md
688
+ [testing]: docs/testing.md
680
689
 
681
690
  [continuous-integration-badge]: https://github.com/AnthusAI/Biblicus/actions/workflows/ci.yml/badge.svg?branch=main
682
691
  [coverage-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/AnthusAI/Biblicus/main/coverage_badge.json
@@ -8,7 +8,7 @@ returns evidence with chunk boundaries so you can trace results back to the orig
8
8
 
9
9
  ## Chunkers are pluggable
10
10
 
11
- Chunking is a pluggable interface selected by identifier in a retrieval recipe:
11
+ Chunking is a pluggable interface selected by identifier in a retrieval configuration:
12
12
 
13
13
  - `chunker_id`
14
14
  - `chunker_config` (Pydantic validated; `extra="forbid"`)
@@ -18,7 +18,7 @@ corpus/
18
18
  config.json
19
19
  catalog.json
20
20
  runs/
21
- <run manifests and artifacts>
21
+ <snapshot manifests and artifacts>
22
22
  ```
23
23
 
24
24
  ## Core concepts
@@ -137,7 +137,7 @@ python -m biblicus reindex --corpus corpora/example
137
137
  ## Reproducibility checklist
138
138
 
139
139
  - Keep raw files and sidecars in source control or backed up as immutable inputs.
140
- - Record the catalog timestamp when comparing run outputs.
140
+ - Record the catalog timestamp when comparing snapshot outputs.
141
141
  - Prefer `import-tree` for reproducible ingest of existing folder structures.
142
142
 
143
143
  ## Common pitfalls
@@ -20,22 +20,22 @@ The output is structured JSON that can be stored, versioned, and compared across
20
20
  biblicus analyze profile --corpus corpora/example --extraction-run pipeline:RUN_ID
21
21
  ```
22
22
 
23
- If you omit `--extraction-run`, Biblicus uses the latest extraction run and emits a reproducibility warning.
23
+ If you omit `--extraction-run`, Biblicus uses the latest extraction snapshot and emits a reproducibility warning.
24
24
 
25
- To customize profiling metrics, pass a recipe file:
25
+ To customize profiling metrics, pass a configuration file:
26
26
 
27
27
  ```
28
- biblicus analyze profile --corpus corpora/example --recipe recipes/profiling.yml --extraction-run pipeline:RUN_ID
28
+ biblicus analyze profile --corpus corpora/example --configuration configurations/profiling.yml --extraction-run pipeline:RUN_ID
29
29
  ```
30
30
 
31
- Profiling recipes support cascading composition. Pass multiple `--recipe` files; later recipes override earlier recipes
31
+ Profiling configurations support cascading composition. Pass multiple `--configuration` files; later configurations override earlier configurations
32
32
  via a deep merge:
33
33
 
34
34
  ```
35
35
  biblicus analyze profile \
36
36
  --corpus corpora/example \
37
- --recipe recipes/profiling/base.yml \
38
- --recipe recipes/profiling/strict.yml \
37
+ --configuration configurations/profiling/base.yml \
38
+ --configuration configurations/profiling/strict.yml \
39
39
  --extraction-run pipeline:RUN_ID
40
40
  ```
41
41
 
@@ -44,14 +44,14 @@ To override the composed configuration view from the command line, use `--config
44
44
  ```
45
45
  biblicus analyze profile \
46
46
  --corpus corpora/example \
47
- --recipe recipes/profiling/base.yml \
47
+ --configuration configurations/profiling/base.yml \
48
48
  --config sample_size=200 \
49
49
  --extraction-run pipeline:RUN_ID
50
50
  ```
51
51
 
52
- ### Profiling recipe configuration
52
+ ### Profiling configuration configuration
53
53
 
54
- Profiling recipes use the analysis schema version and accept these fields:
54
+ Profiling configurations use the analysis schema version and accept these fields:
55
55
 
56
56
  - `schema_version`: analysis schema version, currently `1`
57
57
  - `sample_size`: optional cap for distribution calculations
@@ -60,7 +60,7 @@ Profiling recipes use the analysis schema version and accept these fields:
60
60
  - `top_tag_count`: maximum number of tags to list in `top_tags`
61
61
  - `tag_filters`: optional list of tags to include in tag coverage metrics
62
62
 
63
- Example recipe:
63
+ Example configuration:
64
64
 
65
65
  ```
66
66
  schema_version: 1
@@ -84,7 +84,7 @@ corpus = Corpus.open(Path("corpora/example"))
84
84
  backend = get_analysis_backend("profiling")
85
85
  output = backend.run_analysis(
86
86
  corpus,
87
- recipe_name="default",
87
+ configuration_name="default",
88
88
  config={
89
89
  "schema_version": 1,
90
90
  "sample_size": 500,
@@ -93,9 +93,9 @@ output = backend.run_analysis(
93
93
  "top_tag_count": 10,
94
94
  "tag_filters": ["ag_news"],
95
95
  },
96
- extraction_run=ExtractionRunReference(
96
+ extraction_snapshot=ExtractionRunReference(
97
97
  extractor_id="pipeline",
98
- run_id="RUN_ID",
98
+ snapshot_id="RUN_ID",
99
99
  ),
100
100
  )
101
101
  print(output.model_dump())
@@ -106,7 +106,7 @@ print(output.model_dump())
106
106
  Profiling output is stored under:
107
107
 
108
108
  ```
109
- .biblicus/runs/analysis/profiling/<run_id>/output.json
109
+ .biblicus/runs/analysis/profiling/<snapshot_id>/output.json
110
110
  ```
111
111
 
112
112
  ## Reading the report
@@ -138,17 +138,17 @@ through extraction and how much was missing or empty.
138
138
 
139
139
  ## Comparing profiling runs
140
140
 
141
- Use the same extraction run and recipe configuration whenever you compare profiling outputs:
141
+ Use the same extraction snapshot and configuration configuration whenever you compare profiling outputs:
142
142
 
143
143
  1) Run profiling on two corpus snapshots.
144
144
  2) Compare `raw_items.total_items`, media type counts, and tag coverage.
145
145
  3) Compare `extracted_text` coverage to spot extraction regressions.
146
146
 
147
- Record the run identifiers and catalog timestamps so you can trace differences later.
147
+ Record the snapshot identifiers and catalog timestamps so you can trace differences later.
148
148
 
149
149
  ## Common pitfalls
150
150
 
151
- - Profiling without specifying an extraction run, which makes comparisons harder to reproduce.
151
+ - Profiling without specifying an extraction snapshot, which makes comparisons harder to reproduce.
152
152
  - Comparing runs with different `sample_size` or `min_text_characters` settings.
153
153
  - Interpreting tag counts without noting the `tag_filters` applied.
154
154