biblicus 0.13.0__tar.gz → 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. {biblicus-0.13.0/src/biblicus.egg-info → biblicus-0.14.0}/PKG-INFO +3 -2
  2. {biblicus-0.13.0 → biblicus-0.14.0}/README.md +2 -1
  3. biblicus-0.14.0/datasets/retrieval_lab/labels.json +25 -0
  4. {biblicus-0.13.0 → biblicus-0.14.0}/docs/DEMOS.md +11 -0
  5. {biblicus-0.13.0 → biblicus-0.14.0}/docs/FEATURE_INDEX.md +5 -0
  6. biblicus-0.14.0/docs/RETRIEVAL.md +96 -0
  7. biblicus-0.14.0/docs/RETRIEVAL_EVALUATION.md +181 -0
  8. biblicus-0.14.0/docs/RETRIEVAL_QUALITY.md +106 -0
  9. biblicus-0.14.0/features/retrieval_evaluation_lab.feature +10 -0
  10. biblicus-0.14.0/features/steps/retrieval_evaluation_lab_steps.py +77 -0
  11. {biblicus-0.13.0 → biblicus-0.14.0}/pyproject.toml +1 -1
  12. biblicus-0.14.0/scripts/retrieval_evaluation_lab.py +284 -0
  13. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/__init__.py +1 -1
  14. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/backends/hybrid.py +6 -1
  15. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/context.py +2 -2
  16. {biblicus-0.13.0 → biblicus-0.14.0/src/biblicus.egg-info}/PKG-INFO +3 -2
  17. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus.egg-info/SOURCES.txt +4 -0
  18. biblicus-0.13.0/docs/RETRIEVAL.md +0 -47
  19. biblicus-0.13.0/docs/RETRIEVAL_EVALUATION.md +0 -74
  20. biblicus-0.13.0/docs/RETRIEVAL_QUALITY.md +0 -42
  21. {biblicus-0.13.0 → biblicus-0.14.0}/LICENSE +0 -0
  22. {biblicus-0.13.0 → biblicus-0.14.0}/MANIFEST.in +0 -0
  23. {biblicus-0.13.0 → biblicus-0.14.0}/THIRD_PARTY_NOTICES.md +0 -0
  24. {biblicus-0.13.0 → biblicus-0.14.0}/datasets/extraction_lab/labels.json +0 -0
  25. {biblicus-0.13.0 → biblicus-0.14.0}/datasets/wikipedia_mini.json +0 -0
  26. {biblicus-0.13.0 → biblicus-0.14.0}/docs/ANALYSIS.md +0 -0
  27. {biblicus-0.13.0 → biblicus-0.14.0}/docs/ARCHITECTURE.md +0 -0
  28. {biblicus-0.13.0 → biblicus-0.14.0}/docs/BACKENDS.md +0 -0
  29. {biblicus-0.13.0 → biblicus-0.14.0}/docs/CONTEXT_PACK.md +0 -0
  30. {biblicus-0.13.0 → biblicus-0.14.0}/docs/CORPUS.md +0 -0
  31. {biblicus-0.13.0 → biblicus-0.14.0}/docs/CORPUS_DESIGN.md +0 -0
  32. {biblicus-0.13.0 → biblicus-0.14.0}/docs/EXTRACTION.md +0 -0
  33. {biblicus-0.13.0 → biblicus-0.14.0}/docs/EXTRACTION_EVALUATION.md +0 -0
  34. {biblicus-0.13.0 → biblicus-0.14.0}/docs/KNOWLEDGE_BASE.md +0 -0
  35. {biblicus-0.13.0 → biblicus-0.14.0}/docs/PROFILING.md +0 -0
  36. {biblicus-0.13.0 → biblicus-0.14.0}/docs/ROADMAP.md +0 -0
  37. {biblicus-0.13.0 → biblicus-0.14.0}/docs/STT.md +0 -0
  38. {biblicus-0.13.0 → biblicus-0.14.0}/docs/TESTING.md +0 -0
  39. {biblicus-0.13.0 → biblicus-0.14.0}/docs/TOPIC_MODELING.md +0 -0
  40. {biblicus-0.13.0 → biblicus-0.14.0}/docs/USER_CONFIGURATION.md +0 -0
  41. {biblicus-0.13.0 → biblicus-0.14.0}/docs/api.rst +0 -0
  42. {biblicus-0.13.0 → biblicus-0.14.0}/docs/backends/index.md +0 -0
  43. {biblicus-0.13.0 → biblicus-0.14.0}/docs/backends/scan.md +0 -0
  44. {biblicus-0.13.0 → biblicus-0.14.0}/docs/backends/sqlite-full-text-search.md +0 -0
  45. {biblicus-0.13.0 → biblicus-0.14.0}/docs/backends/vector.md +0 -0
  46. {biblicus-0.13.0 → biblicus-0.14.0}/docs/conf.py +0 -0
  47. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/index.md +0 -0
  48. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/ocr/index.md +0 -0
  49. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/ocr/paddleocr-vl.md +0 -0
  50. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/ocr/rapidocr.md +0 -0
  51. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/pipeline-utilities/index.md +0 -0
  52. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/pipeline-utilities/pipeline.md +0 -0
  53. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/pipeline-utilities/select-longest.md +0 -0
  54. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/pipeline-utilities/select-override.md +0 -0
  55. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/pipeline-utilities/select-smart-override.md +0 -0
  56. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/pipeline-utilities/select-text.md +0 -0
  57. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/speech-to-text/deepgram.md +0 -0
  58. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/speech-to-text/index.md +0 -0
  59. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/speech-to-text/openai.md +0 -0
  60. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/text-document/index.md +0 -0
  61. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/text-document/markitdown.md +0 -0
  62. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/text-document/metadata.md +0 -0
  63. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/text-document/pass-through.md +0 -0
  64. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/text-document/pdf.md +0 -0
  65. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/text-document/unstructured.md +0 -0
  66. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/vlm-document/docling-granite.md +0 -0
  67. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/vlm-document/docling-smol.md +0 -0
  68. {biblicus-0.13.0 → biblicus-0.14.0}/docs/extractors/vlm-document/index.md +0 -0
  69. {biblicus-0.13.0 → biblicus-0.14.0}/docs/index.rst +0 -0
  70. {biblicus-0.13.0 → biblicus-0.14.0}/features/analysis_schema.feature +0 -0
  71. {biblicus-0.13.0 → biblicus-0.14.0}/features/backend_validation.feature +0 -0
  72. {biblicus-0.13.0 → biblicus-0.14.0}/features/biblicus_corpus.feature +0 -0
  73. {biblicus-0.13.0 → biblicus-0.14.0}/features/cli_entrypoint.feature +0 -0
  74. {biblicus-0.13.0 → biblicus-0.14.0}/features/cli_parsing.feature +0 -0
  75. {biblicus-0.13.0 → biblicus-0.14.0}/features/cli_step_spec_parsing.feature +0 -0
  76. {biblicus-0.13.0 → biblicus-0.14.0}/features/content_sniffing.feature +0 -0
  77. {biblicus-0.13.0 → biblicus-0.14.0}/features/context_pack.feature +0 -0
  78. {biblicus-0.13.0 → biblicus-0.14.0}/features/context_pack_cli.feature +0 -0
  79. {biblicus-0.13.0 → biblicus-0.14.0}/features/context_pack_policies.feature +0 -0
  80. {biblicus-0.13.0 → biblicus-0.14.0}/features/corpus_edge_cases.feature +0 -0
  81. {biblicus-0.13.0 → biblicus-0.14.0}/features/corpus_identity.feature +0 -0
  82. {biblicus-0.13.0 → biblicus-0.14.0}/features/corpus_purge.feature +0 -0
  83. {biblicus-0.13.0 → biblicus-0.14.0}/features/crawl.feature +0 -0
  84. {biblicus-0.13.0 → biblicus-0.14.0}/features/docling_granite_extractor.feature +0 -0
  85. {biblicus-0.13.0 → biblicus-0.14.0}/features/docling_smol_extractor.feature +0 -0
  86. {biblicus-0.13.0 → biblicus-0.14.0}/features/environment.py +0 -0
  87. {biblicus-0.13.0 → biblicus-0.14.0}/features/error_cases.feature +0 -0
  88. {biblicus-0.13.0 → biblicus-0.14.0}/features/evaluation.feature +0 -0
  89. {biblicus-0.13.0 → biblicus-0.14.0}/features/evidence_processing.feature +0 -0
  90. {biblicus-0.13.0 → biblicus-0.14.0}/features/extraction_error_handling.feature +0 -0
  91. {biblicus-0.13.0 → biblicus-0.14.0}/features/extraction_evaluation.feature +0 -0
  92. {biblicus-0.13.0 → biblicus-0.14.0}/features/extraction_evaluation_lab.feature +0 -0
  93. {biblicus-0.13.0 → biblicus-0.14.0}/features/extraction_run_lifecycle.feature +0 -0
  94. {biblicus-0.13.0 → biblicus-0.14.0}/features/extraction_selection.feature +0 -0
  95. {biblicus-0.13.0 → biblicus-0.14.0}/features/extraction_selection_longest.feature +0 -0
  96. {biblicus-0.13.0 → biblicus-0.14.0}/features/extractor_pipeline.feature +0 -0
  97. {biblicus-0.13.0 → biblicus-0.14.0}/features/extractor_validation.feature +0 -0
  98. {biblicus-0.13.0 → biblicus-0.14.0}/features/frontmatter.feature +0 -0
  99. {biblicus-0.13.0 → biblicus-0.14.0}/features/hook_config_validation.feature +0 -0
  100. {biblicus-0.13.0 → biblicus-0.14.0}/features/hook_error_handling.feature +0 -0
  101. {biblicus-0.13.0 → biblicus-0.14.0}/features/import_tree.feature +0 -0
  102. {biblicus-0.13.0 → biblicus-0.14.0}/features/inference_backend.feature +0 -0
  103. {biblicus-0.13.0 → biblicus-0.14.0}/features/ingest_sources.feature +0 -0
  104. {biblicus-0.13.0 → biblicus-0.14.0}/features/integration_audio_samples.feature +0 -0
  105. {biblicus-0.13.0 → biblicus-0.14.0}/features/integration_image_samples.feature +0 -0
  106. {biblicus-0.13.0 → biblicus-0.14.0}/features/integration_mixed_corpus.feature +0 -0
  107. {biblicus-0.13.0 → biblicus-0.14.0}/features/integration_mixed_extraction.feature +0 -0
  108. {biblicus-0.13.0 → biblicus-0.14.0}/features/integration_ocr_image_extraction.feature +0 -0
  109. {biblicus-0.13.0 → biblicus-0.14.0}/features/integration_pdf_retrieval.feature +0 -0
  110. {biblicus-0.13.0 → biblicus-0.14.0}/features/integration_pdf_samples.feature +0 -0
  111. {biblicus-0.13.0 → biblicus-0.14.0}/features/integration_unstructured_extraction.feature +0 -0
  112. {biblicus-0.13.0 → biblicus-0.14.0}/features/integration_wikipedia.feature +0 -0
  113. {biblicus-0.13.0 → biblicus-0.14.0}/features/knowledge_base.feature +0 -0
  114. {biblicus-0.13.0 → biblicus-0.14.0}/features/lifecycle_hooks.feature +0 -0
  115. {biblicus-0.13.0 → biblicus-0.14.0}/features/markitdown_extractor.feature +0 -0
  116. {biblicus-0.13.0 → biblicus-0.14.0}/features/model_validation.feature +0 -0
  117. {biblicus-0.13.0 → biblicus-0.14.0}/features/ocr_extractor.feature +0 -0
  118. {biblicus-0.13.0 → biblicus-0.14.0}/features/paddleocr_vl_extractor.feature +0 -0
  119. {biblicus-0.13.0 → biblicus-0.14.0}/features/paddleocr_vl_parse_api_response.feature +0 -0
  120. {biblicus-0.13.0 → biblicus-0.14.0}/features/pdf_text_extraction.feature +0 -0
  121. {biblicus-0.13.0 → biblicus-0.14.0}/features/profiling.feature +0 -0
  122. {biblicus-0.13.0 → biblicus-0.14.0}/features/python_api.feature +0 -0
  123. {biblicus-0.13.0 → biblicus-0.14.0}/features/python_hook_logging.feature +0 -0
  124. {biblicus-0.13.0 → biblicus-0.14.0}/features/query_processing.feature +0 -0
  125. {biblicus-0.13.0 → biblicus-0.14.0}/features/recipe_file_extraction.feature +0 -0
  126. {biblicus-0.13.0 → biblicus-0.14.0}/features/retrieval_budget.feature +0 -0
  127. {biblicus-0.13.0 → biblicus-0.14.0}/features/retrieval_quality.feature +0 -0
  128. {biblicus-0.13.0 → biblicus-0.14.0}/features/retrieval_scan.feature +0 -0
  129. {biblicus-0.13.0 → biblicus-0.14.0}/features/retrieval_sqlite_full_text_search.feature +0 -0
  130. {biblicus-0.13.0 → biblicus-0.14.0}/features/retrieval_uses_extraction_run.feature +0 -0
  131. {biblicus-0.13.0 → biblicus-0.14.0}/features/retrieval_utilities.feature +0 -0
  132. {biblicus-0.13.0 → biblicus-0.14.0}/features/select_override.feature +0 -0
  133. {biblicus-0.13.0 → biblicus-0.14.0}/features/smart_override_selection.feature +0 -0
  134. {biblicus-0.13.0 → biblicus-0.14.0}/features/source_loading.feature +0 -0
  135. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/analysis_steps.py +0 -0
  136. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/backend_steps.py +0 -0
  137. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/cli_parsing_steps.py +0 -0
  138. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/cli_steps.py +0 -0
  139. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/context_pack_steps.py +0 -0
  140. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/crawl_steps.py +0 -0
  141. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/deepgram_steps.py +0 -0
  142. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/docling_steps.py +0 -0
  143. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/evidence_processing_steps.py +0 -0
  144. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/extraction_evaluation_lab_steps.py +0 -0
  145. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/extraction_evaluation_steps.py +0 -0
  146. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/extraction_run_lifecycle_steps.py +0 -0
  147. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/extraction_steps.py +0 -0
  148. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/extractor_steps.py +0 -0
  149. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/frontmatter_steps.py +0 -0
  150. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/inference_steps.py +0 -0
  151. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/knowledge_base_steps.py +0 -0
  152. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/markitdown_steps.py +0 -0
  153. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/model_steps.py +0 -0
  154. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/openai_steps.py +0 -0
  155. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/paddleocr_mock_steps.py +0 -0
  156. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/paddleocr_vl_steps.py +0 -0
  157. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/paddleocr_vl_unit_steps.py +0 -0
  158. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/pdf_steps.py +0 -0
  159. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/profiling_steps.py +0 -0
  160. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/python_api_steps.py +0 -0
  161. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/rapidocr_steps.py +0 -0
  162. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/requests_mock_steps.py +0 -0
  163. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/retrieval_quality_steps.py +0 -0
  164. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/retrieval_steps.py +0 -0
  165. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/stt_deepgram_steps.py +0 -0
  166. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/stt_steps.py +0 -0
  167. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/topic_modeling_steps.py +0 -0
  168. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/unstructured_steps.py +0 -0
  169. {biblicus-0.13.0 → biblicus-0.14.0}/features/steps/user_config_steps.py +0 -0
  170. {biblicus-0.13.0 → biblicus-0.14.0}/features/streaming_ingest.feature +0 -0
  171. {biblicus-0.13.0 → biblicus-0.14.0}/features/stt_deepgram_extractor.feature +0 -0
  172. {biblicus-0.13.0 → biblicus-0.14.0}/features/stt_extractor.feature +0 -0
  173. {biblicus-0.13.0 → biblicus-0.14.0}/features/text_extraction_runs.feature +0 -0
  174. {biblicus-0.13.0 → biblicus-0.14.0}/features/token_budget.feature +0 -0
  175. {biblicus-0.13.0 → biblicus-0.14.0}/features/topic_modeling.feature +0 -0
  176. {biblicus-0.13.0 → biblicus-0.14.0}/features/unstructured_extractor.feature +0 -0
  177. {biblicus-0.13.0 → biblicus-0.14.0}/features/user_config.feature +0 -0
  178. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/download_ag_news.py +0 -0
  179. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/download_audio_samples.py +0 -0
  180. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/download_image_samples.py +0 -0
  181. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/download_mixed_samples.py +0 -0
  182. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/download_pdf_samples.py +0 -0
  183. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/download_wikipedia.py +0 -0
  184. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/extraction_evaluation_demo.py +0 -0
  185. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/extraction_evaluation_lab.py +0 -0
  186. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/profiling_demo.py +0 -0
  187. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/readme_end_to_end_demo.py +0 -0
  188. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/test.py +0 -0
  189. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/topic_modeling_integration.py +0 -0
  190. {biblicus-0.13.0 → biblicus-0.14.0}/scripts/wikipedia_rag_demo.py +0 -0
  191. {biblicus-0.13.0 → biblicus-0.14.0}/setup.cfg +0 -0
  192. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/__main__.py +0 -0
  193. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/_vendor/dotyaml/__init__.py +0 -0
  194. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/_vendor/dotyaml/interpolation.py +0 -0
  195. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/_vendor/dotyaml/loader.py +0 -0
  196. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/_vendor/dotyaml/transformer.py +0 -0
  197. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/analysis/__init__.py +0 -0
  198. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/analysis/base.py +0 -0
  199. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/analysis/llm.py +0 -0
  200. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/analysis/models.py +0 -0
  201. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/analysis/profiling.py +0 -0
  202. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/analysis/schema.py +0 -0
  203. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/analysis/topic_modeling.py +0 -0
  204. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/backends/__init__.py +0 -0
  205. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/backends/base.py +0 -0
  206. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/backends/scan.py +0 -0
  207. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/backends/sqlite_full_text_search.py +0 -0
  208. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/backends/vector.py +0 -0
  209. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/cli.py +0 -0
  210. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/constants.py +0 -0
  211. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/corpus.py +0 -0
  212. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/crawl.py +0 -0
  213. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/errors.py +0 -0
  214. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/evaluation.py +0 -0
  215. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/evidence_processing.py +0 -0
  216. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extraction.py +0 -0
  217. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extraction_evaluation.py +0 -0
  218. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/__init__.py +0 -0
  219. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/base.py +0 -0
  220. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/deepgram_stt.py +0 -0
  221. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/docling_granite_text.py +0 -0
  222. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/docling_smol_text.py +0 -0
  223. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/markitdown_text.py +0 -0
  224. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/metadata_text.py +0 -0
  225. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/openai_stt.py +0 -0
  226. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/paddleocr_vl_text.py +0 -0
  227. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/pass_through_text.py +0 -0
  228. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/pdf_text.py +0 -0
  229. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/pipeline.py +0 -0
  230. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/rapidocr_text.py +0 -0
  231. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/select_longest_text.py +0 -0
  232. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/select_override.py +0 -0
  233. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/select_smart_override.py +0 -0
  234. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/select_text.py +0 -0
  235. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/extractors/unstructured_text.py +0 -0
  236. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/frontmatter.py +0 -0
  237. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/hook_logging.py +0 -0
  238. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/hook_manager.py +0 -0
  239. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/hooks.py +0 -0
  240. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/ignore.py +0 -0
  241. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/inference.py +0 -0
  242. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/knowledge_base.py +0 -0
  243. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/models.py +0 -0
  244. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/retrieval.py +0 -0
  245. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/sources.py +0 -0
  246. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/time.py +0 -0
  247. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/uris.py +0 -0
  248. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus/user_config.py +0 -0
  249. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus.egg-info/dependency_links.txt +0 -0
  250. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus.egg-info/entry_points.txt +0 -0
  251. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus.egg-info/requires.txt +0 -0
  252. {biblicus-0.13.0 → biblicus-0.14.0}/src/biblicus.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: biblicus
3
- Version: 0.13.0
3
+ Version: 0.14.0
4
4
  Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
5
5
  License: MIT
6
6
  Requires-Python: >=3.9
@@ -498,7 +498,8 @@ For detailed documentation including configuration options, performance characte
498
498
 
499
499
  For the retrieval pipeline overview and run artifacts, see `docs/RETRIEVAL.md`. For retrieval quality upgrades
500
500
  (tuned lexical baseline, reranking, hybrid retrieval), see `docs/RETRIEVAL_QUALITY.md`. For evaluation workflows
501
- and dataset formats, see `docs/RETRIEVAL_EVALUATION.md`.
501
+ and dataset formats, see `docs/RETRIEVAL_EVALUATION.md`. For a runnable walkthrough, use the retrieval evaluation lab
502
+ script (`scripts/retrieval_evaluation_lab.py`).
502
503
 
503
504
  ## Extraction backends
504
505
 
@@ -452,7 +452,8 @@ For detailed documentation including configuration options, performance characte
452
452
 
453
453
  For the retrieval pipeline overview and run artifacts, see `docs/RETRIEVAL.md`. For retrieval quality upgrades
454
454
  (tuned lexical baseline, reranking, hybrid retrieval), see `docs/RETRIEVAL_QUALITY.md`. For evaluation workflows
455
- and dataset formats, see `docs/RETRIEVAL_EVALUATION.md`.
455
+ and dataset formats, see `docs/RETRIEVAL_EVALUATION.md`. For a runnable walkthrough, use the retrieval evaluation lab
456
+ script (`scripts/retrieval_evaluation_lab.py`).
456
457
 
457
458
  ## Extraction backends
458
459
 
@@ -0,0 +1,25 @@
1
+ {
2
+ "schema_version": 1,
3
+ "name": "retrieval-evaluation-lab",
4
+ "description": "Bundled labels for the retrieval evaluation lab.",
5
+ "queries": [
6
+ {
7
+ "query_id": "q1",
8
+ "query_text": "alpha unique",
9
+ "expected_filename": "alpha.txt",
10
+ "kind": "gold"
11
+ },
12
+ {
13
+ "query_id": "q2",
14
+ "query_text": "beta unique",
15
+ "expected_filename": "beta.txt",
16
+ "kind": "gold"
17
+ },
18
+ {
19
+ "query_id": "q3",
20
+ "query_text": "gamma unique",
21
+ "expected_filename": "gamma.txt",
22
+ "kind": "gold"
23
+ }
24
+ ]
25
+ }
@@ -225,6 +225,17 @@ python3 scripts/extraction_evaluation_lab.py --corpus corpora/extraction_eval_la
225
225
 
226
226
  The lab writes a generated dataset file and evaluation output path and prints both in the command output.
227
227
 
228
+ ### Retrieval evaluation lab run
229
+
230
+ Use the retrieval evaluation lab to build a tiny corpus, run extraction, build a retrieval backend, and evaluate it
231
+ against bundled labels:
232
+
233
+ ```
234
+ python3 scripts/retrieval_evaluation_lab.py --corpus corpora/retrieval_eval_lab --force
235
+ ```
236
+
237
+ The script prints the dataset path, retrieval run identifier, and evaluation output location.
238
+
228
239
  Run with a larger corpus and a higher topic count:
229
240
 
230
241
  ```
@@ -200,10 +200,15 @@ What it does:
200
200
 
201
201
  - Evaluates retrieval runs against datasets and budgets.
202
202
 
203
+ Documentation:
204
+
205
+ - `docs/RETRIEVAL_EVALUATION.md`
206
+
203
207
  Behavior specifications:
204
208
 
205
209
  - `features/evaluation.feature`
206
210
  - `features/model_validation.feature`
211
+ - `features/retrieval_evaluation_lab.feature`
207
212
 
208
213
  Primary implementation:
209
214
 
@@ -0,0 +1,96 @@
1
+ # Retrieval
2
+
3
+ Biblicus treats retrieval as a reproducible, explicit pipeline stage that transforms a corpus into structured evidence.
4
+ Retrieval is separated from extraction and context shaping so each can be evaluated independently and swapped without
5
+ rewriting ingestion.
6
+
7
+ ## Retrieval concepts
8
+
9
+ - **Backend**: a pluggable retrieval implementation that can build and query runs.
10
+ - **Run**: a recorded retrieval build for a corpus and extraction run.
11
+ - **Evidence**: structured output containing identifiers, provenance, and scores.
12
+ - **Stage**: explicit steps such as retrieve, rerank, and filter.
13
+
14
+ ## How retrieval runs work
15
+
16
+ 1) Ingest raw items into a corpus.
17
+ 2) Build an extraction run to produce text artifacts.
18
+ 3) Build a retrieval run with a backend, referencing the extraction run.
19
+ 4) Query the run to return evidence.
20
+
21
+ Retrieval runs are stored under:
22
+
23
+ ```
24
+ .biblicus/runs/retrieval/<backend_id>/<run_id>/
25
+ ```
26
+
27
+ ## A minimal run you can execute
28
+
29
+ This walkthrough uses the full text search backend and produces evidence you can inspect immediately.
30
+
31
+ ```
32
+ rm -rf corpora/retrieval_demo
33
+ python3 -m biblicus init corpora/retrieval_demo
34
+ printf "alpha beta\n" > /tmp/retrieval-alpha.txt
35
+ printf "beta gamma\n" > /tmp/retrieval-beta.txt
36
+ python3 -m biblicus ingest --corpus corpora/retrieval_demo /tmp/retrieval-alpha.txt
37
+ python3 -m biblicus ingest --corpus corpora/retrieval_demo /tmp/retrieval-beta.txt
38
+
39
+ python3 -m biblicus extract build --corpus corpora/retrieval_demo --step pass-through-text
40
+ python3 -m biblicus build --corpus corpora/retrieval_demo --backend sqlite-full-text-search
41
+ python3 -m biblicus query --corpus corpora/retrieval_demo --query "beta"
42
+ ```
43
+
44
+ The query output is structured evidence with identifiers and scores. That evidence is the primary output for evaluation
45
+ and downstream context packing.
46
+
47
+ ## Backends
48
+
49
+ See `docs/backends/index.md` for backend selection and configuration.
50
+
51
+ ## Choosing a backend
52
+
53
+ Start with the simplest backend that answers your question:
54
+
55
+ - `scan` for tiny corpora or sanity checks.
56
+ - `sqlite-full-text-search` for a practical lexical baseline.
57
+ - `vector` when you want deterministic term-frequency similarity without external dependencies.
58
+
59
+ You can compare them with the same dataset and budget using the retrieval evaluation workflow.
60
+
61
+ ## Evaluation
62
+
63
+ Retrieval runs are evaluated against datasets with explicit budgets. See `docs/RETRIEVAL_EVALUATION.md` for the
64
+ dataset format and workflow, `docs/FEATURE_INDEX.md` for the behavior specifications, and `docs/CONTEXT_PACK.md` for
65
+ how evidence feeds into context packs.
66
+
67
+ ## Labs and demos
68
+
69
+ When you want a repeatable example with bundled data, use the retrieval evaluation lab:
70
+
71
+ ```
72
+ python3 scripts/retrieval_evaluation_lab.py --corpus corpora/retrieval_eval_lab --force
73
+ ```
74
+
75
+ The lab builds a tiny corpus, runs extraction, builds a retrieval run, and evaluates it. It prints the dataset path and
76
+ evaluation output so you can open the JavaScript Object Notation directly.
77
+
78
+ ## Reproducibility checklist
79
+
80
+ Use these habits when you want repeatable retrieval experiments:
81
+
82
+ - Record the extraction run identifier and pass it explicitly when you build a retrieval run.
83
+ - Keep evaluation datasets in source control and treat them as immutable inputs.
84
+ - Capture the full retrieval run identifier when you compare outputs across backends.
85
+
86
+ ## Why the separation matters
87
+
88
+ Keeping extraction and retrieval distinct makes it possible to:
89
+
90
+ - Reuse the same extracted artifacts across many retrieval backends.
91
+ - Compare backends against the same corpus and dataset inputs.
92
+ - Record and audit retrieval decisions without mixing in prompting or context formatting.
93
+
94
+ ## Retrieval quality
95
+
96
+ For retrieval quality upgrades, see `docs/RETRIEVAL_QUALITY.md`.
@@ -0,0 +1,181 @@
1
+ # Retrieval evaluation
2
+
3
+ Biblicus evaluates retrieval runs against deterministic datasets so quality comparisons are repeatable across backends
4
+ and corpora. Evaluations keep the evidence-first model intact by reporting per-query evidence alongside summary
5
+ metrics.
6
+
7
+ ## Dataset format
8
+
9
+ Retrieval datasets are stored as JavaScript Object Notation files with a strict schema:
10
+
11
+ ```json
12
+ {
13
+ "schema_version": 1,
14
+ "name": "example-dataset",
15
+ "description": "Small hand-labeled dataset for smoke tests.",
16
+ "queries": [
17
+ {
18
+ "query_id": "q-001",
19
+ "query_text": "alpha",
20
+ "expected_item_id": "item-id-123",
21
+ "kind": "gold"
22
+ }
23
+ ]
24
+ }
25
+ ```
26
+
27
+ Each query includes either an `expected_item_id` or an `expected_source_uri`. The `kind` field records whether the
28
+ query is hand-labeled (`gold`) or synthetic.
29
+
30
+ ## Metrics primer
31
+
32
+ Retrieval evaluation reports a small set of textbook metrics:
33
+
34
+ - **Hit rate**: the fraction of queries that retrieved the expected item at any rank.
35
+ - **Precision-at-k**: hit rate normalized by the evidence budget (`max_total_items`).
36
+ - **Mean reciprocal rank**: the average of `1 / rank` for the first matching item per query.
37
+
38
+ These metrics are deterministic for the same corpus, run, dataset, and budget.
39
+
40
+ ## Running an evaluation
41
+
42
+ Use the command-line interface to evaluate a retrieval run against a dataset:
43
+
44
+ ```bash
45
+ biblicus eval --corpus corpora/example --run <run_id> --dataset datasets/retrieval.json \
46
+ --max-total-items 5 --max-total-characters 2000 --max-items-per-source 5
47
+ ```
48
+
49
+ If `--run` is omitted, the latest retrieval run is used. Evaluations are deterministic for the same corpus, run, and
50
+ budget.
51
+
52
+ ## End-to-end evaluation example
53
+
54
+ This example builds a tiny corpus, creates a retrieval run, and evaluates it against a minimal dataset:
55
+
56
+ ```
57
+ rm -rf corpora/retrieval_eval_demo
58
+ python3 -m biblicus init corpora/retrieval_eval_demo
59
+ printf "alpha apple\n" > /tmp/eval-alpha.txt
60
+ printf "beta banana\n" > /tmp/eval-beta.txt
61
+ python3 -m biblicus ingest --corpus corpora/retrieval_eval_demo /tmp/eval-alpha.txt
62
+ python3 -m biblicus ingest --corpus corpora/retrieval_eval_demo /tmp/eval-beta.txt
63
+
64
+ python3 -m biblicus extract build --corpus corpora/retrieval_eval_demo --step pass-through-text
65
+ python3 -m biblicus build --corpus corpora/retrieval_eval_demo --backend sqlite-full-text-search
66
+
67
+ cat > /tmp/retrieval_eval_dataset.json <<'JSON'
68
+ {
69
+ "schema_version": 1,
70
+ "name": "retrieval-eval-demo",
71
+ "description": "Minimal dataset for evaluation walkthroughs.",
72
+ "queries": [
73
+ {
74
+ "query_id": "q1",
75
+ "query_text": "apple",
76
+ "expected_item_id": "ITEM_ID_FOR_ALPHA",
77
+ "kind": "gold"
78
+ }
79
+ ]
80
+ }
81
+ JSON
82
+ ```
83
+
84
+ Replace `ITEM_ID_FOR_ALPHA` with the item identifier from `biblicus list`, then run:
85
+
86
+ ```
87
+ python3 -m biblicus eval --corpus corpora/retrieval_eval_demo --dataset /tmp/retrieval_eval_dataset.json \
88
+ --max-total-items 3 --max-total-characters 2000 --max-items-per-source 5
89
+ ```
90
+
91
+ ## Retrieval evaluation lab
92
+
93
+ The retrieval evaluation lab ships with bundled files and labels so you can run a deterministic end-to-end evaluation
94
+ without external dependencies.
95
+
96
+ ```
97
+ python3 scripts/retrieval_evaluation_lab.py --corpus corpora/retrieval_eval_lab --force
98
+ ```
99
+
100
+ The script prints a summary that includes the generated dataset path, the retrieval run identifier, and the evaluation
101
+ output path.
102
+
103
+ ## Output
104
+
105
+ The evaluation output includes:
106
+
107
+ - Dataset metadata (name, description, query count).
108
+ - Run metadata (backend ID, run ID, evaluation timestamp).
109
+ - Metrics (hit rate, precision-at-k, mean reciprocal rank).
110
+ - System diagnostics (latency percentiles and index size).
111
+
112
+ The output is JavaScript Object Notation suitable for downstream reporting.
113
+
114
+ Example snippet:
115
+
116
+ ```json
117
+ {
118
+ "dataset": {
119
+ "name": "retrieval-eval-demo",
120
+ "description": "Minimal dataset for evaluation walkthroughs.",
121
+ "queries": 1
122
+ },
123
+ "backend_id": "sqlite-full-text-search",
124
+ "run_id": "RUN_ID",
125
+ "evaluated_at": "2024-01-01T00:00:00Z",
126
+ "metrics": {
127
+ "hit_rate": 1.0,
128
+ "precision_at_max_total_items": 0.3333333333333333,
129
+ "mean_reciprocal_rank": 1.0
130
+ },
131
+ "system": {
132
+ "average_latency_milliseconds": 1.2,
133
+ "percentile_95_latency_milliseconds": 2.4,
134
+ "index_bytes": 2048.0
135
+ }
136
+ }
137
+ ```
138
+
139
+ The `metrics` section is the primary signal for retriever quality. The `system` section helps compare performance and
140
+ storage costs across backends.
141
+
142
+ ## What to record for comparisons
143
+
144
+ When you compare retrieval runs, capture the same inputs every time:
145
+
146
+ - Corpus path (and whether the catalog has been reindexed).
147
+ - Extraction run identifier used by the retrieval run.
148
+ - Retrieval backend identifier and run identifier.
149
+ - Evaluation dataset path and schema version.
150
+ - Evidence budget values.
151
+
152
+ This metadata allows you to rerun the evaluation and explain differences between results.
153
+
154
+ ## Common pitfalls
155
+
156
+ - Evaluating against a dataset built for a different corpus or extraction run.
157
+ - Changing budgets between runs and expecting metrics to be comparable.
158
+ - Using stale item identifiers after reindexing or re-ingesting content.
159
+
160
+ ## Python usage
161
+
162
+ ```python
163
+ from pathlib import Path
164
+
165
+ from biblicus.corpus import Corpus
166
+ from biblicus.evaluation import evaluate_run, load_dataset
167
+ from biblicus.models import QueryBudget
168
+
169
+ corpus = Corpus.open("corpora/example")
170
+ run = corpus.load_run("<run_id>")
171
+ dataset = load_dataset(Path("datasets/retrieval.json"))
172
+ budget = QueryBudget(max_total_items=5, max_total_characters=2000, max_items_per_source=5)
173
+ result = evaluate_run(corpus=corpus, run=run, dataset=dataset, budget=budget)
174
+ print(result.model_dump_json(indent=2))
175
+ ```
176
+
177
+ ## Design notes
178
+
179
+ - Evaluation is reproducible by construction: the run manifest, dataset, and budget fully determine the results.
180
+ - The evaluation workflow expects retrieval stages to remain explicit in the run artifacts.
181
+ - Reports are portable, so comparisons across backends and corpora are straightforward.
@@ -0,0 +1,106 @@
1
+ # Retrieval quality upgrades
2
+
3
+ This document describes the retrieval quality upgrades available in Biblicus. It is a reference for how retrieval
4
+ quality is expressed in runs and how to interpret the signals in artifacts and evidence.
5
+
6
+ ## Goals
7
+
8
+ - Improve relevance without losing determinism or reproducibility.
9
+ - Keep retrieval stages explicit and visible in run artifacts.
10
+ - Preserve the evidence-first output model.
11
+
12
+ ## Available upgrades
13
+
14
+ ### 1) Tuned lexical baseline
15
+
16
+ Biblicus exposes the knobs you use to shape lexical relevance without losing determinism:
17
+
18
+ - BM25-style scoring with configurable parameters.
19
+ - N-gram range controls.
20
+ - Stop word strategy per backend.
21
+ - Field weighting (for example: title, body, metadata).
22
+
23
+ Example configuration (SQLite full text search):
24
+
25
+ ```
26
+ python3 -m biblicus build --corpus corpora/demo --backend sqlite-full-text-search \
27
+ --config chunk_size=200 \
28
+ --config chunk_overlap=50 \
29
+ --config snippet_characters=120 \
30
+ --config ngram_min=1 \
31
+ --config ngram_max=2
32
+ ```
33
+
34
+ ### 2) Reranking stage
35
+
36
+ The optional rerank stage rescoring keeps retrieval quality transparent. It re-scores a bounded candidate set and
37
+ records rerank scores alongside retrieve scores in evidence metadata.
38
+
39
+ Example configuration:
40
+
41
+ ```
42
+ python3 -m biblicus build --corpus corpora/demo --backend sqlite-full-text-search \
43
+ --config rerank_enabled=true \
44
+ --config rerank_model=cross-encoder \
45
+ --config rerank_top_k=10
46
+ ```
47
+
48
+ ### 3) Hybrid retrieval
49
+
50
+ Hybrid retrieval combines lexical and vector signals. It expands candidate pools for each component backend, fuses
51
+ scores with explicit weights, and then applies the final budget.
52
+
53
+ Example configuration:
54
+
55
+ ```
56
+ python3 -m biblicus build --corpus corpora/demo --backend hybrid \
57
+ --config lexical_backend=sqlite-full-text-search \
58
+ --config embedding_backend=vector \
59
+ --config lexical_weight=0.7 \
60
+ --config embedding_weight=0.3
61
+ ```
62
+
63
+ Evidence items record both stage scores in `stage_scores` and preserve the hybrid weights in the run metadata so
64
+ evaluation can interpret how the fused ranking was produced.
65
+
66
+ ## Evaluation guidance
67
+
68
+ Evaluation keeps the retrieval stages explicit and makes comparisons easy:
69
+
70
+ - Measure hit rate, precision-at-k, and mean reciprocal rank against shared datasets.
71
+ - Use the retrieval evaluation lab for a repeatable walkthrough (`scripts/retrieval_evaluation_lab.py`).
72
+ - Run artifacts capture each stage and configuration for auditability.
73
+ - Deterministic settings remain available as the default baseline.
74
+
75
+ ## Interpreting evidence signals
76
+
77
+ Evidence returned by retrieval runs includes a `stage` label and optional `stage_scores` map:
78
+
79
+ - `stage` identifies the last stage that produced the evidence (for example, `retrieve`, `rerank`, `hybrid`).
80
+ - `stage_scores` contains per-stage scores so you can compare lexical and vector contributions in hybrid runs.
81
+
82
+ Use these fields to understand how a candidate moved through the pipeline and why it ranked where it did.
83
+
84
+ ## Budget awareness
85
+
86
+ Budgets shape every retrieval comparison:
87
+
88
+ - `max_total_items` limits the evidence list length and defines the denominator for precision-at-k.
89
+ - `max_total_characters` controls how much text can survive into evidence outputs.
90
+ - `max_items_per_source` prevents one source from dominating the final list.
91
+
92
+ When you compare backends, keep budgets constant and note any candidate expansion in hybrid runs so fused rankings are
93
+ drawn from comparable pools.
94
+
95
+ ## Non-goals
96
+
97
+ - Automated hyperparameter tuning.
98
+ - Hidden fallback stages that obscure retrieval behavior.
99
+ - UI-driven tuning in this phase.
100
+
101
+ ## Summary
102
+
103
+ Retrieval quality upgrades in Biblicus keep determinism intact while making scoring richer and more interpretable.
104
+ Start with tuned lexical baselines, add reranking when you need sharper relevance, and reach for hybrid retrieval when
105
+ you want to balance lexical precision with semantic similarity signals. Evaluate each change with the same dataset and
106
+ budget so improvements remain credible and reproducible.
@@ -0,0 +1,10 @@
1
+ Feature: Retrieval evaluation lab
2
+ The retrieval evaluation lab provides a deterministic walkthrough with bundled data.
3
+
4
+ Scenario: Retrieval evaluation lab reports expected metrics
5
+ When I run the retrieval evaluation lab with corpus "corpus" and dataset "dataset.json"
6
+ Then the retrieval evaluation lab dataset file exists
7
+ And the retrieval evaluation lab output file exists
8
+ And the retrieval evaluation lab metrics include hit_rate 1
9
+ And the retrieval evaluation lab metrics include mean_reciprocal_rank 1
10
+ And the retrieval evaluation lab metrics include precision_at_max_total_items 0.3333333333333333
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import math
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+ from behave import then, when
9
+
10
+
11
+ def _corpus_path(context, name: str) -> Path:
12
+ return (context.workdir / name).resolve()
13
+
14
+
15
+ def _parse_json_output(standard_output: str) -> dict[str, object]:
16
+ return json.loads(standard_output)
17
+
18
+
19
+ def _expect_metric(metrics: dict[str, object], key: str, expected: float) -> None:
20
+ actual = float(metrics[key])
21
+ assert math.isclose(actual, expected, rel_tol=1e-12, abs_tol=1e-12)
22
+
23
+
24
+ @when('I run the retrieval evaluation lab with corpus "{corpus_name}" and dataset "{dataset_name}"')
25
+ def step_run_retrieval_evaluation_lab(context, corpus_name: str, dataset_name: str) -> None:
26
+ corpus = _corpus_path(context, corpus_name)
27
+ dataset_path = (context.workdir / dataset_name).resolve()
28
+ result = subprocess.run(
29
+ [
30
+ "python3",
31
+ "scripts/retrieval_evaluation_lab.py",
32
+ "--corpus",
33
+ str(corpus),
34
+ "--dataset-path",
35
+ str(dataset_path),
36
+ "--force",
37
+ ],
38
+ cwd=context.repo_root,
39
+ capture_output=True,
40
+ text=True,
41
+ check=False,
42
+ )
43
+ context.last_result = result
44
+ assert result.returncode == 0, result.stderr
45
+ context.retrieval_lab_summary = _parse_json_output(result.stdout)
46
+
47
+
48
+ @then("the retrieval evaluation lab dataset file exists")
49
+ def step_retrieval_lab_dataset_exists(context) -> None:
50
+ summary = context.retrieval_lab_summary
51
+ dataset_path = Path(summary["dataset_path"])
52
+ assert dataset_path.is_file()
53
+
54
+
55
+ @then("the retrieval evaluation lab output file exists")
56
+ def step_retrieval_lab_output_exists(context) -> None:
57
+ summary = context.retrieval_lab_summary
58
+ output_path = Path(summary["evaluation_output_path"])
59
+ assert output_path.is_file()
60
+
61
+
62
+ @then("the retrieval evaluation lab metrics include hit_rate {expected:g}")
63
+ def step_retrieval_lab_hit_rate(context, expected: float) -> None:
64
+ metrics = context.retrieval_lab_summary["metrics"]
65
+ _expect_metric(metrics, "hit_rate", expected)
66
+
67
+
68
+ @then("the retrieval evaluation lab metrics include mean_reciprocal_rank {expected:g}")
69
+ def step_retrieval_lab_mean_reciprocal_rank(context, expected: float) -> None:
70
+ metrics = context.retrieval_lab_summary["metrics"]
71
+ _expect_metric(metrics, "mean_reciprocal_rank", expected)
72
+
73
+
74
+ @then("the retrieval evaluation lab metrics include precision_at_max_total_items {expected:g}")
75
+ def step_retrieval_lab_precision_at_max_total_items(context, expected: float) -> None:
76
+ metrics = context.retrieval_lab_summary["metrics"]
77
+ _expect_metric(metrics, "precision_at_max_total_items", expected)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "biblicus"
7
- version = "0.13.0"
7
+ version = "0.14.0"
8
8
  description = "Command line interface and Python library for corpus ingestion, retrieval, and evaluation."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"