biblicus 0.11.0__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. {biblicus-0.11.0/src/biblicus.egg-info → biblicus-0.12.0}/PKG-INFO +1 -1
  2. {biblicus-0.11.0 → biblicus-0.12.0}/docs/CONTEXT_PACK.md +37 -1
  3. {biblicus-0.11.0 → biblicus-0.12.0}/docs/FEATURE_INDEX.md +1 -0
  4. {biblicus-0.11.0 → biblicus-0.12.0}/docs/RETRIEVAL_QUALITY.md +1 -1
  5. {biblicus-0.11.0 → biblicus-0.12.0}/docs/ROADMAP.md +15 -36
  6. {biblicus-0.11.0 → biblicus-0.12.0}/features/context_pack_cli.feature +25 -0
  7. biblicus-0.12.0/features/context_pack_policies.feature +92 -0
  8. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/cli_steps.py +51 -0
  9. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/context_pack_steps.py +88 -0
  10. {biblicus-0.11.0 → biblicus-0.12.0}/pyproject.toml +1 -1
  11. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/__init__.py +1 -1
  12. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/cli.py +30 -1
  13. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/context.py +138 -4
  14. {biblicus-0.11.0 → biblicus-0.12.0/src/biblicus.egg-info}/PKG-INFO +1 -1
  15. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/SOURCES.txt +1 -0
  16. {biblicus-0.11.0 → biblicus-0.12.0}/LICENSE +0 -0
  17. {biblicus-0.11.0 → biblicus-0.12.0}/MANIFEST.in +0 -0
  18. {biblicus-0.11.0 → biblicus-0.12.0}/README.md +0 -0
  19. {biblicus-0.11.0 → biblicus-0.12.0}/THIRD_PARTY_NOTICES.md +0 -0
  20. {biblicus-0.11.0 → biblicus-0.12.0}/datasets/wikipedia_mini.json +0 -0
  21. {biblicus-0.11.0 → biblicus-0.12.0}/docs/ANALYSIS.md +0 -0
  22. {biblicus-0.11.0 → biblicus-0.12.0}/docs/ARCHITECTURE.md +0 -0
  23. {biblicus-0.11.0 → biblicus-0.12.0}/docs/BACKENDS.md +0 -0
  24. {biblicus-0.11.0 → biblicus-0.12.0}/docs/CORPUS.md +0 -0
  25. {biblicus-0.11.0 → biblicus-0.12.0}/docs/CORPUS_DESIGN.md +0 -0
  26. {biblicus-0.11.0 → biblicus-0.12.0}/docs/DEMOS.md +0 -0
  27. {biblicus-0.11.0 → biblicus-0.12.0}/docs/EXTRACTION.md +0 -0
  28. {biblicus-0.11.0 → biblicus-0.12.0}/docs/KNOWLEDGE_BASE.md +0 -0
  29. {biblicus-0.11.0 → biblicus-0.12.0}/docs/PROFILING.md +0 -0
  30. {biblicus-0.11.0 → biblicus-0.12.0}/docs/RETRIEVAL.md +0 -0
  31. {biblicus-0.11.0 → biblicus-0.12.0}/docs/RETRIEVAL_EVALUATION.md +0 -0
  32. {biblicus-0.11.0 → biblicus-0.12.0}/docs/STT.md +0 -0
  33. {biblicus-0.11.0 → biblicus-0.12.0}/docs/TESTING.md +0 -0
  34. {biblicus-0.11.0 → biblicus-0.12.0}/docs/TOPIC_MODELING.md +0 -0
  35. {biblicus-0.11.0 → biblicus-0.12.0}/docs/USER_CONFIGURATION.md +0 -0
  36. {biblicus-0.11.0 → biblicus-0.12.0}/docs/api.rst +0 -0
  37. {biblicus-0.11.0 → biblicus-0.12.0}/docs/backends/index.md +0 -0
  38. {biblicus-0.11.0 → biblicus-0.12.0}/docs/backends/scan.md +0 -0
  39. {biblicus-0.11.0 → biblicus-0.12.0}/docs/backends/sqlite-full-text-search.md +0 -0
  40. {biblicus-0.11.0 → biblicus-0.12.0}/docs/conf.py +0 -0
  41. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/index.md +0 -0
  42. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/ocr/index.md +0 -0
  43. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/ocr/paddleocr-vl.md +0 -0
  44. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/ocr/rapidocr.md +0 -0
  45. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/index.md +0 -0
  46. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/pipeline.md +0 -0
  47. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/select-longest.md +0 -0
  48. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/select-override.md +0 -0
  49. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/select-smart-override.md +0 -0
  50. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/select-text.md +0 -0
  51. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/speech-to-text/deepgram.md +0 -0
  52. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/speech-to-text/index.md +0 -0
  53. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/speech-to-text/openai.md +0 -0
  54. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/index.md +0 -0
  55. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/markitdown.md +0 -0
  56. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/metadata.md +0 -0
  57. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/pass-through.md +0 -0
  58. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/pdf.md +0 -0
  59. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/unstructured.md +0 -0
  60. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/vlm-document/docling-granite.md +0 -0
  61. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/vlm-document/docling-smol.md +0 -0
  62. {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/vlm-document/index.md +0 -0
  63. {biblicus-0.11.0 → biblicus-0.12.0}/docs/index.rst +0 -0
  64. {biblicus-0.11.0 → biblicus-0.12.0}/features/analysis_schema.feature +0 -0
  65. {biblicus-0.11.0 → biblicus-0.12.0}/features/backend_validation.feature +0 -0
  66. {biblicus-0.11.0 → biblicus-0.12.0}/features/biblicus_corpus.feature +0 -0
  67. {biblicus-0.11.0 → biblicus-0.12.0}/features/cli_entrypoint.feature +0 -0
  68. {biblicus-0.11.0 → biblicus-0.12.0}/features/cli_parsing.feature +0 -0
  69. {biblicus-0.11.0 → biblicus-0.12.0}/features/cli_step_spec_parsing.feature +0 -0
  70. {biblicus-0.11.0 → biblicus-0.12.0}/features/content_sniffing.feature +0 -0
  71. {biblicus-0.11.0 → biblicus-0.12.0}/features/context_pack.feature +0 -0
  72. {biblicus-0.11.0 → biblicus-0.12.0}/features/corpus_edge_cases.feature +0 -0
  73. {biblicus-0.11.0 → biblicus-0.12.0}/features/corpus_identity.feature +0 -0
  74. {biblicus-0.11.0 → biblicus-0.12.0}/features/corpus_purge.feature +0 -0
  75. {biblicus-0.11.0 → biblicus-0.12.0}/features/crawl.feature +0 -0
  76. {biblicus-0.11.0 → biblicus-0.12.0}/features/docling_granite_extractor.feature +0 -0
  77. {biblicus-0.11.0 → biblicus-0.12.0}/features/docling_smol_extractor.feature +0 -0
  78. {biblicus-0.11.0 → biblicus-0.12.0}/features/environment.py +0 -0
  79. {biblicus-0.11.0 → biblicus-0.12.0}/features/error_cases.feature +0 -0
  80. {biblicus-0.11.0 → biblicus-0.12.0}/features/evaluation.feature +0 -0
  81. {biblicus-0.11.0 → biblicus-0.12.0}/features/evidence_processing.feature +0 -0
  82. {biblicus-0.11.0 → biblicus-0.12.0}/features/extraction_error_handling.feature +0 -0
  83. {biblicus-0.11.0 → biblicus-0.12.0}/features/extraction_run_lifecycle.feature +0 -0
  84. {biblicus-0.11.0 → biblicus-0.12.0}/features/extraction_selection.feature +0 -0
  85. {biblicus-0.11.0 → biblicus-0.12.0}/features/extraction_selection_longest.feature +0 -0
  86. {biblicus-0.11.0 → biblicus-0.12.0}/features/extractor_pipeline.feature +0 -0
  87. {biblicus-0.11.0 → biblicus-0.12.0}/features/extractor_validation.feature +0 -0
  88. {biblicus-0.11.0 → biblicus-0.12.0}/features/frontmatter.feature +0 -0
  89. {biblicus-0.11.0 → biblicus-0.12.0}/features/hook_config_validation.feature +0 -0
  90. {biblicus-0.11.0 → biblicus-0.12.0}/features/hook_error_handling.feature +0 -0
  91. {biblicus-0.11.0 → biblicus-0.12.0}/features/import_tree.feature +0 -0
  92. {biblicus-0.11.0 → biblicus-0.12.0}/features/inference_backend.feature +0 -0
  93. {biblicus-0.11.0 → biblicus-0.12.0}/features/ingest_sources.feature +0 -0
  94. {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_audio_samples.feature +0 -0
  95. {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_image_samples.feature +0 -0
  96. {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_mixed_corpus.feature +0 -0
  97. {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_mixed_extraction.feature +0 -0
  98. {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_ocr_image_extraction.feature +0 -0
  99. {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_pdf_retrieval.feature +0 -0
  100. {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_pdf_samples.feature +0 -0
  101. {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_unstructured_extraction.feature +0 -0
  102. {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_wikipedia.feature +0 -0
  103. {biblicus-0.11.0 → biblicus-0.12.0}/features/knowledge_base.feature +0 -0
  104. {biblicus-0.11.0 → biblicus-0.12.0}/features/lifecycle_hooks.feature +0 -0
  105. {biblicus-0.11.0 → biblicus-0.12.0}/features/markitdown_extractor.feature +0 -0
  106. {biblicus-0.11.0 → biblicus-0.12.0}/features/model_validation.feature +0 -0
  107. {biblicus-0.11.0 → biblicus-0.12.0}/features/ocr_extractor.feature +0 -0
  108. {biblicus-0.11.0 → biblicus-0.12.0}/features/paddleocr_vl_extractor.feature +0 -0
  109. {biblicus-0.11.0 → biblicus-0.12.0}/features/paddleocr_vl_parse_api_response.feature +0 -0
  110. {biblicus-0.11.0 → biblicus-0.12.0}/features/pdf_text_extraction.feature +0 -0
  111. {biblicus-0.11.0 → biblicus-0.12.0}/features/profiling.feature +0 -0
  112. {biblicus-0.11.0 → biblicus-0.12.0}/features/python_api.feature +0 -0
  113. {biblicus-0.11.0 → biblicus-0.12.0}/features/python_hook_logging.feature +0 -0
  114. {biblicus-0.11.0 → biblicus-0.12.0}/features/query_processing.feature +0 -0
  115. {biblicus-0.11.0 → biblicus-0.12.0}/features/recipe_file_extraction.feature +0 -0
  116. {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_budget.feature +0 -0
  117. {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_quality.feature +0 -0
  118. {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_scan.feature +0 -0
  119. {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_sqlite_full_text_search.feature +0 -0
  120. {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_uses_extraction_run.feature +0 -0
  121. {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_utilities.feature +0 -0
  122. {biblicus-0.11.0 → biblicus-0.12.0}/features/select_override.feature +0 -0
  123. {biblicus-0.11.0 → biblicus-0.12.0}/features/smart_override_selection.feature +0 -0
  124. {biblicus-0.11.0 → biblicus-0.12.0}/features/source_loading.feature +0 -0
  125. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/analysis_steps.py +0 -0
  126. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/backend_steps.py +0 -0
  127. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/cli_parsing_steps.py +0 -0
  128. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/crawl_steps.py +0 -0
  129. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/deepgram_steps.py +0 -0
  130. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/docling_steps.py +0 -0
  131. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/evidence_processing_steps.py +0 -0
  132. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/extraction_run_lifecycle_steps.py +0 -0
  133. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/extraction_steps.py +0 -0
  134. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/extractor_steps.py +0 -0
  135. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/frontmatter_steps.py +0 -0
  136. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/inference_steps.py +0 -0
  137. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/knowledge_base_steps.py +0 -0
  138. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/markitdown_steps.py +0 -0
  139. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/model_steps.py +0 -0
  140. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/openai_steps.py +0 -0
  141. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/paddleocr_mock_steps.py +0 -0
  142. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/paddleocr_vl_steps.py +0 -0
  143. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/paddleocr_vl_unit_steps.py +0 -0
  144. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/pdf_steps.py +0 -0
  145. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/profiling_steps.py +0 -0
  146. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/python_api_steps.py +0 -0
  147. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/rapidocr_steps.py +0 -0
  148. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/requests_mock_steps.py +0 -0
  149. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/retrieval_quality_steps.py +0 -0
  150. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/retrieval_steps.py +0 -0
  151. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/stt_deepgram_steps.py +0 -0
  152. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/stt_steps.py +0 -0
  153. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/topic_modeling_steps.py +0 -0
  154. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/unstructured_steps.py +0 -0
  155. {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/user_config_steps.py +0 -0
  156. {biblicus-0.11.0 → biblicus-0.12.0}/features/streaming_ingest.feature +0 -0
  157. {biblicus-0.11.0 → biblicus-0.12.0}/features/stt_deepgram_extractor.feature +0 -0
  158. {biblicus-0.11.0 → biblicus-0.12.0}/features/stt_extractor.feature +0 -0
  159. {biblicus-0.11.0 → biblicus-0.12.0}/features/text_extraction_runs.feature +0 -0
  160. {biblicus-0.11.0 → biblicus-0.12.0}/features/token_budget.feature +0 -0
  161. {biblicus-0.11.0 → biblicus-0.12.0}/features/topic_modeling.feature +0 -0
  162. {biblicus-0.11.0 → biblicus-0.12.0}/features/unstructured_extractor.feature +0 -0
  163. {biblicus-0.11.0 → biblicus-0.12.0}/features/user_config.feature +0 -0
  164. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_ag_news.py +0 -0
  165. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_audio_samples.py +0 -0
  166. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_image_samples.py +0 -0
  167. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_mixed_samples.py +0 -0
  168. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_pdf_samples.py +0 -0
  169. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_wikipedia.py +0 -0
  170. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/profiling_demo.py +0 -0
  171. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/readme_end_to_end_demo.py +0 -0
  172. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/test.py +0 -0
  173. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/topic_modeling_integration.py +0 -0
  174. {biblicus-0.11.0 → biblicus-0.12.0}/scripts/wikipedia_rag_demo.py +0 -0
  175. {biblicus-0.11.0 → biblicus-0.12.0}/setup.cfg +0 -0
  176. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/__main__.py +0 -0
  177. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/_vendor/dotyaml/__init__.py +0 -0
  178. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/_vendor/dotyaml/interpolation.py +0 -0
  179. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/_vendor/dotyaml/loader.py +0 -0
  180. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/_vendor/dotyaml/transformer.py +0 -0
  181. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/__init__.py +0 -0
  182. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/base.py +0 -0
  183. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/llm.py +0 -0
  184. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/models.py +0 -0
  185. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/profiling.py +0 -0
  186. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/schema.py +0 -0
  187. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/topic_modeling.py +0 -0
  188. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/__init__.py +0 -0
  189. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/base.py +0 -0
  190. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/hybrid.py +0 -0
  191. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/scan.py +0 -0
  192. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/sqlite_full_text_search.py +0 -0
  193. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/vector.py +0 -0
  194. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/constants.py +0 -0
  195. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/corpus.py +0 -0
  196. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/crawl.py +0 -0
  197. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/errors.py +0 -0
  198. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/evaluation.py +0 -0
  199. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/evidence_processing.py +0 -0
  200. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extraction.py +0 -0
  201. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/__init__.py +0 -0
  202. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/base.py +0 -0
  203. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/deepgram_stt.py +0 -0
  204. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/docling_granite_text.py +0 -0
  205. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/docling_smol_text.py +0 -0
  206. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/markitdown_text.py +0 -0
  207. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/metadata_text.py +0 -0
  208. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/openai_stt.py +0 -0
  209. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/paddleocr_vl_text.py +0 -0
  210. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/pass_through_text.py +0 -0
  211. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/pdf_text.py +0 -0
  212. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/pipeline.py +0 -0
  213. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/rapidocr_text.py +0 -0
  214. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/select_longest_text.py +0 -0
  215. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/select_override.py +0 -0
  216. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/select_smart_override.py +0 -0
  217. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/select_text.py +0 -0
  218. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/unstructured_text.py +0 -0
  219. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/frontmatter.py +0 -0
  220. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/hook_logging.py +0 -0
  221. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/hook_manager.py +0 -0
  222. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/hooks.py +0 -0
  223. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/ignore.py +0 -0
  224. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/inference.py +0 -0
  225. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/knowledge_base.py +0 -0
  226. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/models.py +0 -0
  227. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/retrieval.py +0 -0
  228. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/sources.py +0 -0
  229. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/time.py +0 -0
  230. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/uris.py +0 -0
  231. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/user_config.py +0 -0
  232. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/dependency_links.txt +0 -0
  233. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/entry_points.txt +0 -0
  234. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/requires.txt +0 -0
  235. {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: biblicus
3
- Version: 0.11.0
3
+ Version: 0.12.0
4
4
  Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
5
5
  License: MIT
6
6
  Requires-Python: >=3.9
@@ -23,13 +23,49 @@ context_pack = build_context_pack(result, policy=policy)
23
23
  print(context_pack.text)
24
24
  ```
25
25
 
26
+ ## Policy surfaces
27
+
28
+ Context pack policies make ordering and formatting explicit.
29
+
30
+ ### Ordering
31
+
32
+ Use `ordering` to control how evidence blocks are arranged before joining:
33
+
34
+ - `rank`: use the evidence rank as provided by retrieval.
35
+ - `score`: sort by score (descending) and then item identifier.
36
+ - `source`: group by source uniform resource identifier, then sort by score.
37
+
38
+ ### Metadata inclusion
39
+
40
+ Set `include_metadata=True` to prepend metadata to each block. Metadata includes:
41
+
42
+ - `item_id`
43
+ - `source_uri`
44
+ - `score`
45
+ - `stage`
46
+
47
+ ### Character budgets
48
+
49
+ Character budgets drop trailing blocks until the context pack fits the specified limit. This keeps context shaping
50
+ deterministic without relying on a tokenizer.
51
+
52
+ In Python:
53
+
54
+ ```python
55
+ from biblicus.context import CharacterBudget, ContextPackPolicy, fit_context_pack_to_character_budget
56
+
57
+ policy = ContextPackPolicy(join_with="\n\n", ordering="score", include_metadata=True)
58
+ fitted = fit_context_pack_to_character_budget(context_pack, policy=policy, character_budget=CharacterBudget(max_characters=500))
59
+ print(fitted.text)
60
+ ```
61
+
26
62
  ## Command-line interface
27
63
 
28
64
  The command-line interface can build a context pack from a retrieval result by reading JavaScript Object Notation from standard input.
29
65
 
30
66
  ```bash
31
67
  biblicus query --corpus corpora/example --query "primary button style preference" \\
32
- | biblicus context-pack build
68
+ | biblicus context-pack build --ordering score --include-metadata --max-characters 500
33
69
  ```
34
70
 
35
71
  ## What context pack building does
@@ -204,6 +204,7 @@ Documentation:
204
204
  Behavior specifications:
205
205
 
206
206
  - `features/context_pack.feature`
207
+ - `features/context_pack_policies.feature`
207
208
  - `features/token_budget.feature`
208
209
 
209
210
  Primary implementation:
@@ -1,7 +1,7 @@
1
1
  # Retrieval quality upgrades
2
2
 
3
3
  This document describes the retrieval quality upgrades available in Biblicus. It is a reference for how retrieval
4
- quality is expressed in runs and should be read alongside `docs/ROADMAP.md`.
4
+ quality is expressed in runs and how to interpret the signals in artifacts and evidence.
5
5
 
6
6
  ## Goals
7
7
 
@@ -17,49 +17,27 @@ If you are looking for what already exists, start with:
17
17
  - Raw corpus items remain readable, portable files.
18
18
  - Derived artifacts are stored under the corpus and can coexist for multiple implementations.
19
19
 
20
- ## Next: retrieval evaluation and datasets
20
+ ## Completed foundations
21
21
 
22
- Goal: make evaluation results easier to interpret and compare.
22
+ These are the capability slices that already exist and have end-to-end behavior specifications.
23
23
 
24
- Deliverables:
25
-
26
- - A dataset authoring workflow that supports small hand-labeled sets and larger synthetic sets.
27
- - A report that includes per-query diagnostics and a clear summary.
28
-
29
- Acceptance checks:
30
-
31
- - Dataset formats are versioned when they change.
32
- - Reports remain deterministic for the same inputs.
33
-
34
- ## Next: retrieval quality upgrades
35
-
36
- Goal: make retrieval relevance stronger while keeping deterministic baselines and clear evaluation.
37
-
38
- Deliverables:
39
-
40
- - A tuned lexical baseline (for example: BM25 configuration, n-grams, field weighting, stop word controls).
41
- - A reranking stage that can refine top-N results with either a cross-encoder or an LLM re-ranker.
42
- - A hybrid retrieval mode that combines lexical signals with embeddings and exposes weights explicitly.
43
-
44
- Acceptance checks:
24
+ ### Retrieval evaluation and datasets
45
25
 
46
- - Accuracy-at-k improves on the same evaluation datasets without regressions in determinism.
47
- - Retrieval stages are explicitly recorded (retrieve, rerank, filter) in the output artifacts.
26
+ - Dataset authoring workflow for small hand-labeled sets and larger synthetic sets.
27
+ - Evaluation reports with per-query diagnostics and summary metrics.
28
+ - Versioned dataset formats and deterministic reports for stable inputs.
48
29
 
49
- ## Next: context pack policy surfaces
30
+ ### Retrieval quality upgrades
50
31
 
51
- Goal: make context shaping policies easier to evaluate and swap.
32
+ - Tuned lexical baseline with BM25, n-gram range controls, and stop word policies.
33
+ - Reranking stage for top-N candidates with explicit stage metadata.
34
+ - Hybrid retrieval with explicit fusion weights and stage-level scores.
52
35
 
53
- Deliverables:
54
-
55
- - A clear set of context pack policy variants (formatting, ordering, metadata inclusion).
56
- - Token budget strategies that can use a real tokenizer.
57
- - Documentation that explains where context shaping fits in the pipeline.
58
-
59
- Acceptance checks:
36
+ ### Context pack policy surfaces
60
37
 
61
- - Behavior specifications cover policy selection and budgeting behaviors.
62
- - Example outputs show how context packs differ across policies.
38
+ - Policy variants for formatting, ordering, and metadata inclusion.
39
+ - Token and character budget strategies with explicit selectors.
40
+ - Documentation and examples that show how policy choices change outputs.
63
41
 
64
42
  ## Next: extraction evaluation harness
65
43
 
@@ -82,6 +60,7 @@ Goal: provide lightweight analysis utilities that summarize corpus themes and gu
82
60
 
83
61
  Deliverables:
84
62
 
63
+ - Basic corpus profiling with deterministic metrics for raw items and extracted text.
85
64
  - Hidden Markov modeling analysis for sequence-driven corpora.
86
65
  - A way to compare analysis outputs across corpora or corpus snapshots.
87
66
 
@@ -23,6 +23,31 @@ Feature: Context pack command-line interface
23
23
  one two three
24
24
  """
25
25
 
26
+ Scenario: Context pack build can include metadata
27
+ Given a retrieval result exists with sourced evidence:
28
+ | source_uri | score | text |
29
+ | source-a | 10.0 | alpha |
30
+ When I run "context-pack build" joining with "\n\n" ordering "score" and including metadata
31
+ Then the context pack build output text equals:
32
+ """
33
+ item_id: item-1
34
+ source_uri: source-a
35
+ score: 10.0
36
+ stage: scan
37
+ alpha
38
+ """
39
+
40
+ Scenario: Context pack build can fit to a character budget
41
+ Given a retrieval result exists with evidence text:
42
+ | text |
43
+ | alpha |
44
+ | beta |
45
+ When I run "context-pack build" joining with "\n\n" and character budget 6
46
+ Then the context pack build output text equals:
47
+ """
48
+ alpha
49
+ """
50
+
26
51
  Scenario: Context pack build fails without retrieval result on standard input
27
52
  When I run "context-pack build" with empty standard input
28
53
  Then the command fails with exit code 2
@@ -0,0 +1,92 @@
1
+ Feature: Context pack policies
2
+ Context pack policies control evidence ordering, metadata inclusion, and budgets.
3
+
4
+ Scenario: Score ordering sorts evidence by score
5
+ Given a retrieval result exists with scored evidence:
6
+ | score | text |
7
+ | 1.0 | beta |
8
+ | 5.0 | alpha |
9
+ When I build a context pack from that retrieval result with policy:
10
+ | key | value |
11
+ | join_with | \n\n |
12
+ | ordering | score |
13
+ | include_metadata | false |
14
+ Then the context pack text equals:
15
+ """
16
+ alpha
17
+
18
+ beta
19
+ """
20
+
21
+ Scenario: Source ordering groups evidence by source
22
+ Given a retrieval result exists with sourced evidence:
23
+ | source_uri | score | text |
24
+ | source-b | 1.0 | beta |
25
+ | source-a | 2.0 | alpha |
26
+ | source-a | 1.0 | delta |
27
+ When I build a context pack from that retrieval result with policy:
28
+ | key | value |
29
+ | join_with | \n\n |
30
+ | ordering | source |
31
+ | include_metadata | false |
32
+ Then the context pack text equals:
33
+ """
34
+ alpha
35
+
36
+ delta
37
+
38
+ beta
39
+ """
40
+
41
+ Scenario: Metadata inclusion prepends block metadata
42
+ Given a retrieval result exists with sourced evidence:
43
+ | source_uri | score | text |
44
+ | source-a | 10.0 | alpha |
45
+ When I build a context pack from that retrieval result with policy:
46
+ | key | value |
47
+ | join_with | \n\n |
48
+ | ordering | rank |
49
+ | include_metadata | true |
50
+ Then the context pack text equals:
51
+ """
52
+ item_id: item-1
53
+ source_uri: source-a
54
+ score: 10.0
55
+ stage: scan
56
+ alpha
57
+ """
58
+
59
+ Scenario: Character budgets drop trailing blocks
60
+ Given a retrieval result exists with evidence text:
61
+ | text |
62
+ | alpha |
63
+ | beta |
64
+ When I build a context pack from that retrieval result with policy:
65
+ | key | value |
66
+ | join_with | \n\n |
67
+ | ordering | rank |
68
+ | include_metadata | false |
69
+ And I fit the context pack to a character budget of 6 characters
70
+ Then the context pack text equals:
71
+ """
72
+ alpha
73
+ """
74
+
75
+ Scenario: Character budgets can produce empty context packs
76
+ Given a retrieval result exists with evidence text:
77
+ | text |
78
+ | alpha |
79
+ When I build a context pack from that retrieval result with policy:
80
+ | key | value |
81
+ | join_with | \n\n |
82
+ | ordering | rank |
83
+ | include_metadata | false |
84
+ And I fit the context pack to a character budget of 1 characters
85
+ Then the context pack text is empty
86
+
87
+ Scenario: Unknown ordering raises a policy error
88
+ Given a retrieval result exists with evidence text:
89
+ | text |
90
+ | alpha |
91
+ When I attempt to build a context pack with invalid ordering "mystery"
92
+ Then the context pack ordering error mentions "Unknown context pack ordering"
@@ -97,6 +97,57 @@ def step_context_pack_build_with_token_budget_from_standard_input(
97
97
  context.context_pack_build_output = json.loads(result.stdout)
98
98
 
99
99
 
100
+ @when(
101
+ 'I run "context-pack build" joining with "{join_with}" ordering "{ordering}" and including metadata'
102
+ )
103
+ def step_context_pack_build_with_metadata_from_standard_input(
104
+ context, join_with: str, ordering: str
105
+ ) -> None:
106
+ decoded_join_with = bytes(join_with, "utf-8").decode("unicode_escape")
107
+ retrieval_result_json = context.retrieval_result.model_dump_json(indent=2)
108
+ result = run_biblicus(
109
+ context,
110
+ [
111
+ "context-pack",
112
+ "build",
113
+ "--join-with",
114
+ decoded_join_with,
115
+ "--ordering",
116
+ ordering,
117
+ "--include-metadata",
118
+ ],
119
+ input_text=retrieval_result_json,
120
+ )
121
+ context.last_result = result
122
+ assert result.returncode == 0, result.stderr
123
+ context.context_pack_build_output = json.loads(result.stdout)
124
+
125
+
126
+ @when(
127
+ 'I run "context-pack build" joining with "{join_with}" and character budget {max_characters:d}'
128
+ )
129
+ def step_context_pack_build_with_character_budget_from_standard_input(
130
+ context, join_with: str, max_characters: int
131
+ ) -> None:
132
+ decoded_join_with = bytes(join_with, "utf-8").decode("unicode_escape")
133
+ retrieval_result_json = context.retrieval_result.model_dump_json(indent=2)
134
+ result = run_biblicus(
135
+ context,
136
+ [
137
+ "context-pack",
138
+ "build",
139
+ "--join-with",
140
+ decoded_join_with,
141
+ "--max-characters",
142
+ str(max_characters),
143
+ ],
144
+ input_text=retrieval_result_json,
145
+ )
146
+ context.last_result = result
147
+ assert result.returncode == 0, result.stderr
148
+ context.context_pack_build_output = json.loads(result.stdout)
149
+
150
+
100
151
  @when('I run "context-pack build" with empty standard input')
101
152
  def step_context_pack_build_with_empty_standard_input(context) -> None:
102
153
  result = run_biblicus(context, ["context-pack", "build", "--join-with", "\n\n"], input_text="")
@@ -3,9 +3,11 @@ from __future__ import annotations
3
3
  from behave import given, then, when
4
4
 
5
5
  from biblicus.context import (
6
+ CharacterBudget,
6
7
  ContextPackPolicy,
7
8
  TokenBudget,
8
9
  build_context_pack,
10
+ fit_context_pack_to_character_budget,
9
11
  fit_context_pack_to_token_budget,
10
12
  )
11
13
  from biblicus.models import Evidence, QueryBudget, RetrievalResult
@@ -80,6 +82,41 @@ def given_retrieval_result_exists_with_scored_evidence(context) -> None:
80
82
  )
81
83
 
82
84
 
85
+ @given("a retrieval result exists with sourced evidence:")
86
+ def given_retrieval_result_exists_with_sourced_evidence(context) -> None:
87
+ evidence_items = []
88
+ for rank_value, row in enumerate(context.table, start=1):
89
+ score_value = float(row["score"])
90
+ source_uri_value = row["source_uri"]
91
+ text_value = row["text"]
92
+ content_ref_value = None if str(text_value).strip() else "content-ref"
93
+ evidence_items.append(
94
+ Evidence(
95
+ item_id=f"item-{rank_value}",
96
+ source_uri=source_uri_value,
97
+ media_type="text/plain",
98
+ score=score_value,
99
+ rank=rank_value,
100
+ text=text_value,
101
+ content_ref=content_ref_value,
102
+ stage="scan",
103
+ recipe_id="recipe",
104
+ run_id="run",
105
+ )
106
+ )
107
+
108
+ context.retrieval_result = RetrievalResult(
109
+ query_text="query",
110
+ budget=QueryBudget(max_total_items=10),
111
+ run_id="run",
112
+ recipe_id="recipe",
113
+ backend_id="scan",
114
+ generated_at=utc_now_iso(),
115
+ evidence=evidence_items,
116
+ stats={},
117
+ )
118
+
119
+
83
120
  @given("the second evidence item has no text payload")
84
121
  def given_second_evidence_item_has_no_text_payload(context) -> None:
85
122
  context.retrieval_result.evidence[1] = context.retrieval_result.evidence[1].model_copy(
@@ -96,6 +133,31 @@ def when_build_context_pack_from_retrieval_result(context, join_with: str) -> No
96
133
  )
97
134
 
98
135
 
136
+ @when("I build a context pack from that retrieval result with policy:")
137
+ def when_build_context_pack_from_retrieval_result_with_policy(context) -> None:
138
+ settings = {}
139
+ for row in context.table:
140
+ if "key" in row.headings and "value" in row.headings:
141
+ key = row["key"]
142
+ value = row["value"]
143
+ else:
144
+ key = row[0]
145
+ value = row[1]
146
+ settings[str(key).strip()] = str(value).strip()
147
+ join_with_raw = settings.get("join_with", "\\n\\n")
148
+ ordering = settings.get("ordering", "rank")
149
+ include_metadata = settings.get("include_metadata", "false").lower() == "true"
150
+ decoded_join_with = bytes(join_with_raw, "utf-8").decode("unicode_escape")
151
+ context.context_pack_policy = ContextPackPolicy(
152
+ join_with=decoded_join_with,
153
+ ordering=ordering,
154
+ include_metadata=include_metadata,
155
+ )
156
+ context.context_pack = build_context_pack(
157
+ context.retrieval_result, policy=context.context_pack_policy
158
+ )
159
+
160
+
99
161
  @then("the context pack text equals:")
100
162
  def then_context_pack_text_equals(context) -> None:
101
163
  assert context.context_pack.text == context.text
@@ -110,6 +172,32 @@ def when_fit_context_pack_to_token_budget(context, max_tokens: int) -> None:
110
172
  )
111
173
 
112
174
 
175
+ @when("I fit the context pack to a character budget of {max_characters:d} characters")
176
+ def when_fit_context_pack_to_character_budget(context, max_characters: int) -> None:
177
+ context.context_pack = fit_context_pack_to_character_budget(
178
+ context.context_pack,
179
+ policy=context.context_pack_policy,
180
+ character_budget=CharacterBudget(max_characters=max_characters),
181
+ )
182
+
183
+
184
+ @when('I attempt to build a context pack with invalid ordering "{ordering}"')
185
+ def when_attempt_build_context_pack_with_invalid_ordering(context, ordering: str) -> None:
186
+ policy = ContextPackPolicy(join_with="\n\n").model_copy(update={"ordering": ordering})
187
+ try:
188
+ _ = build_context_pack(context.retrieval_result, policy=policy)
189
+ context.ordering_error = None
190
+ except ValueError as exc:
191
+ context.ordering_error = exc
192
+
193
+
194
+ @then('the context pack ordering error mentions "{message}"')
195
+ def then_context_pack_ordering_error_mentions(context, message: str) -> None:
196
+ error = getattr(context, "ordering_error", None)
197
+ assert error is not None
198
+ assert message in str(error)
199
+
200
+
113
201
  @then("the context pack text is empty")
114
202
  def then_context_pack_text_is_empty(context) -> None:
115
203
  assert context.context_pack.text == ""
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "biblicus"
7
- version = "0.11.0"
7
+ version = "0.12.0"
8
8
  description = "Command line interface and Python library for corpus ingestion, retrieval, and evaluation."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -27,4 +27,4 @@ __all__ = [
27
27
  "RetrievalRun",
28
28
  ]
29
29
 
30
- __version__ = "0.11.0"
30
+ __version__ = "0.12.0"
@@ -15,9 +15,11 @@ from pydantic import ValidationError
15
15
  from .analysis import get_analysis_backend
16
16
  from .backends import get_backend
17
17
  from .context import (
18
+ CharacterBudget,
18
19
  ContextPackPolicy,
19
20
  TokenBudget,
20
21
  build_context_pack,
22
+ fit_context_pack_to_character_budget,
21
23
  fit_context_pack_to_token_budget,
22
24
  )
23
25
  from .corpus import Corpus
@@ -568,7 +570,11 @@ def cmd_context_pack_build(arguments: argparse.Namespace) -> int:
568
570
  )
569
571
  retrieval_result = RetrievalResult.model_validate_json(input_text)
570
572
  join_with = bytes(arguments.join_with, "utf-8").decode("unicode_escape")
571
- policy = ContextPackPolicy(join_with=join_with)
573
+ policy = ContextPackPolicy(
574
+ join_with=join_with,
575
+ ordering=arguments.ordering,
576
+ include_metadata=arguments.include_metadata,
577
+ )
572
578
  context_pack = build_context_pack(retrieval_result, policy=policy)
573
579
  if arguments.max_tokens is not None:
574
580
  context_pack = fit_context_pack_to_token_budget(
@@ -576,6 +582,12 @@ def cmd_context_pack_build(arguments: argparse.Namespace) -> int:
576
582
  policy=policy,
577
583
  token_budget=TokenBudget(max_tokens=int(arguments.max_tokens)),
578
584
  )
585
+ if arguments.max_characters is not None:
586
+ context_pack = fit_context_pack_to_character_budget(
587
+ context_pack,
588
+ policy=policy,
589
+ character_budget=CharacterBudget(max_characters=int(arguments.max_characters)),
590
+ )
579
591
  print(
580
592
  json.dumps(
581
593
  {
@@ -921,12 +933,29 @@ def build_parser() -> argparse.ArgumentParser:
921
933
  default="\\n\\n",
922
934
  help="Separator between evidence blocks (escape sequences supported, default is two newlines).",
923
935
  )
936
+ p_context_pack_build.add_argument(
937
+ "--ordering",
938
+ choices=["rank", "score", "source"],
939
+ default="rank",
940
+ help="Evidence ordering policy (rank, score, source).",
941
+ )
942
+ p_context_pack_build.add_argument(
943
+ "--include-metadata",
944
+ action="store_true",
945
+ help="Include evidence metadata in each context pack block.",
946
+ )
924
947
  p_context_pack_build.add_argument(
925
948
  "--max-tokens",
926
949
  default=None,
927
950
  type=int,
928
951
  help="Optional token budget for the final context pack using the naive-whitespace tokenizer.",
929
952
  )
953
+ p_context_pack_build.add_argument(
954
+ "--max-characters",
955
+ default=None,
956
+ type=int,
957
+ help="Optional character budget for the final context pack.",
958
+ )
930
959
  p_context_pack_build.set_defaults(func=cmd_context_pack_build)
931
960
 
932
961
  p_eval = sub.add_parser("eval", help="Evaluate a run against a dataset.")