biblicus 0.11.0__tar.gz → 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {biblicus-0.11.0/src/biblicus.egg-info → biblicus-0.12.0}/PKG-INFO +1 -1
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/CONTEXT_PACK.md +37 -1
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/FEATURE_INDEX.md +1 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/RETRIEVAL_QUALITY.md +1 -1
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/ROADMAP.md +15 -36
- {biblicus-0.11.0 → biblicus-0.12.0}/features/context_pack_cli.feature +25 -0
- biblicus-0.12.0/features/context_pack_policies.feature +92 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/cli_steps.py +51 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/context_pack_steps.py +88 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/pyproject.toml +1 -1
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/__init__.py +1 -1
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/cli.py +30 -1
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/context.py +138 -4
- {biblicus-0.11.0 → biblicus-0.12.0/src/biblicus.egg-info}/PKG-INFO +1 -1
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/SOURCES.txt +1 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/LICENSE +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/MANIFEST.in +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/README.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/THIRD_PARTY_NOTICES.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/datasets/wikipedia_mini.json +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/ANALYSIS.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/ARCHITECTURE.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/BACKENDS.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/CORPUS.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/CORPUS_DESIGN.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/DEMOS.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/EXTRACTION.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/KNOWLEDGE_BASE.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/PROFILING.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/RETRIEVAL.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/RETRIEVAL_EVALUATION.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/STT.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/TESTING.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/TOPIC_MODELING.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/USER_CONFIGURATION.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/api.rst +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/backends/index.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/backends/scan.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/backends/sqlite-full-text-search.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/conf.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/index.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/ocr/index.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/ocr/paddleocr-vl.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/ocr/rapidocr.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/index.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/pipeline.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/select-longest.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/select-override.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/select-smart-override.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/pipeline-utilities/select-text.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/speech-to-text/deepgram.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/speech-to-text/index.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/speech-to-text/openai.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/index.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/markitdown.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/metadata.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/pass-through.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/pdf.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/text-document/unstructured.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/vlm-document/docling-granite.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/vlm-document/docling-smol.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/extractors/vlm-document/index.md +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/docs/index.rst +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/analysis_schema.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/backend_validation.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/biblicus_corpus.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/cli_entrypoint.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/cli_parsing.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/cli_step_spec_parsing.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/content_sniffing.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/context_pack.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/corpus_edge_cases.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/corpus_identity.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/corpus_purge.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/crawl.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/docling_granite_extractor.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/docling_smol_extractor.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/environment.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/error_cases.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/evaluation.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/evidence_processing.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/extraction_error_handling.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/extraction_run_lifecycle.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/extraction_selection.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/extraction_selection_longest.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/extractor_pipeline.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/extractor_validation.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/frontmatter.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/hook_config_validation.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/hook_error_handling.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/import_tree.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/inference_backend.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/ingest_sources.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_audio_samples.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_image_samples.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_mixed_corpus.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_mixed_extraction.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_ocr_image_extraction.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_pdf_retrieval.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_pdf_samples.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_unstructured_extraction.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/integration_wikipedia.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/knowledge_base.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/lifecycle_hooks.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/markitdown_extractor.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/model_validation.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/ocr_extractor.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/paddleocr_vl_extractor.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/paddleocr_vl_parse_api_response.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/pdf_text_extraction.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/profiling.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/python_api.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/python_hook_logging.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/query_processing.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/recipe_file_extraction.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_budget.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_quality.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_scan.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_sqlite_full_text_search.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_uses_extraction_run.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/retrieval_utilities.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/select_override.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/smart_override_selection.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/source_loading.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/analysis_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/backend_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/cli_parsing_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/crawl_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/deepgram_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/docling_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/evidence_processing_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/extraction_run_lifecycle_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/extraction_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/extractor_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/frontmatter_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/inference_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/knowledge_base_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/markitdown_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/model_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/openai_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/paddleocr_mock_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/paddleocr_vl_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/paddleocr_vl_unit_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/pdf_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/profiling_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/python_api_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/rapidocr_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/requests_mock_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/retrieval_quality_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/retrieval_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/stt_deepgram_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/stt_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/topic_modeling_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/unstructured_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/steps/user_config_steps.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/streaming_ingest.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/stt_deepgram_extractor.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/stt_extractor.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/text_extraction_runs.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/token_budget.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/topic_modeling.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/unstructured_extractor.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/features/user_config.feature +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_ag_news.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_audio_samples.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_image_samples.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_mixed_samples.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_pdf_samples.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/download_wikipedia.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/profiling_demo.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/readme_end_to_end_demo.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/test.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/topic_modeling_integration.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/scripts/wikipedia_rag_demo.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/setup.cfg +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/__main__.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/_vendor/dotyaml/__init__.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/_vendor/dotyaml/interpolation.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/_vendor/dotyaml/loader.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/_vendor/dotyaml/transformer.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/__init__.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/base.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/llm.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/models.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/profiling.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/schema.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/analysis/topic_modeling.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/__init__.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/base.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/hybrid.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/scan.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/sqlite_full_text_search.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/backends/vector.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/constants.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/corpus.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/crawl.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/errors.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/evaluation.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/evidence_processing.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extraction.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/__init__.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/base.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/deepgram_stt.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/docling_granite_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/docling_smol_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/markitdown_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/metadata_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/openai_stt.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/paddleocr_vl_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/pass_through_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/pdf_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/pipeline.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/rapidocr_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/select_longest_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/select_override.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/select_smart_override.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/select_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/extractors/unstructured_text.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/frontmatter.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/hook_logging.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/hook_manager.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/hooks.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/ignore.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/inference.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/knowledge_base.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/models.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/retrieval.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/sources.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/time.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/uris.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus/user_config.py +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/dependency_links.txt +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/entry_points.txt +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/requires.txt +0 -0
- {biblicus-0.11.0 → biblicus-0.12.0}/src/biblicus.egg-info/top_level.txt +0 -0
|
@@ -23,13 +23,49 @@ context_pack = build_context_pack(result, policy=policy)
|
|
|
23
23
|
print(context_pack.text)
|
|
24
24
|
```
|
|
25
25
|
|
|
26
|
+
## Policy surfaces
|
|
27
|
+
|
|
28
|
+
Context pack policies make ordering and formatting explicit.
|
|
29
|
+
|
|
30
|
+
### Ordering
|
|
31
|
+
|
|
32
|
+
Use `ordering` to control how evidence blocks are arranged before joining:
|
|
33
|
+
|
|
34
|
+
- `rank`: use the evidence rank as provided by retrieval.
|
|
35
|
+
- `score`: sort by score (descending) and then item identifier.
|
|
36
|
+
- `source`: group by source uniform resource identifier, then sort by score.
|
|
37
|
+
|
|
38
|
+
### Metadata inclusion
|
|
39
|
+
|
|
40
|
+
Set `include_metadata=True` to prepend metadata to each block. Metadata includes:
|
|
41
|
+
|
|
42
|
+
- `item_id`
|
|
43
|
+
- `source_uri`
|
|
44
|
+
- `score`
|
|
45
|
+
- `stage`
|
|
46
|
+
|
|
47
|
+
### Character budgets
|
|
48
|
+
|
|
49
|
+
Character budgets drop trailing blocks until the context pack fits the specified limit. This keeps context shaping
|
|
50
|
+
deterministic without relying on a tokenizer.
|
|
51
|
+
|
|
52
|
+
In Python:
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from biblicus.context import CharacterBudget, ContextPackPolicy, fit_context_pack_to_character_budget
|
|
56
|
+
|
|
57
|
+
policy = ContextPackPolicy(join_with="\n\n", ordering="score", include_metadata=True)
|
|
58
|
+
fitted = fit_context_pack_to_character_budget(context_pack, policy=policy, character_budget=CharacterBudget(max_characters=500))
|
|
59
|
+
print(fitted.text)
|
|
60
|
+
```
|
|
61
|
+
|
|
26
62
|
## Command-line interface
|
|
27
63
|
|
|
28
64
|
The command-line interface can build a context pack from a retrieval result by reading JavaScript Object Notation from standard input.
|
|
29
65
|
|
|
30
66
|
```bash
|
|
31
67
|
biblicus query --corpus corpora/example --query "primary button style preference" \\
|
|
32
|
-
| biblicus context-pack build
|
|
68
|
+
| biblicus context-pack build --ordering score --include-metadata --max-characters 500
|
|
33
69
|
```
|
|
34
70
|
|
|
35
71
|
## What context pack building does
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Retrieval quality upgrades
|
|
2
2
|
|
|
3
3
|
This document describes the retrieval quality upgrades available in Biblicus. It is a reference for how retrieval
|
|
4
|
-
quality is expressed in runs and
|
|
4
|
+
quality is expressed in runs and how to interpret the signals in artifacts and evidence.
|
|
5
5
|
|
|
6
6
|
## Goals
|
|
7
7
|
|
|
@@ -17,49 +17,27 @@ If you are looking for what already exists, start with:
|
|
|
17
17
|
- Raw corpus items remain readable, portable files.
|
|
18
18
|
- Derived artifacts are stored under the corpus and can coexist for multiple implementations.
|
|
19
19
|
|
|
20
|
-
##
|
|
20
|
+
## Completed foundations
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
These are the capability slices that already exist and have end-to-end behavior specifications.
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
- A dataset authoring workflow that supports small hand-labeled sets and larger synthetic sets.
|
|
27
|
-
- A report that includes per-query diagnostics and a clear summary.
|
|
28
|
-
|
|
29
|
-
Acceptance checks:
|
|
30
|
-
|
|
31
|
-
- Dataset formats are versioned when they change.
|
|
32
|
-
- Reports remain deterministic for the same inputs.
|
|
33
|
-
|
|
34
|
-
## Next: retrieval quality upgrades
|
|
35
|
-
|
|
36
|
-
Goal: make retrieval relevance stronger while keeping deterministic baselines and clear evaluation.
|
|
37
|
-
|
|
38
|
-
Deliverables:
|
|
39
|
-
|
|
40
|
-
- A tuned lexical baseline (for example: BM25 configuration, n-grams, field weighting, stop word controls).
|
|
41
|
-
- A reranking stage that can refine top-N results with either a cross-encoder or an LLM re-ranker.
|
|
42
|
-
- A hybrid retrieval mode that combines lexical signals with embeddings and exposes weights explicitly.
|
|
43
|
-
|
|
44
|
-
Acceptance checks:
|
|
24
|
+
### Retrieval evaluation and datasets
|
|
45
25
|
|
|
46
|
-
-
|
|
47
|
-
-
|
|
26
|
+
- Dataset authoring workflow for small hand-labeled sets and larger synthetic sets.
|
|
27
|
+
- Evaluation reports with per-query diagnostics and summary metrics.
|
|
28
|
+
- Versioned dataset formats and deterministic reports for stable inputs.
|
|
48
29
|
|
|
49
|
-
|
|
30
|
+
### Retrieval quality upgrades
|
|
50
31
|
|
|
51
|
-
|
|
32
|
+
- Tuned lexical baseline with BM25, n-gram range controls, and stop word policies.
|
|
33
|
+
- Reranking stage for top-N candidates with explicit stage metadata.
|
|
34
|
+
- Hybrid retrieval with explicit fusion weights and stage-level scores.
|
|
52
35
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
- A clear set of context pack policy variants (formatting, ordering, metadata inclusion).
|
|
56
|
-
- Token budget strategies that can use a real tokenizer.
|
|
57
|
-
- Documentation that explains where context shaping fits in the pipeline.
|
|
58
|
-
|
|
59
|
-
Acceptance checks:
|
|
36
|
+
### Context pack policy surfaces
|
|
60
37
|
|
|
61
|
-
-
|
|
62
|
-
-
|
|
38
|
+
- Policy variants for formatting, ordering, and metadata inclusion.
|
|
39
|
+
- Token and character budget strategies with explicit selectors.
|
|
40
|
+
- Documentation and examples that show how policy choices change outputs.
|
|
63
41
|
|
|
64
42
|
## Next: extraction evaluation harness
|
|
65
43
|
|
|
@@ -82,6 +60,7 @@ Goal: provide lightweight analysis utilities that summarize corpus themes and gu
|
|
|
82
60
|
|
|
83
61
|
Deliverables:
|
|
84
62
|
|
|
63
|
+
- Basic corpus profiling with deterministic metrics for raw items and extracted text.
|
|
85
64
|
- Hidden Markov modeling analysis for sequence-driven corpora.
|
|
86
65
|
- A way to compare analysis outputs across corpora or corpus snapshots.
|
|
87
66
|
|
|
@@ -23,6 +23,31 @@ Feature: Context pack command-line interface
|
|
|
23
23
|
one two three
|
|
24
24
|
"""
|
|
25
25
|
|
|
26
|
+
Scenario: Context pack build can include metadata
|
|
27
|
+
Given a retrieval result exists with sourced evidence:
|
|
28
|
+
| source_uri | score | text |
|
|
29
|
+
| source-a | 10.0 | alpha |
|
|
30
|
+
When I run "context-pack build" joining with "\n\n" ordering "score" and including metadata
|
|
31
|
+
Then the context pack build output text equals:
|
|
32
|
+
"""
|
|
33
|
+
item_id: item-1
|
|
34
|
+
source_uri: source-a
|
|
35
|
+
score: 10.0
|
|
36
|
+
stage: scan
|
|
37
|
+
alpha
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
Scenario: Context pack build can fit to a character budget
|
|
41
|
+
Given a retrieval result exists with evidence text:
|
|
42
|
+
| text |
|
|
43
|
+
| alpha |
|
|
44
|
+
| beta |
|
|
45
|
+
When I run "context-pack build" joining with "\n\n" and character budget 6
|
|
46
|
+
Then the context pack build output text equals:
|
|
47
|
+
"""
|
|
48
|
+
alpha
|
|
49
|
+
"""
|
|
50
|
+
|
|
26
51
|
Scenario: Context pack build fails without retrieval result on standard input
|
|
27
52
|
When I run "context-pack build" with empty standard input
|
|
28
53
|
Then the command fails with exit code 2
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
Feature: Context pack policies
|
|
2
|
+
Context pack policies control evidence ordering, metadata inclusion, and budgets.
|
|
3
|
+
|
|
4
|
+
Scenario: Score ordering sorts evidence by score
|
|
5
|
+
Given a retrieval result exists with scored evidence:
|
|
6
|
+
| score | text |
|
|
7
|
+
| 1.0 | beta |
|
|
8
|
+
| 5.0 | alpha |
|
|
9
|
+
When I build a context pack from that retrieval result with policy:
|
|
10
|
+
| key | value |
|
|
11
|
+
| join_with | \n\n |
|
|
12
|
+
| ordering | score |
|
|
13
|
+
| include_metadata | false |
|
|
14
|
+
Then the context pack text equals:
|
|
15
|
+
"""
|
|
16
|
+
alpha
|
|
17
|
+
|
|
18
|
+
beta
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
Scenario: Source ordering groups evidence by source
|
|
22
|
+
Given a retrieval result exists with sourced evidence:
|
|
23
|
+
| source_uri | score | text |
|
|
24
|
+
| source-b | 1.0 | beta |
|
|
25
|
+
| source-a | 2.0 | alpha |
|
|
26
|
+
| source-a | 1.0 | delta |
|
|
27
|
+
When I build a context pack from that retrieval result with policy:
|
|
28
|
+
| key | value |
|
|
29
|
+
| join_with | \n\n |
|
|
30
|
+
| ordering | source |
|
|
31
|
+
| include_metadata | false |
|
|
32
|
+
Then the context pack text equals:
|
|
33
|
+
"""
|
|
34
|
+
alpha
|
|
35
|
+
|
|
36
|
+
delta
|
|
37
|
+
|
|
38
|
+
beta
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
Scenario: Metadata inclusion prepends block metadata
|
|
42
|
+
Given a retrieval result exists with sourced evidence:
|
|
43
|
+
| source_uri | score | text |
|
|
44
|
+
| source-a | 10.0 | alpha |
|
|
45
|
+
When I build a context pack from that retrieval result with policy:
|
|
46
|
+
| key | value |
|
|
47
|
+
| join_with | \n\n |
|
|
48
|
+
| ordering | rank |
|
|
49
|
+
| include_metadata | true |
|
|
50
|
+
Then the context pack text equals:
|
|
51
|
+
"""
|
|
52
|
+
item_id: item-1
|
|
53
|
+
source_uri: source-a
|
|
54
|
+
score: 10.0
|
|
55
|
+
stage: scan
|
|
56
|
+
alpha
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
Scenario: Character budgets drop trailing blocks
|
|
60
|
+
Given a retrieval result exists with evidence text:
|
|
61
|
+
| text |
|
|
62
|
+
| alpha |
|
|
63
|
+
| beta |
|
|
64
|
+
When I build a context pack from that retrieval result with policy:
|
|
65
|
+
| key | value |
|
|
66
|
+
| join_with | \n\n |
|
|
67
|
+
| ordering | rank |
|
|
68
|
+
| include_metadata | false |
|
|
69
|
+
And I fit the context pack to a character budget of 6 characters
|
|
70
|
+
Then the context pack text equals:
|
|
71
|
+
"""
|
|
72
|
+
alpha
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
Scenario: Character budgets can produce empty context packs
|
|
76
|
+
Given a retrieval result exists with evidence text:
|
|
77
|
+
| text |
|
|
78
|
+
| alpha |
|
|
79
|
+
When I build a context pack from that retrieval result with policy:
|
|
80
|
+
| key | value |
|
|
81
|
+
| join_with | \n\n |
|
|
82
|
+
| ordering | rank |
|
|
83
|
+
| include_metadata | false |
|
|
84
|
+
And I fit the context pack to a character budget of 1 characters
|
|
85
|
+
Then the context pack text is empty
|
|
86
|
+
|
|
87
|
+
Scenario: Unknown ordering raises a policy error
|
|
88
|
+
Given a retrieval result exists with evidence text:
|
|
89
|
+
| text |
|
|
90
|
+
| alpha |
|
|
91
|
+
When I attempt to build a context pack with invalid ordering "mystery"
|
|
92
|
+
Then the context pack ordering error mentions "Unknown context pack ordering"
|
|
@@ -97,6 +97,57 @@ def step_context_pack_build_with_token_budget_from_standard_input(
|
|
|
97
97
|
context.context_pack_build_output = json.loads(result.stdout)
|
|
98
98
|
|
|
99
99
|
|
|
100
|
+
@when(
|
|
101
|
+
'I run "context-pack build" joining with "{join_with}" ordering "{ordering}" and including metadata'
|
|
102
|
+
)
|
|
103
|
+
def step_context_pack_build_with_metadata_from_standard_input(
|
|
104
|
+
context, join_with: str, ordering: str
|
|
105
|
+
) -> None:
|
|
106
|
+
decoded_join_with = bytes(join_with, "utf-8").decode("unicode_escape")
|
|
107
|
+
retrieval_result_json = context.retrieval_result.model_dump_json(indent=2)
|
|
108
|
+
result = run_biblicus(
|
|
109
|
+
context,
|
|
110
|
+
[
|
|
111
|
+
"context-pack",
|
|
112
|
+
"build",
|
|
113
|
+
"--join-with",
|
|
114
|
+
decoded_join_with,
|
|
115
|
+
"--ordering",
|
|
116
|
+
ordering,
|
|
117
|
+
"--include-metadata",
|
|
118
|
+
],
|
|
119
|
+
input_text=retrieval_result_json,
|
|
120
|
+
)
|
|
121
|
+
context.last_result = result
|
|
122
|
+
assert result.returncode == 0, result.stderr
|
|
123
|
+
context.context_pack_build_output = json.loads(result.stdout)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@when(
|
|
127
|
+
'I run "context-pack build" joining with "{join_with}" and character budget {max_characters:d}'
|
|
128
|
+
)
|
|
129
|
+
def step_context_pack_build_with_character_budget_from_standard_input(
|
|
130
|
+
context, join_with: str, max_characters: int
|
|
131
|
+
) -> None:
|
|
132
|
+
decoded_join_with = bytes(join_with, "utf-8").decode("unicode_escape")
|
|
133
|
+
retrieval_result_json = context.retrieval_result.model_dump_json(indent=2)
|
|
134
|
+
result = run_biblicus(
|
|
135
|
+
context,
|
|
136
|
+
[
|
|
137
|
+
"context-pack",
|
|
138
|
+
"build",
|
|
139
|
+
"--join-with",
|
|
140
|
+
decoded_join_with,
|
|
141
|
+
"--max-characters",
|
|
142
|
+
str(max_characters),
|
|
143
|
+
],
|
|
144
|
+
input_text=retrieval_result_json,
|
|
145
|
+
)
|
|
146
|
+
context.last_result = result
|
|
147
|
+
assert result.returncode == 0, result.stderr
|
|
148
|
+
context.context_pack_build_output = json.loads(result.stdout)
|
|
149
|
+
|
|
150
|
+
|
|
100
151
|
@when('I run "context-pack build" with empty standard input')
|
|
101
152
|
def step_context_pack_build_with_empty_standard_input(context) -> None:
|
|
102
153
|
result = run_biblicus(context, ["context-pack", "build", "--join-with", "\n\n"], input_text="")
|
|
@@ -3,9 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
from behave import given, then, when
|
|
4
4
|
|
|
5
5
|
from biblicus.context import (
|
|
6
|
+
CharacterBudget,
|
|
6
7
|
ContextPackPolicy,
|
|
7
8
|
TokenBudget,
|
|
8
9
|
build_context_pack,
|
|
10
|
+
fit_context_pack_to_character_budget,
|
|
9
11
|
fit_context_pack_to_token_budget,
|
|
10
12
|
)
|
|
11
13
|
from biblicus.models import Evidence, QueryBudget, RetrievalResult
|
|
@@ -80,6 +82,41 @@ def given_retrieval_result_exists_with_scored_evidence(context) -> None:
|
|
|
80
82
|
)
|
|
81
83
|
|
|
82
84
|
|
|
85
|
+
@given("a retrieval result exists with sourced evidence:")
|
|
86
|
+
def given_retrieval_result_exists_with_sourced_evidence(context) -> None:
|
|
87
|
+
evidence_items = []
|
|
88
|
+
for rank_value, row in enumerate(context.table, start=1):
|
|
89
|
+
score_value = float(row["score"])
|
|
90
|
+
source_uri_value = row["source_uri"]
|
|
91
|
+
text_value = row["text"]
|
|
92
|
+
content_ref_value = None if str(text_value).strip() else "content-ref"
|
|
93
|
+
evidence_items.append(
|
|
94
|
+
Evidence(
|
|
95
|
+
item_id=f"item-{rank_value}",
|
|
96
|
+
source_uri=source_uri_value,
|
|
97
|
+
media_type="text/plain",
|
|
98
|
+
score=score_value,
|
|
99
|
+
rank=rank_value,
|
|
100
|
+
text=text_value,
|
|
101
|
+
content_ref=content_ref_value,
|
|
102
|
+
stage="scan",
|
|
103
|
+
recipe_id="recipe",
|
|
104
|
+
run_id="run",
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
context.retrieval_result = RetrievalResult(
|
|
109
|
+
query_text="query",
|
|
110
|
+
budget=QueryBudget(max_total_items=10),
|
|
111
|
+
run_id="run",
|
|
112
|
+
recipe_id="recipe",
|
|
113
|
+
backend_id="scan",
|
|
114
|
+
generated_at=utc_now_iso(),
|
|
115
|
+
evidence=evidence_items,
|
|
116
|
+
stats={},
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
83
120
|
@given("the second evidence item has no text payload")
|
|
84
121
|
def given_second_evidence_item_has_no_text_payload(context) -> None:
|
|
85
122
|
context.retrieval_result.evidence[1] = context.retrieval_result.evidence[1].model_copy(
|
|
@@ -96,6 +133,31 @@ def when_build_context_pack_from_retrieval_result(context, join_with: str) -> No
|
|
|
96
133
|
)
|
|
97
134
|
|
|
98
135
|
|
|
136
|
+
@when("I build a context pack from that retrieval result with policy:")
|
|
137
|
+
def when_build_context_pack_from_retrieval_result_with_policy(context) -> None:
|
|
138
|
+
settings = {}
|
|
139
|
+
for row in context.table:
|
|
140
|
+
if "key" in row.headings and "value" in row.headings:
|
|
141
|
+
key = row["key"]
|
|
142
|
+
value = row["value"]
|
|
143
|
+
else:
|
|
144
|
+
key = row[0]
|
|
145
|
+
value = row[1]
|
|
146
|
+
settings[str(key).strip()] = str(value).strip()
|
|
147
|
+
join_with_raw = settings.get("join_with", "\\n\\n")
|
|
148
|
+
ordering = settings.get("ordering", "rank")
|
|
149
|
+
include_metadata = settings.get("include_metadata", "false").lower() == "true"
|
|
150
|
+
decoded_join_with = bytes(join_with_raw, "utf-8").decode("unicode_escape")
|
|
151
|
+
context.context_pack_policy = ContextPackPolicy(
|
|
152
|
+
join_with=decoded_join_with,
|
|
153
|
+
ordering=ordering,
|
|
154
|
+
include_metadata=include_metadata,
|
|
155
|
+
)
|
|
156
|
+
context.context_pack = build_context_pack(
|
|
157
|
+
context.retrieval_result, policy=context.context_pack_policy
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
99
161
|
@then("the context pack text equals:")
|
|
100
162
|
def then_context_pack_text_equals(context) -> None:
|
|
101
163
|
assert context.context_pack.text == context.text
|
|
@@ -110,6 +172,32 @@ def when_fit_context_pack_to_token_budget(context, max_tokens: int) -> None:
|
|
|
110
172
|
)
|
|
111
173
|
|
|
112
174
|
|
|
175
|
+
@when("I fit the context pack to a character budget of {max_characters:d} characters")
|
|
176
|
+
def when_fit_context_pack_to_character_budget(context, max_characters: int) -> None:
|
|
177
|
+
context.context_pack = fit_context_pack_to_character_budget(
|
|
178
|
+
context.context_pack,
|
|
179
|
+
policy=context.context_pack_policy,
|
|
180
|
+
character_budget=CharacterBudget(max_characters=max_characters),
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@when('I attempt to build a context pack with invalid ordering "{ordering}"')
|
|
185
|
+
def when_attempt_build_context_pack_with_invalid_ordering(context, ordering: str) -> None:
|
|
186
|
+
policy = ContextPackPolicy(join_with="\n\n").model_copy(update={"ordering": ordering})
|
|
187
|
+
try:
|
|
188
|
+
_ = build_context_pack(context.retrieval_result, policy=policy)
|
|
189
|
+
context.ordering_error = None
|
|
190
|
+
except ValueError as exc:
|
|
191
|
+
context.ordering_error = exc
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@then('the context pack ordering error mentions "{message}"')
|
|
195
|
+
def then_context_pack_ordering_error_mentions(context, message: str) -> None:
|
|
196
|
+
error = getattr(context, "ordering_error", None)
|
|
197
|
+
assert error is not None
|
|
198
|
+
assert message in str(error)
|
|
199
|
+
|
|
200
|
+
|
|
113
201
|
@then("the context pack text is empty")
|
|
114
202
|
def then_context_pack_text_is_empty(context) -> None:
|
|
115
203
|
assert context.context_pack.text == ""
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "biblicus"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.12.0"
|
|
8
8
|
description = "Command line interface and Python library for corpus ingestion, retrieval, and evaluation."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -15,9 +15,11 @@ from pydantic import ValidationError
|
|
|
15
15
|
from .analysis import get_analysis_backend
|
|
16
16
|
from .backends import get_backend
|
|
17
17
|
from .context import (
|
|
18
|
+
CharacterBudget,
|
|
18
19
|
ContextPackPolicy,
|
|
19
20
|
TokenBudget,
|
|
20
21
|
build_context_pack,
|
|
22
|
+
fit_context_pack_to_character_budget,
|
|
21
23
|
fit_context_pack_to_token_budget,
|
|
22
24
|
)
|
|
23
25
|
from .corpus import Corpus
|
|
@@ -568,7 +570,11 @@ def cmd_context_pack_build(arguments: argparse.Namespace) -> int:
|
|
|
568
570
|
)
|
|
569
571
|
retrieval_result = RetrievalResult.model_validate_json(input_text)
|
|
570
572
|
join_with = bytes(arguments.join_with, "utf-8").decode("unicode_escape")
|
|
571
|
-
policy = ContextPackPolicy(
|
|
573
|
+
policy = ContextPackPolicy(
|
|
574
|
+
join_with=join_with,
|
|
575
|
+
ordering=arguments.ordering,
|
|
576
|
+
include_metadata=arguments.include_metadata,
|
|
577
|
+
)
|
|
572
578
|
context_pack = build_context_pack(retrieval_result, policy=policy)
|
|
573
579
|
if arguments.max_tokens is not None:
|
|
574
580
|
context_pack = fit_context_pack_to_token_budget(
|
|
@@ -576,6 +582,12 @@ def cmd_context_pack_build(arguments: argparse.Namespace) -> int:
|
|
|
576
582
|
policy=policy,
|
|
577
583
|
token_budget=TokenBudget(max_tokens=int(arguments.max_tokens)),
|
|
578
584
|
)
|
|
585
|
+
if arguments.max_characters is not None:
|
|
586
|
+
context_pack = fit_context_pack_to_character_budget(
|
|
587
|
+
context_pack,
|
|
588
|
+
policy=policy,
|
|
589
|
+
character_budget=CharacterBudget(max_characters=int(arguments.max_characters)),
|
|
590
|
+
)
|
|
579
591
|
print(
|
|
580
592
|
json.dumps(
|
|
581
593
|
{
|
|
@@ -921,12 +933,29 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
921
933
|
default="\\n\\n",
|
|
922
934
|
help="Separator between evidence blocks (escape sequences supported, default is two newlines).",
|
|
923
935
|
)
|
|
936
|
+
p_context_pack_build.add_argument(
|
|
937
|
+
"--ordering",
|
|
938
|
+
choices=["rank", "score", "source"],
|
|
939
|
+
default="rank",
|
|
940
|
+
help="Evidence ordering policy (rank, score, source).",
|
|
941
|
+
)
|
|
942
|
+
p_context_pack_build.add_argument(
|
|
943
|
+
"--include-metadata",
|
|
944
|
+
action="store_true",
|
|
945
|
+
help="Include evidence metadata in each context pack block.",
|
|
946
|
+
)
|
|
924
947
|
p_context_pack_build.add_argument(
|
|
925
948
|
"--max-tokens",
|
|
926
949
|
default=None,
|
|
927
950
|
type=int,
|
|
928
951
|
help="Optional token budget for the final context pack using the naive-whitespace tokenizer.",
|
|
929
952
|
)
|
|
953
|
+
p_context_pack_build.add_argument(
|
|
954
|
+
"--max-characters",
|
|
955
|
+
default=None,
|
|
956
|
+
type=int,
|
|
957
|
+
help="Optional character budget for the final context pack.",
|
|
958
|
+
)
|
|
930
959
|
p_context_pack_build.set_defaults(func=cmd_context_pack_build)
|
|
931
960
|
|
|
932
961
|
p_eval = sub.add_parser("eval", help="Evaluate a run against a dataset.")
|