biblicus 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. {biblicus-0.6.0/src/biblicus.egg-info → biblicus-0.7.0}/PKG-INFO +20 -3
  2. {biblicus-0.6.0 → biblicus-0.7.0}/README.md +17 -2
  3. {biblicus-0.6.0 → biblicus-0.7.0}/docs/DEMOS.md +19 -0
  4. {biblicus-0.6.0 → biblicus-0.7.0}/docs/EXTRACTION.md +21 -0
  5. {biblicus-0.6.0 → biblicus-0.7.0}/docs/FEATURE_INDEX.md +2 -0
  6. {biblicus-0.6.0 → biblicus-0.7.0}/docs/ROADMAP.md +15 -0
  7. {biblicus-0.6.0 → biblicus-0.7.0}/features/environment.py +26 -0
  8. biblicus-0.7.0/features/markitdown_extractor.feature +99 -0
  9. biblicus-0.7.0/features/steps/markitdown_steps.py +173 -0
  10. {biblicus-0.6.0 → biblicus-0.7.0}/pyproject.toml +5 -1
  11. {biblicus-0.6.0 → biblicus-0.7.0}/scripts/test.py +15 -4
  12. biblicus-0.7.0/scripts/wikipedia_rag_demo.py +212 -0
  13. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/__init__.py +1 -1
  14. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/__init__.py +2 -0
  15. biblicus-0.7.0/src/biblicus/extractors/markitdown_text.py +128 -0
  16. {biblicus-0.6.0 → biblicus-0.7.0/src/biblicus.egg-info}/PKG-INFO +20 -3
  17. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus.egg-info/SOURCES.txt +4 -0
  18. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus.egg-info/requires.txt +5 -0
  19. {biblicus-0.6.0 → biblicus-0.7.0}/LICENSE +0 -0
  20. {biblicus-0.6.0 → biblicus-0.7.0}/MANIFEST.in +0 -0
  21. {biblicus-0.6.0 → biblicus-0.7.0}/THIRD_PARTY_NOTICES.md +0 -0
  22. {biblicus-0.6.0 → biblicus-0.7.0}/datasets/wikipedia_mini.json +0 -0
  23. {biblicus-0.6.0 → biblicus-0.7.0}/docs/ARCHITECTURE.md +0 -0
  24. {biblicus-0.6.0 → biblicus-0.7.0}/docs/BACKENDS.md +0 -0
  25. {biblicus-0.6.0 → biblicus-0.7.0}/docs/CONTEXT_PACK.md +0 -0
  26. {biblicus-0.6.0 → biblicus-0.7.0}/docs/CORPUS.md +0 -0
  27. {biblicus-0.6.0 → biblicus-0.7.0}/docs/CORPUS_DESIGN.md +0 -0
  28. {biblicus-0.6.0 → biblicus-0.7.0}/docs/KNOWLEDGE_BASE.md +0 -0
  29. {biblicus-0.6.0 → biblicus-0.7.0}/docs/TESTING.md +0 -0
  30. {biblicus-0.6.0 → biblicus-0.7.0}/docs/USER_CONFIGURATION.md +0 -0
  31. {biblicus-0.6.0 → biblicus-0.7.0}/docs/api.rst +0 -0
  32. {biblicus-0.6.0 → biblicus-0.7.0}/docs/conf.py +0 -0
  33. {biblicus-0.6.0 → biblicus-0.7.0}/docs/index.rst +0 -0
  34. {biblicus-0.6.0 → biblicus-0.7.0}/features/backend_validation.feature +0 -0
  35. {biblicus-0.6.0 → biblicus-0.7.0}/features/biblicus_corpus.feature +0 -0
  36. {biblicus-0.6.0 → biblicus-0.7.0}/features/cli_entrypoint.feature +0 -0
  37. {biblicus-0.6.0 → biblicus-0.7.0}/features/cli_parsing.feature +0 -0
  38. {biblicus-0.6.0 → biblicus-0.7.0}/features/content_sniffing.feature +0 -0
  39. {biblicus-0.6.0 → biblicus-0.7.0}/features/context_pack.feature +0 -0
  40. {biblicus-0.6.0 → biblicus-0.7.0}/features/context_pack_cli.feature +0 -0
  41. {biblicus-0.6.0 → biblicus-0.7.0}/features/corpus_edge_cases.feature +0 -0
  42. {biblicus-0.6.0 → biblicus-0.7.0}/features/corpus_identity.feature +0 -0
  43. {biblicus-0.6.0 → biblicus-0.7.0}/features/corpus_purge.feature +0 -0
  44. {biblicus-0.6.0 → biblicus-0.7.0}/features/crawl.feature +0 -0
  45. {biblicus-0.6.0 → biblicus-0.7.0}/features/error_cases.feature +0 -0
  46. {biblicus-0.6.0 → biblicus-0.7.0}/features/evaluation.feature +0 -0
  47. {biblicus-0.6.0 → biblicus-0.7.0}/features/evidence_processing.feature +0 -0
  48. {biblicus-0.6.0 → biblicus-0.7.0}/features/extraction_error_handling.feature +0 -0
  49. {biblicus-0.6.0 → biblicus-0.7.0}/features/extraction_run_lifecycle.feature +0 -0
  50. {biblicus-0.6.0 → biblicus-0.7.0}/features/extraction_selection.feature +0 -0
  51. {biblicus-0.6.0 → biblicus-0.7.0}/features/extraction_selection_longest.feature +0 -0
  52. {biblicus-0.6.0 → biblicus-0.7.0}/features/extractor_pipeline.feature +0 -0
  53. {biblicus-0.6.0 → biblicus-0.7.0}/features/extractor_validation.feature +0 -0
  54. {biblicus-0.6.0 → biblicus-0.7.0}/features/frontmatter.feature +0 -0
  55. {biblicus-0.6.0 → biblicus-0.7.0}/features/hook_config_validation.feature +0 -0
  56. {biblicus-0.6.0 → biblicus-0.7.0}/features/hook_error_handling.feature +0 -0
  57. {biblicus-0.6.0 → biblicus-0.7.0}/features/import_tree.feature +0 -0
  58. {biblicus-0.6.0 → biblicus-0.7.0}/features/ingest_sources.feature +0 -0
  59. {biblicus-0.6.0 → biblicus-0.7.0}/features/integration_audio_samples.feature +0 -0
  60. {biblicus-0.6.0 → biblicus-0.7.0}/features/integration_image_samples.feature +0 -0
  61. {biblicus-0.6.0 → biblicus-0.7.0}/features/integration_mixed_corpus.feature +0 -0
  62. {biblicus-0.6.0 → biblicus-0.7.0}/features/integration_mixed_extraction.feature +0 -0
  63. {biblicus-0.6.0 → biblicus-0.7.0}/features/integration_ocr_image_extraction.feature +0 -0
  64. {biblicus-0.6.0 → biblicus-0.7.0}/features/integration_pdf_retrieval.feature +0 -0
  65. {biblicus-0.6.0 → biblicus-0.7.0}/features/integration_pdf_samples.feature +0 -0
  66. {biblicus-0.6.0 → biblicus-0.7.0}/features/integration_unstructured_extraction.feature +0 -0
  67. {biblicus-0.6.0 → biblicus-0.7.0}/features/integration_wikipedia.feature +0 -0
  68. {biblicus-0.6.0 → biblicus-0.7.0}/features/knowledge_base.feature +0 -0
  69. {biblicus-0.6.0 → biblicus-0.7.0}/features/lifecycle_hooks.feature +0 -0
  70. {biblicus-0.6.0 → biblicus-0.7.0}/features/model_validation.feature +0 -0
  71. {biblicus-0.6.0 → biblicus-0.7.0}/features/ocr_extractor.feature +0 -0
  72. {biblicus-0.6.0 → biblicus-0.7.0}/features/pdf_text_extraction.feature +0 -0
  73. {biblicus-0.6.0 → biblicus-0.7.0}/features/python_api.feature +0 -0
  74. {biblicus-0.6.0 → biblicus-0.7.0}/features/python_hook_logging.feature +0 -0
  75. {biblicus-0.6.0 → biblicus-0.7.0}/features/query_processing.feature +0 -0
  76. {biblicus-0.6.0 → biblicus-0.7.0}/features/retrieval_budget.feature +0 -0
  77. {biblicus-0.6.0 → biblicus-0.7.0}/features/retrieval_scan.feature +0 -0
  78. {biblicus-0.6.0 → biblicus-0.7.0}/features/retrieval_sqlite_full_text_search.feature +0 -0
  79. {biblicus-0.6.0 → biblicus-0.7.0}/features/retrieval_uses_extraction_run.feature +0 -0
  80. {biblicus-0.6.0 → biblicus-0.7.0}/features/retrieval_utilities.feature +0 -0
  81. {biblicus-0.6.0 → biblicus-0.7.0}/features/source_loading.feature +0 -0
  82. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/backend_steps.py +0 -0
  83. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/cli_parsing_steps.py +0 -0
  84. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/cli_steps.py +0 -0
  85. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/context_pack_steps.py +0 -0
  86. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/crawl_steps.py +0 -0
  87. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/evidence_processing_steps.py +0 -0
  88. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/extraction_run_lifecycle_steps.py +0 -0
  89. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/extraction_steps.py +0 -0
  90. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/extractor_steps.py +0 -0
  91. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/frontmatter_steps.py +0 -0
  92. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/knowledge_base_steps.py +0 -0
  93. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/model_steps.py +0 -0
  94. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/openai_steps.py +0 -0
  95. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/pdf_steps.py +0 -0
  96. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/python_api_steps.py +0 -0
  97. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/rapidocr_steps.py +0 -0
  98. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/retrieval_steps.py +0 -0
  99. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/stt_steps.py +0 -0
  100. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/unstructured_steps.py +0 -0
  101. {biblicus-0.6.0 → biblicus-0.7.0}/features/steps/user_config_steps.py +0 -0
  102. {biblicus-0.6.0 → biblicus-0.7.0}/features/streaming_ingest.feature +0 -0
  103. {biblicus-0.6.0 → biblicus-0.7.0}/features/stt_extractor.feature +0 -0
  104. {biblicus-0.6.0 → biblicus-0.7.0}/features/text_extraction_runs.feature +0 -0
  105. {biblicus-0.6.0 → biblicus-0.7.0}/features/token_budget.feature +0 -0
  106. {biblicus-0.6.0 → biblicus-0.7.0}/features/unstructured_extractor.feature +0 -0
  107. {biblicus-0.6.0 → biblicus-0.7.0}/features/user_config.feature +0 -0
  108. {biblicus-0.6.0 → biblicus-0.7.0}/scripts/download_audio_samples.py +0 -0
  109. {biblicus-0.6.0 → biblicus-0.7.0}/scripts/download_image_samples.py +0 -0
  110. {biblicus-0.6.0 → biblicus-0.7.0}/scripts/download_mixed_samples.py +0 -0
  111. {biblicus-0.6.0 → biblicus-0.7.0}/scripts/download_pdf_samples.py +0 -0
  112. {biblicus-0.6.0 → biblicus-0.7.0}/scripts/download_wikipedia.py +0 -0
  113. {biblicus-0.6.0 → biblicus-0.7.0}/scripts/readme_end_to_end_demo.py +0 -0
  114. {biblicus-0.6.0 → biblicus-0.7.0}/setup.cfg +0 -0
  115. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/__main__.py +0 -0
  116. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/_vendor/dotyaml/__init__.py +0 -0
  117. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/_vendor/dotyaml/interpolation.py +0 -0
  118. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/_vendor/dotyaml/loader.py +0 -0
  119. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/_vendor/dotyaml/transformer.py +0 -0
  120. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/backends/__init__.py +0 -0
  121. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/backends/base.py +0 -0
  122. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/backends/scan.py +0 -0
  123. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/backends/sqlite_full_text_search.py +0 -0
  124. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/cli.py +0 -0
  125. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/constants.py +0 -0
  126. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/context.py +0 -0
  127. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/corpus.py +0 -0
  128. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/crawl.py +0 -0
  129. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/errors.py +0 -0
  130. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/evaluation.py +0 -0
  131. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/evidence_processing.py +0 -0
  132. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extraction.py +0 -0
  133. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/base.py +0 -0
  134. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/metadata_text.py +0 -0
  135. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/openai_stt.py +0 -0
  136. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/pass_through_text.py +0 -0
  137. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/pdf_text.py +0 -0
  138. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/pipeline.py +0 -0
  139. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/rapidocr_text.py +0 -0
  140. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/select_longest_text.py +0 -0
  141. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/select_text.py +0 -0
  142. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/extractors/unstructured_text.py +0 -0
  143. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/frontmatter.py +0 -0
  144. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/hook_logging.py +0 -0
  145. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/hook_manager.py +0 -0
  146. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/hooks.py +0 -0
  147. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/ignore.py +0 -0
  148. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/knowledge_base.py +0 -0
  149. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/models.py +0 -0
  150. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/retrieval.py +0 -0
  151. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/sources.py +0 -0
  152. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/time.py +0 -0
  153. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/uris.py +0 -0
  154. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus/user_config.py +0 -0
  155. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus.egg-info/dependency_links.txt +0 -0
  156. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus.egg-info/entry_points.txt +0 -0
  157. {biblicus-0.6.0 → biblicus-0.7.0}/src/biblicus.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: biblicus
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Command line interface and Python library for corpus ingestion, retrieval, and evaluation.
5
5
  License: MIT
6
6
  Requires-Python: >=3.9
@@ -25,6 +25,8 @@ Requires-Dist: unstructured>=0.12.0; extra == "unstructured"
25
25
  Requires-Dist: python-docx>=1.1.0; extra == "unstructured"
26
26
  Provides-Extra: ocr
27
27
  Requires-Dist: rapidocr-onnxruntime>=1.3.0; extra == "ocr"
28
+ Provides-Extra: markitdown
29
+ Requires-Dist: markitdown[all]>=0.1.0; python_version >= "3.10" and extra == "markitdown"
28
30
  Dynamic: license-file
29
31
 
30
32
  # Biblicus
@@ -67,7 +69,7 @@ If you want to run a real, executable version of this story, use `scripts/readme
67
69
  This simplified sequence diagram shows the same idea at a high level.
68
70
 
69
71
  ```mermaid
70
- %%{init: {"theme": "base", "themeVariables": {"primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
72
+ %%{init: {"theme": "base", "themeVariables": {"background": "#ffffff", "primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
71
73
  sequenceDiagram
72
74
  participant App as Your assistant code
73
75
  participant KB as Knowledge base
@@ -106,7 +108,7 @@ In a coding assistant, retrieval is often triggered by what the user is doing ri
106
108
  This diagram shows two sequential Biblicus calls. They are shown separately to make the boundaries explicit: retrieval returns evidence, and context pack building consumes evidence.
107
109
 
108
110
  ```mermaid
109
- %%{init: {"theme": "base", "themeVariables": {"primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
111
+ %%{init: {"theme": "base", "themeVariables": {"background": "#ffffff", "primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
110
112
  sequenceDiagram
111
113
  participant User
112
114
  participant App as Your assistant code
@@ -160,6 +162,7 @@ Some extractors are optional so the base install stays small.
160
162
  - Optical character recognition for images: `python3 -m pip install "biblicus[ocr]"`
161
163
  - Speech to text transcription: `python3 -m pip install "biblicus[openai]"` (requires an OpenAI API key in `~/.biblicus/config.yml` or `./.biblicus/config.yml`)
162
164
  - Broad document parsing fallback: `python3 -m pip install "biblicus[unstructured]"`
165
+ - MarkItDown document conversion (requires Python 3.10 or higher): `python3 -m pip install "biblicus[markitdown]"`
163
166
 
164
167
  ## Quick start
165
168
 
@@ -467,6 +470,20 @@ Two backends are included.
467
470
  - `scan` is a minimal baseline that scans raw items directly.
468
471
  - `sqlite-full-text-search` is a practical baseline that builds a full text search index in Sqlite.
469
472
 
473
+ ## Extraction backends
474
+
475
+ These extractors are built in. Optional ones require extra dependencies.
476
+
477
+ - `pass-through-text` reads text items and strips Markdown front matter.
478
+ - `metadata-text` turns catalog metadata into a small text artifact.
479
+ - `pdf-text` extracts text from Portable Document Format items with `pypdf`.
480
+ - `select-text` chooses one prior extraction result in a pipeline.
481
+ - `select-longest-text` chooses the longest prior extraction result.
482
+ - `ocr-rapidocr` does optical character recognition on images (optional).
483
+ - `stt-openai` performs speech to text on audio (optional).
484
+ - `unstructured` provides broad document parsing (optional).
485
+ - `markitdown` converts many formats into Markdown-like text (optional).
486
+
470
487
  ## Integration corpus and evaluation dataset
471
488
 
472
489
  Use `scripts/download_wikipedia.py` to download a small integration corpus from Wikipedia when running tests or demos. The repository does not include that content.
@@ -38,7 +38,7 @@ If you want to run a real, executable version of this story, use `scripts/readme
38
38
  This simplified sequence diagram shows the same idea at a high level.
39
39
 
40
40
  ```mermaid
41
- %%{init: {"theme": "base", "themeVariables": {"primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
41
+ %%{init: {"theme": "base", "themeVariables": {"background": "#ffffff", "primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
42
42
  sequenceDiagram
43
43
  participant App as Your assistant code
44
44
  participant KB as Knowledge base
@@ -77,7 +77,7 @@ In a coding assistant, retrieval is often triggered by what the user is doing ri
77
77
  This diagram shows two sequential Biblicus calls. They are shown separately to make the boundaries explicit: retrieval returns evidence, and context pack building consumes evidence.
78
78
 
79
79
  ```mermaid
80
- %%{init: {"theme": "base", "themeVariables": {"primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
80
+ %%{init: {"theme": "base", "themeVariables": {"background": "#ffffff", "primaryColor": "#f3e5f5", "primaryTextColor": "#111111", "primaryBorderColor": "#8e24aa", "lineColor": "#90a4ae", "secondaryColor": "#eceff1", "tertiaryColor": "#ffffff", "noteBkgColor": "#ffffff", "noteTextColor": "#111111", "actorBkg": "#f3e5f5", "actorBorder": "#8e24aa", "actorTextColor": "#111111"}}}%%
81
81
  sequenceDiagram
82
82
  participant User
83
83
  participant App as Your assistant code
@@ -131,6 +131,7 @@ Some extractors are optional so the base install stays small.
131
131
  - Optical character recognition for images: `python3 -m pip install "biblicus[ocr]"`
132
132
  - Speech to text transcription: `python3 -m pip install "biblicus[openai]"` (requires an OpenAI API key in `~/.biblicus/config.yml` or `./.biblicus/config.yml`)
133
133
  - Broad document parsing fallback: `python3 -m pip install "biblicus[unstructured]"`
134
+ - MarkItDown document conversion (requires Python 3.10 or higher): `python3 -m pip install "biblicus[markitdown]"`
134
135
 
135
136
  ## Quick start
136
137
 
@@ -438,6 +439,20 @@ Two backends are included.
438
439
  - `scan` is a minimal baseline that scans raw items directly.
439
440
  - `sqlite-full-text-search` is a practical baseline that builds a full text search index in Sqlite.
440
441
 
442
+ ## Extraction backends
443
+
444
+ These extractors are built in. Optional ones require extra dependencies.
445
+
446
+ - `pass-through-text` reads text items and strips Markdown front matter.
447
+ - `metadata-text` turns catalog metadata into a small text artifact.
448
+ - `pdf-text` extracts text from Portable Document Format items with `pypdf`.
449
+ - `select-text` chooses one prior extraction result in a pipeline.
450
+ - `select-longest-text` chooses the longest prior extraction result.
451
+ - `ocr-rapidocr` does optical character recognition on images (optional).
452
+ - `stt-openai` performs speech to text on audio (optional).
453
+ - `unstructured` provides broad document parsing (optional).
454
+ - `markitdown` converts many formats into Markdown-like text (optional).
455
+
441
456
  ## Integration corpus and evaluation dataset
442
457
 
443
458
  Use `scripts/download_wikipedia.py` to download a small integration corpus from Wikipedia when running tests or demos. The repository does not include that content.
@@ -221,6 +221,25 @@ python3 -m biblicus build --corpus corpora/pdf_samples --backend sqlite-full-tex
221
221
  python3 -m biblicus query --corpus corpora/pdf_samples --query "Dummy PDF file"
222
222
  ```
223
223
 
224
+ ### Wikipedia retrieval demo (Python)
225
+
226
+ This example downloads a few Wikipedia summaries about retrieval and knowledge bases, builds an extraction run, creates a local full text index, and returns evidence plus a context pack.
227
+
228
+ ```
229
+ rm -rf corpora/wikipedia_rag_demo
230
+ python3 scripts/wikipedia_rag_demo.py --corpus corpora/wikipedia_rag_demo --force
231
+ ```
232
+
233
+ ### MarkItDown extraction demo (Python 3.10+)
234
+
235
+ MarkItDown requires Python 3.10 or higher. This example uses the `py311` conda environment to run the extractor over the mixed sample corpus.
236
+
237
+ ```
238
+ conda run -n py311 python -m pip install -e . "markitdown[all]"
239
+ conda run -n py311 python scripts/download_mixed_samples.py --corpus corpora/markitdown_demo_py311 --force
240
+ conda run -n py311 python -m biblicus extract build --corpus corpora/markitdown_demo_py311 --step markitdown
241
+ ```
242
+
224
243
  ### Mixed modality integration corpus
225
244
 
226
245
  This example assembles a tiny mixed corpus with a Markdown note, a Hypertext Markup Language page, an image, a Portable Document Format file with extractable text, and a generated Portable Document Format file with no extractable text.
@@ -71,6 +71,27 @@ To install:
71
71
  python3 -m pip install "biblicus[unstructured]"
72
72
  ```
73
73
 
74
+ `markitdown`
75
+
76
+ - Converts common document formats into Markdown-like text
77
+ - Backed by the optional `markitdown` dependency
78
+ - Requires Python 3.10 or higher
79
+ - Skips items that are already text so the pass-through extractor remains the canonical choice for text items
80
+ - This means it will not process `text/html` or other text media types unless that policy changes
81
+
82
+ To install:
83
+
84
+ ```
85
+ python3 -m pip install "biblicus[markitdown]"
86
+ ```
87
+
88
+ Example:
89
+
90
+ ```
91
+ python3 -m biblicus extract build --corpus corpora/extraction-demo \\
92
+ --step markitdown
93
+ ```
94
+
74
95
  `ocr-rapidocr`
75
96
 
76
97
  - Optical character recognition for image items
@@ -123,6 +123,7 @@ What it does:
123
123
  - Includes a Portable Document Format text extractor plugin.
124
124
  - Includes a speech to text extractor plugin for audio items.
125
125
  - Includes a selection extractor step for choosing extracted text within a pipeline.
126
+ - Includes a MarkItDown extractor plugin for document conversion.
126
127
 
127
128
  Documentation:
128
129
 
@@ -139,6 +140,7 @@ Behavior specifications:
139
140
  - `features/ocr_extractor.feature`
140
141
  - `features/stt_extractor.feature`
141
142
  - `features/unstructured_extractor.feature`
143
+ - `features/markitdown_extractor.feature`
142
144
  - `features/integration_unstructured_extraction.feature`
143
145
 
144
146
  Primary implementation:
@@ -124,6 +124,21 @@ Acceptance checks:
124
124
 
125
125
  These are valuable, but intentionally not the near-term focus while retrieval becomes practical end to end.
126
126
 
127
+ ### In-memory corpus for ephemeral workflows
128
+
129
+ Goal: allow programmatic, temporary corpora that live in memory for short-lived agents or tests.
130
+
131
+ Deliverables:
132
+
133
+ - A memory-backed corpus implementation that supports the same ingestion and catalog APIs.
134
+ - A serialization option for snapshots so ephemeral corpora can be persisted when needed.
135
+ - Documentation that explains tradeoffs versus file-based corpora.
136
+
137
+ Acceptance checks:
138
+
139
+ - Behavior specifications cover ingestion, listing, and reindexing in memory.
140
+ - Retrieval and extraction can operate on the in-memory corpus without special casing.
141
+
127
142
  ### Extractor datasets and evaluation harness
128
143
 
129
144
  Goal: compare extraction approaches in a way that is measurable, repeatable, and useful for practical engineering decisions.
@@ -134,6 +134,32 @@ def after_scenario(context, scenario) -> None:
134
134
  sys.modules.pop(name, None)
135
135
  context._fake_rapidocr_unavailable_installed = False
136
136
  context._fake_rapidocr_unavailable_original_modules = {}
137
+ if getattr(context, "_fake_markitdown_installed", False):
138
+ original_modules = getattr(context, "_fake_markitdown_original_modules", {})
139
+ for name in [
140
+ "markitdown",
141
+ ]:
142
+ if name in original_modules:
143
+ sys.modules[name] = original_modules[name]
144
+ else:
145
+ sys.modules.pop(name, None)
146
+ context._fake_markitdown_installed = False
147
+ context._fake_markitdown_original_modules = {}
148
+ if getattr(context, "_fake_markitdown_unavailable_installed", False):
149
+ original_modules = getattr(context, "_fake_markitdown_unavailable_original_modules", {})
150
+ for name in [
151
+ "markitdown",
152
+ ]:
153
+ if name in original_modules:
154
+ sys.modules[name] = original_modules[name]
155
+ else:
156
+ sys.modules.pop(name, None)
157
+ context._fake_markitdown_unavailable_installed = False
158
+ context._fake_markitdown_unavailable_original_modules = {}
159
+ original_sys_version_info = getattr(context, "_original_sys_version_info", None)
160
+ if original_sys_version_info is not None:
161
+ sys.version_info = original_sys_version_info
162
+ context._original_sys_version_info = None
137
163
  if hasattr(context, "_tmp"):
138
164
  context._tmp.cleanup()
139
165
 
@@ -0,0 +1,99 @@
1
+ Feature: MarkItDown extractor plugin
2
+ The MarkItDown extractor converts non-text items into Markdown-like text as an optional dependency.
3
+
4
+ Scenario: MarkItDown extractor requires an optional dependency
5
+ Given I initialized a corpus at "corpus"
6
+ And the MarkItDown dependency is unavailable
7
+ And a Portable Document Format file "hello.pdf" exists with text "Hello"
8
+ When I ingest the file "hello.pdf" into corpus "corpus"
9
+ And I attempt to build a "markitdown" extraction run in corpus "corpus"
10
+ Then the command fails with exit code 2
11
+ And standard error includes "biblicus[markitdown]"
12
+
13
+ Scenario: MarkItDown extractor rejects unsupported Python versions
14
+ Given I initialized a corpus at "corpus"
15
+ And a fake MarkItDown library is available but marked as real
16
+ And a Portable Document Format file "hello.pdf" exists with text "Hello"
17
+ When I ingest the file "hello.pdf" into corpus "corpus"
18
+ And I attempt to build a "markitdown" extraction run in corpus "corpus"
19
+ Then the command fails with exit code 2
20
+ And standard error includes "Python 3.10"
21
+
22
+ Scenario: MarkItDown extractor skips text items
23
+ Given I initialized a corpus at "corpus"
24
+ And a fake MarkItDown library is available
25
+ When I ingest the text "alpha" with title "Alpha" and tags "a" into corpus "corpus"
26
+ And I build a "markitdown" extraction run in corpus "corpus"
27
+ Then the extraction run does not include extracted text for the last ingested item
28
+
29
+ Scenario: MarkItDown extractor produces extracted text for a non-text item
30
+ Given I initialized a corpus at "corpus"
31
+ And a fake MarkItDown library is available that returns text "Extracted by MarkItDown" for filename "doc.pdf"
32
+ And a binary file "doc.pdf" exists
33
+ When I ingest the file "doc.pdf" into corpus "corpus"
34
+ And I build a "markitdown" extraction run in corpus "corpus"
35
+ Then the extraction run includes extracted text for the last ingested item
36
+ And the extracted text for the last ingested item equals "Extracted by MarkItDown"
37
+ And the extraction run item provenance uses extractor "markitdown"
38
+
39
+ Scenario: MarkItDown extractor records empty output when it cannot extract text
40
+ Given I initialized a corpus at "corpus"
41
+ And a fake MarkItDown library is available that returns empty output for filename "empty.pdf"
42
+ And a binary file "empty.pdf" exists
43
+ When I ingest the file "empty.pdf" into corpus "corpus"
44
+ And I build a "markitdown" extraction run in corpus "corpus"
45
+ Then the extraction run includes extracted text for the last ingested item
46
+ And the extracted text for the last ingested item is empty
47
+ And the extraction run stats include extracted_empty_items 1
48
+
49
+ Scenario: MarkItDown extractor records empty output when conversion returns None
50
+ Given I initialized a corpus at "corpus"
51
+ And a fake MarkItDown library is available that returns None for filename "none.pdf"
52
+ And a binary file "none.pdf" exists
53
+ When I ingest the file "none.pdf" into corpus "corpus"
54
+ And I build a "markitdown" extraction run in corpus "corpus"
55
+ Then the extraction run includes extracted text for the last ingested item
56
+ And the extracted text for the last ingested item is empty
57
+ And the extraction run stats include extracted_empty_items 1
58
+
59
+ Scenario: MarkItDown extractor accepts string results
60
+ Given I initialized a corpus at "corpus"
61
+ And a fake MarkItDown library is available that returns a string for filename "string.pdf"
62
+ And a binary file "string.pdf" exists
63
+ When I ingest the file "string.pdf" into corpus "corpus"
64
+ And I build a "markitdown" extraction run in corpus "corpus"
65
+ Then the extraction run includes extracted text for the last ingested item
66
+ And the extracted text for the last ingested item equals "Extracted by MarkItDown"
67
+ And the extraction run item provenance uses extractor "markitdown"
68
+
69
+ Scenario: MarkItDown extractor records empty output for non-text conversion output
70
+ Given I initialized a corpus at "corpus"
71
+ And a fake MarkItDown library is available that returns non-text output for filename "nonstr.pdf"
72
+ And a binary file "nonstr.pdf" exists
73
+ When I ingest the file "nonstr.pdf" into corpus "corpus"
74
+ And I build a "markitdown" extraction run in corpus "corpus"
75
+ Then the extracted text for the last ingested item is empty
76
+ And the extraction run stats include extracted_empty_items 1
77
+
78
+ Scenario: MarkItDown extractor ignores whitespace output
79
+ Given I initialized a corpus at "corpus"
80
+ And a fake MarkItDown library is available that returns whitespace output for filename "whitespace.pdf"
81
+ And a binary file "whitespace.pdf" exists
82
+ When I ingest the file "whitespace.pdf" into corpus "corpus"
83
+ And I build a "markitdown" extraction run in corpus "corpus"
84
+ Then the extracted text for the last ingested item is empty
85
+ And the extraction run stats include extracted_empty_items 1
86
+
87
+ Scenario: MarkItDown extractor records per-item errors and continues
88
+ Given I initialized a corpus at "corpus"
89
+ And a fake MarkItDown library is available that raises a RuntimeError for filename "boom.pdf"
90
+ And a binary file "boom.pdf" exists
91
+ And a fake MarkItDown library is available that returns text "ok" for filename "ok.pdf"
92
+ And a binary file "ok.pdf" exists
93
+ When I ingest the file "boom.pdf" into corpus "corpus"
94
+ And I ingest the file "ok.pdf" into corpus "corpus"
95
+ And I build a "markitdown" extraction run in corpus "corpus"
96
+ Then the extracted text for the last ingested item equals "ok"
97
+ And the extraction run includes an errored result for the first ingested item
98
+ And the extraction run error type for the first ingested item equals "RuntimeError"
99
+ And the extraction run stats include errored_items 1
@@ -0,0 +1,173 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ import types
5
+ from dataclasses import dataclass
6
+ from typing import Dict, Optional
7
+
8
+ from behave import given
9
+
10
+
11
+ @dataclass
12
+ class _FakeMarkItDownBehavior:
13
+ mode: str
14
+ text: Optional[str] = None
15
+
16
+
17
+ def _ensure_fake_markitdown_behaviors(context) -> Dict[str, _FakeMarkItDownBehavior]:
18
+ behaviors = getattr(context, "fake_markitdown_behaviors", None)
19
+ if behaviors is None:
20
+ behaviors = {}
21
+ context.fake_markitdown_behaviors = behaviors
22
+ return behaviors
23
+
24
+
25
+ def _install_fake_markitdown_module(context) -> None:
26
+ already_installed = getattr(context, "_fake_markitdown_installed", False)
27
+ if already_installed:
28
+ return
29
+
30
+ original_modules: Dict[str, object] = {}
31
+ module_names = [
32
+ "markitdown",
33
+ ]
34
+ for name in module_names:
35
+ if name in sys.modules:
36
+ original_modules[name] = sys.modules[name]
37
+
38
+ behaviors = _ensure_fake_markitdown_behaviors(context)
39
+
40
+ class _ConversionResult:
41
+ def __init__(self, text_content: object) -> None:
42
+ self.text_content = text_content
43
+
44
+ class MarkItDown:
45
+ def __init__(self, *, enable_plugins: bool = False) -> None:
46
+ self.enable_plugins = enable_plugins
47
+
48
+ def convert(self, filename: str) -> object:
49
+ base_name = filename.rsplit("/", 1)[-1]
50
+ normalized_name = base_name.split("--", 1)[-1] if "--" in base_name else base_name
51
+ behavior = behaviors.get(normalized_name)
52
+ if behavior is None:
53
+ return _ConversionResult("")
54
+ if behavior.mode == "error":
55
+ raise RuntimeError("fake markitdown error")
56
+ if behavior.mode == "empty":
57
+ return _ConversionResult("")
58
+ if behavior.mode == "none":
59
+ return None
60
+ if behavior.mode == "string":
61
+ return behavior.text or ""
62
+ if behavior.mode == "nonstring":
63
+ return _ConversionResult(123)
64
+ if behavior.mode == "whitespace":
65
+ return _ConversionResult(" ")
66
+ if behavior.mode == "text":
67
+ return _ConversionResult(behavior.text or "")
68
+ return _ConversionResult("")
69
+
70
+ markitdown_module = types.ModuleType("markitdown")
71
+ markitdown_module.MarkItDown = MarkItDown
72
+ markitdown_module.__biblicus_fake__ = True
73
+
74
+ sys.modules["markitdown"] = markitdown_module
75
+
76
+ context._fake_markitdown_installed = True
77
+ context._fake_markitdown_original_modules = original_modules
78
+
79
+
80
+ def _install_markitdown_unavailable_module(context) -> None:
81
+ already_installed = getattr(context, "_fake_markitdown_unavailable_installed", False)
82
+ if already_installed:
83
+ return
84
+
85
+ original_modules: Dict[str, object] = {}
86
+ if "markitdown" in sys.modules:
87
+ original_modules["markitdown"] = sys.modules["markitdown"]
88
+
89
+ markitdown_module = types.ModuleType("markitdown")
90
+ sys.modules["markitdown"] = markitdown_module
91
+
92
+ context._fake_markitdown_unavailable_installed = True
93
+ context._fake_markitdown_unavailable_original_modules = original_modules
94
+
95
+
96
+ @given("a fake MarkItDown library is available")
97
+ def step_fake_markitdown_available(context) -> None:
98
+ _install_fake_markitdown_module(context)
99
+
100
+
101
+ @given(
102
+ 'a fake MarkItDown library is available that returns text "{text}" for filename "{filename}"'
103
+ )
104
+ def step_fake_markitdown_returns_text(context, text: str, filename: str) -> None:
105
+ _install_fake_markitdown_module(context)
106
+ behaviors = _ensure_fake_markitdown_behaviors(context)
107
+ behaviors[filename] = _FakeMarkItDownBehavior(mode="text", text=text)
108
+
109
+
110
+ @given(
111
+ 'a fake MarkItDown library is available that returns empty output for filename "{filename}"'
112
+ )
113
+ def step_fake_markitdown_returns_empty(context, filename: str) -> None:
114
+ _install_fake_markitdown_module(context)
115
+ behaviors = _ensure_fake_markitdown_behaviors(context)
116
+ behaviors[filename] = _FakeMarkItDownBehavior(mode="empty", text=None)
117
+
118
+
119
+ @given('a fake MarkItDown library is available that returns None for filename "{filename}"')
120
+ def step_fake_markitdown_returns_none(context, filename: str) -> None:
121
+ _install_fake_markitdown_module(context)
122
+ behaviors = _ensure_fake_markitdown_behaviors(context)
123
+ behaviors[filename] = _FakeMarkItDownBehavior(mode="none", text=None)
124
+
125
+
126
+ @given('a fake MarkItDown library is available that returns a string for filename "{filename}"')
127
+ def step_fake_markitdown_returns_string(context, filename: str) -> None:
128
+ _install_fake_markitdown_module(context)
129
+ behaviors = _ensure_fake_markitdown_behaviors(context)
130
+ behaviors[filename] = _FakeMarkItDownBehavior(mode="string", text="Extracted by MarkItDown")
131
+
132
+
133
+ @given(
134
+ 'a fake MarkItDown library is available that returns non-text output for filename "{filename}"'
135
+ )
136
+ def step_fake_markitdown_returns_nonstring(context, filename: str) -> None:
137
+ _install_fake_markitdown_module(context)
138
+ behaviors = _ensure_fake_markitdown_behaviors(context)
139
+ behaviors[filename] = _FakeMarkItDownBehavior(mode="nonstring", text=None)
140
+
141
+
142
+ @given(
143
+ 'a fake MarkItDown library is available that raises a RuntimeError for filename "{filename}"'
144
+ )
145
+ def step_fake_markitdown_raises_error(context, filename: str) -> None:
146
+ _install_fake_markitdown_module(context)
147
+ behaviors = _ensure_fake_markitdown_behaviors(context)
148
+ behaviors[filename] = _FakeMarkItDownBehavior(mode="error", text=None)
149
+
150
+
151
+ @given(
152
+ 'a fake MarkItDown library is available that returns whitespace output for filename "{filename}"'
153
+ )
154
+ def step_fake_markitdown_returns_whitespace(context, filename: str) -> None:
155
+ _install_fake_markitdown_module(context)
156
+ behaviors = _ensure_fake_markitdown_behaviors(context)
157
+ behaviors[filename] = _FakeMarkItDownBehavior(mode="whitespace", text=None)
158
+
159
+
160
+ @given("the MarkItDown dependency is unavailable")
161
+ def step_markitdown_dependency_unavailable(context) -> None:
162
+ _install_markitdown_unavailable_module(context)
163
+
164
+
165
+ @given("a fake MarkItDown library is available but marked as real")
166
+ def step_fake_markitdown_marked_real(context) -> None:
167
+ _install_fake_markitdown_module(context)
168
+ markitdown_module = sys.modules.get("markitdown")
169
+ if markitdown_module is not None:
170
+ markitdown_module.__biblicus_fake__ = False
171
+ if not hasattr(context, "_original_sys_version_info"):
172
+ context._original_sys_version_info = sys.version_info
173
+ sys.version_info = (3, 9, 0, "final", 0)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "biblicus"
7
- version = "0.6.0"
7
+ version = "0.7.0"
8
8
  description = "Command line interface and Python library for corpus ingestion, retrieval, and evaluation."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -36,6 +36,9 @@ unstructured = [
36
36
  ocr = [
37
37
  "rapidocr-onnxruntime>=1.3.0",
38
38
  ]
39
+ markitdown = [
40
+ "markitdown[all]>=0.1.0; python_version>='3.10'",
41
+ ]
39
42
 
40
43
  [project.scripts]
41
44
  biblicus = "biblicus.cli:main"
@@ -54,6 +57,7 @@ omit = ["*/biblicus/_vendor/*"]
54
57
  [tool.coverage.report]
55
58
  show_missing = true
56
59
  skip_covered = false
60
+ fail_under = 100
57
61
  exclude_lines = [
58
62
  "pragma: no cover",
59
63
  "if __name__ == .__main__.:",
@@ -64,6 +64,8 @@ def main() -> int:
64
64
  Scenarios that require the optional Unstructured dependency are tagged ``@unstructured``
65
65
  and are excluded unless you also pass ``--unstructured``.
66
66
 
67
+ The coverage report enforces the configured minimum coverage threshold.
68
+
67
69
  :return: Exit code.
68
70
  :rtype: int
69
71
  """
@@ -100,12 +102,21 @@ def main() -> int:
100
102
  behave_args.extend(["--tags", "~@ocr"])
101
103
  if args.integration and not args.unstructured:
102
104
  behave_args.extend(["--tags", "~@unstructured"])
103
- rc = _run([sys.executable, "-m", "coverage", "run", "-m", "behave", *behave_args], env=env)
104
- _run([sys.executable, "-m", "coverage", "report", "-m"], env=env)
105
- _run([sys.executable, "-m", "coverage", "html", "-d", str(htmlcov_dir)], env=env)
105
+ behave_exit_code = _run(
106
+ [sys.executable, "-m", "coverage", "run", "-m", "behave", *behave_args],
107
+ env=env,
108
+ )
109
+ coverage_report_exit_code = _run(
110
+ [sys.executable, "-m", "coverage", "report", "-m"],
111
+ env=env,
112
+ )
113
+ coverage_html_exit_code = _run(
114
+ [sys.executable, "-m", "coverage", "html", "-d", str(htmlcov_dir)],
115
+ env=env,
116
+ )
106
117
 
107
118
  print(f"Coverage report in Hypertext Markup Language: {htmlcov_dir / 'index.html'}")
108
- return int(rc)
119
+ return int(max(behave_exit_code, coverage_report_exit_code, coverage_html_exit_code))
109
120
 
110
121
 
111
122
  if __name__ == "__main__":