deepdoc 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {deepdoc-1.2.0 → deepdoc-1.3.0}/PKG-INFO +68 -21
  2. {deepdoc-1.2.0 → deepdoc-1.3.0}/README.md +67 -20
  3. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/__init__.py +1 -1
  4. deepdoc-1.3.0/deepdoc/benchmark_v2.py +682 -0
  5. deepdoc-1.3.0/deepdoc/chatbot/deep_research.py +537 -0
  6. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/indexer.py +12 -0
  7. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/persistence.py +19 -0
  8. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/service.py +318 -81
  9. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/settings.py +15 -7
  10. deepdoc-1.3.0/deepdoc/chatbot/source_archive.py +180 -0
  11. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/cli.py +202 -3
  12. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/config.py +15 -7
  13. deepdoc-1.3.0/deepdoc/generator/__init__.py +23 -0
  14. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/generator/generation.py +4 -0
  15. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/generator/post_processors.py +124 -14
  16. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/generator/validation.py +116 -13
  17. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/llm/json_utils.py +9 -2
  18. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/pipeline_v2.py +4 -0
  19. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc.egg-info/PKG-INFO +68 -21
  20. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc.egg-info/SOURCES.txt +3 -0
  21. {deepdoc-1.2.0 → deepdoc-1.3.0}/pyproject.toml +1 -1
  22. deepdoc-1.3.0/tests/test_benchmark_scorecard.py +409 -0
  23. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_chatbot_config.py +18 -0
  24. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_chatbot_query.py +919 -159
  25. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_chatbot_scaffold.py +17 -6
  26. deepdoc-1.3.0/tests/test_chatbot_source_archive.py +132 -0
  27. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_fumadocs_builder.py +163 -32
  28. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_generation_evidence.py +185 -0
  29. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_llm_json_utils.py +42 -3
  30. deepdoc-1.2.0/deepdoc/benchmark_v2.py +0 -150
  31. deepdoc-1.2.0/deepdoc/chatbot/deep_research.py +0 -333
  32. deepdoc-1.2.0/deepdoc/generator/__init__.py +0 -4
  33. {deepdoc-1.2.0 → deepdoc-1.3.0}/LICENSE +0 -0
  34. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/__main__.py +0 -0
  35. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/_legacy_types.py +0 -0
  36. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/call_graph.py +0 -0
  37. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/__init__.py +0 -0
  38. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/chunker.py +0 -0
  39. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/docs_summary.py +0 -0
  40. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/embeddings.py +0 -0
  41. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/linking.py +0 -0
  42. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/providers.py +0 -0
  43. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/scaffold.py +0 -0
  44. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/chatbot/types.py +0 -0
  45. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/generator/evidence.py +0 -0
  46. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/llm/__init__.py +0 -0
  47. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/llm/client.py +0 -0
  48. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/llm/litellm_compat.py +0 -0
  49. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/manifest.py +0 -0
  50. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/openapi.py +0 -0
  51. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/__init__.py +0 -0
  52. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/api_detector.py +0 -0
  53. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/base.py +0 -0
  54. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/go_parser.py +0 -0
  55. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/js_ts_parser.py +0 -0
  56. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/php_parser.py +0 -0
  57. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/python_parser.py +0 -0
  58. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/registry.py +0 -0
  59. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/__init__.py +0 -0
  60. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/base.py +0 -0
  61. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/common.py +0 -0
  62. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/detector.py +0 -0
  63. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/django.py +0 -0
  64. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/express.py +0 -0
  65. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/falcon.py +0 -0
  66. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/fastify.py +0 -0
  67. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/go.py +0 -0
  68. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/js_shared.py +0 -0
  69. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/laravel.py +0 -0
  70. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/nestjs.py +0 -0
  71. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/python_shared.py +0 -0
  72. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/registry.py +0 -0
  73. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/routes/repo_resolver.py +0 -0
  74. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/parser/vue_parser.py +0 -0
  75. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/persistence_v2.py +0 -0
  76. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/planner/__init__.py +0 -0
  77. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/planner/common.py +0 -0
  78. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/planner/engine.py +0 -0
  79. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/planner/heuristics.py +0 -0
  80. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/planner/specializations.py +0 -0
  81. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/planner/utils.py +0 -0
  82. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/prompts_v2.py +0 -0
  83. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/py.typed +0 -0
  84. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/scanner/__init__.py +0 -0
  85. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/scanner/artifacts.py +0 -0
  86. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/scanner/clustering.py +0 -0
  87. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/scanner/common.py +0 -0
  88. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/scanner/database.py +0 -0
  89. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/scanner/endpoints.py +0 -0
  90. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/scanner/integrations.py +0 -0
  91. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/scanner/runtime.py +0 -0
  92. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/scanner/utils.py +0 -0
  93. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/site/__init__.py +0 -0
  94. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/site/builder/__init__.py +0 -0
  95. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/site/builder/common.py +0 -0
  96. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/site/builder/engine.py +0 -0
  97. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/site/builder/mdx_utils.py +0 -0
  98. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/site/builder/templates.py +0 -0
  99. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/smart_update_v2.py +0 -0
  100. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/source_metadata.py +0 -0
  101. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/updater_v2.py +0 -0
  102. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc/v2_models.py +0 -0
  103. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc.egg-info/dependency_links.txt +0 -0
  104. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc.egg-info/entry_points.txt +0 -0
  105. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc.egg-info/requires.txt +0 -0
  106. {deepdoc-1.2.0 → deepdoc-1.3.0}/deepdoc.egg-info/top_level.txt +0 -0
  107. {deepdoc-1.2.0 → deepdoc-1.3.0}/setup.cfg +0 -0
  108. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_call_graph.py +0 -0
  109. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_chatbot_embeddings.py +0 -0
  110. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_chatbot_eval.py +0 -0
  111. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_chatbot_index.py +0 -0
  112. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_chatbot_persistence.py +0 -0
  113. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_chatbot_providers.py +0 -0
  114. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_chatbot_relationship.py +0 -0
  115. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_classify.py +0 -0
  116. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_cli_generate.py +0 -0
  117. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_cli_serve.py +0 -0
  118. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_cli_update.py +0 -0
  119. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_framework_fixtures.py +0 -0
  120. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_framework_support.py +0 -0
  121. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_internal_docs_metadata.py +0 -0
  122. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_litellm_compat.py +0 -0
  123. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_parallel_pipeline.py +0 -0
  124. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_parser_ranges.py +0 -0
  125. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_planner_consolidation.py +0 -0
  126. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_planner_granularity.py +0 -0
  127. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_route_registry.py +0 -0
  128. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_runtime_scan.py +0 -0
  129. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_smart_update.py +0 -0
  130. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_stale.py +0 -0
  131. {deepdoc-1.2.0 → deepdoc-1.3.0}/tests/test_state.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoc
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: Auto-generate beautiful docs from any codebase
5
5
  Author: Pranav Kumar
6
6
  License: MIT
@@ -310,6 +310,22 @@ deepdoc config set output_dir documentation # Change output dir
310
310
  deepdoc config set llm.api_key_env AZURE_API_KEY # Change API key env var
311
311
  ```
312
312
 
313
+ ### `deepdoc benchmark`
314
+
315
+ Run planner benchmark cases and optionally generate a combined docs+chatbot quality scorecard.
316
+
317
+ ```bash
318
+ deepdoc benchmark --catalog benchmarks/catalog.json
319
+ deepdoc benchmark --repo /path/to/repo --gold benchmarks/gold.json
320
+ deepdoc benchmark --catalog benchmarks/catalog.json --chatbot-eval benchmarks/chatbot_eval.json
321
+ deepdoc benchmark --catalog benchmarks/catalog.json --chatbot-eval benchmarks/chatbot_eval.json --scorecard-out .deepdoc/quality_scorecard.json --strict-scorecard
322
+ deepdoc benchmark --generated-root /Users/apple/autodoc/docs --scorecard-out /Users/apple/autodoc/docs/_scorecards/latest.json
323
+ ```
324
+
325
+ Use `--strict-scorecard` to fail the command when completeness gates are not met.
326
+
327
+ When you do not have a hand-written benchmark catalog or chatbot eval file yet, use artifact mode (`--generated-root` or `--artifact-repo`) to compute a provisional scorecard directly from persisted `.deepdoc/` outputs.
328
+
313
329
  ---
314
330
 
315
331
  ## LLM Provider Setup
@@ -618,7 +634,9 @@ chatbot:
618
634
  base_url: ""
619
635
  api_version: ""
620
636
  temperature: 0.1
621
- max_tokens: 16000
637
+ max_tokens: 24000
638
+ continuation_retries: 2 # Auto-continue if answer ends abruptly
639
+ continuation_context_chars: 12000 # Tail chars included in continuation prompt
622
640
 
623
641
  embeddings: # LLM used for embedding code/docs
624
642
  provider: "azure"
@@ -643,16 +661,20 @@ chatbot:
643
661
  top_k_code: 15
644
662
  top_k_artifact: 8
645
663
  top_k_docs: 6
646
- top_k_relationship: 6
664
+ top_k_relationship: 8
647
665
  candidate_top_k_code: 30
648
666
  candidate_top_k_artifact: 16
649
667
  candidate_top_k_docs: 12
650
668
  candidate_top_k_relationship: 12
651
669
  max_prompt_code_chunks: 12
652
670
  max_prompt_artifact_chunks: 6
653
- max_prompt_doc_chunks: 4
654
- max_prompt_relationship_chunks: 4
655
- max_prompt_chars: 200000
671
+ max_prompt_doc_chunks: 6
672
+ max_prompt_relationship_chunks: 6
673
+ max_prompt_chars: 120000
674
+ fast_mode_use_llm_retrieval_steps: false # Fast mode skips expansion/rerank by default
675
+ fast_mode_iterative_retrieval: false # Fast mode skips second-pass follow-up retrieval
676
+ fast_mode_max_prompt_chars: 90000 # Smaller prompt budget for faster /query answers
677
+ deep_mode_max_prompt_chars: 140000 # Larger budget for /deep-research synthesis
656
678
  lexical_retrieval: true
657
679
  lexical_candidate_limit: 24
658
680
  query_expansion: true
@@ -666,7 +688,8 @@ chatbot:
666
688
  graph_neighbor_relationship_chunks_per_file: 2
667
689
  graph_neighbor_max_docs: 4
668
690
  rerank: true
669
- rerank_candidate_limit: 20
691
+ rerank_candidate_limit: 32
692
+ rerank_candidate_limit_per_kind: 8
670
693
  rerank_preview_chars: 450
671
694
  stitch_adjacent_code_chunks: true
672
695
  stitch_max_adjacent_chunks: 2
@@ -674,7 +697,8 @@ chatbot:
674
697
  live_fallback_max_files: 6
675
698
  live_fallback_max_per_file: 2
676
699
  live_fallback_context_lines: 12
677
- deep_research_chunk_chars: 1600
700
+ deep_research_chunk_chars: 3200
701
+ deep_research_top_k: 10
678
702
 
679
703
  chunking:
680
704
  code_chunk_lines: 120
@@ -709,7 +733,9 @@ chatbot:
709
733
  | `chatbot.answer.base_url` | `""` | Custom endpoint (for Azure, Ollama, etc.) |
710
734
  | `chatbot.answer.api_version` | `""` | Azure API version string |
711
735
  | `chatbot.answer.temperature` | `0.1` | Sampling temperature (lower = more deterministic) |
712
- | `chatbot.answer.max_tokens` | `16000` | Max tokens per answer |
736
+ | `chatbot.answer.max_tokens` | `24000` | Max tokens per answer |
737
+ | `chatbot.answer.continuation_retries` | `2` | Extra completion attempts when an answer appears truncated |
738
+ | `chatbot.answer.continuation_context_chars` | `12000` | Number of trailing chars passed when asking the model to continue |
713
739
  | **Embeddings LLM** | | |
714
740
  | `chatbot.embeddings.provider` | `azure` | Provider for the embedding model |
715
741
  | `chatbot.embeddings.model` | `azure/text-embedding-3-large` | Embedding model |
@@ -721,16 +747,20 @@ chatbot:
721
747
  | `chatbot.retrieval.top_k_code` | `15` | Top code chunks retrieved per query |
722
748
  | `chatbot.retrieval.top_k_artifact` | `8` | Top artifact chunks retrieved per query |
723
749
  | `chatbot.retrieval.top_k_docs` | `6` | Top generated-doc and repo-doc chunks retrieved per query |
724
- | `chatbot.retrieval.top_k_relationship` | `6` | Top relationship chunks retrieved per query |
750
+ | `chatbot.retrieval.top_k_relationship` | `8` | Top relationship chunks retrieved per query |
725
751
  | `chatbot.retrieval.candidate_top_k_code` | `30` | Candidate code chunks gathered before reranking |
726
752
  | `chatbot.retrieval.candidate_top_k_artifact` | `16` | Candidate artifact chunks gathered before reranking |
727
753
  | `chatbot.retrieval.candidate_top_k_docs` | `12` | Candidate doc chunks gathered before reranking |
728
754
  | `chatbot.retrieval.candidate_top_k_relationship` | `12` | Candidate relationship chunks gathered before reranking |
729
755
  | `chatbot.retrieval.max_prompt_code_chunks` | `12` | Max code chunks included in the final prompt |
730
756
  | `chatbot.retrieval.max_prompt_artifact_chunks` | `6` | Max artifact chunks in the final prompt |
731
- | `chatbot.retrieval.max_prompt_doc_chunks` | `4` | Max doc chunks in the final prompt |
732
- | `chatbot.retrieval.max_prompt_relationship_chunks` | `4` | Max relationship chunks included in the final prompt |
733
- | `chatbot.retrieval.max_prompt_chars` | `200000` | Total character budget for the assembled prompt |
757
+ | `chatbot.retrieval.max_prompt_doc_chunks` | `6` | Max doc chunks in the final prompt |
758
+ | `chatbot.retrieval.max_prompt_relationship_chunks` | `6` | Max relationship chunks included in the final prompt |
759
+ | `chatbot.retrieval.max_prompt_chars` | `120000` | Default character budget for assembled prompts |
760
+ | `chatbot.retrieval.fast_mode_use_llm_retrieval_steps` | `false` | In `/query` fast mode, disable LLM query expansion and reranking |
761
+ | `chatbot.retrieval.fast_mode_iterative_retrieval` | `false` | In `/query` fast mode, disable iterative follow-up retrieval |
762
+ | `chatbot.retrieval.fast_mode_max_prompt_chars` | `90000` | Prompt budget used by `/query` fast mode |
763
+ | `chatbot.retrieval.deep_mode_max_prompt_chars` | `140000` | Prompt budget used by `/deep-research` |
734
764
  | `chatbot.retrieval.lexical_retrieval` | `true` | Blend exact-match retrieval with embedding retrieval |
735
765
  | `chatbot.retrieval.lexical_candidate_limit` | `24` | Max lexical candidates gathered before merge/rerank |
736
766
  | `chatbot.retrieval.query_expansion` | `true` | Use LLM to generate alternative search queries |
@@ -744,7 +774,8 @@ chatbot:
744
774
  | `chatbot.retrieval.graph_neighbor_relationship_chunks_per_file` | `2` | Relationship chunks per linked file during graph expansion |
745
775
  | `chatbot.retrieval.graph_neighbor_max_docs` | `4` | Max linked docs pulled in during graph expansion |
746
776
  | `chatbot.retrieval.rerank` | `true` | Use LLM to rerank retrieved chunks |
747
- | `chatbot.retrieval.rerank_candidate_limit` | `20` | Max candidates sent to the reranker |
777
+ | `chatbot.retrieval.rerank_candidate_limit` | `32` | Max candidates sent to the reranker |
778
+ | `chatbot.retrieval.rerank_candidate_limit_per_kind` | `8` | Per-kind candidate cap before filling the global rerank pool |
748
779
  | `chatbot.retrieval.rerank_preview_chars` | `450` | Characters of each chunk shown to the reranker |
749
780
  | `chatbot.retrieval.stitch_adjacent_code_chunks` | `true` | Expand exact-match code hits with adjacent windows from the same file |
750
781
  | `chatbot.retrieval.stitch_max_adjacent_chunks` | `2` | Max adjacent code windows stitched onto a top hit |
@@ -752,7 +783,8 @@ chatbot:
752
783
  | `chatbot.retrieval.live_fallback_max_files` | `6` | Max repo files inspected during a deep-research live fallback |
753
784
  | `chatbot.retrieval.live_fallback_max_per_file` | `2` | Max fallback snippets returned per inspected file |
754
785
  | `chatbot.retrieval.live_fallback_context_lines` | `12` | Lines per fallback snippet around each exact match |
755
- | `chatbot.retrieval.deep_research_chunk_chars` | `1600` | Max chars per evidence chunk passed into deep-research step answers |
786
+ | `chatbot.retrieval.deep_research_chunk_chars` | `3200` | Max chars per evidence chunk passed into deep-research step answers |
787
+ | `chatbot.retrieval.deep_research_top_k` | `10` | Retrieved chunks per deep-research sub-question |
756
788
  | **Chunking** | | |
757
789
  | `chatbot.chunking.code_chunk_lines` | `120` | Lines per code chunk |
758
790
  | `chatbot.chunking.code_chunk_overlap` | `20` | Overlap lines between code chunks |
@@ -829,22 +861,24 @@ During `deepdoc generate`, six corpora are built and stored in `.deepdoc/chatbot
829
861
 
830
862
  ### Chatbot Query Pipeline
831
863
 
832
- When a user asks a question, the backend runs a multi-step retrieval pipeline:
864
+ When a user asks a question, the backend runs a mode-aware retrieval pipeline:
833
865
 
834
- 1. **Query expansion** — The LLM generates up to 3 alternative search queries to improve recall.
866
+ 1. **Query expansion** — In default/deep mode, the LLM can generate alternative search queries to improve recall. Fast mode disables this by default.
835
867
  2. **Embedding** — All queries are embedded using the configured embedding model.
836
868
  3. **Hybrid retrieval** — FAISS similarity search and exact-match lexical search both gather candidates from each corpus.
837
- 4. **Follow-up retrieval** — The backend can derive focused second-pass searches and pull linked files/docs via graph-neighbor expansion.
869
+ 4. **Follow-up retrieval** — The backend can derive focused second-pass searches and pull linked files/docs via graph-neighbor expansion. Fast mode can skip follow-up queries for lower latency.
838
870
  5. **Chunk stitching** — Exact-match code hits can pull adjacent code windows from the same file so larger implementations survive chunk boundaries.
839
- 6. **Reranking** — The LLM scores and reranks the retrieved chunks for relevance.
871
+ 6. **Reranking** — In default/deep mode, the LLM can rerank candidates for relevance. Fast mode disables this by default.
840
872
  7. **Prompt assembly** — Query-type-aware budgets reserve space for the most important evidence types within the character budget.
841
- 8. **Answer generation** — The answer LLM produces a grounded response with code, artifact, doc, repo-doc, relationship, and live-fallback citations when used.
873
+ 8. **Answer generation + continuity guard** — The answer LLM produces a grounded response, and if the output appears truncated (for example ending on a dangling heading), DeepDoc retries with a continuation prompt so the response finishes cleanly.
842
874
 
843
875
  `POST /deep-research` uses the same indexed corpora first, but it can also inspect a small bounded set of live repo files when exact-match evidence is missing from the index. This fallback respects the repo's exclude rules, skips oversized/binary files, and is only used in deep research mode.
844
876
 
877
+ `POST /query` and `POST /deep-research` now return `response_mode` in the payload (`fast`, `deep`, or `default`) so clients can confirm which retrieval profile generated the result.
878
+
845
879
  ### Chatbot API Endpoints
846
880
 
847
- The generated `chatbot_backend/` exposes two endpoints:
881
+ The generated `chatbot_backend/` exposes three endpoints:
848
882
 
849
883
  **Health check:**
850
884
  ```
@@ -865,6 +899,19 @@ POST /query
865
899
 
866
900
  The response includes the answer text, code citations (file path + line range), artifact citations, and links to relevant generated doc pages.
867
901
 
902
+ `/query` is optimized for speed: it runs retrieval in fast mode (no LLM query expansion/rerank by default) and returns an answer plus citations.
903
+
904
+ **Retrieve context only (no answer generation):**
905
+ ```
906
+ POST /query-context
907
+ {
908
+ "question": "Where is reshipping implemented?",
909
+ "history": []
910
+ }
911
+ ```
912
+
913
+ `/query-context` returns selected citations/chunks only. Use this endpoint to inspect retrieval quality independently from answer generation.
914
+
868
915
  ### Deploying the Chatbot
869
916
 
870
917
  For local development, `deepdoc serve` handles everything automatically. For production:
@@ -271,6 +271,22 @@ deepdoc config set output_dir documentation # Change output dir
271
271
  deepdoc config set llm.api_key_env AZURE_API_KEY # Change API key env var
272
272
  ```
273
273
 
274
+ ### `deepdoc benchmark`
275
+
276
+ Run planner benchmark cases and optionally generate a combined docs+chatbot quality scorecard.
277
+
278
+ ```bash
279
+ deepdoc benchmark --catalog benchmarks/catalog.json
280
+ deepdoc benchmark --repo /path/to/repo --gold benchmarks/gold.json
281
+ deepdoc benchmark --catalog benchmarks/catalog.json --chatbot-eval benchmarks/chatbot_eval.json
282
+ deepdoc benchmark --catalog benchmarks/catalog.json --chatbot-eval benchmarks/chatbot_eval.json --scorecard-out .deepdoc/quality_scorecard.json --strict-scorecard
283
+ deepdoc benchmark --generated-root /Users/apple/autodoc/docs --scorecard-out /Users/apple/autodoc/docs/_scorecards/latest.json
284
+ ```
285
+
286
+ Use `--strict-scorecard` to fail the command when completeness gates are not met.
287
+
288
+ When you do not have a hand-written benchmark catalog or chatbot eval file yet, use artifact mode (`--generated-root` or `--artifact-repo`) to compute a provisional scorecard directly from persisted `.deepdoc/` outputs.
289
+
274
290
  ---
275
291
 
276
292
  ## LLM Provider Setup
@@ -579,7 +595,9 @@ chatbot:
579
595
  base_url: ""
580
596
  api_version: ""
581
597
  temperature: 0.1
582
- max_tokens: 16000
598
+ max_tokens: 24000
599
+ continuation_retries: 2 # Auto-continue if answer ends abruptly
600
+ continuation_context_chars: 12000 # Tail chars included in continuation prompt
583
601
 
584
602
  embeddings: # LLM used for embedding code/docs
585
603
  provider: "azure"
@@ -604,16 +622,20 @@ chatbot:
604
622
  top_k_code: 15
605
623
  top_k_artifact: 8
606
624
  top_k_docs: 6
607
- top_k_relationship: 6
625
+ top_k_relationship: 8
608
626
  candidate_top_k_code: 30
609
627
  candidate_top_k_artifact: 16
610
628
  candidate_top_k_docs: 12
611
629
  candidate_top_k_relationship: 12
612
630
  max_prompt_code_chunks: 12
613
631
  max_prompt_artifact_chunks: 6
614
- max_prompt_doc_chunks: 4
615
- max_prompt_relationship_chunks: 4
616
- max_prompt_chars: 200000
632
+ max_prompt_doc_chunks: 6
633
+ max_prompt_relationship_chunks: 6
634
+ max_prompt_chars: 120000
635
+ fast_mode_use_llm_retrieval_steps: false # Fast mode skips expansion/rerank by default
636
+ fast_mode_iterative_retrieval: false # Fast mode skips second-pass follow-up retrieval
637
+ fast_mode_max_prompt_chars: 90000 # Smaller prompt budget for faster /query answers
638
+ deep_mode_max_prompt_chars: 140000 # Larger budget for /deep-research synthesis
617
639
  lexical_retrieval: true
618
640
  lexical_candidate_limit: 24
619
641
  query_expansion: true
@@ -627,7 +649,8 @@ chatbot:
627
649
  graph_neighbor_relationship_chunks_per_file: 2
628
650
  graph_neighbor_max_docs: 4
629
651
  rerank: true
630
- rerank_candidate_limit: 20
652
+ rerank_candidate_limit: 32
653
+ rerank_candidate_limit_per_kind: 8
631
654
  rerank_preview_chars: 450
632
655
  stitch_adjacent_code_chunks: true
633
656
  stitch_max_adjacent_chunks: 2
@@ -635,7 +658,8 @@ chatbot:
635
658
  live_fallback_max_files: 6
636
659
  live_fallback_max_per_file: 2
637
660
  live_fallback_context_lines: 12
638
- deep_research_chunk_chars: 1600
661
+ deep_research_chunk_chars: 3200
662
+ deep_research_top_k: 10
639
663
 
640
664
  chunking:
641
665
  code_chunk_lines: 120
@@ -670,7 +694,9 @@ chatbot:
670
694
  | `chatbot.answer.base_url` | `""` | Custom endpoint (for Azure, Ollama, etc.) |
671
695
  | `chatbot.answer.api_version` | `""` | Azure API version string |
672
696
  | `chatbot.answer.temperature` | `0.1` | Sampling temperature (lower = more deterministic) |
673
- | `chatbot.answer.max_tokens` | `16000` | Max tokens per answer |
697
+ | `chatbot.answer.max_tokens` | `24000` | Max tokens per answer |
698
+ | `chatbot.answer.continuation_retries` | `2` | Extra completion attempts when an answer appears truncated |
699
+ | `chatbot.answer.continuation_context_chars` | `12000` | Number of trailing chars passed when asking the model to continue |
674
700
  | **Embeddings LLM** | | |
675
701
  | `chatbot.embeddings.provider` | `azure` | Provider for the embedding model |
676
702
  | `chatbot.embeddings.model` | `azure/text-embedding-3-large` | Embedding model |
@@ -682,16 +708,20 @@ chatbot:
682
708
  | `chatbot.retrieval.top_k_code` | `15` | Top code chunks retrieved per query |
683
709
  | `chatbot.retrieval.top_k_artifact` | `8` | Top artifact chunks retrieved per query |
684
710
  | `chatbot.retrieval.top_k_docs` | `6` | Top generated-doc and repo-doc chunks retrieved per query |
685
- | `chatbot.retrieval.top_k_relationship` | `6` | Top relationship chunks retrieved per query |
711
+ | `chatbot.retrieval.top_k_relationship` | `8` | Top relationship chunks retrieved per query |
686
712
  | `chatbot.retrieval.candidate_top_k_code` | `30` | Candidate code chunks gathered before reranking |
687
713
  | `chatbot.retrieval.candidate_top_k_artifact` | `16` | Candidate artifact chunks gathered before reranking |
688
714
  | `chatbot.retrieval.candidate_top_k_docs` | `12` | Candidate doc chunks gathered before reranking |
689
715
  | `chatbot.retrieval.candidate_top_k_relationship` | `12` | Candidate relationship chunks gathered before reranking |
690
716
  | `chatbot.retrieval.max_prompt_code_chunks` | `12` | Max code chunks included in the final prompt |
691
717
  | `chatbot.retrieval.max_prompt_artifact_chunks` | `6` | Max artifact chunks in the final prompt |
692
- | `chatbot.retrieval.max_prompt_doc_chunks` | `4` | Max doc chunks in the final prompt |
693
- | `chatbot.retrieval.max_prompt_relationship_chunks` | `4` | Max relationship chunks included in the final prompt |
694
- | `chatbot.retrieval.max_prompt_chars` | `200000` | Total character budget for the assembled prompt |
718
+ | `chatbot.retrieval.max_prompt_doc_chunks` | `6` | Max doc chunks in the final prompt |
719
+ | `chatbot.retrieval.max_prompt_relationship_chunks` | `6` | Max relationship chunks included in the final prompt |
720
+ | `chatbot.retrieval.max_prompt_chars` | `120000` | Default character budget for assembled prompts |
721
+ | `chatbot.retrieval.fast_mode_use_llm_retrieval_steps` | `false` | In `/query` fast mode, disable LLM query expansion and reranking |
722
+ | `chatbot.retrieval.fast_mode_iterative_retrieval` | `false` | In `/query` fast mode, disable iterative follow-up retrieval |
723
+ | `chatbot.retrieval.fast_mode_max_prompt_chars` | `90000` | Prompt budget used by `/query` fast mode |
724
+ | `chatbot.retrieval.deep_mode_max_prompt_chars` | `140000` | Prompt budget used by `/deep-research` |
695
725
  | `chatbot.retrieval.lexical_retrieval` | `true` | Blend exact-match retrieval with embedding retrieval |
696
726
  | `chatbot.retrieval.lexical_candidate_limit` | `24` | Max lexical candidates gathered before merge/rerank |
697
727
  | `chatbot.retrieval.query_expansion` | `true` | Use LLM to generate alternative search queries |
@@ -705,7 +735,8 @@ chatbot:
705
735
  | `chatbot.retrieval.graph_neighbor_relationship_chunks_per_file` | `2` | Relationship chunks per linked file during graph expansion |
706
736
  | `chatbot.retrieval.graph_neighbor_max_docs` | `4` | Max linked docs pulled in during graph expansion |
707
737
  | `chatbot.retrieval.rerank` | `true` | Use LLM to rerank retrieved chunks |
708
- | `chatbot.retrieval.rerank_candidate_limit` | `20` | Max candidates sent to the reranker |
738
+ | `chatbot.retrieval.rerank_candidate_limit` | `32` | Max candidates sent to the reranker |
739
+ | `chatbot.retrieval.rerank_candidate_limit_per_kind` | `8` | Per-kind candidate cap before filling the global rerank pool |
709
740
  | `chatbot.retrieval.rerank_preview_chars` | `450` | Characters of each chunk shown to the reranker |
710
741
  | `chatbot.retrieval.stitch_adjacent_code_chunks` | `true` | Expand exact-match code hits with adjacent windows from the same file |
711
742
  | `chatbot.retrieval.stitch_max_adjacent_chunks` | `2` | Max adjacent code windows stitched onto a top hit |
@@ -713,7 +744,8 @@ chatbot:
713
744
  | `chatbot.retrieval.live_fallback_max_files` | `6` | Max repo files inspected during a deep-research live fallback |
714
745
  | `chatbot.retrieval.live_fallback_max_per_file` | `2` | Max fallback snippets returned per inspected file |
715
746
  | `chatbot.retrieval.live_fallback_context_lines` | `12` | Lines per fallback snippet around each exact match |
716
- | `chatbot.retrieval.deep_research_chunk_chars` | `1600` | Max chars per evidence chunk passed into deep-research step answers |
747
+ | `chatbot.retrieval.deep_research_chunk_chars` | `3200` | Max chars per evidence chunk passed into deep-research step answers |
748
+ | `chatbot.retrieval.deep_research_top_k` | `10` | Retrieved chunks per deep-research sub-question |
717
749
  | **Chunking** | | |
718
750
  | `chatbot.chunking.code_chunk_lines` | `120` | Lines per code chunk |
719
751
  | `chatbot.chunking.code_chunk_overlap` | `20` | Overlap lines between code chunks |
@@ -790,22 +822,24 @@ During `deepdoc generate`, six corpora are built and stored in `.deepdoc/chatbot
790
822
 
791
823
  ### Chatbot Query Pipeline
792
824
 
793
- When a user asks a question, the backend runs a multi-step retrieval pipeline:
825
+ When a user asks a question, the backend runs a mode-aware retrieval pipeline:
794
826
 
795
- 1. **Query expansion** — The LLM generates up to 3 alternative search queries to improve recall.
827
+ 1. **Query expansion** — In default/deep mode, the LLM can generate alternative search queries to improve recall. Fast mode disables this by default.
796
828
  2. **Embedding** — All queries are embedded using the configured embedding model.
797
829
  3. **Hybrid retrieval** — FAISS similarity search and exact-match lexical search both gather candidates from each corpus.
798
- 4. **Follow-up retrieval** — The backend can derive focused second-pass searches and pull linked files/docs via graph-neighbor expansion.
830
+ 4. **Follow-up retrieval** — The backend can derive focused second-pass searches and pull linked files/docs via graph-neighbor expansion. Fast mode can skip follow-up queries for lower latency.
799
831
  5. **Chunk stitching** — Exact-match code hits can pull adjacent code windows from the same file so larger implementations survive chunk boundaries.
800
- 6. **Reranking** — The LLM scores and reranks the retrieved chunks for relevance.
832
+ 6. **Reranking** — In default/deep mode, the LLM can rerank candidates for relevance. Fast mode disables this by default.
801
833
  7. **Prompt assembly** — Query-type-aware budgets reserve space for the most important evidence types within the character budget.
802
- 8. **Answer generation** — The answer LLM produces a grounded response with code, artifact, doc, repo-doc, relationship, and live-fallback citations when used.
834
+ 8. **Answer generation + continuity guard** — The answer LLM produces a grounded response, and if the output appears truncated (for example ending on a dangling heading), DeepDoc retries with a continuation prompt so the response finishes cleanly.
803
835
 
804
836
  `POST /deep-research` uses the same indexed corpora first, but it can also inspect a small bounded set of live repo files when exact-match evidence is missing from the index. This fallback respects the repo's exclude rules, skips oversized/binary files, and is only used in deep research mode.
805
837
 
838
+ `POST /query` and `POST /deep-research` now return `response_mode` in the payload (`fast`, `deep`, or `default`) so clients can confirm which retrieval profile generated the result.
839
+
806
840
  ### Chatbot API Endpoints
807
841
 
808
- The generated `chatbot_backend/` exposes two endpoints:
842
+ The generated `chatbot_backend/` exposes three endpoints:
809
843
 
810
844
  **Health check:**
811
845
  ```
@@ -826,6 +860,19 @@ POST /query
826
860
 
827
861
  The response includes the answer text, code citations (file path + line range), artifact citations, and links to relevant generated doc pages.
828
862
 
863
+ `/query` is optimized for speed: it runs retrieval in fast mode (no LLM query expansion/rerank by default) and returns an answer plus citations.
864
+
865
+ **Retrieve context only (no answer generation):**
866
+ ```
867
+ POST /query-context
868
+ {
869
+ "question": "Where is reshipping implemented?",
870
+ "history": []
871
+ }
872
+ ```
873
+
874
+ `/query-context` returns selected citations/chunks only. Use this endpoint to inspect retrieval quality independently from answer generation.
875
+
829
876
  ### Deploying the Chatbot
830
877
 
831
878
  For local development, `deepdoc serve` handles everything automatically. For production:
@@ -1,3 +1,3 @@
1
1
  """DeepDoc — Auto-generate beautiful docs from any codebase."""
2
2
 
3
- __version__ = "1.2.0"
3
+ __version__ = "1.3.0"