deepdoc 2.3.5__tar.gz → 2.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. {deepdoc-2.3.5 → deepdoc-2.3.6}/PKG-INFO +1 -1
  2. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/generator/__init__.py +1 -0
  3. deepdoc-2.3.6/deepdoc/generator/consistency.py +126 -0
  4. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/generator/evidence.py +72 -1
  5. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/generator/post_processors.py +23 -13
  6. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/pipeline_v2.py +6 -0
  7. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc.egg-info/PKG-INFO +1 -1
  8. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc.egg-info/SOURCES.txt +2 -0
  9. {deepdoc-2.3.5 → deepdoc-2.3.6}/pyproject.toml +1 -1
  10. deepdoc-2.3.6/tests/test_consistency_pass.py +112 -0
  11. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_generation_evidence.py +172 -0
  12. {deepdoc-2.3.5 → deepdoc-2.3.6}/LICENSE +0 -0
  13. {deepdoc-2.3.5 → deepdoc-2.3.6}/README.md +0 -0
  14. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/__init__.py +0 -0
  15. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/__main__.py +0 -0
  16. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/benchmark_v2.py +0 -0
  17. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/call_graph.py +0 -0
  18. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/changelog_writer.py +0 -0
  19. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/__init__.py +0 -0
  20. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/answer_mixin.py +0 -0
  21. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/chunker.py +0 -0
  22. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/constants.py +0 -0
  23. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/deep_research.py +0 -0
  24. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/docs_summary.py +0 -0
  25. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/embeddings.py +0 -0
  26. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/indexer.py +0 -0
  27. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/linking.py +0 -0
  28. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/live_fallback_mixin.py +0 -0
  29. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/persistence.py +0 -0
  30. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/providers.py +0 -0
  31. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/retrieval_mixin.py +0 -0
  32. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/routes.py +0 -0
  33. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/scaffold.py +0 -0
  34. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/service.py +0 -0
  35. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/settings.py +0 -0
  36. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/source_archive.py +0 -0
  37. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/symbol_index.py +0 -0
  38. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/chatbot/types.py +0 -0
  39. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/cli.py +0 -0
  40. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/config.py +0 -0
  41. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/generator/generation.py +0 -0
  42. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/generator/validation.py +0 -0
  43. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/llm/__init__.py +0 -0
  44. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/llm/client.py +0 -0
  45. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/llm/json_utils.py +0 -0
  46. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/llm/litellm_compat.py +0 -0
  47. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/manifest.py +0 -0
  48. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/openapi.py +0 -0
  49. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/__init__.py +0 -0
  50. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/api_detector.py +0 -0
  51. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/base.py +0 -0
  52. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/go_parser.py +0 -0
  53. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/js_ts_parser.py +0 -0
  54. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/php_parser.py +0 -0
  55. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/python_parser.py +0 -0
  56. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/registry.py +0 -0
  57. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/__init__.py +0 -0
  58. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/base.py +0 -0
  59. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/common.py +0 -0
  60. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/detector.py +0 -0
  61. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/django.py +0 -0
  62. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/express.py +0 -0
  63. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/falcon.py +0 -0
  64. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/fastify.py +0 -0
  65. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/go.py +0 -0
  66. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/js_shared.py +0 -0
  67. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/laravel.py +0 -0
  68. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/nestjs.py +0 -0
  69. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/python_shared.py +0 -0
  70. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/registry.py +0 -0
  71. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/routes/repo_resolver.py +0 -0
  72. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/parser/vue_parser.py +0 -0
  73. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/persistence_v2.py +0 -0
  74. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/__init__.py +0 -0
  75. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/bucket_injection.py +0 -0
  76. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/bucket_refinement.py +0 -0
  77. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/common.py +0 -0
  78. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/endpoint_refs.py +0 -0
  79. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/engine.py +0 -0
  80. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/flow_candidates.py +0 -0
  81. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/heuristics.py +0 -0
  82. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/nav_shaping.py +0 -0
  83. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/specializations.py +0 -0
  84. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/topology.py +0 -0
  85. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/planner/utils.py +0 -0
  86. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/prompts/__init__.py +0 -0
  87. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/prompts/bucket_types.py +0 -0
  88. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/prompts/page_types.py +0 -0
  89. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/prompts/selectors.py +0 -0
  90. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/prompts/system.py +0 -0
  91. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/prompts/update.py +0 -0
  92. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/py.typed +0 -0
  93. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/scanner/__init__.py +0 -0
  94. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/scanner/artifacts.py +0 -0
  95. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/scanner/clustering.py +0 -0
  96. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/scanner/common.py +0 -0
  97. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/scanner/database.py +0 -0
  98. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/scanner/endpoints.py +0 -0
  99. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/scanner/integrations.py +0 -0
  100. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/scanner/runtime.py +0 -0
  101. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/scanner/utils.py +0 -0
  102. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/site/__init__.py +0 -0
  103. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/site/builder/__init__.py +0 -0
  104. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/site/builder/chatbot_components.py +0 -0
  105. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/site/builder/common.py +0 -0
  106. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/site/builder/engine.py +0 -0
  107. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/site/builder/mdx_utils.py +0 -0
  108. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/site/builder/scaffold_files.py +0 -0
  109. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/site/builder/templates.py +0 -0
  110. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/smart_update_v2.py +0 -0
  111. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/source_metadata.py +0 -0
  112. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/updater_v2.py +0 -0
  113. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc/v2_models.py +0 -0
  114. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc.egg-info/dependency_links.txt +0 -0
  115. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc.egg-info/entry_points.txt +0 -0
  116. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc.egg-info/requires.txt +0 -0
  117. {deepdoc-2.3.5 → deepdoc-2.3.6}/deepdoc.egg-info/top_level.txt +0 -0
  118. {deepdoc-2.3.5 → deepdoc-2.3.6}/setup.cfg +0 -0
  119. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_benchmark_scorecard.py +0 -0
  120. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_call_graph.py +0 -0
  121. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_changelog.py +0 -0
  122. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_config.py +0 -0
  123. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_embeddings.py +0 -0
  124. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_eval.py +0 -0
  125. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_index.py +0 -0
  126. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_persistence.py +0 -0
  127. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_providers.py +0 -0
  128. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_query.py +0 -0
  129. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_relationship.py +0 -0
  130. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_scaffold.py +0 -0
  131. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_chatbot_source_archive.py +0 -0
  132. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_classify.py +0 -0
  133. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_cli_generate.py +0 -0
  134. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_cli_serve.py +0 -0
  135. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_cli_update.py +0 -0
  136. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_flow_candidates.py +0 -0
  137. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_framework_fixtures.py +0 -0
  138. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_framework_support.py +0 -0
  139. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_fumadocs_builder.py +0 -0
  140. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_internal_docs_metadata.py +0 -0
  141. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_litellm_compat.py +0 -0
  142. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_llm_json_utils.py +0 -0
  143. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_parallel_pipeline.py +0 -0
  144. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_parser_ranges.py +0 -0
  145. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_planner_consolidation.py +0 -0
  146. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_planner_granularity.py +0 -0
  147. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_route_registry.py +0 -0
  148. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_runtime_scan.py +0 -0
  149. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_smart_update.py +0 -0
  150. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_stale.py +0 -0
  151. {deepdoc-2.3.5 → deepdoc-2.3.6}/tests/test_state.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoc
3
- Version: 2.3.5
3
+ Version: 2.3.6
4
4
  Summary: Auto-generate beautiful docs from any codebase
5
5
  Author: Pranav Kumar
6
6
  License: MIT
@@ -1,3 +1,4 @@
1
+ from .consistency import CrossBucketConsistencyPass
1
2
  from .evidence import AssembledEvidence, FileEvidenceCard, EvidenceAssembler
2
3
  from .generation import (
3
4
  PageGenerator,
@@ -0,0 +1,126 @@
1
+ """Post-generation cross-bucket consistency pass.
2
+
3
+ After all pages are generated independently, makes a single LLM call to
4
+ identify cross-linking gaps — pages that discuss concepts documented
5
+ elsewhere but don't link to them — and injects a "See also" callout.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import re
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from rich.console import Console
16
+
17
+ from ..llm import LLMClient
18
+ from .generation import GenerationResult
19
+
20
+ console = Console()
21
+
22
+ CONSISTENCY_SYSTEM = (
23
+ "You are a documentation reviewer. "
24
+ "Your job is to identify cross-linking gaps between independently generated "
25
+ "documentation pages. A gap exists when page A discusses concepts that are clearly "
26
+ "documented on page B but contains no link to page B. "
27
+ "Return only valid JSON — no prose, no markdown fences."
28
+ )
29
+
30
+ _H2_RE = re.compile(r"^## (.+)", re.MULTILINE)
31
+
32
+
33
+ class CrossBucketConsistencyPass:
34
+ """Single post-generation LLM pass to detect and patch cross-link gaps."""
35
+
36
+ def __init__(self, llm: LLMClient, output_dir: Path, cfg: dict[str, Any]) -> None:
37
+ self.llm = llm
38
+ self.output_dir = output_dir
39
+ self.cfg = cfg
40
+
41
+ def run(self, results: list[GenerationResult]) -> int:
42
+ """Detect cross-link gaps and inject 'See also' callouts.
43
+
44
+ Returns the number of pages patched (0 if nothing to do or LLM fails).
45
+ """
46
+ if not self.cfg.get("consistency_pass", True):
47
+ return 0
48
+
49
+ successful = [r for r in results if r.content and not r.error]
50
+ if len(successful) < 2:
51
+ return 0
52
+
53
+ slug_to_title = {r.bucket.slug: r.bucket.title for r in successful}
54
+
55
+ page_summaries = self._build_summaries(successful)
56
+ user_prompt = self._build_prompt(page_summaries)
57
+
58
+ try:
59
+ response = self.llm.complete(CONSISTENCY_SYSTEM, user_prompt)
60
+ except Exception as exc:
61
+ console.print(f"[dim yellow] consistency pass: LLM call failed ({exc})[/dim yellow]")
62
+ return 0
63
+
64
+ cross_links = self._parse_response(response)
65
+ if cross_links is None:
66
+ return 0
67
+
68
+ patched = 0
69
+ for item in cross_links:
70
+ from_slug = item.get("from_slug", "")
71
+ to_slug = item.get("to_slug", "")
72
+ reason = item.get("reason", "")
73
+ if not from_slug or not to_slug or from_slug == to_slug:
74
+ continue
75
+ if to_slug not in slug_to_title:
76
+ continue
77
+ page_path = self.output_dir / f"{from_slug}.md"
78
+ if not page_path.exists():
79
+ continue
80
+ content = page_path.read_text(encoding="utf-8")
81
+ if f"/{to_slug}" in content:
82
+ continue
83
+ to_title = slug_to_title[to_slug]
84
+ callout = f"\n:::note[See also]\n- [{to_title}](/{to_slug}) — {reason}\n:::\n"
85
+ page_path.write_text(content.rstrip() + callout, encoding="utf-8")
86
+ patched += 1
87
+
88
+ return patched
89
+
90
+ # ── helpers ──────────────────────────────────────────────────────────
91
+
92
+ def _build_summaries(self, results: list[GenerationResult]) -> str:
93
+ lines: list[str] = []
94
+ for r in results:
95
+ headings = _H2_RE.findall(r.content or "")[:6]
96
+ heading_str = ", ".join(headings) if headings else "(no sections)"
97
+ lines.append(
98
+ f"- slug={r.bucket.slug} | title={r.bucket.title} "
99
+ f"| type={r.bucket.bucket_type} | sections=[{heading_str}]"
100
+ )
101
+ return "\n".join(lines)
102
+
103
+ def _build_prompt(self, page_summaries: str) -> str:
104
+ return (
105
+ f"You have the following documentation pages ({page_summaries.count(chr(10)) + 1} total).\n"
106
+ "Identify pairs where page A discusses concepts clearly documented on page B "
107
+ "but has no link to it.\n\n"
108
+ f"Pages:\n{page_summaries}\n\n"
109
+ 'Return JSON: {"cross_links": [{"from_slug": "...", "to_slug": "...", "reason": "..."}]}\n\n'
110
+ "Rules:\n"
111
+ "- Only suggest links genuinely useful to a developer reading page A\n"
112
+ "- Do not suggest obvious/redundant links (e.g. intro → everything)\n"
113
+ "- Maximum 20 suggestions total"
114
+ )
115
+
116
+ def _parse_response(self, response: str) -> list[dict[str, str]] | None:
117
+ text = response.strip()
118
+ if text.startswith("```"):
119
+ lines = [ln for ln in text.splitlines() if not ln.strip().startswith("```")]
120
+ text = "\n".join(lines).strip()
121
+ try:
122
+ data = json.loads(text)
123
+ return data.get("cross_links", [])
124
+ except Exception:
125
+ console.print("[dim yellow] consistency pass: could not parse LLM response[/dim yellow]")
126
+ return None
@@ -97,6 +97,13 @@ class FileEvidenceCard:
97
97
  targeted_snippet: str = ""
98
98
 
99
99
 
100
+ def _unowned_ratio(symbols: list[Symbol], owned: set[str]) -> float:
101
+ """Fraction of file symbols not owned by the current bucket."""
102
+ if not symbols:
103
+ return 0.0
104
+ return sum(1 for s in symbols if s.name not in owned) / len(symbols)
105
+
106
+
100
107
  class EvidenceAssembler:
101
108
  """Gathers and formats evidence for a single bucket from the full scan output.
102
109
 
@@ -291,7 +298,17 @@ class EvidenceAssembler:
291
298
 
292
299
  # Choose tier
293
300
  if line_count <= self.large_file_lines:
294
- code = content
301
+ if (
302
+ owned_symbols_set
303
+ and parsed
304
+ and parsed.symbols
305
+ and _unowned_ratio(parsed.symbols, owned_symbols_set) > 0.5
306
+ ):
307
+ code = self._extract_owned_symbol_bodies(
308
+ parsed, content, owned_symbols_set
309
+ )
310
+ else:
311
+ code = content
295
312
  elif line_count <= self.giant_file_lines:
296
313
  code = self._extract_signatures(parsed, content)
297
314
  else:
@@ -1039,6 +1056,60 @@ class EvidenceAssembler:
1039
1056
 
1040
1057
  return header + "\n".join(sig_lines)
1041
1058
 
1059
+ def _extract_owned_symbol_bodies(
1060
+ self,
1061
+ parsed: ParsedFile,
1062
+ content: str,
1063
+ owned_symbols: set[str],
1064
+ ) -> str:
1065
+ """Tier 0.5: file header + full bodies of owned symbols only.
1066
+
1067
+ Activated for Tier 1 files when owned_symbols is set and more than half
1068
+ the file's symbols are unowned — avoids sending irrelevant functions to
1069
+ the LLM.
1070
+ """
1071
+ lines = content.splitlines()
1072
+
1073
+ # File header: everything up to the first def/class/async def (≤60 lines)
1074
+ header_end = 0
1075
+ for i, line in enumerate(lines[:60]):
1076
+ stripped = line.strip()
1077
+ if stripped.startswith(("def ", "class ", "async def ")):
1078
+ header_end = i
1079
+ break
1080
+ header = "\n".join(lines[:header_end]) if header_end else ""
1081
+
1082
+ # Precompute symbol end lines — prefer Symbol.end_line, fall back to
1083
+ # next symbol's start minus 1 (same pattern as _extract_key_sections).
1084
+ def _sym_end(idx: int) -> int:
1085
+ sym = parsed.symbols[idx]
1086
+ if sym.has_known_range():
1087
+ return sym.end_line
1088
+ if idx + 1 < len(parsed.symbols):
1089
+ return parsed.symbols[idx + 1].start_line - 1
1090
+ return len(lines)
1091
+
1092
+ body_parts: list[str] = []
1093
+ for idx, sym in enumerate(parsed.symbols):
1094
+ if sym.name not in owned_symbols:
1095
+ continue
1096
+ start = max(0, sym.start_line - 1)
1097
+ end = _sym_end(idx)
1098
+ body_parts.append("\n".join(lines[start:end]))
1099
+
1100
+ if not body_parts:
1101
+ # owned_symbols listed names that don't match any parsed symbol in
1102
+ # this file — include header + a safe leading chunk so the page
1103
+ # still has some source context.
1104
+ fallback = "\n".join(lines[:min(60, len(lines))])
1105
+ return f"{header}\n\n{fallback}" if header else fallback
1106
+
1107
+ total = len(parsed.symbols)
1108
+ owned_count = len(body_parts)
1109
+ marker = f"# [Owned symbols only — {owned_count} of {total} in file]"
1110
+ joined = "\n\n".join(body_parts)
1111
+ return f"{header}\n\n{marker}\n\n{joined}" if header else f"{marker}\n\n{joined}"
1112
+
1042
1113
  # ── Endpoint detail ──────────────────────────────────────────────────
1043
1114
 
1044
1115
  def _build_endpoints_detail(self, bucket: DocBucket) -> str:
@@ -875,32 +875,42 @@ def fix_bare_mermaid_fences(content: str) -> str:
875
875
 
876
876
 
877
877
  def fix_bare_language_markers(content: str) -> str:
878
- """Repair lines where the LLM appended ':language' instead of opening a fence.
878
+ """Repair lines where the LLM wrote a bare language name instead of opening a fence.
879
879
 
880
- The LLM sometimes writes:
881
- Some description text:typescript
882
- interface Foo { ... }
883
- ```
880
+ Two variants the LLM produces:
884
881
 
885
- instead of:
886
- Some description text
887
- ```typescript
888
- interface Foo { ... }
889
- ```
882
+ 1. Suffix variant — language appended after a colon:
883
+ Some description text:typescript
884
+ interface Foo { ... }
885
+ ```
886
+
887
+ 2. Standalone variant — language on its own line:
888
+ #### Example Usage
889
+ typescript
890
+ <Component ... />
891
+ ```
890
892
 
891
- The bare ':language' suffix leaves the code content in free MDX body,
892
- causing acorn parse errors on any {expression} inside.
893
+ Both leave the code content in free MDX body causing acorn parse errors.
893
894
  """
894
895
  _LANGS = (
895
896
  r"typescript|javascript|python|bash|json|yaml|tsx|jsx"
896
897
  r"|go|rust|java|css|html|sql|sh|text|plaintext|ruby|php|c|cpp|swift"
897
898
  )
898
- return re.sub(
899
+ # Variant 1: text ending in :language
900
+ content = re.sub(
899
901
  rf"^(.*\S):({_LANGS})\s*$",
900
902
  lambda m: f"{m.group(1)}\n```{m.group(2)}",
901
903
  content,
902
904
  flags=re.MULTILINE,
903
905
  )
906
+ # Variant 2: language word alone on its own line (must be preceded by non-code line)
907
+ content = re.sub(
908
+ rf"^({_LANGS})\n",
909
+ lambda m: f"```{m.group(1)}\n",
910
+ content,
911
+ flags=re.MULTILINE,
912
+ )
913
+ return content
904
914
 
905
915
 
906
916
  def fix_leaf_card_directives(content: str) -> str:
@@ -378,6 +378,12 @@ class PipelineV2:
378
378
  gen_results = engine.generate_all(force=force)
379
379
  phase_timings["generate"] = time.perf_counter() - phase_start
380
380
  engine.update_manifest(gen_results)
381
+
382
+ from .generator.consistency import CrossBucketConsistencyPass
383
+ injected = CrossBucketConsistencyPass(self.llm, self.output_dir, self.cfg).run(gen_results)
384
+ if injected:
385
+ console.print(f"[dim] ↳ consistency pass: {injected} cross-link(s) injected[/dim]")
386
+
381
387
  generation_summary = summarize_generation_results(gen_results)
382
388
  stats["pages_generated"] = generation_summary.succeeded
383
389
  stats["pages_failed"] = generation_summary.failed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoc
3
- Version: 2.3.5
3
+ Version: 2.3.6
4
4
  Summary: Auto-generate beautiful docs from any codebase
5
5
  Author: Pranav Kumar
6
6
  License: MIT
@@ -44,6 +44,7 @@ deepdoc/chatbot/source_archive.py
44
44
  deepdoc/chatbot/symbol_index.py
45
45
  deepdoc/chatbot/types.py
46
46
  deepdoc/generator/__init__.py
47
+ deepdoc/generator/consistency.py
47
48
  deepdoc/generator/evidence.py
48
49
  deepdoc/generator/generation.py
49
50
  deepdoc/generator/post_processors.py
@@ -128,6 +129,7 @@ tests/test_classify.py
128
129
  tests/test_cli_generate.py
129
130
  tests/test_cli_serve.py
130
131
  tests/test_cli_update.py
132
+ tests/test_consistency_pass.py
131
133
  tests/test_flow_candidates.py
132
134
  tests/test_framework_fixtures.py
133
135
  tests/test_framework_support.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "deepdoc"
7
- version = "2.3.5"
7
+ version = "2.3.6"
8
8
  description = "Auto-generate beautiful docs from any codebase"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from unittest.mock import MagicMock
6
+
7
+ from deepdoc.generator.consistency import CrossBucketConsistencyPass
8
+ from deepdoc.generator.generation import GenerationResult
9
+ from tests.conftest import make_bucket
10
+
11
+
12
+ def _make_result(slug: str, title: str, content: str) -> GenerationResult:
13
+ bucket = make_bucket(title, slug, [])
14
+ return GenerationResult(bucket=bucket, content=content)
15
+
16
+
17
+ def _make_llm(response: str) -> MagicMock:
18
+ llm = MagicMock()
19
+ llm.complete.return_value = response
20
+ return llm
21
+
22
+
23
+ def test_consistency_pass_injects_missing_link(tmp_path):
24
+ """LLM returns a cross-link gap — callout is appended to the source page."""
25
+ output_dir = tmp_path / "docs"
26
+ output_dir.mkdir()
27
+
28
+ orders_content = "# Order Fulfillment\n\n## Overview\n\nplace_order calls charge_card.\n"
29
+ payments_content = "# Payments & Billing\n\n## Overview\n\nStripe integration.\n"
30
+
31
+ (output_dir / "order-fulfillment.md").write_text(orders_content)
32
+ (output_dir / "payments-billing.md").write_text(payments_content)
33
+
34
+ results = [
35
+ _make_result("order-fulfillment", "Order Fulfillment", orders_content),
36
+ _make_result("payments-billing", "Payments & Billing", payments_content),
37
+ ]
38
+
39
+ llm_response = json.dumps({
40
+ "cross_links": [
41
+ {
42
+ "from_slug": "order-fulfillment",
43
+ "to_slug": "payments-billing",
44
+ "reason": "mentions charge_card which is documented here",
45
+ }
46
+ ]
47
+ })
48
+ llm = _make_llm(llm_response)
49
+ cfg = {}
50
+
51
+ injected = CrossBucketConsistencyPass(llm, output_dir, cfg).run(results)
52
+
53
+ assert injected == 1
54
+ patched = (output_dir / "order-fulfillment.md").read_text()
55
+ assert ":::note[See also]" in patched
56
+ assert "/payments-billing" in patched
57
+ assert "charge_card" in patched
58
+ # Payments page untouched
59
+ assert (output_dir / "payments-billing.md").read_text() == payments_content
60
+
61
+
62
+ def test_consistency_pass_skips_existing_link(tmp_path):
63
+ """LLM suggests a link that already exists in the page — no change, returns 0."""
64
+ output_dir = tmp_path / "docs"
65
+ output_dir.mkdir()
66
+
67
+ orders_content = (
68
+ "# Order Fulfillment\n\n"
69
+ "See [Payments & Billing](/payments-billing) for charge details.\n"
70
+ )
71
+ (output_dir / "order-fulfillment.md").write_text(orders_content)
72
+ (output_dir / "payments-billing.md").write_text("# Payments\n")
73
+
74
+ results = [
75
+ _make_result("order-fulfillment", "Order Fulfillment", orders_content),
76
+ _make_result("payments-billing", "Payments & Billing", "# Payments\n"),
77
+ ]
78
+
79
+ llm_response = json.dumps({
80
+ "cross_links": [
81
+ {"from_slug": "order-fulfillment", "to_slug": "payments-billing", "reason": "related"}
82
+ ]
83
+ })
84
+ llm = _make_llm(llm_response)
85
+
86
+ injected = CrossBucketConsistencyPass(llm, output_dir, {}).run(results)
87
+
88
+ assert injected == 0
89
+ # Content unchanged
90
+ assert (output_dir / "order-fulfillment.md").read_text() == orders_content
91
+
92
+
93
+ def test_consistency_pass_handles_llm_failure_gracefully(tmp_path):
94
+ """LLM returns unparseable garbage — pass returns 0 without raising."""
95
+ output_dir = tmp_path / "docs"
96
+ output_dir.mkdir()
97
+
98
+ (output_dir / "page-a.md").write_text("# Page A\n")
99
+ (output_dir / "page-b.md").write_text("# Page B\n")
100
+
101
+ results = [
102
+ _make_result("page-a", "Page A", "# Page A\n"),
103
+ _make_result("page-b", "Page B", "# Page B\n"),
104
+ ]
105
+
106
+ llm = _make_llm("not valid json at all !!!")
107
+
108
+ injected = CrossBucketConsistencyPass(llm, output_dir, {}).run(results)
109
+
110
+ assert injected == 0
111
+ # Files untouched
112
+ assert (output_dir / "page-a.md").read_text() == "# Page A\n"
@@ -2256,3 +2256,175 @@ def test_call_graph_context_prefers_exact_method_symbol_and_counts_extra_context
2256
2256
  assert evidence.total_evidence_chars >= (
2257
2257
  len(evidence.call_graph_context) + len(evidence.config_env_context)
2258
2258
  )
2259
+
2260
+
2261
+ # ── Tier 0.5: symbol-level evidence pack tests ───────────────────────────────
2262
+
2263
+
2264
+ def test_tier1_owned_symbol_bodies_extracts_only_owned(tmp_path):
2265
+ """Tier 1 file with 5 fns, bucket owns 2 → only owned bodies in source_context."""
2266
+ repo_root = tmp_path / "repo"
2267
+ repo_root.mkdir()
2268
+
2269
+ src = "\n".join([
2270
+ "import os",
2271
+ "",
2272
+ "def alpha():",
2273
+ " return 'alpha'",
2274
+ "",
2275
+ "def beta():",
2276
+ " return 'beta'",
2277
+ "",
2278
+ "def gamma():",
2279
+ " return 'gamma'",
2280
+ "",
2281
+ "def delta():",
2282
+ " return 'delta'",
2283
+ "",
2284
+ "def epsilon():",
2285
+ " return 'epsilon'",
2286
+ ])
2287
+ (repo_root / "mod.py").write_text(src)
2288
+
2289
+ symbols = [
2290
+ Symbol(name="alpha", kind="function", signature="def alpha():", start_line=3, end_line=4),
2291
+ Symbol(name="beta", kind="function", signature="def beta():", start_line=6, end_line=7),
2292
+ Symbol(name="gamma", kind="function", signature="def gamma():", start_line=9, end_line=10),
2293
+ Symbol(name="delta", kind="function", signature="def delta():", start_line=12, end_line=13),
2294
+ Symbol(name="epsilon", kind="function", signature="def epsilon():", start_line=15, end_line=16),
2295
+ ]
2296
+ parsed = ParsedFile(path=Path("mod.py"), language="python", symbols=symbols, imports=["os"])
2297
+
2298
+ scan = RepoScan(
2299
+ file_tree={"": ["mod.py"]},
2300
+ file_summaries={"mod.py": "utility module"},
2301
+ file_contents={"mod.py": src},
2302
+ parsed_files={"mod.py": parsed},
2303
+ file_line_counts={"mod.py": len(src.splitlines())},
2304
+ api_endpoints=[],
2305
+ languages={"python": 1},
2306
+ has_openapi=False,
2307
+ openapi_paths=[],
2308
+ total_files=1,
2309
+ frameworks_detected=[],
2310
+ entry_points=[],
2311
+ config_files=[],
2312
+ )
2313
+
2314
+ bucket = make_bucket("Mod", "mod", ["mod.py"])
2315
+ bucket.owned_symbols = ["beta", "delta"]
2316
+ plan = make_plan([bucket])
2317
+
2318
+ evidence = EvidenceAssembler(repo_root, scan, plan, dict(DEFAULT_CONFIG)).assemble(bucket)
2319
+
2320
+ # Owned function bodies present
2321
+ assert "return 'beta'" in evidence.source_context
2322
+ assert "return 'delta'" in evidence.source_context
2323
+ # Unowned function bodies absent
2324
+ assert "return 'alpha'" not in evidence.source_context
2325
+ assert "return 'gamma'" not in evidence.source_context
2326
+ assert "return 'epsilon'" not in evidence.source_context
2327
+ assert "Owned symbols only" in evidence.source_context
2328
+
2329
+
2330
+ def test_tier1_owned_symbol_bodies_falls_through_for_low_unowned_ratio(tmp_path):
2331
+ """Tier 1 file with 4 fns, bucket owns 3 (ratio=0.25) → full source included."""
2332
+ repo_root = tmp_path / "repo"
2333
+ repo_root.mkdir()
2334
+
2335
+ src = "\n".join([
2336
+ "def alpha():",
2337
+ " return 'alpha'",
2338
+ "",
2339
+ "def beta():",
2340
+ " return 'beta'",
2341
+ "",
2342
+ "def gamma():",
2343
+ " return 'gamma'",
2344
+ "",
2345
+ "def delta():",
2346
+ " return 'delta'",
2347
+ ])
2348
+ (repo_root / "mod.py").write_text(src)
2349
+
2350
+ symbols = [
2351
+ Symbol(name="alpha", kind="function", signature="def alpha():", start_line=1, end_line=2),
2352
+ Symbol(name="beta", kind="function", signature="def beta():", start_line=4, end_line=5),
2353
+ Symbol(name="gamma", kind="function", signature="def gamma():", start_line=7, end_line=8),
2354
+ Symbol(name="delta", kind="function", signature="def delta():", start_line=10, end_line=11),
2355
+ ]
2356
+ parsed = ParsedFile(path=Path("mod.py"), language="python", symbols=symbols, imports=[])
2357
+
2358
+ scan = RepoScan(
2359
+ file_tree={"": ["mod.py"]},
2360
+ file_summaries={"mod.py": ""},
2361
+ file_contents={"mod.py": src},
2362
+ parsed_files={"mod.py": parsed},
2363
+ file_line_counts={"mod.py": len(src.splitlines())},
2364
+ api_endpoints=[],
2365
+ languages={"python": 1},
2366
+ has_openapi=False,
2367
+ openapi_paths=[],
2368
+ total_files=1,
2369
+ frameworks_detected=[],
2370
+ entry_points=[],
2371
+ config_files=[],
2372
+ )
2373
+
2374
+ bucket = make_bucket("Mod", "mod", ["mod.py"])
2375
+ bucket.owned_symbols = ["alpha", "beta", "gamma"] # 3 of 4 owned → ratio=0.25
2376
+ plan = make_plan([bucket])
2377
+
2378
+ evidence = EvidenceAssembler(repo_root, scan, plan, dict(DEFAULT_CONFIG)).assemble(bucket)
2379
+
2380
+ # All four function names should appear — full source was included
2381
+ for name in ("alpha", "beta", "gamma", "delta"):
2382
+ assert name in evidence.source_context
2383
+ assert "Owned symbols only" not in evidence.source_context
2384
+
2385
+
2386
+ def test_tier1_owned_symbol_bodies_falls_through_when_owned_symbols_empty(tmp_path):
2387
+ """Tier 1 file with owned_symbols=[] → full source included unchanged."""
2388
+ repo_root = tmp_path / "repo"
2389
+ repo_root.mkdir()
2390
+
2391
+ src = "\n".join([
2392
+ "def alpha():",
2393
+ " return 1",
2394
+ "",
2395
+ "def beta():",
2396
+ " return 2",
2397
+ ])
2398
+ (repo_root / "mod.py").write_text(src)
2399
+
2400
+ symbols = [
2401
+ Symbol(name="alpha", kind="function", signature="def alpha():", start_line=1, end_line=2),
2402
+ Symbol(name="beta", kind="function", signature="def beta():", start_line=4, end_line=5),
2403
+ ]
2404
+ parsed = ParsedFile(path=Path("mod.py"), language="python", symbols=symbols, imports=[])
2405
+
2406
+ scan = RepoScan(
2407
+ file_tree={"": ["mod.py"]},
2408
+ file_summaries={"mod.py": ""},
2409
+ file_contents={"mod.py": src},
2410
+ parsed_files={"mod.py": parsed},
2411
+ file_line_counts={"mod.py": len(src.splitlines())},
2412
+ api_endpoints=[],
2413
+ languages={"python": 1},
2414
+ has_openapi=False,
2415
+ openapi_paths=[],
2416
+ total_files=1,
2417
+ frameworks_detected=[],
2418
+ entry_points=[],
2419
+ config_files=[],
2420
+ )
2421
+
2422
+ bucket = make_bucket("Mod", "mod", ["mod.py"])
2423
+ # owned_symbols defaults to [] — no narrowing should happen
2424
+ plan = make_plan([bucket])
2425
+
2426
+ evidence = EvidenceAssembler(repo_root, scan, plan, dict(DEFAULT_CONFIG)).assemble(bucket)
2427
+
2428
+ assert "alpha" in evidence.source_context
2429
+ assert "beta" in evidence.source_context
2430
+ assert "Owned symbols only" not in evidence.source_context
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes