deepdoc 2.2.0__tar.gz → 2.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepdoc-2.2.0 → deepdoc-2.2.1}/PKG-INFO +13 -1
- {deepdoc-2.2.0 → deepdoc-2.2.1}/README.md +12 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/__init__.py +1 -1
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/cli.py +9 -6
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/generator/evidence.py +17 -8
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/generator/generation.py +83 -29
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/generator/mdx_compile_gate.py +8 -1
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/generator/validation.py +85 -16
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/pipeline_v2.py +29 -21
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/bucket_refinement.py +164 -83
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/common.py +6 -1
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/engine.py +12 -7
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/topology.py +10 -2
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/scanner/artifacts.py +17 -5
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/scanner/clustering.py +6 -2
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/scanner/database.py +9 -2
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/scanner/endpoints.py +3 -3
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/scanner/integrations.py +3 -1
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/scanner/runtime.py +8 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/scanner/utils.py +4 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/smart_update_v2.py +1 -1
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc.egg-info/PKG-INFO +13 -1
- {deepdoc-2.2.0 → deepdoc-2.2.1}/pyproject.toml +1 -1
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_generation_evidence.py +99 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_mdx_compile_gate.py +3 -1
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_parallel_pipeline.py +160 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_planner_granularity.py +104 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/LICENSE +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/__main__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/_legacy_types.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/benchmark_v2.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/call_graph.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/changelog_writer.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/answer_mixin.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/chunker.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/deep_research.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/docs_summary.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/embeddings.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/indexer.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/linking.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/live_fallback_mixin.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/persistence.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/providers.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/retrieval_mixin.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/routes.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/scaffold.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/service.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/settings.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/source_archive.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/symbol_index.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/chatbot/types.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/config.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/generator/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/generator/mdx_validator/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/generator/mdx_validator/package.json +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/generator/mdx_validator/validate.mjs +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/generator/post_processors.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/llm/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/llm/client.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/llm/json_utils.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/llm/litellm_compat.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/manifest.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/openapi.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/api_detector.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/base.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/go_parser.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/js_ts_parser.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/php_parser.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/python_parser.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/registry.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/base.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/common.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/detector.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/django.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/express.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/falcon.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/fastify.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/go.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/js_shared.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/laravel.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/nestjs.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/python_shared.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/registry.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/routes/repo_resolver.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/parser/vue_parser.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/persistence_v2.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/bucket_injection.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/endpoint_refs.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/flow_candidates.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/heuristics.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/nav_shaping.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/specializations.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/planner/utils.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/prompts/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/prompts/bucket_types.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/prompts/page_types.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/prompts/selectors.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/prompts/system.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/prompts/update.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/prompts_v2.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/py.typed +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/scanner/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/scanner/common.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/site/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/site/builder/__init__.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/site/builder/chatbot_components.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/site/builder/common.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/site/builder/engine.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/site/builder/mdx_utils.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/site/builder/scaffold_files.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/site/builder/templates.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/source_metadata.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/updater_v2.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc/v2_models.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc.egg-info/SOURCES.txt +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc.egg-info/dependency_links.txt +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc.egg-info/entry_points.txt +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc.egg-info/requires.txt +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/deepdoc.egg-info/top_level.txt +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/setup.cfg +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_benchmark_scorecard.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_call_graph.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_changelog.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_config.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_embeddings.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_eval.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_index.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_persistence.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_providers.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_query.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_relationship.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_scaffold.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_chatbot_source_archive.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_classify.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_cli_generate.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_cli_serve.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_cli_update.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_flow_candidates.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_framework_fixtures.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_framework_support.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_fumadocs_builder.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_internal_docs_metadata.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_litellm_compat.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_llm_json_utils.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_parser_ranges.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_planner_consolidation.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_route_registry.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_runtime_scan.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_smart_update.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_stale.py +0 -0
- {deepdoc-2.2.0 → deepdoc-2.2.1}/tests/test_state.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepdoc
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.1
|
|
4
4
|
Summary: Auto-generate beautiful docs from any codebase
|
|
5
5
|
Author: Pranav Kumar
|
|
6
6
|
License: MIT
|
|
@@ -43,6 +43,18 @@ Dynamic: license-file
|
|
|
43
43
|
[](https://pypi.org/project/deepdoc/)
|
|
44
44
|
[](./LICENSE)
|
|
45
45
|
|
|
46
|
+
## Repository Layout
|
|
47
|
+
|
|
48
|
+
| Directory | What it is | Where to start |
|
|
49
|
+
|---|---|---|
|
|
50
|
+
| [`deepdoc/`](./deepdoc/) | The Python package — CLI, pipeline, planner, generator, chatbot, and site builder. This is the core product. | [`deepdoc/README.md`](./deepdoc/README.md) |
|
|
51
|
+
| [`web/`](./web/) | Marketing and changelog site built with Astro 5 + Tailwind. Deployed to the public DeepDoc website. | [`web/README.md`](./web/README.md) |
|
|
52
|
+
| [`vscode-extension/`](./vscode-extension/) | VS Code extension — explains selected code snippets in Fast or Deep mode and inserts AI-generated comments inline. | [`vscode-extension/README.md`](./vscode-extension/README.md) |
|
|
53
|
+
| [`tests/`](./tests/) | pytest test suite for the Python package. | Run `python3 -m pytest -q` from repo root. |
|
|
54
|
+
| [`scripts/`](./scripts/) | One-off release and maintenance scripts. | — |
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
46
58
|
Auto-generate deep engineering documentation from real codebases using AI.
|
|
47
59
|
|
|
48
60
|
DeepDoc scans your repo, builds a bucket-based documentation plan, generates rich MDX pages with Mermaid diagrams, and builds a local-first Fumadocs site with Orama search.
|
|
@@ -4,6 +4,18 @@
|
|
|
4
4
|
[](https://pypi.org/project/deepdoc/)
|
|
5
5
|
[](./LICENSE)
|
|
6
6
|
|
|
7
|
+
## Repository Layout
|
|
8
|
+
|
|
9
|
+
| Directory | What it is | Where to start |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| [`deepdoc/`](./deepdoc/) | The Python package — CLI, pipeline, planner, generator, chatbot, and site builder. This is the core product. | [`deepdoc/README.md`](./deepdoc/README.md) |
|
|
12
|
+
| [`web/`](./web/) | Marketing and changelog site built with Astro 5 + Tailwind. Deployed to the public DeepDoc website. | [`web/README.md`](./web/README.md) |
|
|
13
|
+
| [`vscode-extension/`](./vscode-extension/) | VS Code extension — explains selected code snippets in Fast or Deep mode and inserts AI-generated comments inline. | [`vscode-extension/README.md`](./vscode-extension/README.md) |
|
|
14
|
+
| [`tests/`](./tests/) | pytest test suite for the Python package. | Run `python3 -m pytest -q` from repo root. |
|
|
15
|
+
| [`scripts/`](./scripts/) | One-off release and maintenance scripts. | — |
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
7
19
|
Auto-generate deep engineering documentation from real codebases using AI.
|
|
8
20
|
|
|
9
21
|
DeepDoc scans your repo, builds a bucket-based documentation plan, generates rich MDX pages with Mermaid diagrams, and builds a local-first Fumadocs site with Orama search.
|
|
@@ -200,8 +200,10 @@ def init(name, description, provider, model, output_dir, with_chatbot):
|
|
|
200
200
|
"openai": ("gpt-4o", "OPENAI_API_KEY"),
|
|
201
201
|
"ollama": ("ollama/llama3.2", None),
|
|
202
202
|
"azure": ("azure/gpt-4o", "AZURE_API_KEY"),
|
|
203
|
+
"google": ("gemini/gemini-1.5-pro", "GEMINI_API_KEY"),
|
|
204
|
+
"gemini": ("gemini/gemini-1.5-pro", "GEMINI_API_KEY"),
|
|
203
205
|
}
|
|
204
|
-
default_model, default_key_env = provider_defaults.get(provider, ("", ""))
|
|
206
|
+
default_model, default_key_env = provider_defaults.get(provider, ("", "DEEPDOC_LLM_API_KEY"))
|
|
205
207
|
resolved_model = model or default_model
|
|
206
208
|
|
|
207
209
|
cfg = dict(DEFAULT_CONFIG)
|
|
@@ -261,14 +263,15 @@ def init(name, description, provider, model, output_dir, with_chatbot):
|
|
|
261
263
|
)
|
|
262
264
|
next_steps.append(" 4. Generate docs: [bold]deepdoc generate[/bold]")
|
|
263
265
|
next_steps.append(" 5. Preview locally: [bold]deepdoc serve[/bold]")
|
|
264
|
-
elif
|
|
265
|
-
next_steps.append(
|
|
266
|
-
f" 2. Set your API key: [bold]export {cfg['llm']['api_key_env']}=...[/bold]"
|
|
267
|
-
)
|
|
266
|
+
elif provider == "ollama":
|
|
267
|
+
next_steps.append(" 2. Make sure Ollama is running locally")
|
|
268
268
|
next_steps.append(" 3. Generate docs: [bold]deepdoc generate[/bold]")
|
|
269
269
|
next_steps.append(" 4. Preview locally: [bold]deepdoc serve[/bold]")
|
|
270
270
|
else:
|
|
271
|
-
|
|
271
|
+
key_env = cfg["llm"].get("api_key_env") or "DEEPDOC_LLM_API_KEY"
|
|
272
|
+
next_steps.append(
|
|
273
|
+
f" 2. Set your API key: [bold]export {key_env}=...[/bold]"
|
|
274
|
+
)
|
|
272
275
|
next_steps.append(" 3. Generate docs: [bold]deepdoc generate[/bold]")
|
|
273
276
|
next_steps.append(" 4. Preview locally: [bold]deepdoc serve[/bold]")
|
|
274
277
|
if with_chatbot:
|
|
@@ -40,6 +40,11 @@ from ..openapi import parse_openapi_spec, spec_to_context_string
|
|
|
40
40
|
|
|
41
41
|
console = Console()
|
|
42
42
|
|
|
43
|
+
_EP_TITLE_RE = re.compile(
|
|
44
|
+
r"^(GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS|CONNECT|TRACE)\s+(/\S*)",
|
|
45
|
+
re.IGNORECASE,
|
|
46
|
+
)
|
|
47
|
+
|
|
43
48
|
# ═════════════════════════════════════════════════════════════════════════════
|
|
44
49
|
# 3.1 Evidence Assembly
|
|
45
50
|
# ═════════════════════════════════════════════════════════════════════════════
|
|
@@ -67,6 +72,7 @@ class AssembledEvidence:
|
|
|
67
72
|
compressed_cards_context: str = ""
|
|
68
73
|
files_included_raw: int = 0
|
|
69
74
|
files_compressed: int = 0
|
|
75
|
+
compressed_file_paths: set[str] = field(default_factory=set)
|
|
70
76
|
coverage_files_total: int = 0
|
|
71
77
|
helper_context: str = "" # resolved helper/utility function bodies
|
|
72
78
|
flow_context: str = "" # call graph + flow evidence (entrypoints, chains, side effects)
|
|
@@ -141,7 +147,7 @@ class EvidenceAssembler:
|
|
|
141
147
|
source_ctx,
|
|
142
148
|
compressed_cards_ctx,
|
|
143
149
|
files_included_raw,
|
|
144
|
-
|
|
150
|
+
compressed_file_paths,
|
|
145
151
|
coverage_total,
|
|
146
152
|
) = self._build_source_context(bucket)
|
|
147
153
|
endpoints_detail = self._build_endpoints_detail(bucket)
|
|
@@ -204,7 +210,8 @@ class EvidenceAssembler:
|
|
|
204
210
|
flow_context=flow_ctx,
|
|
205
211
|
total_evidence_chars=total,
|
|
206
212
|
files_included_raw=files_included_raw,
|
|
207
|
-
files_compressed=
|
|
213
|
+
files_compressed=len(compressed_file_paths),
|
|
214
|
+
compressed_file_paths=compressed_file_paths,
|
|
208
215
|
coverage_files_total=coverage_total,
|
|
209
216
|
evidence_file_paths=evidence_files,
|
|
210
217
|
config_env_context=config_env_ctx,
|
|
@@ -316,11 +323,12 @@ class EvidenceAssembler:
|
|
|
316
323
|
included += 1
|
|
317
324
|
|
|
318
325
|
cards_context = self._format_compressed_cards(compressed_cards)
|
|
326
|
+
compressed_paths = {card.file_path for card in compressed_cards}
|
|
319
327
|
return (
|
|
320
328
|
"\n".join(parts),
|
|
321
329
|
cards_context,
|
|
322
330
|
included,
|
|
323
|
-
|
|
331
|
+
compressed_paths,
|
|
324
332
|
len(ranked_files),
|
|
325
333
|
)
|
|
326
334
|
|
|
@@ -780,7 +788,7 @@ class EvidenceAssembler:
|
|
|
780
788
|
"""Extract actual env var names from source files for grounded config docs."""
|
|
781
789
|
env_vars: dict[str, list[str]] = {} # var_name -> [file_paths]
|
|
782
790
|
|
|
783
|
-
for src_file in bucket.owned_files:
|
|
791
|
+
for src_file in list(bucket.owned_files) + list(bucket.artifact_refs or []):
|
|
784
792
|
src_path = self.repo_root / src_file
|
|
785
793
|
if not src_path.exists():
|
|
786
794
|
continue
|
|
@@ -1052,10 +1060,11 @@ class EvidenceAssembler:
|
|
|
1052
1060
|
|
|
1053
1061
|
# ── endpoint_ref: match specific endpoint, pull deep evidence ─────
|
|
1054
1062
|
if hints.get("is_endpoint_ref"):
|
|
1055
|
-
#
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1063
|
+
# Extract METHOD /path from title via regex — more robust than split(" ", 1)
|
|
1064
|
+
# since titles may not always follow the "GET /path" convention.
|
|
1065
|
+
_m = _EP_TITLE_RE.match(bucket.title)
|
|
1066
|
+
ref_method = _m.group(1).upper() if _m else ""
|
|
1067
|
+
ref_path = _m.group(2) if _m else ""
|
|
1059
1068
|
|
|
1060
1069
|
# Find matching bundle via handler symbol or method+path
|
|
1061
1070
|
matched_bundle = None
|
|
@@ -40,7 +40,7 @@ from ..parser import parse_file, supported_extensions
|
|
|
40
40
|
from ..parser.base import ParsedFile, Symbol
|
|
41
41
|
from ..planner import DocBucket, DocPlan, RepoScan, tracked_bucket_files
|
|
42
42
|
from ..prompts_v2 import SYSTEM_V2, get_prompt_for_bucket
|
|
43
|
-
from ..scanner import _classify_file_role
|
|
43
|
+
from ..scanner import _build_import_lookup, _classify_file_role, _normalize_import
|
|
44
44
|
from ..openapi import parse_openapi_spec, spec_to_context_string
|
|
45
45
|
|
|
46
46
|
console = Console()
|
|
@@ -354,6 +354,7 @@ class BucketGenerationEngine:
|
|
|
354
354
|
self._repo_file_paths = set(self.scan.file_summaries.keys())
|
|
355
355
|
self.coverage_report: dict[str, Any] = {}
|
|
356
356
|
self.local_dev_warnings: list[str] = []
|
|
357
|
+
self._import_lookup = _build_import_lookup(set(self.scan.file_summaries.keys()))
|
|
357
358
|
self._openapi_context = self._precompute_openapi_context()
|
|
358
359
|
self._doc_pages = self._planned_doc_pages()
|
|
359
360
|
(
|
|
@@ -363,17 +364,25 @@ class BucketGenerationEngine:
|
|
|
363
364
|
) = build_internal_doc_link_maps(self._doc_pages)
|
|
364
365
|
|
|
365
366
|
def _precompute_openapi_context(self) -> str:
|
|
366
|
-
"""Parse
|
|
367
|
+
"""Parse all available OpenAPI specs, accumulating up to 6 000 chars."""
|
|
367
368
|
if not self.scan.has_openapi:
|
|
368
369
|
return ""
|
|
370
|
+
spec_count = len(self.scan.openapi_paths)
|
|
371
|
+
per_spec_limit = max(2000, min(4000, 6000 // max(1, spec_count)))
|
|
372
|
+
parts: list[str] = []
|
|
373
|
+
total = 0
|
|
369
374
|
for spec_path in self.scan.openapi_paths:
|
|
370
375
|
spec = parse_openapi_spec(self.repo_root / spec_path)
|
|
371
376
|
if spec:
|
|
372
|
-
|
|
377
|
+
chunk = (
|
|
373
378
|
f"\n## OpenAPI Spec ({spec_path}):\n"
|
|
374
|
-
f"{spec_to_context_string(spec)[:
|
|
379
|
+
f"{spec_to_context_string(spec)[:per_spec_limit]}"
|
|
375
380
|
)
|
|
376
|
-
|
|
381
|
+
parts.append(chunk)
|
|
382
|
+
total += len(chunk)
|
|
383
|
+
if total >= 6000:
|
|
384
|
+
break
|
|
385
|
+
return "\n".join(parts)
|
|
377
386
|
|
|
378
387
|
def generate_all(self, force: bool = False) -> list[GenerationResult]:
|
|
379
388
|
"""Generate all pages. Returns results for each bucket.
|
|
@@ -469,6 +478,9 @@ class BucketGenerationEngine:
|
|
|
469
478
|
f"~{word_count} words{diagrams} · "
|
|
470
479
|
f"{result.elapsed_seconds:.1f}s)[/dim]{warnings}"
|
|
471
480
|
)
|
|
481
|
+
# Save manifest incrementally so a cancelled run
|
|
482
|
+
# can resume from completed pages on next generate.
|
|
483
|
+
self._checkpoint_manifest(_manifest, result)
|
|
472
484
|
except Exception as e:
|
|
473
485
|
failed_count += 1
|
|
474
486
|
results.append(
|
|
@@ -1364,24 +1376,52 @@ Re-run `deepdoc generate` to retry.
|
|
|
1364
1376
|
pages.append((bucket.title, url))
|
|
1365
1377
|
return pages
|
|
1366
1378
|
|
|
1367
|
-
def
|
|
1368
|
-
"""
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1379
|
+
def _bucket_url(self, b: DocBucket) -> str:
|
|
1380
|
+
"""Return the site URL for a bucket, respecting endpoint_ref /api/* routing."""
|
|
1381
|
+
hints = b.generation_hints or {}
|
|
1382
|
+
if hints.get("is_introduction_page"):
|
|
1383
|
+
return "/"
|
|
1384
|
+
if self.scan.has_openapi and (
|
|
1385
|
+
hints.get("is_endpoint_ref")
|
|
1386
|
+
or hints.get("prompt_style") == "endpoint_ref"
|
|
1387
|
+
or b.bucket_type == "endpoint_ref"
|
|
1388
|
+
):
|
|
1389
|
+
return f"/api/{b.slug}"
|
|
1390
|
+
return f"/{b.slug}"
|
|
1373
1391
|
|
|
1392
|
+
def _build_sitemap_for(self, current_slug: str) -> str:
|
|
1393
|
+
"""Build formatted sitemap ordered by nav_structure, excluding current page."""
|
|
1394
|
+
slug_to_bucket = {b.slug: b for b in self.plan.buckets if b.slug != current_slug}
|
|
1374
1395
|
lines: list[str] = []
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1396
|
+
seen: set[str] = set()
|
|
1397
|
+
|
|
1398
|
+
for section, slugs in self.plan.nav_structure.items():
|
|
1399
|
+
section_lines: list[str] = []
|
|
1400
|
+
for slug in slugs:
|
|
1401
|
+
b = slug_to_bucket.get(slug)
|
|
1402
|
+
if not b:
|
|
1403
|
+
continue
|
|
1404
|
+
seen.add(slug)
|
|
1405
|
+
page_path = self._bucket_url(b)
|
|
1379
1406
|
key_files = ", ".join(f"`{f}`" for f in b.owned_files[:4])
|
|
1380
1407
|
if len(b.owned_files) > 4:
|
|
1381
1408
|
key_files += f" +{len(b.owned_files) - 4} more"
|
|
1382
|
-
|
|
1409
|
+
section_lines.append(f"- [{b.title}]({page_path}) — {b.description}")
|
|
1383
1410
|
if key_files:
|
|
1384
|
-
|
|
1411
|
+
section_lines.append(f" *Covers: {key_files}*")
|
|
1412
|
+
if section_lines:
|
|
1413
|
+
lines.append(f"**{section}**")
|
|
1414
|
+
lines.extend(section_lines)
|
|
1415
|
+
|
|
1416
|
+
# Buckets not referenced by nav_structure — group by section
|
|
1417
|
+
orphans_by_section: dict[str, list] = defaultdict(list)
|
|
1418
|
+
for slug, b in slug_to_bucket.items():
|
|
1419
|
+
if slug not in seen:
|
|
1420
|
+
orphans_by_section[b.section or "Other"].append(b)
|
|
1421
|
+
for section, orphan_buckets in orphans_by_section.items():
|
|
1422
|
+
lines.append(f"**{section}**")
|
|
1423
|
+
for b in orphan_buckets:
|
|
1424
|
+
lines.append(f"- [{b.title}]({self._bucket_url(b)}) — {b.description}")
|
|
1385
1425
|
|
|
1386
1426
|
return "\n".join(lines) if lines else "(no other pages)"
|
|
1387
1427
|
|
|
@@ -1396,7 +1436,9 @@ Re-run `deepdoc generate` to retry.
|
|
|
1396
1436
|
if dep_slug in slug_to_bucket and dep_slug != bucket.slug:
|
|
1397
1437
|
related[dep_slug] = slug_to_bucket[dep_slug]
|
|
1398
1438
|
|
|
1399
|
-
# Import-based: find buckets whose files are imported by this bucket's files
|
|
1439
|
+
# Import-based: find buckets whose files are imported by this bucket's files.
|
|
1440
|
+
# Uses the pre-built import lookup (O(imports) per file) instead of scanning
|
|
1441
|
+
# all repo files for each import string.
|
|
1400
1442
|
file_to_buckets: dict[str, list[DocBucket]] = defaultdict(list)
|
|
1401
1443
|
for b in self.plan.buckets:
|
|
1402
1444
|
for f in b.owned_files:
|
|
@@ -1407,18 +1449,17 @@ Re-run `deepdoc generate` to retry.
|
|
|
1407
1449
|
if not parsed or not parsed.imports:
|
|
1408
1450
|
continue
|
|
1409
1451
|
for imp in parsed.imports:
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
for linked_bucket in file_to_buckets.get(
|
|
1452
|
+
for hint in _normalize_import(imp):
|
|
1453
|
+
key = hint.replace(".", "/").lower().strip("/")
|
|
1454
|
+
if not key:
|
|
1455
|
+
continue
|
|
1456
|
+
matched_files = self._import_lookup.get(key, set())
|
|
1457
|
+
if len(matched_files) > 5:
|
|
1458
|
+
continue # ambiguous — too many matches to be useful
|
|
1459
|
+
for matched_file in matched_files:
|
|
1460
|
+
for linked_bucket in file_to_buckets.get(matched_file, []):
|
|
1419
1461
|
if linked_bucket.slug != bucket.slug:
|
|
1420
1462
|
related[linked_bucket.slug] = linked_bucket
|
|
1421
|
-
break
|
|
1422
1463
|
|
|
1423
1464
|
# Strong overlap-based links for database/runtime/interface pages
|
|
1424
1465
|
for candidate in self.plan.buckets:
|
|
@@ -1443,7 +1484,7 @@ Re-run `deepdoc generate` to retry.
|
|
|
1443
1484
|
"**Dependency Links** (pages this module imports from — MUST link to these):"
|
|
1444
1485
|
]
|
|
1445
1486
|
for b in related.values():
|
|
1446
|
-
lines.append(f"- [{b.title}](
|
|
1487
|
+
lines.append(f"- [{b.title}]({self._bucket_url(b)}) — {b.description}")
|
|
1447
1488
|
|
|
1448
1489
|
return "\n".join(lines)
|
|
1449
1490
|
|
|
@@ -1466,6 +1507,19 @@ Re-run `deepdoc generate` to retry.
|
|
|
1466
1507
|
return True
|
|
1467
1508
|
return False
|
|
1468
1509
|
|
|
1510
|
+
def _checkpoint_manifest(self, manifest: Any, result: "GenerationResult") -> None:
|
|
1511
|
+
"""Write the manifest for one completed page so a cancelled run can resume."""
|
|
1512
|
+
from ..manifest import file_hash as compute_hash
|
|
1513
|
+
try:
|
|
1514
|
+
for src_file in tracked_bucket_files(result.bucket):
|
|
1515
|
+
src_path = self.repo_root / src_file
|
|
1516
|
+
if src_path.exists():
|
|
1517
|
+
content = src_path.read_text(encoding="utf-8", errors="replace")
|
|
1518
|
+
manifest.update(src_file, compute_hash(content), result.bucket.slug)
|
|
1519
|
+
manifest.save()
|
|
1520
|
+
except Exception:
|
|
1521
|
+
pass
|
|
1522
|
+
|
|
1469
1523
|
def update_manifest(self, results: list[GenerationResult]):
|
|
1470
1524
|
"""Update the manifest with new file hashes for all successfully generated pages."""
|
|
1471
1525
|
from ..manifest import Manifest, file_hash as compute_hash
|
|
@@ -24,6 +24,7 @@ from typing import Callable
|
|
|
24
24
|
from ..llm import LLMClient
|
|
25
25
|
from ..planner import DocBucket
|
|
26
26
|
from ..prompts_v2 import SYSTEM_V2
|
|
27
|
+
from .post_processors import escape_mdx_route_params, escape_mdx_text_hazards
|
|
27
28
|
from .mdx_validator import (
|
|
28
29
|
MdxCompileError,
|
|
29
30
|
ValidationOutcome,
|
|
@@ -115,13 +116,19 @@ def apply_mdx_compile_gate(
|
|
|
115
116
|
)
|
|
116
117
|
continue
|
|
117
118
|
|
|
119
|
+
# Re-run hazard escaping so LLM fix attempts cannot reintroduce bare
|
|
120
|
+
# {expr} or route params that weren't present before the fix call.
|
|
121
|
+
fixed = escape_mdx_text_hazards(fixed)
|
|
122
|
+
fixed = escape_mdx_route_params(fixed)
|
|
118
123
|
current = fixed
|
|
119
124
|
next_outcome = validate(current)
|
|
120
125
|
if next_outcome.ok:
|
|
121
126
|
return GateOutcome(content=current, retries=retries)
|
|
122
127
|
last_error = next_outcome.error
|
|
123
128
|
|
|
124
|
-
|
|
129
|
+
# Escape hazards one more time before JSX stripping — the retry loop may
|
|
130
|
+
# have left bare {expr} in content that the strip pass won't handle.
|
|
131
|
+
fallback = _strip_jsx_to_markdown(escape_mdx_text_hazards(escape_mdx_route_params(current)))
|
|
125
132
|
fallback_outcome = validate(fallback)
|
|
126
133
|
return GateOutcome(
|
|
127
134
|
content=fallback,
|
|
@@ -102,7 +102,7 @@ class PageValidator:
|
|
|
102
102
|
self._check_sections(content, bucket, result)
|
|
103
103
|
|
|
104
104
|
# 2. Check that owned files are referenced
|
|
105
|
-
self._check_file_refs(content, bucket, result)
|
|
105
|
+
self._check_file_refs(content, bucket, result, evidence)
|
|
106
106
|
|
|
107
107
|
# 3. Check for hallucinated file paths
|
|
108
108
|
self._check_hallucinated_paths(content, bucket, result)
|
|
@@ -114,7 +114,7 @@ class PageValidator:
|
|
|
114
114
|
self._check_hallucinated_symbols(content, bucket, evidence, result)
|
|
115
115
|
|
|
116
116
|
# 6. Check route/path claims for API and operations-heavy pages
|
|
117
|
-
self._check_route_claims(content, bucket, result)
|
|
117
|
+
self._check_route_claims(content, bucket, result, evidence)
|
|
118
118
|
|
|
119
119
|
# 7. Count mermaid diagrams
|
|
120
120
|
result.mermaid_block_count = len(re.findall(r"```mermaid", content))
|
|
@@ -259,21 +259,41 @@ class PageValidator:
|
|
|
259
259
|
]
|
|
260
260
|
|
|
261
261
|
def _check_file_refs(
|
|
262
|
-
self,
|
|
262
|
+
self,
|
|
263
|
+
content: str,
|
|
264
|
+
bucket: DocBucket,
|
|
265
|
+
result: ValidationResult,
|
|
266
|
+
evidence: AssembledEvidence | None = None,
|
|
263
267
|
):
|
|
264
|
-
"""Check that at least some of the bucket's owned files are referenced.
|
|
268
|
+
"""Check that at least some of the bucket's owned files are referenced.
|
|
269
|
+
|
|
270
|
+
Only files the LLM actually received full source for are checked — files
|
|
271
|
+
that were compressed to evidence cards are excluded from the coverage
|
|
272
|
+
threshold because the LLM cannot be expected to cite paths it never saw
|
|
273
|
+
in full.
|
|
274
|
+
"""
|
|
265
275
|
if not bucket.owned_files:
|
|
266
276
|
return
|
|
267
277
|
|
|
278
|
+
# Scope the check to files the LLM actually received full source for.
|
|
279
|
+
compressed_paths: set[str] = (
|
|
280
|
+
evidence.compressed_file_paths
|
|
281
|
+
if evidence is not None and evidence.compressed_file_paths
|
|
282
|
+
else set()
|
|
283
|
+
)
|
|
284
|
+
checkable_files = [f for f in bucket.owned_files if f not in compressed_paths]
|
|
285
|
+
if not checkable_files:
|
|
286
|
+
# All files were compressed — the LLM had no full source to cite from.
|
|
287
|
+
return
|
|
288
|
+
|
|
268
289
|
content_lower = content.lower()
|
|
269
290
|
referenced = 0
|
|
270
|
-
for f in
|
|
271
|
-
# Check if file path appears in the content (case-insensitive)
|
|
291
|
+
for f in checkable_files:
|
|
272
292
|
if f.lower() in content_lower:
|
|
273
293
|
referenced += 1
|
|
274
294
|
|
|
275
|
-
coverage = referenced / len(
|
|
276
|
-
unreferenced = [f for f in
|
|
295
|
+
coverage = referenced / len(checkable_files)
|
|
296
|
+
unreferenced = [f for f in checkable_files if f.lower() not in content_lower]
|
|
277
297
|
|
|
278
298
|
hints = bucket.generation_hints or {}
|
|
279
299
|
is_intro = hints.get("is_introduction_page") or bucket.section == "Start Here"
|
|
@@ -289,12 +309,17 @@ class PageValidator:
|
|
|
289
309
|
|
|
290
310
|
if coverage < threshold and len(unreferenced) > 2:
|
|
291
311
|
result.missing_file_refs = unreferenced[:5]
|
|
312
|
+
compressed_note = (
|
|
313
|
+
f"; {len(compressed_paths)} compressed files excluded"
|
|
314
|
+
if compressed_paths
|
|
315
|
+
else ""
|
|
316
|
+
)
|
|
292
317
|
result.warnings.append(
|
|
293
|
-
f"Low file coverage: {referenced}/{len(
|
|
294
|
-
f"({coverage:.0%}; expected at least {threshold:.0%})"
|
|
318
|
+
f"Low file coverage: {referenced}/{len(checkable_files)} full-source files referenced "
|
|
319
|
+
f"({coverage:.0%}; expected at least {threshold:.0%}{compressed_note})"
|
|
295
320
|
)
|
|
296
321
|
if is_intro:
|
|
297
|
-
if coverage < 0.15 and len(
|
|
322
|
+
if coverage < 0.15 and len(checkable_files) >= 10:
|
|
298
323
|
result.is_valid = False
|
|
299
324
|
else:
|
|
300
325
|
result.is_valid = False
|
|
@@ -438,6 +463,14 @@ class PageValidator:
|
|
|
438
463
|
key = impact.get("key", "") if isinstance(impact, dict) else getattr(impact, "key", "")
|
|
439
464
|
if key:
|
|
440
465
|
symbols.add(str(key))
|
|
466
|
+
# For integration pages, also treat any symbol-like token that appears in
|
|
467
|
+
# the integration context evidence as known-good. These are external SDK
|
|
468
|
+
# symbols (e.g. S3Client, GupshupMessage) that are real but not in the
|
|
469
|
+
# repo's parsed files — they should not be flagged as hallucinations.
|
|
470
|
+
if evidence is not None and evidence.integration_context:
|
|
471
|
+
for token in re.findall(r"\b([A-Za-z_][A-Za-z0-9_]{2,})\b", evidence.integration_context):
|
|
472
|
+
if self._looks_like_symbol_reference(token):
|
|
473
|
+
symbols.add(token)
|
|
441
474
|
return symbols
|
|
442
475
|
|
|
443
476
|
@staticmethod
|
|
@@ -494,7 +527,11 @@ class PageValidator:
|
|
|
494
527
|
return lower in bucket_text
|
|
495
528
|
|
|
496
529
|
def _check_route_claims(
|
|
497
|
-
self,
|
|
530
|
+
self,
|
|
531
|
+
content: str,
|
|
532
|
+
bucket: DocBucket,
|
|
533
|
+
result: ValidationResult,
|
|
534
|
+
evidence: "AssembledEvidence | None" = None,
|
|
498
535
|
) -> None:
|
|
499
536
|
if not self.known_route_paths:
|
|
500
537
|
return
|
|
@@ -512,6 +549,19 @@ class PageValidator:
|
|
|
512
549
|
):
|
|
513
550
|
return
|
|
514
551
|
|
|
552
|
+
# Build the set of valid routes for this page: internal routes + any routes
|
|
553
|
+
# that appear verbatim in the integration context evidence. The second set
|
|
554
|
+
# covers external service API paths (e.g. WhatsApp /messages, AWS /putObject)
|
|
555
|
+
# that the LLM received in evidence and is correct to reference.
|
|
556
|
+
valid_routes = set(self.known_route_paths)
|
|
557
|
+
if evidence is not None and evidence.integration_context:
|
|
558
|
+
for token in re.findall(
|
|
559
|
+
r"(\/[A-Za-z0-9{}_<>\-.:/~]+)", evidence.integration_context
|
|
560
|
+
):
|
|
561
|
+
normalized = self._normalize_route_path(token)
|
|
562
|
+
if normalized and not self._is_markup_path_noise(normalized):
|
|
563
|
+
valid_routes.add(normalized)
|
|
564
|
+
|
|
515
565
|
candidate_tokens: list[str] = []
|
|
516
566
|
for inline in re.findall(r"`([^`]+)`", content):
|
|
517
567
|
candidate_tokens.extend(re.findall(r"(\/[A-Za-z0-9{}_<>\-.:/~]+)", inline))
|
|
@@ -534,7 +584,7 @@ class PageValidator:
|
|
|
534
584
|
candidates.add(route)
|
|
535
585
|
|
|
536
586
|
unmatched = sorted(
|
|
537
|
-
route for route in candidates if route not in
|
|
587
|
+
route for route in candidates if route not in valid_routes
|
|
538
588
|
)
|
|
539
589
|
if unmatched:
|
|
540
590
|
result.unmatched_routes = unmatched[:10]
|
|
@@ -557,9 +607,17 @@ class PageValidator:
|
|
|
557
607
|
return
|
|
558
608
|
|
|
559
609
|
content_lower = content.lower()
|
|
560
|
-
|
|
610
|
+
_flow_terms = (
|
|
611
|
+
"call flow", "execution flow", "request flow",
|
|
612
|
+
"flow diagram", "sequence diagram",
|
|
613
|
+
)
|
|
614
|
+
_effect_terms = (
|
|
615
|
+
"side effect", "downstream effect",
|
|
616
|
+
"triggers", "emits", "dispatches", "publishes",
|
|
617
|
+
)
|
|
618
|
+
if not any(t in content_lower for t in _flow_terms):
|
|
561
619
|
result.missing_flow_edges.append("call_flow")
|
|
562
|
-
if
|
|
620
|
+
if not any(t in content_lower for t in _effect_terms):
|
|
563
621
|
result.missing_flow_entrypoints.append("side_effects")
|
|
564
622
|
|
|
565
623
|
if result.missing_flow_edges or result.missing_flow_entrypoints:
|
|
@@ -903,7 +961,16 @@ class PageValidator:
|
|
|
903
961
|
return
|
|
904
962
|
|
|
905
963
|
content_lower = content.lower()
|
|
906
|
-
|
|
964
|
+
# Use token-based partial matching: an integration name is "covered" if any
|
|
965
|
+
# of its meaningful tokens appear in the content. This handles paraphrasing
|
|
966
|
+
# like "Amazon Web Services" vs "AWS" or "Gupshup WhatsApp" vs "WhatsApp".
|
|
967
|
+
def _is_covered(name: str) -> bool:
|
|
968
|
+
if name.lower() in content_lower:
|
|
969
|
+
return True
|
|
970
|
+
tokens = self._integration_name_tokens(name)
|
|
971
|
+
return bool(tokens) and any(token in content_lower for token in tokens)
|
|
972
|
+
|
|
973
|
+
missing = [name for name in expected if not _is_covered(name)]
|
|
907
974
|
if not missing:
|
|
908
975
|
return
|
|
909
976
|
|
|
@@ -911,6 +978,8 @@ class PageValidator:
|
|
|
911
978
|
result.warnings.append(
|
|
912
979
|
f"Integration context missing named references: {', '.join(result.missing_integrations[:4])}"
|
|
913
980
|
)
|
|
981
|
+
# Only mark invalid when ALL expected integrations are fully absent and we
|
|
982
|
+
# have concrete evidence the LLM had them in context.
|
|
914
983
|
if bucket.bucket_type == "integration" and len(missing) == len(expected):
|
|
915
984
|
result.is_valid = False
|
|
916
985
|
elif (
|
|
@@ -283,7 +283,7 @@ class PipelineV2:
|
|
|
283
283
|
)
|
|
284
284
|
)
|
|
285
285
|
phase_start = time.perf_counter()
|
|
286
|
-
plan = bucket_plan_docs(scan, self.cfg, self.llm)
|
|
286
|
+
plan = bucket_plan_docs(scan, self.cfg, self.llm, repo_root=self.repo_root)
|
|
287
287
|
phase_timings["plan"] = time.perf_counter() - phase_start
|
|
288
288
|
stats["pages_planned"] = len(plan.pages)
|
|
289
289
|
|
|
@@ -347,6 +347,34 @@ class PipelineV2:
|
|
|
347
347
|
stats["playground"] = 0
|
|
348
348
|
phase_timings["openapi"] = 0.0
|
|
349
349
|
|
|
350
|
+
# ── Persist state ──────────────────────────────────────────────
|
|
351
|
+
phase_start = time.perf_counter()
|
|
352
|
+
save_all(plan, scan, gen_results, self.repo_root, self.output_dir)
|
|
353
|
+
stats["llm_usage"] = dict(getattr(self.llm, "usage", {}) or {})
|
|
354
|
+
self._save_quality_report(stats)
|
|
355
|
+
phase_timings["persist"] = time.perf_counter() - phase_start
|
|
356
|
+
|
|
357
|
+
# ── Record changelog after save_all so entries reference persisted pages ──
|
|
358
|
+
try:
|
|
359
|
+
import git as _git
|
|
360
|
+
|
|
361
|
+
_repo_cl = _git.Repo(self.repo_root)
|
|
362
|
+
_head_cl = _repo_cl.head.commit
|
|
363
|
+
_changelog_exists = bool(load_changelog(self.repo_root))
|
|
364
|
+
_record_changelog(
|
|
365
|
+
self.repo_root,
|
|
366
|
+
self.output_dir,
|
|
367
|
+
commit=_head_cl.hexsha,
|
|
368
|
+
commit_message=_head_cl.message.strip().splitlines()[0],
|
|
369
|
+
commit_date=_head_cl.committed_datetime.strftime("%Y-%m-%d"),
|
|
370
|
+
strategy="full_generate",
|
|
371
|
+
pages_updated=[b.slug for b in plan.buckets],
|
|
372
|
+
files_changed=[],
|
|
373
|
+
is_initial=not _changelog_exists,
|
|
374
|
+
)
|
|
375
|
+
except Exception:
|
|
376
|
+
pass # Not a git repo or detached HEAD — skip silently
|
|
377
|
+
|
|
350
378
|
# ── Phase 5: Build site ────────────────────────────────────────
|
|
351
379
|
console.print(
|
|
352
380
|
Panel("[bold]Phase 5/5: Building site[/bold]", border_style="blue")
|
|
@@ -361,13 +389,6 @@ class PipelineV2:
|
|
|
361
389
|
phase_timings["build_site"] = time.perf_counter() - phase_start
|
|
362
390
|
stats["site"] = 1
|
|
363
391
|
|
|
364
|
-
# ── Persist state ──────────────────────────────────────────────
|
|
365
|
-
phase_start = time.perf_counter()
|
|
366
|
-
save_all(plan, scan, gen_results, self.repo_root, self.output_dir)
|
|
367
|
-
stats["llm_usage"] = dict(getattr(self.llm, "usage", {}) or {})
|
|
368
|
-
self._save_quality_report(stats)
|
|
369
|
-
phase_timings["persist"] = time.perf_counter() - phase_start
|
|
370
|
-
|
|
371
392
|
if chatbot_enabled(self.cfg):
|
|
372
393
|
try:
|
|
373
394
|
from .chatbot.indexer import ChatbotIndexer
|
|
@@ -463,19 +484,6 @@ class PipelineV2:
|
|
|
463
484
|
"replanned": True,
|
|
464
485
|
},
|
|
465
486
|
)
|
|
466
|
-
changelog_exists = bool(load_changelog(self.repo_root))
|
|
467
|
-
_commit_obj = _repo.head.commit
|
|
468
|
-
_record_changelog(
|
|
469
|
-
self.repo_root,
|
|
470
|
-
self.output_dir,
|
|
471
|
-
commit=head_sha,
|
|
472
|
-
commit_message=_commit_obj.message.strip().splitlines()[0],
|
|
473
|
-
commit_date=_commit_obj.committed_datetime.strftime("%Y-%m-%d"),
|
|
474
|
-
strategy="full_generate",
|
|
475
|
-
pages_updated=[b.slug for b in plan.buckets],
|
|
476
|
-
files_changed=[],
|
|
477
|
-
is_initial=not changelog_exists,
|
|
478
|
-
)
|
|
479
487
|
except Exception:
|
|
480
488
|
pass # Not a git repo or detached HEAD — skip silently
|
|
481
489
|
|