deepdoc 2.2.1__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepdoc-2.2.1 → deepdoc-2.3.0}/PKG-INFO +1 -1
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/changelog_writer.py +11 -11
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/answer_mixin.py +6 -12
- deepdoc-2.3.0/deepdoc/chatbot/constants.py +27 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/docs_summary.py +2 -2
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/indexer.py +3 -3
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/linking.py +2 -2
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/live_fallback_mixin.py +1 -2
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/retrieval_mixin.py +1 -38
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/service.py +8 -49
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/settings.py +2 -11
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/cli.py +2 -2
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/generator/__init__.py +0 -12
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/generator/evidence.py +1 -1
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/generator/generation.py +64 -90
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/generator/post_processors.py +149 -373
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/generator/validation.py +40 -38
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/persistence_v2.py +32 -6
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/pipeline_v2.py +153 -50
- deepdoc-2.3.0/deepdoc/planner/__init__.py +3 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/bucket_injection.py +147 -298
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/bucket_refinement.py +10 -5
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/common.py +15 -231
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/engine.py +31 -2
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/heuristics.py +13 -150
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/nav_shaping.py +80 -70
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/specializations.py +154 -16
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/topology.py +17 -8
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/utils.py +0 -13
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/prompts/system.py +61 -54
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/site/builder/__init__.py +2 -2
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/site/builder/engine.py +108 -45
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/site/builder/mdx_utils.py +4 -4
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/site/builder/scaffold_files.py +127 -1
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/site/builder/templates.py +1 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/smart_update_v2.py +3 -3
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/updater_v2.py +4 -4
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/v2_models.py +0 -3
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc.egg-info/PKG-INFO +1 -1
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc.egg-info/SOURCES.txt +1 -7
- {deepdoc-2.2.1 → deepdoc-2.3.0}/pyproject.toml +1 -1
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_changelog.py +6 -6
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_index.py +11 -11
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_cli_serve.py +2 -2
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_fumadocs_builder.py +26 -405
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_generation_evidence.py +9 -8
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_internal_docs_metadata.py +10 -11
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_llm_json_utils.py +1 -1
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_parallel_pipeline.py +2 -7
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_planner_consolidation.py +2 -7
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_planner_granularity.py +26 -75
- deepdoc-2.2.1/deepdoc/_legacy_types.py +0 -48
- deepdoc-2.2.1/deepdoc/generator/mdx_compile_gate.py +0 -390
- deepdoc-2.2.1/deepdoc/generator/mdx_validator/__init__.py +0 -182
- deepdoc-2.2.1/deepdoc/generator/mdx_validator/package.json +0 -12
- deepdoc-2.2.1/deepdoc/generator/mdx_validator/validate.mjs +0 -53
- deepdoc-2.2.1/deepdoc/planner/__init__.py +0 -8
- deepdoc-2.2.1/deepdoc/prompts_v2.py +0 -43
- deepdoc-2.2.1/tests/test_mdx_compile_gate.py +0 -287
- {deepdoc-2.2.1 → deepdoc-2.3.0}/LICENSE +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/README.md +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/__init__.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/__main__.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/benchmark_v2.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/call_graph.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/__init__.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/chunker.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/deep_research.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/embeddings.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/persistence.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/providers.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/routes.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/scaffold.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/source_archive.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/symbol_index.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/chatbot/types.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/config.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/llm/__init__.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/llm/client.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/llm/json_utils.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/llm/litellm_compat.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/manifest.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/openapi.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/__init__.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/api_detector.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/base.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/go_parser.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/js_ts_parser.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/php_parser.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/python_parser.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/registry.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/__init__.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/base.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/common.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/detector.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/django.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/express.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/falcon.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/fastify.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/go.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/js_shared.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/laravel.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/nestjs.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/python_shared.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/registry.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/routes/repo_resolver.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/parser/vue_parser.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/endpoint_refs.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/planner/flow_candidates.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/prompts/__init__.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/prompts/bucket_types.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/prompts/page_types.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/prompts/selectors.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/prompts/update.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/py.typed +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/scanner/__init__.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/scanner/artifacts.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/scanner/clustering.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/scanner/common.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/scanner/database.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/scanner/endpoints.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/scanner/integrations.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/scanner/runtime.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/scanner/utils.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/site/__init__.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/site/builder/chatbot_components.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/site/builder/common.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc/source_metadata.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc.egg-info/dependency_links.txt +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc.egg-info/entry_points.txt +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc.egg-info/requires.txt +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/deepdoc.egg-info/top_level.txt +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/setup.cfg +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_benchmark_scorecard.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_call_graph.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_config.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_embeddings.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_eval.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_persistence.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_providers.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_query.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_relationship.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_scaffold.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_chatbot_source_archive.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_classify.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_cli_generate.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_cli_update.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_flow_candidates.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_framework_fixtures.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_framework_support.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_litellm_compat.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_parser_ranges.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_route_registry.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_runtime_scan.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_smart_update.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_stale.py +0 -0
- {deepdoc-2.2.1 → deepdoc-2.3.0}/tests/test_state.py +0 -0
|
@@ -31,7 +31,7 @@ def record_and_write(
|
|
|
31
31
|
files_changed: list[str],
|
|
32
32
|
is_initial: bool = False,
|
|
33
33
|
) -> None:
|
|
34
|
-
"""Append one changelog entry and regenerate whats-changed.
|
|
34
|
+
"""Append one changelog entry and regenerate whats-changed.md."""
|
|
35
35
|
entry = {
|
|
36
36
|
"commit": commit[:8],
|
|
37
37
|
"run_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
|
@@ -47,15 +47,15 @@ def record_and_write(
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
def write_whats_changed_page(repo_root: Path, output_dir: Path) -> None:
|
|
50
|
-
"""Write docs/whats-changed.
|
|
50
|
+
"""Write docs/whats-changed.md from .deepdoc/changelog.json."""
|
|
51
51
|
entries = load_changelog(repo_root)
|
|
52
|
-
|
|
52
|
+
content = _build_md(entries)
|
|
53
53
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
54
|
-
atomic_write_text(output_dir / "whats-changed.
|
|
54
|
+
atomic_write_text(output_dir / "whats-changed.md", content)
|
|
55
55
|
_ensure_in_nav(repo_root)
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
def
|
|
58
|
+
def _build_md(entries: list[dict]) -> str:
|
|
59
59
|
lines = [
|
|
60
60
|
"---",
|
|
61
61
|
'title: "What\'s Changed"',
|
|
@@ -72,11 +72,11 @@ def _build_mdx(entries: list[dict]) -> str:
|
|
|
72
72
|
|
|
73
73
|
if not entries:
|
|
74
74
|
lines.append(
|
|
75
|
-
"
|
|
75
|
+
":::note\nNo changelog entries yet. Run `deepdoc generate` to create the first entry.\n:::"
|
|
76
76
|
)
|
|
77
77
|
return "\n".join(lines)
|
|
78
78
|
|
|
79
|
-
lines.append("
|
|
79
|
+
lines.append(":::accordions")
|
|
80
80
|
for entry in entries:
|
|
81
81
|
date = entry.get("date", "")
|
|
82
82
|
msg = entry.get("commit_message", "update")
|
|
@@ -88,7 +88,7 @@ def _build_mdx(entries: list[dict]) -> str:
|
|
|
88
88
|
strategy_label = _STRATEGY_LABEL.get(strategy, strategy)
|
|
89
89
|
|
|
90
90
|
title = f"{date} — {msg[:72]} ({sha})"
|
|
91
|
-
lines.append(f'
|
|
91
|
+
lines.append(f'::accordion{{title="{title}"}}')
|
|
92
92
|
lines.append("")
|
|
93
93
|
|
|
94
94
|
# Commit metadata row
|
|
@@ -119,7 +119,7 @@ def _build_mdx(entries: list[dict]) -> str:
|
|
|
119
119
|
lines.append(f"- [{_slug_to_title(s)}](/{s})")
|
|
120
120
|
else:
|
|
121
121
|
lines.append(
|
|
122
|
-
"
|
|
122
|
+
":::info\nNo pages were regenerated — only metadata or chatbot corpora were refreshed.\n:::"
|
|
123
123
|
)
|
|
124
124
|
|
|
125
125
|
# Source files that changed
|
|
@@ -148,9 +148,9 @@ def _build_mdx(entries: list[dict]) -> str:
|
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
lines.append("")
|
|
151
|
-
lines.append("
|
|
151
|
+
lines.append("::")
|
|
152
152
|
|
|
153
|
-
lines.append("
|
|
153
|
+
lines.append(":::")
|
|
154
154
|
return "\n".join(lines)
|
|
155
155
|
|
|
156
156
|
|
|
@@ -7,6 +7,11 @@ import re
|
|
|
7
7
|
from typing import Any, Callable
|
|
8
8
|
|
|
9
9
|
from ..source_metadata import classify_source_kind
|
|
10
|
+
from .constants import (
|
|
11
|
+
CODE_WORKSPACE_CONFIG_NAMES,
|
|
12
|
+
CODE_WORKSPACE_CONFIG_SUFFIXES,
|
|
13
|
+
CODE_WORKSPACE_SUFFIXES,
|
|
14
|
+
)
|
|
10
15
|
from .types import (
|
|
11
16
|
EvidenceItem,
|
|
12
17
|
ReferenceItem,
|
|
@@ -14,17 +19,6 @@ from .types import (
|
|
|
14
19
|
RetrievedChunk,
|
|
15
20
|
)
|
|
16
21
|
|
|
17
|
-
CODE_WORKSPACE_SUFFIXES = {
|
|
18
|
-
".py", ".js", ".jsx", ".ts", ".tsx", ".go", ".php", ".java", ".rb",
|
|
19
|
-
".rs", ".vue", ".svelte", ".html", ".css", ".scss", ".sass",
|
|
20
|
-
}
|
|
21
|
-
CODE_WORKSPACE_CONFIG_NAMES = {
|
|
22
|
-
".env", ".env.example", "docker-compose.yml", "docker-compose.yaml",
|
|
23
|
-
"package.json", "pyproject.toml", "requirements.txt", "composer.json",
|
|
24
|
-
"go.mod", "cargo.toml", "gemfile",
|
|
25
|
-
}
|
|
26
|
-
CODE_WORKSPACE_CONFIG_SUFFIXES = {".json", ".toml", ".yaml", ".yml", ".ini", ".cfg"}
|
|
27
|
-
|
|
28
22
|
|
|
29
23
|
class AnswerMixin:
|
|
30
24
|
"""Mixin providing answer generation and evidence contract methods."""
|
|
@@ -1039,7 +1033,7 @@ class AnswerMixin:
|
|
|
1039
1033
|
{
|
|
1040
1034
|
"title": page.title,
|
|
1041
1035
|
"url": url,
|
|
1042
|
-
"doc_path": f"{page.slug}.
|
|
1036
|
+
"doc_path": f"{page.slug}.md",
|
|
1043
1037
|
},
|
|
1044
1038
|
)
|
|
1045
1039
|
return list(links.values())[:5]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Shared constants for the chatbot module."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
STOPWORD_TOKENS: frozenset[str] = frozenset({
|
|
6
|
+
"a", "an", "and", "any", "are", "can", "does", "first", "for", "from",
|
|
7
|
+
"handle", "handled", "how", "in", "is", "it", "its", "of", "or", "repo",
|
|
8
|
+
"repository", "show", "that", "the", "this", "to", "use", "what", "went",
|
|
9
|
+
"where", "who", "which", "with", "work",
|
|
10
|
+
})
|
|
11
|
+
|
|
12
|
+
DOC_SUFFIXES: frozenset[str] = frozenset({".md", ".mdx", ".txt", ".rst", ".adoc", ".ipynb"})
|
|
13
|
+
|
|
14
|
+
CODE_WORKSPACE_SUFFIXES: frozenset[str] = frozenset({
|
|
15
|
+
".py", ".js", ".jsx", ".ts", ".tsx", ".go", ".php", ".java", ".rb",
|
|
16
|
+
".rs", ".vue", ".svelte", ".html", ".css", ".scss", ".sass",
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
CODE_WORKSPACE_CONFIG_NAMES: frozenset[str] = frozenset({
|
|
20
|
+
".env", ".env.example", "docker-compose.yml", "docker-compose.yaml",
|
|
21
|
+
"package.json", "pyproject.toml", "requirements.txt", "composer.json",
|
|
22
|
+
"go.mod", "cargo.toml", "gemfile",
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
CODE_WORKSPACE_CONFIG_SUFFIXES: frozenset[str] = frozenset({
|
|
26
|
+
".json", ".toml", ".yaml", ".yml", ".ini", ".cfg",
|
|
27
|
+
})
|
|
@@ -548,8 +548,8 @@ def _looks_binary(path: Path) -> bool:
|
|
|
548
548
|
def _doc_path_for_page(output_dir: Path, page: Any) -> Path:
|
|
549
549
|
hints = (page._b.generation_hints or {}) if hasattr(page, "_b") else {}
|
|
550
550
|
if hints.get("is_introduction_page") or page.page_type == "overview":
|
|
551
|
-
return output_dir / "index.
|
|
552
|
-
return output_dir / f"{page.slug}.
|
|
551
|
+
return output_dir / "index.md"
|
|
552
|
+
return output_dir / f"{page.slug}.md"
|
|
553
553
|
|
|
554
554
|
|
|
555
555
|
def _doc_url(page: Any, has_openapi: bool) -> str:
|
|
@@ -316,14 +316,14 @@ class ChatbotIndexer:
|
|
|
316
316
|
changed_keys=artifact_targets,
|
|
317
317
|
deleted_keys=deleted_files,
|
|
318
318
|
)
|
|
319
|
-
deleted_doc_paths = [path for path in deleted_files if path.endswith(".mdx")]
|
|
319
|
+
deleted_doc_paths = [path for path in deleted_files if path.endswith((".md", ".mdx"))]
|
|
320
320
|
if rebuild_doc_summary:
|
|
321
321
|
self._save_records("doc_summary", doc_summary_records)
|
|
322
322
|
else:
|
|
323
323
|
self._merge_records(
|
|
324
324
|
"doc_summary",
|
|
325
325
|
doc_summary_records,
|
|
326
|
-
changed_keys=[f"{slug}.
|
|
326
|
+
changed_keys=[f"{slug}.md" for slug in changed_doc_slugs],
|
|
327
327
|
deleted_keys=deleted_doc_paths,
|
|
328
328
|
)
|
|
329
329
|
if rebuild_doc_full:
|
|
@@ -332,7 +332,7 @@ class ChatbotIndexer:
|
|
|
332
332
|
self._merge_records(
|
|
333
333
|
"doc_full",
|
|
334
334
|
doc_full_records,
|
|
335
|
-
changed_keys=[f"{slug}.
|
|
335
|
+
changed_keys=[f"{slug}.md" for slug in changed_doc_slugs],
|
|
336
336
|
deleted_keys=deleted_doc_paths,
|
|
337
337
|
)
|
|
338
338
|
if rebuild_repo_doc:
|
|
@@ -67,8 +67,8 @@ def bucket_doc_path(bucket: DocBucket) -> str:
|
|
|
67
67
|
hints = bucket.generation_hints or {}
|
|
68
68
|
page_type = hints.get("prompt_style", bucket.bucket_type)
|
|
69
69
|
if hints.get("is_introduction_page") or page_type == "overview":
|
|
70
|
-
return "index.
|
|
71
|
-
return f"{bucket.slug}.
|
|
70
|
+
return "index.md"
|
|
71
|
+
return f"{bucket.slug}.md"
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
def bucket_doc_url(bucket: DocBucket, *, has_openapi: bool = False) -> str:
|
|
@@ -8,13 +8,12 @@ from pathlib import Path
|
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
10
|
from ..source_metadata import classify_source_kind
|
|
11
|
+
from .constants import DOC_SUFFIXES
|
|
11
12
|
from .types import (
|
|
12
13
|
ChunkRecord,
|
|
13
14
|
RetrievedChunk,
|
|
14
15
|
)
|
|
15
16
|
|
|
16
|
-
DOC_SUFFIXES = {".md", ".mdx", ".txt", ".rst", ".adoc", ".ipynb"}
|
|
17
|
-
|
|
18
17
|
|
|
19
18
|
class LiveFallbackMixin:
|
|
20
19
|
"""Mixin providing live-repo fallback search methods."""
|
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
import re
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
+
from .constants import DOC_SUFFIXES, STOPWORD_TOKENS
|
|
10
11
|
from .persistence import (
|
|
11
12
|
query_lexical_index,
|
|
12
13
|
similarity_search,
|
|
@@ -15,44 +16,6 @@ from .types import (
|
|
|
15
16
|
RetrievedChunk,
|
|
16
17
|
)
|
|
17
18
|
|
|
18
|
-
STOPWORD_TOKENS = {
|
|
19
|
-
"a",
|
|
20
|
-
"an",
|
|
21
|
-
"and",
|
|
22
|
-
"any",
|
|
23
|
-
"are",
|
|
24
|
-
"can",
|
|
25
|
-
"does",
|
|
26
|
-
"first",
|
|
27
|
-
"for",
|
|
28
|
-
"from",
|
|
29
|
-
"handle",
|
|
30
|
-
"handled",
|
|
31
|
-
"how",
|
|
32
|
-
"in",
|
|
33
|
-
"is",
|
|
34
|
-
"it",
|
|
35
|
-
"its",
|
|
36
|
-
"of",
|
|
37
|
-
"or",
|
|
38
|
-
"repo",
|
|
39
|
-
"repository",
|
|
40
|
-
"show",
|
|
41
|
-
"that",
|
|
42
|
-
"the",
|
|
43
|
-
"this",
|
|
44
|
-
"to",
|
|
45
|
-
"use",
|
|
46
|
-
"what",
|
|
47
|
-
"went",
|
|
48
|
-
"where",
|
|
49
|
-
"who",
|
|
50
|
-
"which",
|
|
51
|
-
"with",
|
|
52
|
-
"work",
|
|
53
|
-
}
|
|
54
|
-
DOC_SUFFIXES = {".md", ".mdx", ".txt", ".rst", ".adoc", ".ipynb"}
|
|
55
|
-
|
|
56
19
|
|
|
57
20
|
class RetrievalMixin:
|
|
58
21
|
"""Mixin providing retrieval and search methods for ChatbotQueryService."""
|
|
@@ -39,59 +39,18 @@ from .types import (
|
|
|
39
39
|
SourceCatalogEntry,
|
|
40
40
|
)
|
|
41
41
|
|
|
42
|
-
from .retrieval_mixin import RetrievalMixin
|
|
43
42
|
from .answer_mixin import AnswerMixin
|
|
43
|
+
from .constants import (
|
|
44
|
+
CODE_WORKSPACE_CONFIG_NAMES,
|
|
45
|
+
CODE_WORKSPACE_CONFIG_SUFFIXES,
|
|
46
|
+
CODE_WORKSPACE_SUFFIXES,
|
|
47
|
+
DOC_SUFFIXES,
|
|
48
|
+
STOPWORD_TOKENS,
|
|
49
|
+
)
|
|
44
50
|
from .live_fallback_mixin import LiveFallbackMixin
|
|
51
|
+
from .retrieval_mixin import RetrievalMixin
|
|
45
52
|
from .routes import create_fastapi_app, QueryRequest, DeepResearchRequest, CodeDeepRequest
|
|
46
53
|
|
|
47
|
-
STOPWORD_TOKENS = {
|
|
48
|
-
"a",
|
|
49
|
-
"an",
|
|
50
|
-
"and",
|
|
51
|
-
"any",
|
|
52
|
-
"are",
|
|
53
|
-
"can",
|
|
54
|
-
"does",
|
|
55
|
-
"first",
|
|
56
|
-
"for",
|
|
57
|
-
"from",
|
|
58
|
-
"handle",
|
|
59
|
-
"handled",
|
|
60
|
-
"how",
|
|
61
|
-
"in",
|
|
62
|
-
"is",
|
|
63
|
-
"it",
|
|
64
|
-
"its",
|
|
65
|
-
"of",
|
|
66
|
-
"or",
|
|
67
|
-
"repo",
|
|
68
|
-
"repository",
|
|
69
|
-
"show",
|
|
70
|
-
"that",
|
|
71
|
-
"the",
|
|
72
|
-
"this",
|
|
73
|
-
"to",
|
|
74
|
-
"use",
|
|
75
|
-
"what",
|
|
76
|
-
"went",
|
|
77
|
-
"where",
|
|
78
|
-
"who",
|
|
79
|
-
"which",
|
|
80
|
-
"with",
|
|
81
|
-
"work",
|
|
82
|
-
}
|
|
83
|
-
DOC_SUFFIXES = {".md", ".mdx", ".txt", ".rst", ".adoc", ".ipynb"}
|
|
84
|
-
CODE_WORKSPACE_SUFFIXES = {
|
|
85
|
-
".py", ".js", ".jsx", ".ts", ".tsx", ".go", ".php", ".java", ".rb",
|
|
86
|
-
".rs", ".vue", ".svelte", ".html", ".css", ".scss", ".sass",
|
|
87
|
-
}
|
|
88
|
-
CODE_WORKSPACE_CONFIG_NAMES = {
|
|
89
|
-
".env", ".env.example", "docker-compose.yml", "docker-compose.yaml",
|
|
90
|
-
"package.json", "pyproject.toml", "requirements.txt", "composer.json",
|
|
91
|
-
"go.mod", "cargo.toml", "gemfile",
|
|
92
|
-
}
|
|
93
|
-
CODE_WORKSPACE_CONFIG_SUFFIXES = {".json", ".toml", ".yaml", ".yml", ".ini", ".cfg"}
|
|
94
|
-
|
|
95
54
|
|
|
96
55
|
class ChatbotQueryService(RetrievalMixin, AnswerMixin, LiveFallbackMixin):
|
|
97
56
|
"""Query all chatbot corpora and answer with grounded citations."""
|
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from copy import deepcopy
|
|
6
5
|
import os
|
|
7
6
|
from pathlib import Path
|
|
8
7
|
from typing import Any
|
|
9
8
|
from urllib.parse import urlparse
|
|
10
9
|
import zlib
|
|
11
10
|
|
|
11
|
+
from ..config import _deep_merge
|
|
12
|
+
|
|
12
13
|
DEFAULT_CHATBOT_CONFIG: dict[str, Any] = {
|
|
13
14
|
"enabled": False,
|
|
14
15
|
"index_dir": ".deepdoc/chatbot",
|
|
@@ -113,16 +114,6 @@ DEFAULT_CHATBOT_CONFIG: dict[str, Any] = {
|
|
|
113
114
|
}
|
|
114
115
|
|
|
115
116
|
|
|
116
|
-
def _deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
|
|
117
|
-
merged = deepcopy(base)
|
|
118
|
-
for key, value in override.items():
|
|
119
|
-
if isinstance(value, dict) and isinstance(merged.get(key), dict):
|
|
120
|
-
merged[key] = _deep_merge(merged[key], value)
|
|
121
|
-
else:
|
|
122
|
-
merged[key] = value
|
|
123
|
-
return merged
|
|
124
|
-
|
|
125
|
-
|
|
126
117
|
def get_chatbot_cfg(cfg: dict[str, Any]) -> dict[str, Any]:
|
|
127
118
|
return _deep_merge(DEFAULT_CHATBOT_CONFIG, cfg.get("chatbot", {}))
|
|
128
119
|
|
|
@@ -1323,7 +1323,7 @@ def _detect_generated_deepdoc_version(repo_root: Path, output_dir: Path) -> str
|
|
|
1323
1323
|
for directory in candidates:
|
|
1324
1324
|
if not directory.exists():
|
|
1325
1325
|
continue
|
|
1326
|
-
for doc_path in sorted(directory.rglob("*.
|
|
1326
|
+
for doc_path in sorted(directory.rglob("*.md")):
|
|
1327
1327
|
try:
|
|
1328
1328
|
content = doc_path.read_text(encoding="utf-8", errors="replace")
|
|
1329
1329
|
except OSError:
|
|
@@ -1459,7 +1459,7 @@ def _deployment_quality_blockers(repo_root: Path, output_dir: Path) -> list[str]
|
|
|
1459
1459
|
if output_dir.exists():
|
|
1460
1460
|
invalid_pages: list[str] = []
|
|
1461
1461
|
stub_pages: list[str] = []
|
|
1462
|
-
for doc_path in sorted(output_dir.rglob("*.
|
|
1462
|
+
for doc_path in sorted(output_dir.rglob("*.md")):
|
|
1463
1463
|
try:
|
|
1464
1464
|
content = doc_path.read_text(encoding="utf-8")
|
|
1465
1465
|
except Exception:
|
|
@@ -6,28 +6,16 @@ from .generation import (
|
|
|
6
6
|
summarize_generation_results,
|
|
7
7
|
BucketGenerationEngine,
|
|
8
8
|
)
|
|
9
|
-
from .mdx_compile_gate import GateOutcome, apply_mdx_compile_gate
|
|
10
|
-
from .mdx_validator import (
|
|
11
|
-
MdxCompileError,
|
|
12
|
-
ValidationOutcome,
|
|
13
|
-
bootstrap_validator,
|
|
14
|
-
ensure_node_available,
|
|
15
|
-
validate_mdx,
|
|
16
|
-
)
|
|
17
9
|
from .validation import ValidationResult, PageValidator
|
|
18
10
|
from .post_processors import (
|
|
19
11
|
_fix_mermaid_diagram,
|
|
20
12
|
build_internal_doc_link_maps,
|
|
21
|
-
escape_mdx_route_params,
|
|
22
|
-
escape_mdx_text_hazards,
|
|
23
13
|
fix_file_references,
|
|
24
14
|
fix_mermaid_diagrams,
|
|
25
15
|
normalize_code_fence_languages,
|
|
26
16
|
normalize_explanatory_lines_outside_fences,
|
|
27
17
|
normalize_html_code_blocks,
|
|
28
|
-
normalize_mdx_steps,
|
|
29
18
|
repair_internal_doc_links,
|
|
30
|
-
repair_mdx_component_blocks,
|
|
31
19
|
repair_split_object_code_fences,
|
|
32
20
|
repair_dangling_plain_fences,
|
|
33
21
|
repair_unbalanced_code_fences,
|
|
@@ -34,7 +34,7 @@ from ..llm import LLMClient
|
|
|
34
34
|
from ..parser import parse_file, supported_extensions
|
|
35
35
|
from ..parser.base import ParsedFile, Symbol
|
|
36
36
|
from ..planner import DocBucket, DocPlan, RepoScan, tracked_bucket_files
|
|
37
|
-
from ..
|
|
37
|
+
from ..prompts import SYSTEM_V2, get_prompt_for_bucket
|
|
38
38
|
from ..scanner import _classify_file_role
|
|
39
39
|
from ..openapi import parse_openapi_spec, spec_to_context_string
|
|
40
40
|
|