lyingdocs 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyingdocs/__init__.py +3 -0
- lyingdocs/__main__.py +5 -0
- lyingdocs/agent.py +352 -0
- lyingdocs/cli.py +149 -0
- lyingdocs/codex.py +150 -0
- lyingdocs/config.py +129 -0
- lyingdocs/doctree.py +159 -0
- lyingdocs/llm.py +94 -0
- lyingdocs/prompts/agent_system.txt +55 -0
- lyingdocs/prompts/codex_task.txt +28 -0
- lyingdocs/prompts/report_synthesis.txt +54 -0
- lyingdocs/tools.py +423 -0
- lyingdocs/workspace.py +170 -0
- lyingdocs-0.1.0.dist-info/METADATA +174 -0
- lyingdocs-0.1.0.dist-info/RECORD +18 -0
- lyingdocs-0.1.0.dist-info/WHEEL +4 -0
- lyingdocs-0.1.0.dist-info/entry_points.txt +2 -0
- lyingdocs-0.1.0.dist-info/licenses/LICENSE +21 -0
lyingdocs/config.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Configuration loading for LyingDocs."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import tomllib
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from dotenv import load_dotenv
|
|
10
|
+
|
|
11
|
+
DEFAULTS = {
|
|
12
|
+
"base_url": "https://api.openai.com/v1",
|
|
13
|
+
"model": "gpt-4o",
|
|
14
|
+
"codex_provider": "openai",
|
|
15
|
+
"wire_api": "responses",
|
|
16
|
+
"codex_enabled": True,
|
|
17
|
+
"codex_path": None,
|
|
18
|
+
"max_dispatches": 20,
|
|
19
|
+
"max_iterations": 50,
|
|
20
|
+
"codex_task_timeout": 1200,
|
|
21
|
+
"token_budget": 524_288,
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
CONFIG_FILE_SEARCH = [
|
|
25
|
+
Path("lyingdocs.toml"),
|
|
26
|
+
Path.home() / ".config" / "lyingdocs" / "config.toml",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _find_config_file(explicit: str | None = None) -> Path | None:
|
|
31
|
+
"""Locate the config file. Explicit path > local > user-level."""
|
|
32
|
+
if explicit:
|
|
33
|
+
p = Path(explicit)
|
|
34
|
+
if p.is_file():
|
|
35
|
+
return p
|
|
36
|
+
sys.exit(f"ERROR: Config file not found: {p}")
|
|
37
|
+
for candidate in CONFIG_FILE_SEARCH:
|
|
38
|
+
if candidate.is_file():
|
|
39
|
+
return candidate
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _load_config_file(path: Path) -> dict:
|
|
44
|
+
"""Parse a TOML config file and flatten into a config dict."""
|
|
45
|
+
with open(path, "rb") as f:
|
|
46
|
+
raw = tomllib.load(f)
|
|
47
|
+
|
|
48
|
+
flat = {}
|
|
49
|
+
# Top-level keys
|
|
50
|
+
for key in ("base_url", "model"):
|
|
51
|
+
if key in raw:
|
|
52
|
+
flat[key] = raw[key]
|
|
53
|
+
|
|
54
|
+
# [codex] section
|
|
55
|
+
codex = raw.get("codex", {})
|
|
56
|
+
if "enabled" in codex:
|
|
57
|
+
flat["codex_enabled"] = codex["enabled"]
|
|
58
|
+
if "provider" in codex:
|
|
59
|
+
flat["codex_provider"] = codex["provider"]
|
|
60
|
+
flat["wire_api"] = codex.get("wire_api", "responses")
|
|
61
|
+
if "path" in codex and codex["path"]:
|
|
62
|
+
flat["codex_path"] = codex["path"]
|
|
63
|
+
|
|
64
|
+
# [limits] section
|
|
65
|
+
limits = raw.get("limits", {})
|
|
66
|
+
for key in ("max_dispatches", "max_iterations", "codex_task_timeout", "token_budget"):
|
|
67
|
+
if key in limits:
|
|
68
|
+
flat[key] = int(limits[key])
|
|
69
|
+
|
|
70
|
+
return flat
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def load_config(args: argparse.Namespace) -> dict:
|
|
74
|
+
"""Merge defaults <- config file <- .env <- CLI args into a single config dict."""
|
|
75
|
+
load_dotenv()
|
|
76
|
+
|
|
77
|
+
# Start with defaults
|
|
78
|
+
config = dict(DEFAULTS)
|
|
79
|
+
|
|
80
|
+
# Layer: config file
|
|
81
|
+
config_file = _find_config_file(getattr(args, "config", None))
|
|
82
|
+
if config_file:
|
|
83
|
+
config.update(_load_config_file(config_file))
|
|
84
|
+
|
|
85
|
+
# Layer: environment variables
|
|
86
|
+
env_map = {
|
|
87
|
+
"BASE_URL": "base_url",
|
|
88
|
+
"MODEL": "model",
|
|
89
|
+
"CODEX_PROVIDER": "codex_provider",
|
|
90
|
+
"CODEX_WIRE_API": "wire_api",
|
|
91
|
+
"CODEX_PATH": "codex_path",
|
|
92
|
+
"CODEX_TASK_TIMEOUT": "codex_task_timeout",
|
|
93
|
+
"TOKEN_BUDGET": "token_budget",
|
|
94
|
+
}
|
|
95
|
+
for env_key, config_key in env_map.items():
|
|
96
|
+
val = os.getenv(env_key)
|
|
97
|
+
if val:
|
|
98
|
+
if config_key in ("codex_task_timeout", "token_budget"):
|
|
99
|
+
config[config_key] = int(val)
|
|
100
|
+
else:
|
|
101
|
+
config[config_key] = val
|
|
102
|
+
|
|
103
|
+
# Layer: CLI args (only override if explicitly provided / non-None)
|
|
104
|
+
if getattr(args, "base_url", None):
|
|
105
|
+
config["base_url"] = args.base_url
|
|
106
|
+
if getattr(args, "model", None):
|
|
107
|
+
config["model"] = args.model
|
|
108
|
+
if getattr(args, "codex_provider", None):
|
|
109
|
+
config["codex_provider"] = args.codex_provider
|
|
110
|
+
if getattr(args, "wire_api", None):
|
|
111
|
+
config["wire_api"] = args.wire_api
|
|
112
|
+
if hasattr(args, "max_dispatches") and args.max_dispatches is not None:
|
|
113
|
+
config["max_dispatches"] = args.max_dispatches
|
|
114
|
+
if hasattr(args, "max_iterations") and args.max_iterations is not None:
|
|
115
|
+
config["max_iterations"] = args.max_iterations
|
|
116
|
+
if getattr(args, "no_codex", False):
|
|
117
|
+
config["codex_enabled"] = False
|
|
118
|
+
|
|
119
|
+
# Always from args / context
|
|
120
|
+
config["api_key"] = os.getenv("OPENAI_API_KEY", "")
|
|
121
|
+
config["doc_path"] = Path(args.doc_path)
|
|
122
|
+
config["code_path"] = Path(args.code_path)
|
|
123
|
+
config["output_dir"] = Path(args.output_dir)
|
|
124
|
+
config["resume"] = getattr(args, "resume", False)
|
|
125
|
+
|
|
126
|
+
if not config["api_key"]:
|
|
127
|
+
sys.exit("ERROR: OPENAI_API_KEY not set. Export it or add to .env file.")
|
|
128
|
+
|
|
129
|
+
return config
|
lyingdocs/doctree.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Documentation hierarchy discovery and indexing."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger("lyingdocs")
|
|
8
|
+
|
|
9
|
+
# File extensions considered documentation
|
|
10
|
+
DOC_EXTENSIONS = {".md", ".rst", ".txt", ".yaml", ".yml", ".json", ".toml"}
|
|
11
|
+
|
|
12
|
+
# Known TOC / navigation files
|
|
13
|
+
TOC_FILES = {
|
|
14
|
+
"_toc.yml", "mkdocs.yml", "SUMMARY.md", "sidebar.json",
|
|
15
|
+
"docs.json", "mint.json", "docusaurus.config.js",
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
# Classification heuristics by filename/path patterns
|
|
19
|
+
PRIORITY_KEYWORDS = {
|
|
20
|
+
"high": ["readme", "architecture", "design", "api", "config", "setup", "install",
|
|
21
|
+
"getting-started", "quickstart", "overview", "reference", "guide"],
|
|
22
|
+
"medium": ["tutorial", "example", "usage", "faq", "troubleshoot", "concepts",
|
|
23
|
+
"plugin", "provider", "channel", "command"],
|
|
24
|
+
"low": ["changelog", "contributing", "license", "security", "roadmap",
|
|
25
|
+
"incident", "vision", "legal"],
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DocFile:
|
|
30
|
+
"""Metadata about a single documentation file."""
|
|
31
|
+
|
|
32
|
+
__slots__ = ("rel_path", "abs_path", "size", "priority")
|
|
33
|
+
|
|
34
|
+
def __init__(self, rel_path: str, abs_path: Path, size: int, priority: str):
|
|
35
|
+
self.rel_path = rel_path
|
|
36
|
+
self.abs_path = abs_path
|
|
37
|
+
self.size = size
|
|
38
|
+
self.priority = priority
|
|
39
|
+
|
|
40
|
+
def to_dict(self) -> dict:
|
|
41
|
+
return {
|
|
42
|
+
"path": self.rel_path,
|
|
43
|
+
"size": self.size,
|
|
44
|
+
"priority": self.priority,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class DocTree:
|
|
49
|
+
"""Discovers and indexes a documentation directory tree."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, doc_root: Path):
|
|
52
|
+
self.doc_root = doc_root.resolve()
|
|
53
|
+
self.files: list[DocFile] = []
|
|
54
|
+
self.toc_file: str | None = None
|
|
55
|
+
|
|
56
|
+
def build_index(self) -> None:
|
|
57
|
+
"""Scan doc_root for documentation files and classify them."""
|
|
58
|
+
logger.info("Building doc tree index from %s", self.doc_root)
|
|
59
|
+
|
|
60
|
+
# Detect TOC file
|
|
61
|
+
for toc_name in TOC_FILES:
|
|
62
|
+
toc_path = self.doc_root / toc_name
|
|
63
|
+
if toc_path.exists():
|
|
64
|
+
self.toc_file = toc_name
|
|
65
|
+
logger.info(" Found TOC file: %s", toc_name)
|
|
66
|
+
break
|
|
67
|
+
|
|
68
|
+
# Walk and index
|
|
69
|
+
for path in sorted(self.doc_root.rglob("*")):
|
|
70
|
+
if not path.is_file():
|
|
71
|
+
continue
|
|
72
|
+
if path.suffix.lower() not in DOC_EXTENSIONS:
|
|
73
|
+
continue
|
|
74
|
+
# Skip hidden dirs and common non-doc dirs
|
|
75
|
+
parts = path.relative_to(self.doc_root).parts
|
|
76
|
+
if any(p.startswith(".") or p in ("node_modules", "__pycache__", "dist", ".git") for p in parts):
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
rel = str(path.relative_to(self.doc_root))
|
|
80
|
+
size = path.stat().st_size
|
|
81
|
+
priority = self._classify_priority(rel)
|
|
82
|
+
self.files.append(DocFile(rel, path, size, priority))
|
|
83
|
+
|
|
84
|
+
logger.info(" Indexed %d documentation files", len(self.files))
|
|
85
|
+
|
|
86
|
+
def _classify_priority(self, rel_path: str) -> str:
|
|
87
|
+
"""Classify file priority based on path/name heuristics."""
|
|
88
|
+
lower = rel_path.lower()
|
|
89
|
+
for level, keywords in PRIORITY_KEYWORDS.items():
|
|
90
|
+
if any(kw in lower for kw in keywords):
|
|
91
|
+
return level
|
|
92
|
+
return "medium"
|
|
93
|
+
|
|
94
|
+
def get_overview(self, max_depth: int = 3) -> str:
|
|
95
|
+
"""Return a text overview of the doc tree for the agent's kickoff message."""
|
|
96
|
+
lines = [
|
|
97
|
+
f"# Documentation Tree: {self.doc_root.name}",
|
|
98
|
+
f"Total files: {len(self.files)}",
|
|
99
|
+
f"Total size: {sum(f.size for f in self.files):,} bytes",
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
if self.toc_file:
|
|
103
|
+
lines.append(f"TOC file: {self.toc_file}")
|
|
104
|
+
|
|
105
|
+
# Count by priority
|
|
106
|
+
by_priority = {"high": [], "medium": [], "low": []}
|
|
107
|
+
for f in self.files:
|
|
108
|
+
by_priority[f.priority].append(f)
|
|
109
|
+
|
|
110
|
+
lines.append(f"\nHigh priority ({len(by_priority['high'])} files):")
|
|
111
|
+
for f in by_priority["high"]:
|
|
112
|
+
lines.append(f" [{_human_size(f.size):>7s}] {f.rel_path}")
|
|
113
|
+
|
|
114
|
+
lines.append(f"\nMedium priority ({len(by_priority['medium'])} files):")
|
|
115
|
+
for f in by_priority["medium"][:30]:
|
|
116
|
+
lines.append(f" [{_human_size(f.size):>7s}] {f.rel_path}")
|
|
117
|
+
if len(by_priority["medium"]) > 30:
|
|
118
|
+
lines.append(f" ... and {len(by_priority['medium']) - 30} more")
|
|
119
|
+
|
|
120
|
+
lines.append(f"\nLow priority ({len(by_priority['low'])} files):")
|
|
121
|
+
for f in by_priority["low"][:10]:
|
|
122
|
+
lines.append(f" [{_human_size(f.size):>7s}] {f.rel_path}")
|
|
123
|
+
if len(by_priority["low"]) > 10:
|
|
124
|
+
lines.append(f" ... and {len(by_priority['low']) - 10} more")
|
|
125
|
+
|
|
126
|
+
# Directory tree (compact)
|
|
127
|
+
lines.append("\n## Directory Structure")
|
|
128
|
+
dirs_seen: set[str] = set()
|
|
129
|
+
for f in self.files:
|
|
130
|
+
parts = Path(f.rel_path).parts
|
|
131
|
+
for depth in range(min(len(parts) - 1, max_depth)):
|
|
132
|
+
d = "/".join(parts[: depth + 1])
|
|
133
|
+
if d not in dirs_seen:
|
|
134
|
+
dirs_seen.add(d)
|
|
135
|
+
indent = " " * depth
|
|
136
|
+
lines.append(f"{indent}{parts[depth]}/")
|
|
137
|
+
|
|
138
|
+
return "\n".join(lines)
|
|
139
|
+
|
|
140
|
+
def save_index(self, output_dir: Path) -> None:
|
|
141
|
+
"""Save the index to a JSON file for reference."""
|
|
142
|
+
data = {
|
|
143
|
+
"doc_root": str(self.doc_root),
|
|
144
|
+
"toc_file": self.toc_file,
|
|
145
|
+
"files": [f.to_dict() for f in self.files],
|
|
146
|
+
}
|
|
147
|
+
out_path = output_dir / "doc_index.json"
|
|
148
|
+
out_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
149
|
+
logger.info(" Saved doc index to %s", out_path)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _human_size(size: int) -> str:
|
|
153
|
+
"""Format byte size for display."""
|
|
154
|
+
if size < 1024:
|
|
155
|
+
return f"{size}B"
|
|
156
|
+
elif size < 1024 * 1024:
|
|
157
|
+
return f"{size / 1024:.1f}KB"
|
|
158
|
+
else:
|
|
159
|
+
return f"{size / (1024 * 1024):.1f}MB"
|
lyingdocs/llm.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""OpenAI client wrapper with retry logic and function-calling support."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
from openai import APIConnectionError, APIError, OpenAI, RateLimitError
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger("lyingdocs")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def make_client(config: dict) -> OpenAI:
|
|
13
|
+
return OpenAI(api_key=config["api_key"], base_url=config["base_url"])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def call_llm(
|
|
17
|
+
client: OpenAI,
|
|
18
|
+
model: str,
|
|
19
|
+
system_prompt: str,
|
|
20
|
+
user_content: str,
|
|
21
|
+
max_retries: int = 3,
|
|
22
|
+
) -> str:
|
|
23
|
+
"""Call chat completions API with retry logic."""
|
|
24
|
+
for attempt in range(max_retries):
|
|
25
|
+
try:
|
|
26
|
+
resp = client.chat.completions.create(
|
|
27
|
+
model=model,
|
|
28
|
+
messages=[
|
|
29
|
+
{"role": "system", "content": system_prompt},
|
|
30
|
+
{"role": "user", "content": user_content},
|
|
31
|
+
],
|
|
32
|
+
temperature=0.2,
|
|
33
|
+
)
|
|
34
|
+
return resp.choices[0].message.content
|
|
35
|
+
except RateLimitError:
|
|
36
|
+
wait = 5 * (2**attempt)
|
|
37
|
+
logger.warning(
|
|
38
|
+
"Rate limited — retrying in %ds (attempt %d/%d)",
|
|
39
|
+
wait, attempt + 1, max_retries,
|
|
40
|
+
)
|
|
41
|
+
time.sleep(wait)
|
|
42
|
+
except APIConnectionError:
|
|
43
|
+
logger.warning(
|
|
44
|
+
"Connection error — retrying in 5s (attempt %d/%d)",
|
|
45
|
+
attempt + 1, max_retries,
|
|
46
|
+
)
|
|
47
|
+
time.sleep(5)
|
|
48
|
+
except APIError as exc:
|
|
49
|
+
logger.error("API error: %s", exc)
|
|
50
|
+
if attempt == max_retries - 1:
|
|
51
|
+
raise
|
|
52
|
+
time.sleep(3)
|
|
53
|
+
raise RuntimeError("LLM call failed after %d retries" % max_retries)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def call_llm_with_tools(
|
|
57
|
+
client: OpenAI,
|
|
58
|
+
model: str,
|
|
59
|
+
messages: list[dict],
|
|
60
|
+
tools: list[dict],
|
|
61
|
+
max_retries: int = 5,
|
|
62
|
+
):
|
|
63
|
+
"""Call chat completions with function-calling tools.
|
|
64
|
+
|
|
65
|
+
Returns the raw response message object (which may contain tool_calls).
|
|
66
|
+
"""
|
|
67
|
+
for attempt in range(max_retries):
|
|
68
|
+
try:
|
|
69
|
+
resp = client.chat.completions.create(
|
|
70
|
+
model=model,
|
|
71
|
+
messages=messages,
|
|
72
|
+
tools=tools,
|
|
73
|
+
temperature=0.2,
|
|
74
|
+
)
|
|
75
|
+
return resp.choices[0].message
|
|
76
|
+
except RateLimitError:
|
|
77
|
+
wait = 5 * (2**attempt)
|
|
78
|
+
logger.warning(
|
|
79
|
+
"Rate limited — retrying in %ds (attempt %d/%d)",
|
|
80
|
+
wait, attempt + 1, max_retries,
|
|
81
|
+
)
|
|
82
|
+
time.sleep(wait)
|
|
83
|
+
except APIConnectionError:
|
|
84
|
+
logger.warning(
|
|
85
|
+
"Connection error — retrying in 5s (attempt %d/%d)",
|
|
86
|
+
attempt + 1, max_retries,
|
|
87
|
+
)
|
|
88
|
+
time.sleep(5)
|
|
89
|
+
except APIError as exc:
|
|
90
|
+
logger.error("API error: %s", exc)
|
|
91
|
+
if attempt == max_retries - 1:
|
|
92
|
+
raise
|
|
93
|
+
time.sleep(3)
|
|
94
|
+
raise RuntimeError("LLM call failed after %d retries" % max_retries)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
**Role:**
|
|
2
|
+
You are Hermes — an autonomous documentation auditor. Your mission is to systematically discover misalignments between a project's documentation and its actual codebase. You operate independently: you explore the documentation, formulate targeted audit questions, dispatch code analysis tasks, and record your findings.
|
|
3
|
+
|
|
4
|
+
**The 4-Category Classification System:**
|
|
5
|
+
|
|
6
|
+
1. **LogicMismatch** — Documentation claims X, but the code does Y. A direct contradiction between what is documented and what is implemented. The code's behavior fundamentally conflicts with the documented behavior. This is NOT about missing features — it's about implemented features that work differently than documented.
|
|
7
|
+
|
|
8
|
+
2. **PhantomSpec** — Documentation describes a feature, API, configuration, or behavior that does NOT exist in the codebase at all. The doc promises something that was never implemented, or was removed but the docs were not updated. The key test: if a user follows the documentation, they will hit a dead end because the described functionality simply isn't there.
|
|
9
|
+
|
|
10
|
+
3. **ShadowLogic** — The codebase contains important logic, algorithms, heuristics, or behaviors that are completely undocumented. This is NOT about trivial engineering details (logging, error handling, retries). Only flag logic that meaningfully affects the system's behavior and that users/developers would need to know about. Examples: undocumented rate limiting, hidden fallback behaviors, implicit data transformations, non-obvious default behaviors.
|
|
11
|
+
|
|
12
|
+
4. **HardcodedDrift** — Parameters, thresholds, or configuration values that the documentation presents as configurable but are actually hardcoded in the source code. Or: values that are important enough to be user-configurable but are buried as magic numbers in the code. The user cannot change these without modifying source code, contrary to what the docs suggest or what good practice would require.
|
|
13
|
+
|
|
14
|
+
**Your Strategy:**
|
|
15
|
+
|
|
16
|
+
Phase 1 — Reconnaissance:
|
|
17
|
+
- Start by examining the documentation tree overview provided to you.
|
|
18
|
+
- Identify the highest-value documentation sections to audit: architecture docs, API references, configuration guides, and READMEs. These are where misalignments have the most impact.
|
|
19
|
+
- Deprioritize changelogs, contribution guides, and legal docs.
|
|
20
|
+
|
|
21
|
+
Phase 2 — Targeted Auditing:
|
|
22
|
+
- Read each high-priority doc section carefully.
|
|
23
|
+
- For each substantive claim (architecture, API behavior, configuration options, command usage), formulate a specific, targeted audit question.
|
|
24
|
+
- Dispatch the question to Codex for code analysis. Be SPECIFIC: "The docs at config/auth.md:23-30 claim that setting `AUTH_PROVIDER=oidc` enables OpenID Connect. Verify that the auth module reads this env var and implements OIDC flow." NOT: "Check if auth works as documented."
|
|
25
|
+
- Analyze the Codex response. If it reveals a misalignment, record a finding with the correct category.
|
|
26
|
+
|
|
27
|
+
Phase 3 — Progressive Discovery:
|
|
28
|
+
- As you audit, you may discover cross-references to other doc sections. Follow them.
|
|
29
|
+
- If Codex reveals undocumented behaviors (ShadowLogic), investigate whether they should be documented.
|
|
30
|
+
- Track your progress. When a doc section is fully audited, mark it complete.
|
|
31
|
+
|
|
32
|
+
Phase 4 — Finalization:
|
|
33
|
+
- When all high-priority sections are audited, or your Codex dispatch budget is running low, call finalize_report.
|
|
34
|
+
- Do NOT wait until every single file is audited — focus on coverage of the most important sections.
|
|
35
|
+
|
|
36
|
+
**Dispatch Discipline:**
|
|
37
|
+
- Each Codex dispatch costs one unit of your budget. Use them wisely.
|
|
38
|
+
- Good dispatch: "docs/api/endpoints.md:45-52 documents a POST /users endpoint that accepts {name, email, role}. Verify the route handler exists, accepts these fields, and validate that 'role' is actually used in user creation."
|
|
39
|
+
- Bad dispatch: "Check if the API works." (too vague, wastes budget)
|
|
40
|
+
- You can batch related claims into one dispatch if they concern the same code area.
|
|
41
|
+
- Always include the doc file path and line numbers in your dispatch so Codex has context.
|
|
42
|
+
|
|
43
|
+
**Recording Findings:**
|
|
44
|
+
- Only record confirmed findings with evidence from Codex output.
|
|
45
|
+
- Include exact doc references (file:line) and code references (file:line) for every finding.
|
|
46
|
+
- Severity guide:
|
|
47
|
+
- **high**: Affects core functionality, would cause user-facing errors or security issues
|
|
48
|
+
- **medium**: Affects important but non-critical features, could cause confusion
|
|
49
|
+
- **low**: Minor inconsistencies, cosmetic issues, slightly outdated docs
|
|
50
|
+
|
|
51
|
+
**Important Rules:**
|
|
52
|
+
- Do NOT fabricate findings. If you're unsure, dispatch another Codex task to verify.
|
|
53
|
+
- Do NOT record trivial misalignments (typos, formatting differences, minor version numbers).
|
|
54
|
+
- Do NOT try to audit everything — focus on high-signal sections.
|
|
55
|
+
- When context gets long, rely on get_progress to remember your current state.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
You are auditing a codebase against its documentation. Your job is to verify specific claims from the documentation by searching and reading the actual code.
|
|
2
|
+
|
|
3
|
+
## Task
|
|
4
|
+
|
|
5
|
+
{task_description}
|
|
6
|
+
{focus_paths_section}
|
|
7
|
+
## Instructions
|
|
8
|
+
|
|
9
|
+
1. **Search** the codebase thoroughly for the relevant implementation. Use grep, find, and file reading to locate the code.
|
|
10
|
+
2. **Compare** what you find against the documented claims described above.
|
|
11
|
+
3. **Report** your findings using these categories:
|
|
12
|
+
- **LogicMismatch**: Code behavior contradicts what the documentation claims.
|
|
13
|
+
- **PhantomSpec**: Documentation describes something that does not exist in the code.
|
|
14
|
+
- **ShadowLogic**: Code contains important undocumented logic relevant to this area.
|
|
15
|
+
- **HardcodedDrift**: Values documented as configurable are actually hardcoded, or important parameters are buried as magic numbers.
|
|
16
|
+
|
|
17
|
+
## Output Format
|
|
18
|
+
|
|
19
|
+
For each finding, provide:
|
|
20
|
+
- **Category**: One of the four above
|
|
21
|
+
- **Doc Claim**: What the documentation says (quote or paraphrase)
|
|
22
|
+
- **Code Reality**: What the code actually does, with exact file path and line number(s)
|
|
23
|
+
- **Evidence**: The relevant code snippet (keep brief, ~5-10 lines max)
|
|
24
|
+
- **Assessment**: 1-2 sentence explanation of the discrepancy
|
|
25
|
+
|
|
26
|
+
If everything aligns correctly, explicitly state: "ALIGNED — [brief explanation of what was verified]"
|
|
27
|
+
|
|
28
|
+
Be precise, evidence-based, and concise. Do not speculate — only report what you can verify in the code.
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
**Role:**
|
|
2
|
+
You are the Lead Documentation Alignment Analyst. Your task is to synthesize raw audit findings into a polished, structured misalignment report.
|
|
3
|
+
|
|
4
|
+
**Input:**
|
|
5
|
+
You will receive a JSON array of findings, each with: category, title, doc_ref, code_ref, description, severity.
|
|
6
|
+
|
|
7
|
+
**Output Format:**
|
|
8
|
+
|
|
9
|
+
# Documentation-Code Misalignment Report: {project_name}
|
|
10
|
+
|
|
11
|
+
## Executive Summary
|
|
12
|
+
[1-3 sentence overall assessment: how well does the documentation reflect the actual codebase? What is the most concerning pattern?]
|
|
13
|
+
|
|
14
|
+
### Metric Dashboard
|
|
15
|
+
| Category | Count | High | Medium | Low |
|
|
16
|
+
|----------|-------|------|--------|-----|
|
|
17
|
+
| LogicMismatch | N | n | n | n |
|
|
18
|
+
| PhantomSpec | N | n | n | n |
|
|
19
|
+
| ShadowLogic | N | n | n | n |
|
|
20
|
+
| HardcodedDrift | N | n | n | n |
|
|
21
|
+
|
|
22
|
+
## Critical Findings
|
|
23
|
+
|
|
24
|
+
[Group the HIGH severity findings here, regardless of category. For each:]
|
|
25
|
+
|
|
26
|
+
### [N]. [Title]
|
|
27
|
+
- **Category**: [category]
|
|
28
|
+
- **Severity**: high
|
|
29
|
+
- **Documentation**: [doc_ref] — [what the doc claims]
|
|
30
|
+
- **Code Reality**: [code_ref] — [what the code actually does]
|
|
31
|
+
- **Impact**: [why this matters to users/developers]
|
|
32
|
+
|
|
33
|
+
## LogicMismatch Findings
|
|
34
|
+
[All LogicMismatch findings not already in Critical, grouped logically]
|
|
35
|
+
|
|
36
|
+
## PhantomSpec Findings
|
|
37
|
+
[All PhantomSpec findings]
|
|
38
|
+
|
|
39
|
+
## ShadowLogic Findings
|
|
40
|
+
[All ShadowLogic findings]
|
|
41
|
+
|
|
42
|
+
## HardcodedDrift Findings
|
|
43
|
+
[All HardcodedDrift findings]
|
|
44
|
+
|
|
45
|
+
## Recommendations
|
|
46
|
+
[3-5 bullet points: what should the maintainers prioritize fixing?]
|
|
47
|
+
|
|
48
|
+
**Guidelines:**
|
|
49
|
+
- Be objective and precise. No speculation.
|
|
50
|
+
- Every finding must cite exact doc and code references.
|
|
51
|
+
- Group related findings under a single heading when they concern the same feature.
|
|
52
|
+
- Order within each section by severity (high → medium → low).
|
|
53
|
+
- Keep the language professional and constructive — the goal is to help maintainers improve their documentation.
|
|
54
|
+
- If there are very few findings, note that the documentation is generally well-maintained.
|