iil-docs-agent 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docs_agent/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """Docs Agent — AI-assisted documentation quality tool.
2
+
3
+ Provides AST-based docstring coverage scanning, DIATAXIS classification,
4
+ and LLM-powered docstring generation with non-destructive code insertion.
5
+ """
6
+
7
+ __version__ = "0.2.0"
@@ -0,0 +1 @@
1
+ """Analyzer components — AST scanner and DIATAXIS classifier."""
@@ -0,0 +1,231 @@
1
+ """AST-based docstring coverage scanner.
2
+
3
+ Scans Python modules via the `ast` module and reports undocumented items:
4
+ classes, functions, methods. No LLM required — pure static analysis.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import ast
10
+ import logging
11
+ from pathlib import Path
12
+
13
+ from docs_agent.models import CodeItem, ItemKind, ModuleCoverage, RepoCoverage
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ SKIP_DIRS: set[str] = {
18
+ "__pycache__",
19
+ ".git",
20
+ ".venv",
21
+ "venv",
22
+ "node_modules",
23
+ "migrations",
24
+ "_build",
25
+ "_archive",
26
+ "static",
27
+ "staticfiles",
28
+ ".tox",
29
+ ".mypy_cache",
30
+ ".pytest_cache",
31
+ "dist",
32
+ "build",
33
+ "eggs",
34
+ "*.egg-info",
35
+ }
36
+
37
+ SKIP_FILES: set[str] = {
38
+ "__init__.py",
39
+ "conftest.py",
40
+ "manage.py",
41
+ "wsgi.py",
42
+ "asgi.py",
43
+ }
44
+
45
+
46
+ def _should_skip_dir(dirname: str) -> bool:
47
+ """Check if directory should be skipped."""
48
+ return dirname in SKIP_DIRS or dirname.startswith(".")
49
+
50
+
51
+ def _should_skip_file(filename: str) -> bool:
52
+ """Check if file should be skipped."""
53
+ return filename in SKIP_FILES or filename.startswith("test_")
54
+
55
+
56
+ def _has_docstring(node: ast.AST) -> bool:
57
+ """Check if an AST node has a docstring."""
58
+ if not hasattr(node, "body") or not node.body:
59
+ return False
60
+ first = node.body[0]
61
+ if isinstance(first, ast.Expr) and isinstance(first.value, ast.Constant):
62
+ return isinstance(first.value.value, str)
63
+ return False
64
+
65
+
66
+ def _is_private(name: str) -> bool:
67
+ """Check if name is private (single underscore prefix, not dunder)."""
68
+ return name.startswith("_") and not name.startswith("__")
69
+
70
+
71
+ def _is_dunder(name: str) -> bool:
72
+ """Check if name is a dunder method."""
73
+ return name.startswith("__") and name.endswith("__")
74
+
75
+
76
+ SKIP_DUNDERS: set[str] = {
77
+ "__init__",
78
+ "__str__",
79
+ "__repr__",
80
+ "__eq__",
81
+ "__hash__",
82
+ "__lt__",
83
+ "__le__",
84
+ "__gt__",
85
+ "__ge__",
86
+ "__len__",
87
+ "__bool__",
88
+ "__contains__",
89
+ "__iter__",
90
+ "__next__",
91
+ "__enter__",
92
+ "__exit__",
93
+ "__call__",
94
+ "__getattr__",
95
+ "__setattr__",
96
+ "__delattr__",
97
+ "__getitem__",
98
+ "__setitem__",
99
+ "__delitem__",
100
+ }
101
+
102
+
103
+ def scan_module(file_path: Path) -> ModuleCoverage:
104
+ """Scan a single Python module for docstring coverage.
105
+
106
+ Args:
107
+ file_path: Path to the Python file.
108
+
109
+ Returns:
110
+ ModuleCoverage with all documentable items and their status.
111
+ """
112
+ coverage = ModuleCoverage(file_path=file_path)
113
+
114
+ try:
115
+ source = file_path.read_text(encoding="utf-8")
116
+ except (OSError, UnicodeDecodeError) as exc:
117
+ logger.warning("Cannot read %s: %s", file_path, exc)
118
+ return coverage
119
+
120
+ try:
121
+ tree = ast.parse(source, filename=str(file_path))
122
+ except SyntaxError as exc:
123
+ logger.warning("Syntax error in %s: %s", file_path, exc)
124
+ return coverage
125
+
126
+ # Module-level docstring
127
+ has_module_doc = _has_docstring(tree)
128
+ coverage.items.append(
129
+ CodeItem(
130
+ name=file_path.stem,
131
+ kind=ItemKind.MODULE,
132
+ line=1,
133
+ has_docstring=has_module_doc,
134
+ file_path=file_path,
135
+ )
136
+ )
137
+ coverage.total_items += 1
138
+ coverage.documented_items += int(has_module_doc)
139
+
140
+ for node in ast.walk(tree):
141
+ if isinstance(node, ast.ClassDef):
142
+ has_doc = _has_docstring(node)
143
+ coverage.items.append(
144
+ CodeItem(
145
+ name=node.name,
146
+ kind=ItemKind.CLASS,
147
+ line=node.lineno,
148
+ has_docstring=has_doc,
149
+ file_path=file_path,
150
+ )
151
+ )
152
+ coverage.total_items += 1
153
+ coverage.documented_items += int(has_doc)
154
+
155
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
156
+ name = node.name
157
+
158
+ if _is_dunder(name) and name in SKIP_DUNDERS:
159
+ continue
160
+ if _is_private(name):
161
+ continue
162
+
163
+ # Determine if method or function
164
+ is_method = any(
165
+ isinstance(parent, ast.ClassDef)
166
+ for parent in ast.walk(tree)
167
+ if isinstance(getattr(parent, "body", None), list)
168
+ and node in parent.body
169
+ )
170
+ kind = ItemKind.METHOD if is_method else ItemKind.FUNCTION
171
+
172
+ has_doc = _has_docstring(node)
173
+ coverage.items.append(
174
+ CodeItem(
175
+ name=name,
176
+ kind=kind,
177
+ line=node.lineno,
178
+ has_docstring=has_doc,
179
+ file_path=file_path,
180
+ )
181
+ )
182
+ coverage.total_items += 1
183
+ coverage.documented_items += int(has_doc)
184
+
185
+ return coverage
186
+
187
+
188
+ def scan_repo(
189
+ repo_path: Path,
190
+ *,
191
+ apps_only: bool = False,
192
+ ) -> RepoCoverage:
193
+ """Scan an entire repository for docstring coverage.
194
+
195
+ Args:
196
+ repo_path: Root path of the repository.
197
+ apps_only: If True, only scan `apps/` subdirectory.
198
+
199
+ Returns:
200
+ RepoCoverage with per-module and aggregate statistics.
201
+ """
202
+ repo_path = repo_path.resolve()
203
+ result = RepoCoverage(repo_path=repo_path)
204
+
205
+ search_root = repo_path
206
+ if apps_only:
207
+ apps_dir = repo_path / "apps"
208
+ if apps_dir.is_dir():
209
+ search_root = apps_dir
210
+ else:
211
+ src_apps = repo_path / "src" / "apps"
212
+ if src_apps.is_dir():
213
+ search_root = src_apps
214
+
215
+ for py_file in sorted(search_root.rglob("*.py")):
216
+ # Skip excluded directories
217
+ if any(
218
+ _should_skip_dir(part)
219
+ for part in py_file.relative_to(repo_path).parts
220
+ ):
221
+ continue
222
+
223
+ # Skip excluded files
224
+ if _should_skip_file(py_file.name):
225
+ continue
226
+
227
+ module_cov = scan_module(py_file)
228
+ if module_cov.total_items > 0:
229
+ result.modules.append(module_cov)
230
+
231
+ return result
@@ -0,0 +1,206 @@
1
+ """DIATAXIS heuristic classifier for documentation files.
2
+
3
+ Classifies Markdown/RST files into DIATAXIS quadrants using
4
+ trigger-word pattern matching. No LLM required for the heuristic pass.
5
+
6
+ LLM refinement (confidence < 0.7) is deferred to Phase 4.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import re
13
+ from pathlib import Path
14
+
15
+ from docs_agent.models import DiaxisClassification, DiaxisQuadrant
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ TRIGGER_PATTERNS: dict[DiaxisQuadrant, list[str]] = {
20
+ DiaxisQuadrant.TUTORIAL: [
21
+ r"step\s+\d",
22
+ r"getting\s+started",
23
+ r"\blearn\b",
24
+ r"we\s+will",
25
+ r"in\s+this\s+tutorial",
26
+ r"let[\u2018\u2019']s\s+",
27
+ r"follow\s+along",
28
+ r"by\s+the\s+end\b",
29
+ r"prerequisites",
30
+ ],
31
+ DiaxisQuadrant.GUIDE: [
32
+ r"how\s+to\b",
33
+ r"\bconfigure\b",
34
+ r"\bdeploy\b",
35
+ r"\bfix\b",
36
+ r"troubleshoot",
37
+ r"set\s+up\b",
38
+ r"install\b",
39
+ r"upgrade\b",
40
+ r"migrat",
41
+ r"recipe",
42
+ ],
43
+ DiaxisQuadrant.REFERENCE: [
44
+ r"\bAPI\b",
45
+ r"\bparameter[s]?\b",
46
+ r"\breturn[s]?\b",
47
+ r"type\s*:",
48
+ r"automodule",
49
+ r"endpoint[s]?\b",
50
+ r"\bschema\b",
51
+ r"class\s+\w+",
52
+ r"field[s]?\b",
53
+ r"\bargs\b",
54
+ r"\bkwargs\b",
55
+ ],
56
+ DiaxisQuadrant.EXPLANATION: [
57
+ r"\bwhy\b",
58
+ r"architecture",
59
+ r"\bdesign\b",
60
+ r"rationale",
61
+ r"background",
62
+ r"concept[s]?\b",
63
+ r"overview",
64
+ r"philosophy",
65
+ r"trade-?off",
66
+ r"decision",
67
+ ],
68
+ }
69
+
70
+ DOC_EXTENSIONS: set[str] = {".md", ".rst", ".txt"}
71
+
72
+ SKIP_DIRS: set[str] = {
73
+ "_archive",
74
+ "_build",
75
+ "source",
76
+ "node_modules",
77
+ ".git",
78
+ ".venv",
79
+ "venv",
80
+ }
81
+
82
+
83
+ def _count_triggers(
84
+ text: str,
85
+ patterns: list[str],
86
+ ) -> tuple[int, list[str]]:
87
+ """Count trigger pattern matches in text.
88
+
89
+ Args:
90
+ text: Document text to scan.
91
+ patterns: Regex patterns to match.
92
+
93
+ Returns:
94
+ Tuple of (match_count, list_of_matched_patterns).
95
+ """
96
+ count = 0
97
+ matched: list[str] = []
98
+ for pattern in patterns:
99
+ hits = len(re.findall(pattern, text, re.IGNORECASE))
100
+ if hits > 0:
101
+ count += hits
102
+ matched.append(pattern)
103
+ return count, matched
104
+
105
+
106
+ def classify_file(file_path: Path) -> DiaxisClassification:
107
+ """Classify a single document file into a DIATAXIS quadrant.
108
+
109
+ Args:
110
+ file_path: Path to the document file.
111
+
112
+ Returns:
113
+ DiaxisClassification with quadrant, confidence, and triggers.
114
+ """
115
+ try:
116
+ text = file_path.read_text(encoding="utf-8", errors="replace")
117
+ except OSError as exc:
118
+ logger.warning("Cannot read %s: %s", file_path, exc)
119
+ return DiaxisClassification(
120
+ file_path=file_path,
121
+ quadrant=DiaxisQuadrant.UNKNOWN,
122
+ confidence=0.0,
123
+ )
124
+
125
+ scores: dict[DiaxisQuadrant, int] = {}
126
+ all_triggers: dict[DiaxisQuadrant, list[str]] = {}
127
+
128
+ for quadrant, patterns in TRIGGER_PATTERNS.items():
129
+ count, matched = _count_triggers(text, patterns)
130
+ scores[quadrant] = count
131
+ all_triggers[quadrant] = matched
132
+
133
+ total = sum(scores.values())
134
+ if total == 0:
135
+ return DiaxisClassification(
136
+ file_path=file_path,
137
+ quadrant=DiaxisQuadrant.UNKNOWN,
138
+ confidence=0.0,
139
+ )
140
+
141
+ best_quadrant = max(scores, key=lambda q: scores[q])
142
+ best_score = scores[best_quadrant]
143
+ confidence = best_score / total
144
+
145
+ # Boost confidence if file path hints at quadrant
146
+ path_lower = str(file_path).lower()
147
+ path_hints: dict[str, DiaxisQuadrant] = {
148
+ "tutorial": DiaxisQuadrant.TUTORIAL,
149
+ "getting-started": DiaxisQuadrant.TUTORIAL,
150
+ "guide": DiaxisQuadrant.GUIDE,
151
+ "howto": DiaxisQuadrant.GUIDE,
152
+ "how-to": DiaxisQuadrant.GUIDE,
153
+ "reference": DiaxisQuadrant.REFERENCE,
154
+ "api": DiaxisQuadrant.REFERENCE,
155
+ "explanation": DiaxisQuadrant.EXPLANATION,
156
+ "adr": DiaxisQuadrant.EXPLANATION,
157
+ "architecture": DiaxisQuadrant.EXPLANATION,
158
+ }
159
+ for hint, quadrant in path_hints.items():
160
+ if hint in path_lower:
161
+ if quadrant == best_quadrant:
162
+ confidence = min(confidence + 0.15, 1.0)
163
+ break
164
+
165
+ return DiaxisClassification(
166
+ file_path=file_path,
167
+ quadrant=best_quadrant,
168
+ confidence=round(confidence, 3),
169
+ triggers=all_triggers[best_quadrant],
170
+ )
171
+
172
+
173
+ def classify_repo(
174
+ repo_path: Path,
175
+ ) -> list[DiaxisClassification]:
176
+ """Classify all documentation files in a repository.
177
+
178
+ Args:
179
+ repo_path: Root path of the repository.
180
+
181
+ Returns:
182
+ List of DiaxisClassification results.
183
+ """
184
+ repo_path = repo_path.resolve()
185
+ results: list[DiaxisClassification] = []
186
+
187
+ docs_dir = repo_path / "docs"
188
+ if not docs_dir.is_dir():
189
+ logger.info("No docs/ directory in %s", repo_path)
190
+ return results
191
+
192
+ for doc_file in sorted(docs_dir.rglob("*")):
193
+ if not doc_file.is_file():
194
+ continue
195
+ if doc_file.suffix not in DOC_EXTENSIONS:
196
+ continue
197
+ if any(
198
+ part in SKIP_DIRS
199
+ for part in doc_file.relative_to(repo_path).parts
200
+ ):
201
+ continue
202
+
203
+ classification = classify_file(doc_file)
204
+ results.append(classification)
205
+
206
+ return results
@@ -0,0 +1,149 @@
1
+ """LLM-based DIATAXIS classifier for low-confidence documents.
2
+
3
+ Re-classifies documents where the heuristic classifier has
4
+ confidence < threshold, using the llm_mcp HTTP gateway or
5
+ direct OpenAI API.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ from docs_agent.llm_client import LLMConfig, generate
16
+ from docs_agent.models import DiaxisClassification, DiaxisQuadrant
17
+ from docs_agent.prompts import PROMPT_DIATAXIS_CLASSIFY, SYSTEM_DIATAXIS
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ DEFAULT_THRESHOLD = 0.7
22
+ PREVIEW_CHARS = 500
23
+
24
+
25
+ async def reclassify_low_confidence(
26
+ classifications: list[DiaxisClassification],
27
+ *,
28
+ threshold: float = DEFAULT_THRESHOLD,
29
+ config: Optional[LLMConfig] = None,
30
+ ) -> list[DiaxisClassification]:
31
+ """Re-classify documents with confidence below threshold via LLM.
32
+
33
+ Args:
34
+ classifications: Heuristic classification results.
35
+ threshold: Confidence threshold for re-classification.
36
+ config: LLM configuration.
37
+
38
+ Returns:
39
+ Updated list with LLM-refined classifications replacing
40
+ low-confidence entries.
41
+ """
42
+ results: list[DiaxisClassification] = []
43
+
44
+ for classification in classifications:
45
+ if classification.confidence >= threshold:
46
+ results.append(classification)
47
+ continue
48
+
49
+ refined = await _classify_with_llm(
50
+ classification.file_path, config=config
51
+ )
52
+ if refined is not None:
53
+ results.append(refined)
54
+ else:
55
+ results.append(classification)
56
+
57
+ return results
58
+
59
+
60
+ async def _classify_with_llm(
61
+ file_path: Path,
62
+ *,
63
+ config: Optional[LLMConfig] = None,
64
+ ) -> Optional[DiaxisClassification]:
65
+ """Classify a single document via LLM.
66
+
67
+ Args:
68
+ file_path: Path to the document.
69
+ config: LLM configuration.
70
+
71
+ Returns:
72
+ DiaxisClassification or None if LLM call fails.
73
+ """
74
+ try:
75
+ text = file_path.read_text(encoding="utf-8", errors="replace")
76
+ except OSError as exc:
77
+ logger.warning("Cannot read %s: %s", file_path, exc)
78
+ return None
79
+
80
+ title = file_path.stem.replace("-", " ").replace("_", " ")
81
+ preview = text[:PREVIEW_CHARS]
82
+
83
+ prompt = PROMPT_DIATAXIS_CLASSIFY.format(
84
+ title=title,
85
+ preview=preview,
86
+ )
87
+
88
+ response = await generate(
89
+ prompt,
90
+ system_prompt=SYSTEM_DIATAXIS,
91
+ config=config,
92
+ )
93
+
94
+ if not response.success:
95
+ logger.warning(
96
+ "LLM classification failed for %s: %s",
97
+ file_path, response.error,
98
+ )
99
+ return None
100
+
101
+ return _parse_classification_response(response.content, file_path)
102
+
103
+
104
+ def _parse_classification_response(
105
+ content: str | dict | list | None,
106
+ file_path: Path,
107
+ ) -> Optional[DiaxisClassification]:
108
+ """Parse LLM classification response.
109
+
110
+ Args:
111
+ content: LLM response content.
112
+ file_path: Path to the classified document.
113
+
114
+ Returns:
115
+ DiaxisClassification or None on parse failure.
116
+ """
117
+ if isinstance(content, str):
118
+ try:
119
+ content = json.loads(content)
120
+ except json.JSONDecodeError:
121
+ logger.warning("Failed to parse JSON from LLM response")
122
+ return None
123
+
124
+ if not isinstance(content, dict):
125
+ return None
126
+
127
+ quadrant_str = content.get("quadrant", "").lower().strip()
128
+ confidence = float(content.get("confidence", 0.5))
129
+
130
+ quadrant_map = {
131
+ "tutorial": DiaxisQuadrant.TUTORIAL,
132
+ "guide": DiaxisQuadrant.GUIDE,
133
+ "reference": DiaxisQuadrant.REFERENCE,
134
+ "explanation": DiaxisQuadrant.EXPLANATION,
135
+ }
136
+
137
+ quadrant = quadrant_map.get(quadrant_str)
138
+ if quadrant is None:
139
+ logger.warning(
140
+ "Unknown quadrant from LLM: %s", quadrant_str
141
+ )
142
+ return None
143
+
144
+ return DiaxisClassification(
145
+ file_path=file_path,
146
+ quadrant=quadrant,
147
+ confidence=round(confidence, 3),
148
+ triggers=[f"llm: {content.get('reasoning', '')}"],
149
+ )