ai-docs-gen 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ai_docs/generator.py ADDED
@@ -0,0 +1,959 @@
1
+ import json
2
+ from datetime import datetime
3
+ import os
4
+ import shutil
5
+ import subprocess
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, Set, Tuple
8
+
9
+ import tomli
10
+ import yaml
11
+
12
+ from concurrent.futures import ThreadPoolExecutor, as_completed
13
+
14
+ from .cache import CacheManager
15
+ from .changes import format_changes_md
16
+ from .domain import is_infra
17
+ from .llm import LLMClient
18
+ from .mkdocs import build_mkdocs_yaml, write_docs_files
19
+ from .summary import summarize_file, write_summary
20
+ from .tokenizer import count_tokens, chunk_text
21
+ from .utils import ensure_dir, read_text_file, sha256_text, safe_slug
22
+
23
+
24
+ SECTION_TITLES = {
25
+ "architecture": "Архитектура",
26
+ "runtime": "Запуск и окружение",
27
+ "dependencies": "Зависимости",
28
+ "testing": "Тестирование",
29
+ "conventions": "Соглашения",
30
+ "glossary": "Глоссарий",
31
+ }
32
+
33
+ DOMAIN_TITLES = {
34
+ "kubernetes": "Kubernetes",
35
+ "helm": "Helm",
36
+ "terraform": "Terraform",
37
+ "ansible": "Ansible",
38
+ "docker": "Docker",
39
+ "ci": "CI/CD",
40
+ "observability": "Observability",
41
+ "service_mesh": "Service Mesh / Ingress",
42
+ "data_storage": "Data / Storage",
43
+ }
44
+
45
+ def _is_test_path(path: str) -> bool:
46
+ parts = Path(path).parts
47
+ if any(part in {"test", "tests", "__tests__"} for part in parts):
48
+ return True
49
+ name = Path(path).name
50
+ return name.startswith("test_") or name.endswith("_test.py")
51
+
52
+
53
+ def _collect_dependencies(files: Dict[str, Dict]) -> List[str]:
54
+ deps: List[str] = []
55
+ for path, meta in files.items():
56
+ if path.endswith("pyproject.toml"):
57
+ try:
58
+ data = tomli.loads(meta["content"])
59
+ deps_map = data.get("tool", {}).get("poetry", {}).get("dependencies", {})
60
+ deps.extend([f"{k} {v}" for k, v in deps_map.items()])
61
+ except Exception:
62
+ continue
63
+ if path.endswith("requirements.txt"):
64
+ lines = [line.strip() for line in meta["content"].splitlines() if line.strip() and not line.strip().startswith("#")]
65
+ deps.extend(lines)
66
+ if path.endswith("package.json"):
67
+ try:
68
+ data = json.loads(meta["content"])
69
+ for section in ("dependencies", "devDependencies"):
70
+ for k, v in data.get(section, {}).items():
71
+ deps.append(f"{k} {v}")
72
+ except Exception:
73
+ continue
74
+ return sorted(set(deps))
75
+
76
+
77
+ def _collect_test_info(files: Dict[str, Dict]) -> Tuple[List[str], List[str]]:
78
+ test_paths = sorted([path for path in files if _is_test_path(path)])
79
+ commands: List[str] = []
80
+
81
+ has_pytest_config = any(
82
+ path.endswith("pytest.ini") or path.endswith("pyproject.toml")
83
+ for path in files.keys()
84
+ )
85
+ has_pytest_dep = False
86
+ for path, meta in files.items():
87
+ if path.endswith(("requirements.txt", "pyproject.toml")):
88
+ content = meta.get("content", "")
89
+ if "pytest" in content:
90
+ has_pytest_dep = True
91
+ break
92
+
93
+ if test_paths and (has_pytest_config or has_pytest_dep or any(p.endswith(".py") for p in test_paths)):
94
+ commands.append("python -m pytest")
95
+
96
+ for path, meta in files.items():
97
+ if path.endswith("package.json"):
98
+ try:
99
+ data = json.loads(meta.get("content", ""))
100
+ scripts = data.get("scripts", {})
101
+ if "test" in scripts:
102
+ commands.append("npm test")
103
+ except Exception:
104
+ continue
105
+
106
+ return test_paths, sorted(set(commands))
107
+
108
+
109
+ def _render_testing_section(test_paths: List[str], commands: List[str]) -> str:
110
+ if not test_paths:
111
+ return "Тесты не обнаружены."
112
+ tests_md = "\n".join(f"- `{p}`" for p in test_paths)
113
+ commands_md = "\n".join(f"- `{c}`" for c in commands) if commands else "- (команда запуска не определена)"
114
+ return (
115
+ "## Найденные тесты\n\n"
116
+ f"{tests_md}\n\n"
117
+ "## Как запускать\n\n"
118
+ f"{commands_md}\n"
119
+ )
120
+
121
+
122
+ def _render_project_configs_index(config_nav_paths: List[str]) -> str:
123
+ if not config_nav_paths:
124
+ return "Конфигурационные файлы не обнаружены."
125
+ toc_lines = "\n".join(
126
+ [
127
+ f"- [{Path(p).with_suffix('').as_posix()}]({Path(p).as_posix()[len('configs/'):] if p.startswith('configs/') else p})"
128
+ for p in sorted(config_nav_paths)
129
+ ]
130
+ )
131
+ return f"## Файлы конфигурации\n\n{toc_lines}\n"
132
+
133
+
134
+ def _generate_section(llm: LLMClient, llm_cache: Dict[str, str], title: str, context: str, language: str) -> str:
135
+ prompt = (
136
+ "Ты опытный технический писатель. Сгенерируй раздел документации в Markdown. "
137
+ f"Язык: {language}. Раздел: {title}. "
138
+ "Используй предоставленный контекст. Избегай воды, дай практические детали."
139
+ )
140
+ if title.lower() == "архитектура":
141
+ prompt += (
142
+ " В начале раздела обязательно вставь Mermaid-диаграмму архитектуры. "
143
+ "Используй блок:\n```mermaid\n...\n```.\n"
144
+ "Схема должна отражать основные компоненты и потоки данных проекта. "
145
+ "Используй `-->` для связей. Запрещено использовать `>`. "
146
+ "Внутри блока Mermaid запрещены круглые скобки `(` и `)` в любых строках. "
147
+ "Для подписей используй квадратные скобки."
148
+ )
149
+ messages = [
150
+ {"role": "system", "content": prompt},
151
+ {"role": "user", "content": context},
152
+ ]
153
+ content = llm.chat(messages, cache=llm_cache).strip()
154
+ return _strip_duplicate_heading(content, title)
155
+
156
+
157
+ def _strip_duplicate_heading(content: str, title: str) -> str:
158
+ lines = content.splitlines()
159
+ if not lines:
160
+ return content
161
+ first = lines[0].strip()
162
+ if first.startswith("#") and first.lstrip("#").strip().lower() == title.strip().lower():
163
+ return "\n".join(lines[1:]).lstrip()
164
+ return content
165
+
166
+
167
+ def _generate_readme(llm: LLMClient, llm_cache: Dict[str, str], project_name: str, overview_context: str, language: str) -> str:
168
+ prompt = (
169
+ "Сформируй README.md для проекта. "
170
+ "Структура: Обзор, Быстрый старт, Архитектура (кратко), Ссылки на docs. "
171
+ "Текст должен быть кратким и полезным. Язык: " + language
172
+ )
173
+ messages = [
174
+ {"role": "system", "content": prompt},
175
+ {"role": "user", "content": overview_context},
176
+ ]
177
+ return llm.chat(messages, cache=llm_cache).strip()
178
+
179
+
180
+ def _truncate_context(context: str, model: str, max_tokens: int) -> str:
181
+ if count_tokens(context, model) <= max_tokens:
182
+ return context
183
+ chunks = chunk_text(context, model=model, max_tokens=max_tokens)
184
+ return chunks[0]
185
+
186
+
187
+ def _first_paragraph(text: str) -> str:
188
+ lines: List[str] = []
189
+ for raw in text.splitlines():
190
+ line = raw.strip()
191
+ if not line:
192
+ if lines:
193
+ break
194
+ continue
195
+ if line.startswith("#") or line.startswith("```"):
196
+ continue
197
+ lines.append(line)
198
+ if len(lines) >= 2:
199
+ break
200
+ return " ".join(lines).strip()
201
+
202
+
203
+ def _build_docs_index(
204
+ output_root: Path,
205
+ docs_dir: Path,
206
+ docs_files: Dict[str, str],
207
+ file_map: Dict[str, Dict],
208
+ module_pages: Dict[str, str],
209
+ config_pages: Dict[str, str],
210
+ ) -> Dict[str, object]:
211
+ existing_files: Set[str] = set()
212
+ if docs_dir.exists():
213
+ for path in docs_dir.rglob("*.md"):
214
+ try:
215
+ existing_files.add(path.relative_to(docs_dir).as_posix())
216
+ except Exception:
217
+ continue
218
+ sections = []
219
+ for key, title in SECTION_TITLES.items():
220
+ path = f"{key}.md"
221
+ if path in docs_files or path in existing_files:
222
+ sections.append({"id": key, "title": title, "path": path})
223
+ if "configs/index.md" in docs_files or "configs/index.md" in existing_files:
224
+ sections.append({"id": "configs", "title": "Конфигурация проекта", "path": "configs/index.md"})
225
+
226
+ modules = []
227
+ for path, meta in file_map.items():
228
+ if _is_test_path(path):
229
+ continue
230
+ summary_path = meta.get("module_summary_path")
231
+ if not summary_path:
232
+ continue
233
+ module_rel = Path("modules") / Path(path).with_suffix("")
234
+ module_rel_str = module_rel.as_posix() + ".md"
235
+ summary_text = read_text_file(Path(summary_path))
236
+ modules.append(
237
+ {
238
+ "name": Path(path).with_suffix("").as_posix(),
239
+ "path": module_rel_str,
240
+ "source_path": path,
241
+ "summary": _first_paragraph(summary_text),
242
+ }
243
+ )
244
+
245
+ configs = []
246
+ for path, meta in file_map.items():
247
+ if meta.get("type") != "config":
248
+ continue
249
+ summary_path = meta.get("config_summary_path")
250
+ if not summary_path:
251
+ continue
252
+ config_rel = Path("configs/files") / Path(path)
253
+ config_rel_str = config_rel.as_posix().replace(".", "__") + ".md"
254
+ summary_text = read_text_file(Path(summary_path))
255
+ configs.append(
256
+ {
257
+ "name": Path(path).as_posix(),
258
+ "path": config_rel_str,
259
+ "source_path": path,
260
+ "summary": _first_paragraph(summary_text),
261
+ }
262
+ )
263
+
264
+ return {
265
+ "generated_at": datetime.utcnow().isoformat() + "Z",
266
+ "docs_dir": ".ai-docs",
267
+ "rules": {
268
+ "priority": [
269
+ "modules/index.md",
270
+ "modules/*",
271
+ "configs/index.md",
272
+ "configs/files/*",
273
+ "index.md",
274
+ "architecture.md",
275
+ "conventions.md",
276
+ ],
277
+ "ranking": "keyword frequency + file priority",
278
+ "usage": "use this index to choose a narrow route before reading full docs",
279
+ },
280
+ "sections": sections,
281
+ "modules": modules,
282
+ "configs": configs,
283
+ "files": sorted(set(docs_files.keys()) | existing_files | {"_index.json"}),
284
+ }
285
+
286
+
287
+ def generate_docs(
288
+ files: List[Dict],
289
+ output_root: Path,
290
+ cache_dir: Path,
291
+ llm: LLMClient,
292
+ language: str,
293
+ write_readme: bool,
294
+ write_mkdocs: bool,
295
+ use_cache: bool = True,
296
+ threads: int = 1,
297
+ local_site: bool = False,
298
+ force: bool = False,
299
+ ) -> None:
300
+ cache = CacheManager(cache_dir)
301
+ llm_cache = cache.load_llm_cache() if use_cache else None
302
+ index_data = cache.load_index()
303
+ prev_files = index_data.get("files", {})
304
+ errors: List[str] = []
305
+
306
+ def _save_cache_snapshot() -> None:
307
+ snapshot = {
308
+ "files": {path: {k: v for k, v in meta.items() if k != "content"} for path, meta in file_map.items()},
309
+ "sections": index_data.get("sections", {}),
310
+ }
311
+ cache.save_index(snapshot)
312
+ if use_cache and llm_cache is not None:
313
+ cache.save_llm_cache(llm_cache)
314
+
315
+ file_map: Dict[str, Dict] = {}
316
+ for f in files:
317
+ file_map[f["path"]] = {
318
+ "hash": sha256_text(f["content"]),
319
+ "size": f["size"],
320
+ "type": f["type"],
321
+ "domains": f["domains"],
322
+ "content": f["content"],
323
+ }
324
+
325
+ added, modified, deleted, unchanged = cache.diff_files(file_map)
326
+ print(f"[ai-docs] diff: added={len(added)} modified={len(modified)} deleted={len(deleted)} unchanged={len(unchanged)}")
327
+
328
+ summaries_dir = cache_dir / "intermediate" / "files"
329
+ module_summaries_dir = cache_dir / "intermediate" / "modules"
330
+ ensure_dir(summaries_dir)
331
+ ensure_dir(module_summaries_dir)
332
+
333
+ # Summaries for changed files (parallel if threads > 1)
334
+ to_summarize: List[Tuple[str, Dict]] = list({**added, **modified}.items())
335
+ if to_summarize:
336
+ print(f"[ai-docs] summarize: {len(to_summarize)} changed files (threads={threads})")
337
+ if threads > 1 and to_summarize:
338
+ with ThreadPoolExecutor(max_workers=threads) as executor:
339
+ futures = {}
340
+ print(f"[ai-docs] summarize: queued {len(to_summarize)} tasks (workers={threads})")
341
+ for path, meta in to_summarize:
342
+ print(f"[ai-docs] summarize start: {path}")
343
+ futures[
344
+ executor.submit(
345
+ summarize_file,
346
+ meta["content"],
347
+ meta["type"],
348
+ meta["domains"],
349
+ llm,
350
+ llm_cache,
351
+ llm.model,
352
+ False,
353
+ )
354
+ ] = (path, meta)
355
+ total = len(futures)
356
+ done = 0
357
+ for future in as_completed(futures):
358
+ path, meta = futures[future]
359
+ try:
360
+ summary = future.result()
361
+ except Exception as exc:
362
+ msg = f"summarize: {path} -> {exc}"
363
+ print(f"[ai-docs] summarize error: {path} ({exc})")
364
+ errors.append(msg)
365
+ continue
366
+ summary_path = write_summary(summaries_dir, path, summary)
367
+ meta["summary_path"] = str(summary_path)
368
+ done += 1
369
+ print(f"[ai-docs] summarize done: {path} ({done}/{total})")
370
+ else:
371
+ total = len(to_summarize)
372
+ done = 0
373
+ for path, meta in to_summarize:
374
+ print(f"[ai-docs] summarize start: {path}")
375
+ try:
376
+ summary = summarize_file(meta["content"], meta["type"], meta["domains"], llm, llm_cache, llm.model, False)
377
+ summary_path = write_summary(summaries_dir, path, summary)
378
+ meta["summary_path"] = str(summary_path)
379
+ done += 1
380
+ print(f"[ai-docs] summarize done: {path} ({done}/{total})")
381
+ except Exception as exc:
382
+ msg = f"summarize: {path} -> {exc}"
383
+ print(f"[ai-docs] summarize error: {path} ({exc})")
384
+ errors.append(msg)
385
+ if to_summarize:
386
+ _save_cache_snapshot()
387
+
388
+ # Detailed module summaries for changed files (code only)
389
+ module_candidates = [
390
+ (path, meta)
391
+ for path, meta in to_summarize
392
+ if meta.get("type") == "code" and not _is_test_path(path)
393
+ ]
394
+ if module_candidates:
395
+ print(f"[ai-docs] summarize modules: {len(module_candidates)} changed code files (threads={threads})")
396
+ if threads > 1 and module_candidates:
397
+ with ThreadPoolExecutor(max_workers=threads) as executor:
398
+ futures = {}
399
+ print(f"[ai-docs] summarize modules: queued {len(module_candidates)} tasks (workers={threads})")
400
+ for path, meta in module_candidates:
401
+ print(f"[ai-docs] summarize module start: {path}")
402
+ futures[
403
+ executor.submit(
404
+ summarize_file,
405
+ meta["content"],
406
+ meta["type"],
407
+ meta["domains"],
408
+ llm,
409
+ llm_cache,
410
+ llm.model,
411
+ True,
412
+ )
413
+ ] = (path, meta)
414
+ total = len(futures)
415
+ done = 0
416
+ for future in as_completed(futures):
417
+ path, meta = futures[future]
418
+ try:
419
+ summary = future.result()
420
+ except Exception as exc:
421
+ msg = f"summarize module: {path} -> {exc}"
422
+ print(f"[ai-docs] summarize module error: {path} ({exc})")
423
+ errors.append(msg)
424
+ continue
425
+ summary_path = write_summary(module_summaries_dir, path, summary)
426
+ meta["module_summary_path"] = str(summary_path)
427
+ done += 1
428
+ print(f"[ai-docs] summarize module done: {path} ({done}/{total})")
429
+ else:
430
+ total = len(module_candidates)
431
+ done = 0
432
+ for path, meta in module_candidates:
433
+ print(f"[ai-docs] summarize module start: {path}")
434
+ try:
435
+ summary = summarize_file(meta["content"], meta["type"], meta["domains"], llm, llm_cache, llm.model, True)
436
+ summary_path = write_summary(module_summaries_dir, path, summary)
437
+ meta["module_summary_path"] = str(summary_path)
438
+ done += 1
439
+ print(f"[ai-docs] summarize module done: {path} ({done}/{total})")
440
+ except Exception as exc:
441
+ msg = f"summarize module: {path} -> {exc}"
442
+ print(f"[ai-docs] summarize module error: {path} ({exc})")
443
+ errors.append(msg)
444
+ if module_candidates:
445
+ _save_cache_snapshot()
446
+
447
+ # Detailed config summaries for changed files (config only)
448
+ config_candidates = [
449
+ (path, meta)
450
+ for path, meta in to_summarize
451
+ if meta.get("type") == "config"
452
+ ]
453
+ config_summaries_dir = cache_dir / "intermediate" / "configs"
454
+ if config_candidates:
455
+ print(f"[ai-docs] summarize configs: {len(config_candidates)} changed config files (threads={threads})")
456
+ if threads > 1 and config_candidates:
457
+ with ThreadPoolExecutor(max_workers=threads) as executor:
458
+ futures = {}
459
+ print(f"[ai-docs] summarize configs: queued {len(config_candidates)} tasks (workers={threads})")
460
+ for path, meta in config_candidates:
461
+ print(f"[ai-docs] summarize config start: {path}")
462
+ futures[
463
+ executor.submit(
464
+ summarize_file,
465
+ meta["content"],
466
+ meta["type"],
467
+ meta["domains"],
468
+ llm,
469
+ llm_cache,
470
+ llm.model,
471
+ True,
472
+ )
473
+ ] = (path, meta)
474
+ total = len(futures)
475
+ done = 0
476
+ for future in as_completed(futures):
477
+ path, meta = futures[future]
478
+ try:
479
+ summary = future.result()
480
+ except Exception as exc:
481
+ msg = f"summarize config: {path} -> {exc}"
482
+ print(f"[ai-docs] summarize config error: {path} ({exc})")
483
+ errors.append(msg)
484
+ continue
485
+ summary_path = write_summary(config_summaries_dir, path, summary)
486
+ meta["config_summary_path"] = str(summary_path)
487
+ done += 1
488
+ print(f"[ai-docs] summarize config done: {path} ({done}/{total})")
489
+ else:
490
+ total = len(config_candidates)
491
+ done = 0
492
+ for path, meta in config_candidates:
493
+ print(f"[ai-docs] summarize config start: {path}")
494
+ try:
495
+ summary = summarize_file(meta["content"], meta["type"], meta["domains"], llm, llm_cache, llm.model, True)
496
+ summary_path = write_summary(config_summaries_dir, path, summary)
497
+ meta["config_summary_path"] = str(summary_path)
498
+ done += 1
499
+ print(f"[ai-docs] summarize config done: {path} ({done}/{total})")
500
+ except Exception as exc:
501
+ msg = f"summarize config: {path} -> {exc}"
502
+ print(f"[ai-docs] summarize config error: {path} ({exc})")
503
+ errors.append(msg)
504
+ if config_candidates:
505
+ _save_cache_snapshot()
506
+
507
+ # Carry summaries for unchanged files (recreate if missing)
508
+ missing_summaries: List[Tuple[str, Dict]] = []
509
+ missing_module_summaries: List[Tuple[str, Dict]] = []
510
+ missing_config_summaries: List[Tuple[str, Dict]] = []
511
+ for path, meta in unchanged.items():
512
+ prev = prev_files.get(path, {})
513
+ summary_path = prev.get("summary_path")
514
+ if summary_path and Path(summary_path).exists():
515
+ meta["summary_path"] = summary_path
516
+ else:
517
+ if summary_path:
518
+ print(f"[ai-docs] summarize missing: {path} ({summary_path})")
519
+ else:
520
+ print(f"[ai-docs] summarize missing: {path}")
521
+ missing_summaries.append((path, meta))
522
+ module_summary_path = prev.get("module_summary_path")
523
+ if meta.get("type") == "code" and not _is_test_path(path):
524
+ if module_summary_path and Path(module_summary_path).exists():
525
+ meta["module_summary_path"] = module_summary_path
526
+ else:
527
+ if module_summary_path:
528
+ print(f"[ai-docs] summarize module missing: {path} ({module_summary_path})")
529
+ else:
530
+ print(f"[ai-docs] summarize module missing: {path}")
531
+ missing_module_summaries.append((path, meta))
532
+ config_summary_path = prev.get("config_summary_path")
533
+ if meta.get("type") == "config":
534
+ if config_summary_path and Path(config_summary_path).exists():
535
+ meta["config_summary_path"] = config_summary_path
536
+ else:
537
+ if config_summary_path:
538
+ print(f"[ai-docs] summarize config missing: {path} ({config_summary_path})")
539
+ else:
540
+ print(f"[ai-docs] summarize config missing: {path}")
541
+ missing_config_summaries.append((path, meta))
542
+
543
+ if missing_summaries:
544
+ print(f"[ai-docs] summarize: {len(missing_summaries)} missing summaries")
545
+ if threads > 1:
546
+ with ThreadPoolExecutor(max_workers=threads) as executor:
547
+ futures = {}
548
+ print(f"[ai-docs] summarize: queued {len(missing_summaries)} tasks (workers={threads})")
549
+ for path, meta in missing_summaries:
550
+ print(f"[ai-docs] summarize start: {path}")
551
+ futures[
552
+ executor.submit(
553
+ summarize_file,
554
+ meta["content"],
555
+ meta["type"],
556
+ meta["domains"],
557
+ llm,
558
+ llm_cache,
559
+ llm.model,
560
+ )
561
+ ] = (path, meta)
562
+ total = len(futures)
563
+ done = 0
564
+ for future in as_completed(futures):
565
+ path, meta = futures[future]
566
+ try:
567
+ summary = future.result()
568
+ except Exception as exc:
569
+ msg = f"summarize: {path} -> {exc}"
570
+ print(f"[ai-docs] summarize error: {path} ({exc})")
571
+ errors.append(msg)
572
+ continue
573
+ summary_path = write_summary(summaries_dir, path, summary)
574
+ meta["summary_path"] = str(summary_path)
575
+ done += 1
576
+ print(f"[ai-docs] summarize done: {path} ({done}/{total})")
577
+ else:
578
+ total = len(missing_summaries)
579
+ done = 0
580
+ for path, meta in missing_summaries:
581
+ print(f"[ai-docs] summarize start: {path}")
582
+ try:
583
+ summary = summarize_file(meta["content"], meta["type"], meta["domains"], llm, llm_cache, llm.model, False)
584
+ summary_path = write_summary(summaries_dir, path, summary)
585
+ meta["summary_path"] = str(summary_path)
586
+ done += 1
587
+ print(f"[ai-docs] summarize done: {path} ({done}/{total})")
588
+ except Exception as exc:
589
+ msg = f"summarize: {path} -> {exc}"
590
+ print(f"[ai-docs] summarize error: {path} ({exc})")
591
+ errors.append(msg)
592
+ _save_cache_snapshot()
593
+
594
+ if missing_module_summaries:
595
+ print(f"[ai-docs] summarize modules: {len(missing_module_summaries)} missing module summaries")
596
+ if threads > 1:
597
+ with ThreadPoolExecutor(max_workers=threads) as executor:
598
+ futures = {}
599
+ print(f"[ai-docs] summarize modules: queued {len(missing_module_summaries)} tasks (workers={threads})")
600
+ for path, meta in missing_module_summaries:
601
+ print(f"[ai-docs] summarize module start: {path}")
602
+ futures[
603
+ executor.submit(
604
+ summarize_file,
605
+ meta["content"],
606
+ meta["type"],
607
+ meta["domains"],
608
+ llm,
609
+ llm_cache,
610
+ llm.model,
611
+ True,
612
+ )
613
+ ] = (path, meta)
614
+ total = len(futures)
615
+ done = 0
616
+ for future in as_completed(futures):
617
+ path, meta = futures[future]
618
+ try:
619
+ summary = future.result()
620
+ except Exception as exc:
621
+ msg = f"summarize module: {path} -> {exc}"
622
+ print(f"[ai-docs] summarize module error: {path} ({exc})")
623
+ errors.append(msg)
624
+ continue
625
+ summary_path = write_summary(module_summaries_dir, path, summary)
626
+ meta["module_summary_path"] = str(summary_path)
627
+ done += 1
628
+ print(f"[ai-docs] summarize module done: {path} ({done}/{total})")
629
+ else:
630
+ total = len(missing_module_summaries)
631
+ done = 0
632
+ for path, meta in missing_module_summaries:
633
+ print(f"[ai-docs] summarize module start: {path}")
634
+ try:
635
+ summary = summarize_file(meta["content"], meta["type"], meta["domains"], llm, llm_cache, llm.model, True)
636
+ summary_path = write_summary(module_summaries_dir, path, summary)
637
+ meta["module_summary_path"] = str(summary_path)
638
+ done += 1
639
+ print(f"[ai-docs] summarize module done: {path} ({done}/{total})")
640
+ except Exception as exc:
641
+ msg = f"summarize module: {path} -> {exc}"
642
+ print(f"[ai-docs] summarize module error: {path} ({exc})")
643
+ errors.append(msg)
644
+ _save_cache_snapshot()
645
+
646
+ if missing_config_summaries:
647
+ print(f"[ai-docs] summarize configs: {len(missing_config_summaries)} missing config summaries")
648
+ if threads > 1:
649
+ with ThreadPoolExecutor(max_workers=threads) as executor:
650
+ futures = {}
651
+ print(f"[ai-docs] summarize configs: queued {len(missing_config_summaries)} tasks (workers={threads})")
652
+ for path, meta in missing_config_summaries:
653
+ print(f"[ai-docs] summarize config start: {path}")
654
+ futures[
655
+ executor.submit(
656
+ summarize_file,
657
+ meta["content"],
658
+ meta["type"],
659
+ meta["domains"],
660
+ llm,
661
+ llm_cache,
662
+ llm.model,
663
+ True,
664
+ )
665
+ ] = (path, meta)
666
+ total = len(futures)
667
+ done = 0
668
+ for future in as_completed(futures):
669
+ path, meta = futures[future]
670
+ try:
671
+ summary = future.result()
672
+ except Exception as exc:
673
+ msg = f"summarize config: {path} -> {exc}"
674
+ print(f"[ai-docs] summarize config error: {path} ({exc})")
675
+ errors.append(msg)
676
+ continue
677
+ summary_path = write_summary(config_summaries_dir, path, summary)
678
+ meta["config_summary_path"] = str(summary_path)
679
+ done += 1
680
+ print(f"[ai-docs] summarize config done: {path} ({done}/{total})")
681
+ else:
682
+ total = len(missing_config_summaries)
683
+ done = 0
684
+ for path, meta in missing_config_summaries:
685
+ print(f"[ai-docs] summarize config start: {path}")
686
+ try:
687
+ summary = summarize_file(meta["content"], meta["type"], meta["domains"], llm, llm_cache, llm.model, True)
688
+ summary_path = write_summary(config_summaries_dir, path, summary)
689
+ meta["config_summary_path"] = str(summary_path)
690
+ done += 1
691
+ print(f"[ai-docs] summarize config done: {path} ({done}/{total})")
692
+ except Exception as exc:
693
+ msg = f"summarize config: {path} -> {exc}"
694
+ print(f"[ai-docs] summarize config error: {path} ({exc})")
695
+ errors.append(msg)
696
+ _save_cache_snapshot()
697
+
698
+ # Remove summaries for deleted files
699
+ if deleted:
700
+ print(f"[ai-docs] cleanup: removing {len(deleted)} deleted summaries")
701
+ for path, meta in deleted.items():
702
+ prev_meta = prev_files.get(path, {})
703
+ summary_path = prev_meta.get("summary_path")
704
+ if summary_path:
705
+ try:
706
+ Path(summary_path).unlink()
707
+ except FileNotFoundError:
708
+ pass
709
+ module_summary_path = prev_meta.get("module_summary_path")
710
+ if module_summary_path:
711
+ try:
712
+ Path(module_summary_path).unlink()
713
+ except FileNotFoundError:
714
+ pass
715
+ prev_files.pop(path, None)
716
+
717
+ input_budget = max(512, llm.context_limit - llm.max_tokens - 200)
718
+
719
+ # Domains changed
720
+ changed_domains: Set[str] = set()
721
+ for path, meta in {**added, **modified, **deleted}.items():
722
+ changed_domains.update(meta.get("domains", []))
723
+
724
+ # Prepare domain contexts
725
+ domain_contexts: Dict[str, str] = {}
726
+ for domain in DOMAIN_TITLES.keys():
727
+ domain_files = [m for m in file_map.values() if domain in m.get("domains", [])]
728
+ if not domain_files:
729
+ continue
730
+ summaries = []
731
+ for m in domain_files:
732
+ summary_path = m.get("summary_path")
733
+ if summary_path:
734
+ summaries.append(read_text_file(Path(summary_path)))
735
+ if summaries:
736
+ domain_contexts[domain] = _truncate_context("\n\n".join(summaries), llm.model, input_budget)
737
+
738
+ test_paths, test_commands = _collect_test_info(file_map)
739
+
740
+ # Base context for overview sections
741
+ overview_context = "\n\n".join(
742
+ [read_text_file(Path(m["summary_path"])) for m in file_map.values() if m.get("summary_path")]
743
+ )
744
+ overview_context = _truncate_context(overview_context, llm.model, input_budget)
745
+
746
+ # Sections to regenerate
747
+ regenerated_sections: List[str] = []
748
+ docs_files: Dict[str, str] = {}
749
+ docs_dir = output_root / ".ai-docs"
750
+ module_pages: Dict[str, str] = {}
751
+ section_workers = min(threads, 4) if threads > 1 else 1
752
+
753
+ # Core + domain sections (+ index) in parallel (bounded)
754
+ configs_written: Dict[str, str] = {}
755
+ section_futures = {}
756
+ if section_workers > 1:
757
+ executor = ThreadPoolExecutor(max_workers=section_workers)
758
+ else:
759
+ executor = None
760
+
761
+ def _submit_section(out_path: str, title: str, context: str) -> None:
762
+ if executor:
763
+ section_futures[executor.submit(_generate_section, llm, llm_cache, title, context, language)] = (out_path, title)
764
+ else:
765
+ content = _generate_section(llm, llm_cache, title, context, language)
766
+ docs_files[out_path] = f"# {title}\n\n{content}\n"
767
+ regenerated_sections.append(title)
768
+
769
+ # Core sections
770
+ for key, title in SECTION_TITLES.items():
771
+ if added or modified or deleted or not (docs_dir / f"{key}.md").exists():
772
+ print(f"[ai-docs] generate section: {title}")
773
+ if key == "testing":
774
+ docs_files["testing.md"] = f"# {title}\n\n{_render_testing_section(test_paths, test_commands)}\n"
775
+ regenerated_sections.append(title)
776
+ continue
777
+ _submit_section(f"{key}.md", title, overview_context)
778
+
779
+ # Domain sections
780
+ for domain, title in DOMAIN_TITLES.items():
781
+ if domain not in domain_contexts:
782
+ continue
783
+ filename = f"{domain}.md"
784
+ if domain in changed_domains or not (docs_dir / "configs" / filename).exists():
785
+ print(f"[ai-docs] generate domain: {title}")
786
+ _submit_section(f"configs/{filename}", title, domain_contexts[domain])
787
+ configs_written[domain] = filename
788
+
789
+ # Index
790
+ index_title = "Документация проекта"
791
+ if added or modified or deleted or not (docs_dir / "index.md").exists():
792
+ print("[ai-docs] generate index")
793
+ _submit_section("index.md", index_title, overview_context)
794
+
795
+ # Modules (detailed summaries -> per-module pages + index)
796
+ module_summaries = []
797
+ module_nav_paths: List[str] = []
798
+ for path, meta in file_map.items():
799
+ if _is_test_path(path):
800
+ continue
801
+ summary_path = meta.get("module_summary_path")
802
+ if not summary_path:
803
+ continue
804
+ module_rel = Path("modules") / Path(path)
805
+ module_rel_str = module_rel.as_posix().replace(".", "__") + ".md"
806
+ module_title = Path(path).with_suffix("").as_posix()
807
+ summary = read_text_file(Path(summary_path))
808
+ module_pages[module_rel_str] = f"# {module_title}\n\n{summary}\n"
809
+ module_nav_paths.append(module_rel_str)
810
+ module_summaries.append(summary)
811
+ if module_summaries:
812
+ modules_title = "Модули"
813
+ modules_context = _truncate_context("\n\n".join(module_summaries), llm.model, input_budget)
814
+ print("[ai-docs] generate modules")
815
+ intro = _generate_section(llm, llm_cache, modules_title, modules_context, language)
816
+ toc_lines = "\n".join(
817
+ [
818
+ f"- [{Path(p).with_suffix('').as_posix()}]({Path(p).as_posix()[len('modules/'):] if p.startswith('modules/') else p})"
819
+ for p in sorted(module_nav_paths)
820
+ ]
821
+ )
822
+ docs_files["modules/index.md"] = f"# {modules_title}\n\n{intro}\n\n## Список модулей\n\n{toc_lines}\n"
823
+ regenerated_sections.append(modules_title)
824
+ docs_files.update(module_pages)
825
+
826
+ # Project configs (detailed summaries -> per-config pages + index)
827
+ config_pages: Dict[str, str] = {}
828
+ config_nav_paths: List[str] = []
829
+ for path, meta in file_map.items():
830
+ if meta.get("type") != "config":
831
+ continue
832
+ summary_path = meta.get("config_summary_path")
833
+ if not summary_path:
834
+ continue
835
+ config_rel = Path("configs/files") / Path(path)
836
+ config_rel_str = config_rel.as_posix().replace(".", "__") + ".md"
837
+ config_title = Path(path).as_posix()
838
+ summary = read_text_file(Path(summary_path))
839
+ config_pages[config_rel_str] = f"# {config_title}\n\n{summary}\n"
840
+ config_nav_paths.append(config_rel_str)
841
+ if config_nav_paths:
842
+ configs_title = "Конфигурация проекта"
843
+ docs_files["configs/index.md"] = f"# {configs_title}\n\n{_render_project_configs_index(config_nav_paths)}"
844
+ regenerated_sections.append(configs_title)
845
+ docs_files.update(config_pages)
846
+
847
+ if executor:
848
+ for future in as_completed(section_futures):
849
+ out_path, title = section_futures[future]
850
+ content = future.result()
851
+ docs_files[out_path] = f"# {title}\n\n{content}\n"
852
+ regenerated_sections.append(title)
853
+ executor.shutdown(wait=True)
854
+
855
+ # Remove stale config docs if domain no longer present
856
+ configs_dir = docs_dir / "configs"
857
+ if configs_dir.exists():
858
+ for domain, title in DOMAIN_TITLES.items():
859
+ if domain not in domain_contexts:
860
+ stale_path = configs_dir / f"{domain}.md"
861
+ if stale_path.exists():
862
+ stale_path.unlink()
863
+
864
+ # Dependencies (inject list if found)
865
+ deps = _collect_dependencies(file_map)
866
+ if deps:
867
+ deps_md = "\n".join([f"- {d}" for d in deps])
868
+ docs_files["dependencies.md"] = docs_files.get("dependencies.md", f"# {SECTION_TITLES['dependencies']}\n\n") + f"\n## Выявленные зависимости\n\n{deps_md}\n"
869
+
870
+ # Glossary placeholder if missing
871
+ if "glossary.md" not in docs_files and not (docs_dir / "glossary.md").exists():
872
+ docs_files["glossary.md"] = "# Глоссарий\n\n- TBD\n"
873
+ regenerated_sections.append("Глоссарий")
874
+
875
+ # Changes summary
876
+ if added or modified or deleted:
877
+ print("[ai-docs] generate changes")
878
+ changes_context = "\n\n".join(
879
+ [read_text_file(Path(meta["summary_path"])) for meta in {**added, **modified}.values() if meta.get("summary_path")]
880
+ )
881
+ changes_context = _truncate_context(changes_context, llm.model, input_budget)
882
+ summary = _generate_section(llm, llm_cache, "Краткое резюме изменений", changes_context, language)
883
+ else:
884
+ summary = "Изменений нет."
885
+
886
+ changes_md = format_changes_md(added, modified, deleted, regenerated_sections, summary)
887
+ docs_files["changes.md"] = changes_md
888
+
889
+ # Docs index for navigation
890
+ docs_index = _build_docs_index(output_root, docs_dir, docs_files, file_map, module_pages, config_pages)
891
+ docs_files["_index.json"] = json.dumps(docs_index, ensure_ascii=False, indent=2) + "\n"
892
+
893
+ # Mermaid JS asset for offline rendering (ship inside package)
894
+ mermaid_asset = Path(__file__).parent / "assets" / "mermaid.min.js"
895
+ if mermaid_asset.exists():
896
+ docs_files["js/mermaid.min.js"] = mermaid_asset.read_text(encoding="utf-8")
897
+ else:
898
+ print(f"[ai-docs] warning: mermaid asset not found: {mermaid_asset}")
899
+
900
+ write_docs_files(docs_dir, docs_files)
901
+
902
+ if write_readme:
903
+ print("[ai-docs] write README")
904
+ readme_path = output_root / "README.md"
905
+ if readme_path.exists() and not force:
906
+ print("[ai-docs] skip README: already exists (use --force to overwrite)")
907
+ else:
908
+ readme = _generate_readme(llm, llm_cache, output_root.name, overview_context, language)
909
+ readme_path.write_text(readme + "\n", encoding="utf-8")
910
+
911
+ # Remove orphan docs (keep .ai-docs/plans).
912
+ # Only cleanup when actual source changes occurred.
913
+ has_changes = bool(added or modified or deleted)
914
+ if docs_dir.exists() and docs_files and has_changes:
915
+ print("[ai-docs] cleanup docs: removing orphan files")
916
+ keep_files = {docs_dir / rel for rel in docs_files.keys()}
917
+ keep_dirs = {docs_dir / "plans"}
918
+ for path in docs_dir.rglob("*"):
919
+ if path.is_dir():
920
+ continue
921
+ if any(str(path).startswith(str(keep_dir)) for keep_dir in keep_dirs):
922
+ continue
923
+ if path in keep_files:
924
+ continue
925
+ path.unlink()
926
+ elif docs_dir.exists():
927
+ print("[ai-docs] cleanup docs: skipped (no source changes)")
928
+
929
+ if write_mkdocs:
930
+ print("[ai-docs] mkdocs: build")
931
+ mkdocs_yaml = build_mkdocs_yaml(
932
+ site_name=output_root.name,
933
+ sections=SECTION_TITLES,
934
+ configs=configs_written,
935
+ has_modules=bool(module_summaries),
936
+ module_nav_paths=module_nav_paths if module_summaries else None,
937
+ project_config_nav_paths=config_nav_paths if config_nav_paths else None,
938
+ local_site=local_site,
939
+ )
940
+ (output_root / "mkdocs.yml").write_text(mkdocs_yaml, encoding="utf-8")
941
+ mkdocs_bin = shutil.which("mkdocs")
942
+ if not mkdocs_bin:
943
+ raise RuntimeError("mkdocs is not installed or not on PATH")
944
+ subprocess.check_call([mkdocs_bin, "build", "-f", "mkdocs.yml"], cwd=output_root)
945
+ print("[ai-docs] mkdocs: done")
946
+
947
+ # Save cache
948
+ new_index = {
949
+ "files": {path: {k: v for k, v in meta.items() if k != "content"} for path, meta in file_map.items()},
950
+ "sections": {"regenerated": regenerated_sections},
951
+ }
952
+ cache.save_index(new_index)
953
+ if use_cache and llm_cache is not None:
954
+ cache.save_llm_cache(llm_cache)
955
+
956
+ if errors:
957
+ print("[ai-docs] errors summary:")
958
+ for item in errors:
959
+ print(f"[ai-docs] error: {item}")