ai-docs-gen 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_docs/__init__.py +0 -0
- ai_docs/__main__.py +22 -0
- ai_docs/assets/mermaid.min.js +1642 -0
- ai_docs/cache.py +52 -0
- ai_docs/changes.py +25 -0
- ai_docs/cli.py +84 -0
- ai_docs/domain.py +206 -0
- ai_docs/generator.py +959 -0
- ai_docs/llm.py +82 -0
- ai_docs/mkdocs.py +161 -0
- ai_docs/scanner.py +237 -0
- ai_docs/summary.py +238 -0
- ai_docs/tokenizer.py +26 -0
- ai_docs/utils.py +43 -0
- ai_docs_gen-0.1.2.dist-info/METADATA +197 -0
- ai_docs_gen-0.1.2.dist-info/RECORD +19 -0
- ai_docs_gen-0.1.2.dist-info/WHEEL +5 -0
- ai_docs_gen-0.1.2.dist-info/entry_points.txt +2 -0
- ai_docs_gen-0.1.2.dist-info/top_level.txt +1 -0
ai_docs/cache.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict, Tuple
|
|
4
|
+
|
|
5
|
+
from .utils import ensure_dir
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CacheManager:
|
|
9
|
+
def __init__(self, cache_dir: Path):
|
|
10
|
+
self.cache_dir = cache_dir
|
|
11
|
+
ensure_dir(self.cache_dir)
|
|
12
|
+
self.index_path = self.cache_dir / "index.json"
|
|
13
|
+
self.llm_cache_path = self.cache_dir / "llm_cache.json"
|
|
14
|
+
|
|
15
|
+
def load_index(self) -> Dict:
|
|
16
|
+
if not self.index_path.exists():
|
|
17
|
+
return {"files": {}, "sections": {}}
|
|
18
|
+
return json.loads(self.index_path.read_text(encoding="utf-8", errors="ignore"))
|
|
19
|
+
|
|
20
|
+
def save_index(self, data: Dict) -> None:
|
|
21
|
+
self.index_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
22
|
+
|
|
23
|
+
def load_llm_cache(self) -> Dict[str, str]:
|
|
24
|
+
if not self.llm_cache_path.exists():
|
|
25
|
+
return {}
|
|
26
|
+
return json.loads(self.llm_cache_path.read_text(encoding="utf-8", errors="ignore"))
|
|
27
|
+
|
|
28
|
+
def save_llm_cache(self, data: Dict[str, str]) -> None:
|
|
29
|
+
self.llm_cache_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
30
|
+
|
|
31
|
+
def diff_files(self, current_files: Dict[str, Dict]) -> Tuple[Dict, Dict, Dict, Dict]:
|
|
32
|
+
prev = self.load_index().get("files", {})
|
|
33
|
+
added = {}
|
|
34
|
+
modified = {}
|
|
35
|
+
deleted = {}
|
|
36
|
+
unchanged = {}
|
|
37
|
+
|
|
38
|
+
for path, meta in current_files.items():
|
|
39
|
+
if path not in prev:
|
|
40
|
+
added[path] = meta
|
|
41
|
+
continue
|
|
42
|
+
if prev[path].get("hash") != meta.get("hash"):
|
|
43
|
+
modified[path] = meta
|
|
44
|
+
else:
|
|
45
|
+
unchanged[path] = meta
|
|
46
|
+
|
|
47
|
+
for path, meta in prev.items():
|
|
48
|
+
if path not in current_files:
|
|
49
|
+
deleted[path] = meta
|
|
50
|
+
|
|
51
|
+
return added, modified, deleted, unchanged
|
|
52
|
+
|
ai_docs/changes.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def format_changes_md(added: Dict, modified: Dict, deleted: Dict, regenerated_sections: List[str], summary: str) -> str:
|
|
5
|
+
def _fmt_list(title: str, items: Dict) -> str:
|
|
6
|
+
if not items:
|
|
7
|
+
return f"## {title}\n\n- нет\n"
|
|
8
|
+
lines = "\n".join([f"- {path}" for path in sorted(items.keys())])
|
|
9
|
+
return f"## {title}\n\n{lines}\n"
|
|
10
|
+
|
|
11
|
+
md = "# Изменения с последней генерации\n\n"
|
|
12
|
+
md += _fmt_list("Добавленные файлы", added)
|
|
13
|
+
md += _fmt_list("Изменённые файлы", modified)
|
|
14
|
+
md += _fmt_list("Удалённые файлы", deleted)
|
|
15
|
+
|
|
16
|
+
md += "## Перегенерированные разделы\n\n"
|
|
17
|
+
if regenerated_sections:
|
|
18
|
+
md += "\n".join([f"- {name}" for name in regenerated_sections]) + "\n"
|
|
19
|
+
else:
|
|
20
|
+
md += "- нет\n"
|
|
21
|
+
|
|
22
|
+
md += "\n## Краткое резюме\n\n"
|
|
23
|
+
md += summary.strip() + "\n"
|
|
24
|
+
return md
|
|
25
|
+
|
ai_docs/cli.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Set
|
|
5
|
+
|
|
6
|
+
import shutil
|
|
7
|
+
|
|
8
|
+
from .generator import generate_docs
|
|
9
|
+
from .llm import from_env
|
|
10
|
+
from .scanner import scan_source
|
|
11
|
+
from .utils import is_url
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def parse_args() -> argparse.Namespace:
|
|
16
|
+
parser = argparse.ArgumentParser(description="Generate README + MkDocs documentation for a code/config repository.")
|
|
17
|
+
parser.add_argument("--source", required=True, help="Path to local folder or git URL")
|
|
18
|
+
parser.add_argument("--output", help="Output root directory. Defaults to source for local paths, or ./output/<repo> for URLs")
|
|
19
|
+
parser.add_argument("--readme", action="store_true", help="Generate README.md")
|
|
20
|
+
parser.add_argument("--mkdocs", action="store_true", help="Generate MkDocs docs site")
|
|
21
|
+
parser.add_argument("--language", default="ru", help="Language for generated docs (ru|en)")
|
|
22
|
+
parser.add_argument("--include", nargs="*", help="Include patterns (glob)")
|
|
23
|
+
parser.add_argument("--exclude", nargs="*", help="Exclude patterns (glob)")
|
|
24
|
+
parser.add_argument("--max-size", type=int, default=200_000, help="Max file size in bytes")
|
|
25
|
+
parser.add_argument("--cache-dir", default=".ai_docs_cache", help="Cache directory")
|
|
26
|
+
parser.add_argument("--no-cache", action="store_true", help="Disable LLM cache")
|
|
27
|
+
parser.add_argument("--threads", type=int, default=None, help="Number of parallel LLM workers")
|
|
28
|
+
parser.add_argument("--local-site", action="store_true", help="Generate MkDocs config for local run")
|
|
29
|
+
parser.add_argument("--force", action="store_true", help="Overwrite README.md if it already exists")
|
|
30
|
+
return parser.parse_args()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def resolve_output(source: str, output: Optional[str], repo_name: str) -> Path:
|
|
34
|
+
if output:
|
|
35
|
+
return Path(output).expanduser().resolve()
|
|
36
|
+
source_path = Path(source).expanduser().resolve()
|
|
37
|
+
if source_path.exists():
|
|
38
|
+
return source_path
|
|
39
|
+
return Path("output") / repo_name
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def main() -> None:
|
|
43
|
+
load_dotenv()
|
|
44
|
+
args = parse_args()
|
|
45
|
+
include: Optional[Set[str]] = set(args.include) if args.include else None
|
|
46
|
+
exclude: Optional[Set[str]] = set(args.exclude) if args.exclude else None
|
|
47
|
+
|
|
48
|
+
scan_result = scan_source(args.source, include=include, exclude=exclude, max_size=args.max_size)
|
|
49
|
+
root = scan_result.root
|
|
50
|
+
repo_name = scan_result.repo_name
|
|
51
|
+
print(f"[ai-docs] scan complete: {len(scan_result.files)} files")
|
|
52
|
+
|
|
53
|
+
output_root = resolve_output(args.source, args.output, repo_name)
|
|
54
|
+
output_root.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
|
|
56
|
+
llm = from_env()
|
|
57
|
+
print(f"[ai-docs] llm: model={llm.model} context={llm.context_limit} max_tokens={llm.max_tokens}")
|
|
58
|
+
|
|
59
|
+
env_threads = int(os.getenv("AI_DOCS_THREADS", "1"))
|
|
60
|
+
env_local_site = os.getenv("AI_DOCS_LOCAL_SITE", "false").strip().lower() in {"1", "true", "yes", "y"}
|
|
61
|
+
threads = args.threads if args.threads is not None else env_threads
|
|
62
|
+
local_site = args.local_site or env_local_site
|
|
63
|
+
|
|
64
|
+
print(f"[ai-docs] generate: readme={args.readme or not args.mkdocs} mkdocs={args.mkdocs or not args.readme}")
|
|
65
|
+
generate_docs(
|
|
66
|
+
files=scan_result.files,
|
|
67
|
+
output_root=output_root,
|
|
68
|
+
cache_dir=output_root / args.cache_dir,
|
|
69
|
+
llm=llm,
|
|
70
|
+
language=args.language,
|
|
71
|
+
write_readme=(args.readme or not args.mkdocs),
|
|
72
|
+
write_mkdocs=(args.mkdocs or not args.readme),
|
|
73
|
+
use_cache=not args.no_cache,
|
|
74
|
+
threads=max(1, threads),
|
|
75
|
+
local_site=local_site,
|
|
76
|
+
force=args.force,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if is_url(args.source):
|
|
80
|
+
shutil.rmtree(scan_result.root, ignore_errors=True)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
if __name__ == "__main__":
|
|
84
|
+
main()
|
ai_docs/domain.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Set
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
CODE_EXTENSION_DESCRIPTIONS = {
|
|
6
|
+
".py": "Python",
|
|
7
|
+
".pyi": "Python (типизация)",
|
|
8
|
+
".pyx": "Cython",
|
|
9
|
+
".js": "JavaScript",
|
|
10
|
+
".jsx": "JavaScript (JSX)",
|
|
11
|
+
".ts": "TypeScript",
|
|
12
|
+
".tsx": "TypeScript (TSX)",
|
|
13
|
+
".go": "Go",
|
|
14
|
+
".java": "Java",
|
|
15
|
+
".c": "C",
|
|
16
|
+
".cc": "C++",
|
|
17
|
+
".cpp": "C++",
|
|
18
|
+
".h": "C/C++ Header",
|
|
19
|
+
".hpp": "C++ Header",
|
|
20
|
+
".rs": "Rust",
|
|
21
|
+
".rb": "Ruby",
|
|
22
|
+
".php": "PHP",
|
|
23
|
+
".cs": "C#",
|
|
24
|
+
".kt": "Kotlin",
|
|
25
|
+
".kts": "Kotlin (Script)",
|
|
26
|
+
".swift": "Swift",
|
|
27
|
+
".m": "Objective-C",
|
|
28
|
+
".mm": "Objective-C++",
|
|
29
|
+
".vb": "Visual Basic",
|
|
30
|
+
".bas": "BASIC",
|
|
31
|
+
".sql": "SQL",
|
|
32
|
+
".pas": "Pascal",
|
|
33
|
+
".dpr": "Delphi/Pascal",
|
|
34
|
+
".pp": "Pascal",
|
|
35
|
+
".r": "R",
|
|
36
|
+
".pl": "Perl",
|
|
37
|
+
".pm": "Perl Module",
|
|
38
|
+
".f": "Fortran",
|
|
39
|
+
".for": "Fortran",
|
|
40
|
+
".f90": "Fortran",
|
|
41
|
+
".f95": "Fortran",
|
|
42
|
+
".f03": "Fortran",
|
|
43
|
+
".f08": "Fortran",
|
|
44
|
+
".sb3": "Scratch",
|
|
45
|
+
".adb": "Ada",
|
|
46
|
+
".ads": "Ada (Spec)",
|
|
47
|
+
".asm": "Assembly",
|
|
48
|
+
".s": "Assembly",
|
|
49
|
+
".ino": "Arduino",
|
|
50
|
+
".htm": "HTML",
|
|
51
|
+
".html": "HTML",
|
|
52
|
+
".css": "CSS",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
DOC_EXTENSION_DESCRIPTIONS = {
|
|
56
|
+
".md": "Markdown",
|
|
57
|
+
".rst": "reStructuredText",
|
|
58
|
+
".adoc": "AsciiDoc",
|
|
59
|
+
".txt": "Text",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
CONFIG_EXTENSION_DESCRIPTIONS = {
|
|
63
|
+
".yml": "YAML",
|
|
64
|
+
".yaml": "YAML",
|
|
65
|
+
".json": "JSON",
|
|
66
|
+
".toml": "TOML",
|
|
67
|
+
".ini": "INI",
|
|
68
|
+
".cfg": "Config",
|
|
69
|
+
".conf": "Config",
|
|
70
|
+
".env": "Environment",
|
|
71
|
+
".properties": "Properties",
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
CODE_EXTENSIONS = set(CODE_EXTENSION_DESCRIPTIONS)
|
|
75
|
+
DOC_EXTENSIONS = set(DOC_EXTENSION_DESCRIPTIONS)
|
|
76
|
+
CONFIG_EXTENSIONS = set(CONFIG_EXTENSION_DESCRIPTIONS)
|
|
77
|
+
|
|
78
|
+
DATA_EXTENSIONS = {".csv", ".tsv", ".parquet", ".avro", ".jsonl"}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
K8S_FILENAMES = {
|
|
82
|
+
"deployment.yaml", "deployment.yml", "service.yaml", "service.yml",
|
|
83
|
+
"ingress.yaml", "ingress.yml", "kustomization.yaml", "kustomization.yml",
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
CI_FILENAMES = {".gitlab-ci.yml", "Jenkinsfile", "azure-pipelines.yml"}
|
|
87
|
+
CI_PATH_MARKERS = {".github/workflows", ".circleci", ".buildkite"}
|
|
88
|
+
CI_FILENAMES_EXTRA = {"bitbucket-pipelines.yml", "buildkite.yml", "pipeline.yml"}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
HELM_FILENAMES = {"Chart.yaml", "Chart.yml", "values.yaml", "values.yml"}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
DOCKER_FILENAMES = {"Dockerfile", "docker-compose.yml", "docker-compose.yaml", "compose.yaml", "compose.yml"}
|
|
95
|
+
|
|
96
|
+
OBSERVABILITY_FILENAMES = {
|
|
97
|
+
"prometheus.yml", "prometheus.yaml", "alertmanager.yml", "alertmanager.yaml",
|
|
98
|
+
"loki.yml", "loki.yaml", "promtail.yml", "promtail.yaml", "tempo.yml", "tempo.yaml",
|
|
99
|
+
"otel-collector.yml", "otel-collector.yaml", "opentelemetry-collector.yml", "opentelemetry-collector.yaml",
|
|
100
|
+
"jaeger.yml", "jaeger.yaml", "zipkin.yml", "zipkin.yaml",
|
|
101
|
+
}
|
|
102
|
+
OBSERVABILITY_PATH_MARKERS = {
|
|
103
|
+
"prometheus", "grafana", "loki", "tempo", "otel", "opentelemetry",
|
|
104
|
+
"jaeger", "zipkin", "logstash", "fluentd", "fluent-bit",
|
|
105
|
+
}
|
|
106
|
+
SERVICE_MESH_MARKERS = {
|
|
107
|
+
"istio", "linkerd", "consul", "cilium", "envoy", "traefik", "nginx-ingress",
|
|
108
|
+
"service-mesh", "servicemesh", "ingress", "gateway",
|
|
109
|
+
}
|
|
110
|
+
DATA_STORAGE_MARKERS = {
|
|
111
|
+
"postgres", "mysql", "mariadb", "redis", "mongo", "mongodb", "cassandra",
|
|
112
|
+
"clickhouse", "elasticsearch", "opensearch", "kafka", "minio", "s3",
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
TERRAFORM_EXTENSIONS = {".tf", ".tfvars"}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def classify_type(path: Path) -> str:
|
|
119
|
+
name = path.name
|
|
120
|
+
suffix = path.suffix.lower()
|
|
121
|
+
if name in DOCKER_FILENAMES or name.startswith("Dockerfile"):
|
|
122
|
+
return "infra"
|
|
123
|
+
if name in CI_FILENAMES or name in CI_FILENAMES_EXTRA or any(marker in path.as_posix() for marker in CI_PATH_MARKERS):
|
|
124
|
+
return "ci"
|
|
125
|
+
if suffix in TERRAFORM_EXTENSIONS:
|
|
126
|
+
return "infra"
|
|
127
|
+
if suffix in CODE_EXTENSIONS:
|
|
128
|
+
return "code"
|
|
129
|
+
if suffix in DOC_EXTENSIONS:
|
|
130
|
+
return "docs"
|
|
131
|
+
if suffix in CONFIG_EXTENSIONS:
|
|
132
|
+
return "config"
|
|
133
|
+
if suffix in DATA_EXTENSIONS:
|
|
134
|
+
return "data"
|
|
135
|
+
return "other"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def detect_domains(path: Path, content_snippet: str) -> Set[str]:
|
|
139
|
+
domains: Set[str] = set()
|
|
140
|
+
posix_path = path.as_posix()
|
|
141
|
+
name = path.name
|
|
142
|
+
suffix = path.suffix.lower()
|
|
143
|
+
content = content_snippet or ""
|
|
144
|
+
|
|
145
|
+
if name in DOCKER_FILENAMES or name.startswith("Dockerfile"):
|
|
146
|
+
domains.add("docker")
|
|
147
|
+
|
|
148
|
+
if "docker" in posix_path and (suffix in {".yml", ".yaml"} or name.startswith("Dockerfile")):
|
|
149
|
+
domains.add("docker")
|
|
150
|
+
|
|
151
|
+
if name in CI_FILENAMES or name in CI_FILENAMES_EXTRA or any(marker in posix_path for marker in CI_PATH_MARKERS):
|
|
152
|
+
domains.add("ci")
|
|
153
|
+
|
|
154
|
+
if name in HELM_FILENAMES or "charts/" in posix_path or "/templates/" in posix_path:
|
|
155
|
+
domains.add("helm")
|
|
156
|
+
|
|
157
|
+
if suffix in TERRAFORM_EXTENSIONS or "terraform" in posix_path:
|
|
158
|
+
domains.add("terraform")
|
|
159
|
+
|
|
160
|
+
if "ansible" in posix_path or "/roles/" in posix_path or "/tasks/" in posix_path:
|
|
161
|
+
domains.add("ansible")
|
|
162
|
+
|
|
163
|
+
if name in K8S_FILENAMES or "k8s" in posix_path or "kubernetes" in posix_path:
|
|
164
|
+
domains.add("kubernetes")
|
|
165
|
+
|
|
166
|
+
if suffix in {".yml", ".yaml"}:
|
|
167
|
+
if "apiVersion" in content and "kind" in content:
|
|
168
|
+
domains.add("kubernetes")
|
|
169
|
+
|
|
170
|
+
if name in OBSERVABILITY_FILENAMES or any(marker in posix_path for marker in OBSERVABILITY_PATH_MARKERS):
|
|
171
|
+
domains.add("observability")
|
|
172
|
+
|
|
173
|
+
if any(marker in posix_path for marker in SERVICE_MESH_MARKERS):
|
|
174
|
+
domains.add("service_mesh")
|
|
175
|
+
if "ingress" in posix_path:
|
|
176
|
+
domains.add("kubernetes")
|
|
177
|
+
|
|
178
|
+
if suffix in {".yml", ".yaml"}:
|
|
179
|
+
if "kind: Ingress" in content or "kind: Gateway" in content:
|
|
180
|
+
domains.add("service_mesh")
|
|
181
|
+
domains.add("kubernetes")
|
|
182
|
+
if "VirtualService" in content or "DestinationRule" in content or "ServiceEntry" in content:
|
|
183
|
+
domains.add("service_mesh")
|
|
184
|
+
|
|
185
|
+
if any(marker in posix_path for marker in DATA_STORAGE_MARKERS):
|
|
186
|
+
domains.add("data_storage")
|
|
187
|
+
|
|
188
|
+
return domains
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def is_infra(domains: Set[str]) -> bool:
|
|
192
|
+
return bool(
|
|
193
|
+
domains.intersection(
|
|
194
|
+
{
|
|
195
|
+
"kubernetes",
|
|
196
|
+
"helm",
|
|
197
|
+
"terraform",
|
|
198
|
+
"ansible",
|
|
199
|
+
"docker",
|
|
200
|
+
"ci",
|
|
201
|
+
"observability",
|
|
202
|
+
"service_mesh",
|
|
203
|
+
"data_storage",
|
|
204
|
+
}
|
|
205
|
+
)
|
|
206
|
+
)
|