ai-docs-gen 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ai_docs/cache.py ADDED
@@ -0,0 +1,52 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Dict, Tuple
4
+
5
+ from .utils import ensure_dir
6
+
7
+
8
+ class CacheManager:
9
+ def __init__(self, cache_dir: Path):
10
+ self.cache_dir = cache_dir
11
+ ensure_dir(self.cache_dir)
12
+ self.index_path = self.cache_dir / "index.json"
13
+ self.llm_cache_path = self.cache_dir / "llm_cache.json"
14
+
15
+ def load_index(self) -> Dict:
16
+ if not self.index_path.exists():
17
+ return {"files": {}, "sections": {}}
18
+ return json.loads(self.index_path.read_text(encoding="utf-8", errors="ignore"))
19
+
20
+ def save_index(self, data: Dict) -> None:
21
+ self.index_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
22
+
23
+ def load_llm_cache(self) -> Dict[str, str]:
24
+ if not self.llm_cache_path.exists():
25
+ return {}
26
+ return json.loads(self.llm_cache_path.read_text(encoding="utf-8", errors="ignore"))
27
+
28
+ def save_llm_cache(self, data: Dict[str, str]) -> None:
29
+ self.llm_cache_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
30
+
31
+ def diff_files(self, current_files: Dict[str, Dict]) -> Tuple[Dict, Dict, Dict, Dict]:
32
+ prev = self.load_index().get("files", {})
33
+ added = {}
34
+ modified = {}
35
+ deleted = {}
36
+ unchanged = {}
37
+
38
+ for path, meta in current_files.items():
39
+ if path not in prev:
40
+ added[path] = meta
41
+ continue
42
+ if prev[path].get("hash") != meta.get("hash"):
43
+ modified[path] = meta
44
+ else:
45
+ unchanged[path] = meta
46
+
47
+ for path, meta in prev.items():
48
+ if path not in current_files:
49
+ deleted[path] = meta
50
+
51
+ return added, modified, deleted, unchanged
52
+
ai_docs/changes.py ADDED
@@ -0,0 +1,25 @@
1
+ from typing import Dict, List
2
+
3
+
4
+ def format_changes_md(added: Dict, modified: Dict, deleted: Dict, regenerated_sections: List[str], summary: str) -> str:
5
+ def _fmt_list(title: str, items: Dict) -> str:
6
+ if not items:
7
+ return f"## {title}\n\n- нет\n"
8
+ lines = "\n".join([f"- {path}" for path in sorted(items.keys())])
9
+ return f"## {title}\n\n{lines}\n"
10
+
11
+ md = "# Изменения с последней генерации\n\n"
12
+ md += _fmt_list("Добавленные файлы", added)
13
+ md += _fmt_list("Изменённые файлы", modified)
14
+ md += _fmt_list("Удалённые файлы", deleted)
15
+
16
+ md += "## Перегенерированные разделы\n\n"
17
+ if regenerated_sections:
18
+ md += "\n".join([f"- {name}" for name in regenerated_sections]) + "\n"
19
+ else:
20
+ md += "- нет\n"
21
+
22
+ md += "\n## Краткое резюме\n\n"
23
+ md += summary.strip() + "\n"
24
+ return md
25
+
ai_docs/cli.py ADDED
@@ -0,0 +1,84 @@
1
+ import argparse
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Optional, Set
5
+
6
+ import shutil
7
+
8
+ from .generator import generate_docs
9
+ from .llm import from_env
10
+ from .scanner import scan_source
11
+ from .utils import is_url
12
+ from dotenv import load_dotenv
13
+
14
+
15
+ def parse_args() -> argparse.Namespace:
16
+ parser = argparse.ArgumentParser(description="Generate README + MkDocs documentation for a code/config repository.")
17
+ parser.add_argument("--source", required=True, help="Path to local folder or git URL")
18
+ parser.add_argument("--output", help="Output root directory. Defaults to source for local paths, or ./output/<repo> for URLs")
19
+ parser.add_argument("--readme", action="store_true", help="Generate README.md")
20
+ parser.add_argument("--mkdocs", action="store_true", help="Generate MkDocs docs site")
21
+ parser.add_argument("--language", default="ru", help="Language for generated docs (ru|en)")
22
+ parser.add_argument("--include", nargs="*", help="Include patterns (glob)")
23
+ parser.add_argument("--exclude", nargs="*", help="Exclude patterns (glob)")
24
+ parser.add_argument("--max-size", type=int, default=200_000, help="Max file size in bytes")
25
+ parser.add_argument("--cache-dir", default=".ai_docs_cache", help="Cache directory")
26
+ parser.add_argument("--no-cache", action="store_true", help="Disable LLM cache")
27
+ parser.add_argument("--threads", type=int, default=None, help="Number of parallel LLM workers")
28
+ parser.add_argument("--local-site", action="store_true", help="Generate MkDocs config for local run")
29
+ parser.add_argument("--force", action="store_true", help="Overwrite README.md if it already exists")
30
+ return parser.parse_args()
31
+
32
+
33
+ def resolve_output(source: str, output: Optional[str], repo_name: str) -> Path:
34
+ if output:
35
+ return Path(output).expanduser().resolve()
36
+ source_path = Path(source).expanduser().resolve()
37
+ if source_path.exists():
38
+ return source_path
39
+ return Path("output") / repo_name
40
+
41
+
42
+ def main() -> None:
43
+ load_dotenv()
44
+ args = parse_args()
45
+ include: Optional[Set[str]] = set(args.include) if args.include else None
46
+ exclude: Optional[Set[str]] = set(args.exclude) if args.exclude else None
47
+
48
+ scan_result = scan_source(args.source, include=include, exclude=exclude, max_size=args.max_size)
49
+ root = scan_result.root
50
+ repo_name = scan_result.repo_name
51
+ print(f"[ai-docs] scan complete: {len(scan_result.files)} files")
52
+
53
+ output_root = resolve_output(args.source, args.output, repo_name)
54
+ output_root.mkdir(parents=True, exist_ok=True)
55
+
56
+ llm = from_env()
57
+ print(f"[ai-docs] llm: model={llm.model} context={llm.context_limit} max_tokens={llm.max_tokens}")
58
+
59
+ env_threads = int(os.getenv("AI_DOCS_THREADS", "1"))
60
+ env_local_site = os.getenv("AI_DOCS_LOCAL_SITE", "false").strip().lower() in {"1", "true", "yes", "y"}
61
+ threads = args.threads if args.threads is not None else env_threads
62
+ local_site = args.local_site or env_local_site
63
+
64
+ print(f"[ai-docs] generate: readme={args.readme or not args.mkdocs} mkdocs={args.mkdocs or not args.readme}")
65
+ generate_docs(
66
+ files=scan_result.files,
67
+ output_root=output_root,
68
+ cache_dir=output_root / args.cache_dir,
69
+ llm=llm,
70
+ language=args.language,
71
+ write_readme=(args.readme or not args.mkdocs),
72
+ write_mkdocs=(args.mkdocs or not args.readme),
73
+ use_cache=not args.no_cache,
74
+ threads=max(1, threads),
75
+ local_site=local_site,
76
+ force=args.force,
77
+ )
78
+
79
+ if is_url(args.source):
80
+ shutil.rmtree(scan_result.root, ignore_errors=True)
81
+
82
+
83
+ if __name__ == "__main__":
84
+ main()
ai_docs/domain.py ADDED
@@ -0,0 +1,206 @@
1
+ from pathlib import Path
2
+ from typing import Set
3
+
4
+
5
+ CODE_EXTENSION_DESCRIPTIONS = {
6
+ ".py": "Python",
7
+ ".pyi": "Python (типизация)",
8
+ ".pyx": "Cython",
9
+ ".js": "JavaScript",
10
+ ".jsx": "JavaScript (JSX)",
11
+ ".ts": "TypeScript",
12
+ ".tsx": "TypeScript (TSX)",
13
+ ".go": "Go",
14
+ ".java": "Java",
15
+ ".c": "C",
16
+ ".cc": "C++",
17
+ ".cpp": "C++",
18
+ ".h": "C/C++ Header",
19
+ ".hpp": "C++ Header",
20
+ ".rs": "Rust",
21
+ ".rb": "Ruby",
22
+ ".php": "PHP",
23
+ ".cs": "C#",
24
+ ".kt": "Kotlin",
25
+ ".kts": "Kotlin (Script)",
26
+ ".swift": "Swift",
27
+ ".m": "Objective-C",
28
+ ".mm": "Objective-C++",
29
+ ".vb": "Visual Basic",
30
+ ".bas": "BASIC",
31
+ ".sql": "SQL",
32
+ ".pas": "Pascal",
33
+ ".dpr": "Delphi/Pascal",
34
+ ".pp": "Pascal",
35
+ ".r": "R",
36
+ ".pl": "Perl",
37
+ ".pm": "Perl Module",
38
+ ".f": "Fortran",
39
+ ".for": "Fortran",
40
+ ".f90": "Fortran",
41
+ ".f95": "Fortran",
42
+ ".f03": "Fortran",
43
+ ".f08": "Fortran",
44
+ ".sb3": "Scratch",
45
+ ".adb": "Ada",
46
+ ".ads": "Ada (Spec)",
47
+ ".asm": "Assembly",
48
+ ".s": "Assembly",
49
+ ".ino": "Arduino",
50
+ ".htm": "HTML",
51
+ ".html": "HTML",
52
+ ".css": "CSS",
53
+ }
54
+
55
+ DOC_EXTENSION_DESCRIPTIONS = {
56
+ ".md": "Markdown",
57
+ ".rst": "reStructuredText",
58
+ ".adoc": "AsciiDoc",
59
+ ".txt": "Text",
60
+ }
61
+
62
+ CONFIG_EXTENSION_DESCRIPTIONS = {
63
+ ".yml": "YAML",
64
+ ".yaml": "YAML",
65
+ ".json": "JSON",
66
+ ".toml": "TOML",
67
+ ".ini": "INI",
68
+ ".cfg": "Config",
69
+ ".conf": "Config",
70
+ ".env": "Environment",
71
+ ".properties": "Properties",
72
+ }
73
+
74
+ CODE_EXTENSIONS = set(CODE_EXTENSION_DESCRIPTIONS)
75
+ DOC_EXTENSIONS = set(DOC_EXTENSION_DESCRIPTIONS)
76
+ CONFIG_EXTENSIONS = set(CONFIG_EXTENSION_DESCRIPTIONS)
77
+
78
+ DATA_EXTENSIONS = {".csv", ".tsv", ".parquet", ".avro", ".jsonl"}
79
+
80
+
81
+ K8S_FILENAMES = {
82
+ "deployment.yaml", "deployment.yml", "service.yaml", "service.yml",
83
+ "ingress.yaml", "ingress.yml", "kustomization.yaml", "kustomization.yml",
84
+ }
85
+
86
+ CI_FILENAMES = {".gitlab-ci.yml", "Jenkinsfile", "azure-pipelines.yml"}
87
+ CI_PATH_MARKERS = {".github/workflows", ".circleci", ".buildkite"}
88
+ CI_FILENAMES_EXTRA = {"bitbucket-pipelines.yml", "buildkite.yml", "pipeline.yml"}
89
+
90
+
91
+ HELM_FILENAMES = {"Chart.yaml", "Chart.yml", "values.yaml", "values.yml"}
92
+
93
+
94
+ DOCKER_FILENAMES = {"Dockerfile", "docker-compose.yml", "docker-compose.yaml", "compose.yaml", "compose.yml"}
95
+
96
+ OBSERVABILITY_FILENAMES = {
97
+ "prometheus.yml", "prometheus.yaml", "alertmanager.yml", "alertmanager.yaml",
98
+ "loki.yml", "loki.yaml", "promtail.yml", "promtail.yaml", "tempo.yml", "tempo.yaml",
99
+ "otel-collector.yml", "otel-collector.yaml", "opentelemetry-collector.yml", "opentelemetry-collector.yaml",
100
+ "jaeger.yml", "jaeger.yaml", "zipkin.yml", "zipkin.yaml",
101
+ }
102
+ OBSERVABILITY_PATH_MARKERS = {
103
+ "prometheus", "grafana", "loki", "tempo", "otel", "opentelemetry",
104
+ "jaeger", "zipkin", "logstash", "fluentd", "fluent-bit",
105
+ }
106
+ SERVICE_MESH_MARKERS = {
107
+ "istio", "linkerd", "consul", "cilium", "envoy", "traefik", "nginx-ingress",
108
+ "service-mesh", "servicemesh", "ingress", "gateway",
109
+ }
110
+ DATA_STORAGE_MARKERS = {
111
+ "postgres", "mysql", "mariadb", "redis", "mongo", "mongodb", "cassandra",
112
+ "clickhouse", "elasticsearch", "opensearch", "kafka", "minio", "s3",
113
+ }
114
+
115
+ TERRAFORM_EXTENSIONS = {".tf", ".tfvars"}
116
+
117
+
118
+ def classify_type(path: Path) -> str:
119
+ name = path.name
120
+ suffix = path.suffix.lower()
121
+ if name in DOCKER_FILENAMES or name.startswith("Dockerfile"):
122
+ return "infra"
123
+ if name in CI_FILENAMES or name in CI_FILENAMES_EXTRA or any(marker in path.as_posix() for marker in CI_PATH_MARKERS):
124
+ return "ci"
125
+ if suffix in TERRAFORM_EXTENSIONS:
126
+ return "infra"
127
+ if suffix in CODE_EXTENSIONS:
128
+ return "code"
129
+ if suffix in DOC_EXTENSIONS:
130
+ return "docs"
131
+ if suffix in CONFIG_EXTENSIONS:
132
+ return "config"
133
+ if suffix in DATA_EXTENSIONS:
134
+ return "data"
135
+ return "other"
136
+
137
+
138
+ def detect_domains(path: Path, content_snippet: str) -> Set[str]:
139
+ domains: Set[str] = set()
140
+ posix_path = path.as_posix()
141
+ name = path.name
142
+ suffix = path.suffix.lower()
143
+ content = content_snippet or ""
144
+
145
+ if name in DOCKER_FILENAMES or name.startswith("Dockerfile"):
146
+ domains.add("docker")
147
+
148
+ if "docker" in posix_path and (suffix in {".yml", ".yaml"} or name.startswith("Dockerfile")):
149
+ domains.add("docker")
150
+
151
+ if name in CI_FILENAMES or name in CI_FILENAMES_EXTRA or any(marker in posix_path for marker in CI_PATH_MARKERS):
152
+ domains.add("ci")
153
+
154
+ if name in HELM_FILENAMES or "charts/" in posix_path or "/templates/" in posix_path:
155
+ domains.add("helm")
156
+
157
+ if suffix in TERRAFORM_EXTENSIONS or "terraform" in posix_path:
158
+ domains.add("terraform")
159
+
160
+ if "ansible" in posix_path or "/roles/" in posix_path or "/tasks/" in posix_path:
161
+ domains.add("ansible")
162
+
163
+ if name in K8S_FILENAMES or "k8s" in posix_path or "kubernetes" in posix_path:
164
+ domains.add("kubernetes")
165
+
166
+ if suffix in {".yml", ".yaml"}:
167
+ if "apiVersion" in content and "kind" in content:
168
+ domains.add("kubernetes")
169
+
170
+ if name in OBSERVABILITY_FILENAMES or any(marker in posix_path for marker in OBSERVABILITY_PATH_MARKERS):
171
+ domains.add("observability")
172
+
173
+ if any(marker in posix_path for marker in SERVICE_MESH_MARKERS):
174
+ domains.add("service_mesh")
175
+ if "ingress" in posix_path:
176
+ domains.add("kubernetes")
177
+
178
+ if suffix in {".yml", ".yaml"}:
179
+ if "kind: Ingress" in content or "kind: Gateway" in content:
180
+ domains.add("service_mesh")
181
+ domains.add("kubernetes")
182
+ if "VirtualService" in content or "DestinationRule" in content or "ServiceEntry" in content:
183
+ domains.add("service_mesh")
184
+
185
+ if any(marker in posix_path for marker in DATA_STORAGE_MARKERS):
186
+ domains.add("data_storage")
187
+
188
+ return domains
189
+
190
+
191
+ def is_infra(domains: Set[str]) -> bool:
192
+ return bool(
193
+ domains.intersection(
194
+ {
195
+ "kubernetes",
196
+ "helm",
197
+ "terraform",
198
+ "ansible",
199
+ "docker",
200
+ "ci",
201
+ "observability",
202
+ "service_mesh",
203
+ "data_storage",
204
+ }
205
+ )
206
+ )