ai-docs-gen 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ai_docs/llm.py ADDED
@@ -0,0 +1,82 @@
1
+ import json
2
+ import os
3
+ import threading
4
+ from typing import Dict, List, Optional
5
+
6
+ import requests
7
+
8
+ from .utils import sha256_text
9
+
10
+
11
+ class LLMClient:
12
+ def __init__(
13
+ self,
14
+ api_key: str,
15
+ base_url: str,
16
+ model: str,
17
+ temperature: float = 0.2,
18
+ max_tokens: int = 1200,
19
+ context_limit: int = 8192,
20
+ ):
21
+ self.api_key = api_key
22
+ self.base_url = base_url.rstrip("/")
23
+ self.model = model
24
+ self.temperature = temperature
25
+ self.max_tokens = max_tokens
26
+ self.context_limit = context_limit
27
+ self._cache_lock = threading.Lock()
28
+
29
+ def _cache_key(self, payload: Dict) -> str:
30
+ return sha256_text(json.dumps(payload, sort_keys=True))
31
+
32
+ def chat(self, messages: List[Dict[str, str]], cache: Optional[Dict[str, str]] = None) -> str:
33
+ payload = {
34
+ "model": self.model,
35
+ "messages": messages,
36
+ "temperature": self.temperature,
37
+ "max_tokens": self.max_tokens,
38
+ }
39
+ key = self._cache_key(payload)
40
+ if cache is not None:
41
+ with self._cache_lock:
42
+ if key in cache:
43
+ return cache[key]
44
+
45
+ if self.base_url.endswith("/v1"):
46
+ url = f"{self.base_url}/chat/completions"
47
+ else:
48
+ url = f"{self.base_url}/v1/chat/completions"
49
+ headers = {
50
+ "Content-Type": "application/json",
51
+ "Authorization": f"Bearer {self.api_key}",
52
+ }
53
+ response = requests.post(url, headers=headers, json=payload, timeout=(120, 480))
54
+ response.raise_for_status()
55
+ data = response.json()
56
+ try:
57
+ content = data["choices"][0]["message"]["content"]
58
+ except Exception as exc:
59
+ raise RuntimeError(f"LLM response missing content: {data}") from exc
60
+ if cache is not None:
61
+ with self._cache_lock:
62
+ cache[key] = content
63
+ return content
64
+
65
+
66
+ def from_env() -> LLMClient:
67
+ api_key = os.getenv("OPENAI_API_KEY")
68
+ if not api_key:
69
+ raise RuntimeError("OPENAI_API_KEY is not set")
70
+ base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
71
+ model = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
72
+ temperature = float(os.getenv("OPENAI_TEMPERATURE", "0.2"))
73
+ max_tokens = int(os.getenv("OPENAI_MAX_TOKENS", "1200"))
74
+ context_limit = int(os.getenv("OPENAI_CONTEXT_TOKENS", "8192"))
75
+ return LLMClient(
76
+ api_key=api_key,
77
+ base_url=base_url,
78
+ model=model,
79
+ temperature=temperature,
80
+ max_tokens=max_tokens,
81
+ context_limit=context_limit,
82
+ )
ai_docs/mkdocs.py ADDED
@@ -0,0 +1,161 @@
1
+ from pathlib import Path
2
+ from typing import Dict, List
3
+
4
+ import yaml
5
+
6
+
7
+ class _YamlPythonName(str):
8
+ pass
9
+
10
+
11
+ class _YamlSafeDumper(yaml.SafeDumper):
12
+ pass
13
+
14
+
15
+ def _python_name_representer(dumper: yaml.Dumper, data: _YamlPythonName) -> yaml.nodes.ScalarNode:
16
+ return dumper.represent_scalar(f"tag:yaml.org,2002:python/name:{data}", "")
17
+
18
+
19
+ _YamlSafeDumper.add_representer(_YamlPythonName, _python_name_representer)
20
+
21
+
22
+ def build_mkdocs_yaml(
23
+ site_name: str,
24
+ sections: Dict[str, str],
25
+ configs: Dict[str, str],
26
+ local_site: bool = False,
27
+ has_modules: bool = False,
28
+ module_nav_paths: List[str] | None = None,
29
+ project_config_nav_paths: List[str] | None = None,
30
+ ) -> str:
31
+ nav = [
32
+ {"Главная": "index.md"},
33
+ ]
34
+ if "architecture" in sections:
35
+ nav.append({"Архитектура": "architecture.md"})
36
+ if "runtime" in sections:
37
+ nav.append({"Запуск": "runtime.md"})
38
+ if "dependencies" in sections:
39
+ nav.append({"Зависимости": "dependencies.md"})
40
+ if "testing" in sections:
41
+ nav.append({"Тестирование": "testing.md"})
42
+ if "conventions" in sections:
43
+ nav.append({"Соглашения": "conventions.md"})
44
+ if "glossary" in sections:
45
+ nav.append({"Глоссарий": "glossary.md"})
46
+
47
+ if configs:
48
+ cfg_nav: List[Dict[str, str]] = []
49
+ for key, filename in configs.items():
50
+ title = {
51
+ "kubernetes": "Kubernetes",
52
+ "helm": "Helm",
53
+ "terraform": "Terraform",
54
+ "ansible": "Ansible",
55
+ "docker": "Docker",
56
+ "ci": "CI/CD",
57
+ }.get(key, key)
58
+ cfg_nav.append({title: f"configs/{filename}"})
59
+ nav.append({"Конфиги": cfg_nav})
60
+
61
+ if project_config_nav_paths:
62
+ project_cfg_nav: List[Dict[str, object]] = [{"Обзор": "configs/index.md"}]
63
+ project_cfg_nav.extend(_build_tree_nav(project_config_nav_paths, "configs/files/"))
64
+ nav.append({"Конфигурация проекта": project_cfg_nav})
65
+
66
+ if has_modules:
67
+ modules_nav: List[Dict[str, object]] = [{"Обзор": "modules/index.md"}]
68
+ if module_nav_paths:
69
+ modules_nav.extend(_build_tree_nav(module_nav_paths, "modules/"))
70
+ nav.append({"Модули": modules_nav})
71
+
72
+ nav.append({"Изменения": "changes.md"})
73
+
74
+ data = {
75
+ "site_name": site_name,
76
+ "docs_dir": ".ai-docs",
77
+ "site_dir": "ai_docs_site",
78
+ "plugins": [
79
+ "search",
80
+ {"mermaid2": {"javascript": "js/mermaid.min.js"}},
81
+ ],
82
+ "markdown_extensions": [
83
+ "tables",
84
+ "sane_lists",
85
+ "attr_list",
86
+ "def_list",
87
+ "footnotes",
88
+ "admonition",
89
+ "fenced_code",
90
+ {
91
+ "pymdownx.superfences": {
92
+ "custom_fences": [
93
+ {
94
+ "name": "mermaid",
95
+ "class": "mermaid",
96
+ "format": _YamlPythonName("mermaid2.fence_mermaid"),
97
+ }
98
+ ]
99
+ }
100
+ },
101
+ ],
102
+ "nav": nav,
103
+ }
104
+ if local_site:
105
+ data["site_url"] = ""
106
+ data["use_directory_urls"] = False
107
+ return yaml.dump(data, allow_unicode=True, sort_keys=False, Dumper=_YamlSafeDumper)
108
+
109
+
110
+ def _build_tree_nav(paths: List[str], strip_prefix: str) -> List[Dict[str, object]]:
111
+ tree: Dict[str, object] = {}
112
+
113
+ for rel_path in paths:
114
+ rel = Path(rel_path).as_posix()
115
+ if rel.startswith(strip_prefix):
116
+ rel = rel[len(strip_prefix) :]
117
+ parts = rel.split("/")
118
+ if parts:
119
+ last = Path(parts[-1]).with_suffix("").name
120
+ sep = last.rfind("__")
121
+ if sep != -1 and sep + 2 < len(last):
122
+ base = last[:sep]
123
+ ext = last[sep + 2 :]
124
+ parts[-1] = f"{base}.{ext}"
125
+ else:
126
+ parts[-1] = last
127
+ _insert_nav_node(tree, parts, rel_path)
128
+
129
+ return _tree_to_nav(tree)
130
+
131
+
132
+ def _insert_nav_node(tree: Dict[str, object], parts: List[str], rel_path: str) -> None:
133
+ key = parts[0]
134
+ if len(parts) == 1:
135
+ tree[key] = rel_path
136
+ return
137
+ node = tree.get(key)
138
+ if not isinstance(node, dict):
139
+ node = {}
140
+ tree[key] = node
141
+ _insert_nav_node(node, parts[1:], rel_path)
142
+
143
+
144
+ def _tree_to_nav(tree: Dict[str, object]) -> List[Dict[str, object]]:
145
+ nav: List[Dict[str, object]] = []
146
+ for key in sorted(tree.keys(), key=lambda k: (not isinstance(tree[k], dict), k.lower())):
147
+ value = tree[key]
148
+ if isinstance(value, dict):
149
+ label = key if key.startswith("/") else f"/{key}"
150
+ nav.append({label: _tree_to_nav(value)})
151
+ else:
152
+ nav.append({key: value})
153
+ return nav
154
+
155
+
156
+ def write_docs_files(docs_dir: Path, files: Dict[str, str]) -> None:
157
+ docs_dir.mkdir(parents=True, exist_ok=True)
158
+ for rel_path, content in files.items():
159
+ out_path = docs_dir / rel_path
160
+ out_path.parent.mkdir(parents=True, exist_ok=True)
161
+ out_path.write_text(content, encoding="utf-8")
ai_docs/scanner.py ADDED
@@ -0,0 +1,237 @@
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import tempfile
5
+ from pathlib import Path
6
+ from typing import Dict, List, Optional, Set, Tuple
7
+
8
+ import pathspec
9
+ import yaml
10
+
11
+ from .domain import (
12
+ CODE_EXTENSION_DESCRIPTIONS,
13
+ CONFIG_EXTENSION_DESCRIPTIONS,
14
+ DOC_EXTENSION_DESCRIPTIONS,
15
+ classify_type,
16
+ detect_domains,
17
+ is_infra,
18
+ )
19
+ from .utils import is_binary_file, is_url, read_text_file, to_posix
20
+
21
+
22
+ FIXED_INCLUDE_PATTERNS = {
23
+ "*.tf", "*.tfvars",
24
+ "Dockerfile*", "docker-compose*.yml", "docker-compose*.yaml", "compose.yml", "compose.yaml",
25
+ "Jenkinsfile", ".gitlab-ci.yml", "azure-pipelines.yml",
26
+ "requirements.txt", "pyproject.toml", "package.json", "package-lock.json",
27
+ }
28
+
29
+ DEFAULT_EXCLUDE_PATTERNS = {
30
+ ".git/*", "**/.git/*",
31
+ ".venv/*", ".venv/**", "**/.venv/*", "**/.venv/**",
32
+ "venv/*", "venv/**", "**/venv/*", "**/venv/**",
33
+ "**/node_modules/*",
34
+ "**/dist/*", "**/build/*",
35
+ "**/.idea/*", "**/.vscode/*", "**/__pycache__/*",
36
+ "**/.pytest_cache/*", "**/.mypy_cache/*",
37
+ "**/.ai_docs_cache/*", "**/.ai_docs_cache/**", ".ai_docs_cache/**", ".ai_docs_cache/*",
38
+ "**/ai_docs_site/*", "**/ai_docs_site/**", "ai_docs_site/**", "ai_docs_site/*",
39
+ ".ai-docs/*", ".ai-docs/**", "**/.ai-docs/*", "**/.ai-docs/**",
40
+ ".github/*", ".github/**", "**/.github/*", "**/.github/**",
41
+ "mkdocs.yml", "**/mkdocs.yml", "mkdocs_yml.md", "**/mkdocs_yml.md",
42
+ ".ai-docs.yaml", "**/.ai-docs.yaml",
43
+ }
44
+
45
+
46
+ class ScanResult:
47
+ def __init__(self, root: Path, files: List[Dict], source: str, repo_name: str):
48
+ self.root = root
49
+ self.files = files
50
+ self.source = source
51
+ self.repo_name = repo_name
52
+
53
+
54
+ def _normalize_extensions(raw: object, defaults: Dict[str, str]) -> Dict[str, str]:
55
+ normalized: Dict[str, str] = {}
56
+ if isinstance(raw, dict):
57
+ items = raw.items()
58
+ for key, value in items:
59
+ ext = str(key).strip()
60
+ if not ext:
61
+ continue
62
+ if not ext.startswith("."):
63
+ ext = f".{ext}"
64
+ desc = value if isinstance(value, str) and value.strip() else defaults.get(ext, "")
65
+ normalized[ext] = desc
66
+ elif isinstance(raw, list):
67
+ for item in raw:
68
+ ext = str(item).strip()
69
+ if not ext:
70
+ continue
71
+ if not ext.startswith("."):
72
+ ext = f".{ext}"
73
+ normalized[ext] = defaults.get(ext, "")
74
+ return normalized or defaults.copy()
75
+
76
+
77
+ def _normalize_excludes(raw: object) -> Set[str]:
78
+ if not isinstance(raw, list):
79
+ return set()
80
+ return {str(item).strip() for item in raw if str(item).strip()}
81
+
82
+
83
+ def _load_extension_config(root: Path) -> Dict[str, object]:
84
+ config_path = root / ".ai-docs.yaml"
85
+ defaults = {
86
+ "code_extensions": CODE_EXTENSION_DESCRIPTIONS,
87
+ "doc_extensions": DOC_EXTENSION_DESCRIPTIONS,
88
+ "config_extensions": CONFIG_EXTENSION_DESCRIPTIONS,
89
+ }
90
+
91
+ if not config_path.exists():
92
+ payload = {
93
+ "code_extensions": defaults["code_extensions"],
94
+ "doc_extensions": defaults["doc_extensions"],
95
+ "config_extensions": defaults["config_extensions"],
96
+ }
97
+ config_path.write_text(
98
+ yaml.safe_dump(payload, allow_unicode=True, sort_keys=False),
99
+ encoding="utf-8",
100
+ )
101
+ return {**{key: value.copy() for key, value in defaults.items()}, "exclude": set()}
102
+
103
+ try:
104
+ raw = yaml.safe_load(config_path.read_text(encoding="utf-8", errors="ignore")) or {}
105
+ except yaml.YAMLError:
106
+ return {**{key: value.copy() for key, value in defaults.items()}, "exclude": set()}
107
+
108
+ if not isinstance(raw, dict):
109
+ return {**{key: value.copy() for key, value in defaults.items()}, "exclude": set()}
110
+
111
+ code_raw = raw.get("code_extensions") or {}
112
+ doc_raw = raw.get("doc_extensions") or {}
113
+ config_raw = raw.get("config_extensions") or {}
114
+ exclude_raw = raw.get("exclude") or []
115
+
116
+ return {
117
+ "code_extensions": _normalize_extensions(code_raw, defaults["code_extensions"]),
118
+ "doc_extensions": _normalize_extensions(doc_raw, defaults["doc_extensions"]),
119
+ "config_extensions": _normalize_extensions(config_raw, defaults["config_extensions"]),
120
+ "exclude": _normalize_excludes(exclude_raw),
121
+ }
122
+
123
+
124
+ def _build_default_include_patterns(extension_config: Dict[str, object]) -> Set[str]:
125
+ extensions: Set[str] = set()
126
+ for key in ("code_extensions", "doc_extensions", "config_extensions"):
127
+ extensions.update(extension_config.get(key, {}).keys())
128
+ return {f"*{ext}" for ext in extensions} | FIXED_INCLUDE_PATTERNS
129
+
130
+
131
+ def _load_ignore_specs(root: Path) -> List[pathspec.PathSpec]:
132
+ specs: List[pathspec.PathSpec] = []
133
+ for name in (".gitignore", ".build_ignore"):
134
+ ignore_file = root / name
135
+ if not ignore_file.exists():
136
+ continue
137
+ patterns = ignore_file.read_text(encoding="utf-8", errors="ignore").splitlines()
138
+ specs.append(pathspec.PathSpec.from_lines("gitignore", patterns))
139
+ return specs
140
+
141
+
142
+ def _should_include(rel_path: str, include: Optional[Set[str]], exclude: Optional[Set[str]], ignore_specs: List[pathspec.PathSpec]) -> bool:
143
+ for spec in ignore_specs:
144
+ if spec.match_file(rel_path):
145
+ return False
146
+ if exclude:
147
+ for pattern in exclude:
148
+ if pathspec.PathSpec.from_lines("gitignore", [pattern]).match_file(rel_path):
149
+ return False
150
+ if not include:
151
+ return True
152
+ for pattern in include:
153
+ if pathspec.PathSpec.from_lines("gitignore", [pattern]).match_file(rel_path):
154
+ return True
155
+ return False
156
+
157
+
158
+ def _scan_directory(root: Path, include: Optional[Set[str]], exclude: Optional[Set[str]], max_size: int) -> List[Dict]:
159
+ files: List[Dict] = []
160
+ ignore_specs = _load_ignore_specs(root)
161
+
162
+ for dirpath, dirnames, filenames in os.walk(root):
163
+ # Avoid .git directory traversal
164
+ dirnames[:] = [d for d in dirnames if d != ".git"]
165
+ for filename in filenames:
166
+ abs_path = Path(dirpath) / filename
167
+ rel_path = abs_path.relative_to(root)
168
+ rel_path_str = to_posix(rel_path)
169
+
170
+ if not _should_include(rel_path_str, include, exclude, ignore_specs):
171
+ continue
172
+
173
+ if abs_path.is_symlink():
174
+ continue
175
+
176
+ try:
177
+ size = abs_path.stat().st_size
178
+ except OSError:
179
+ continue
180
+
181
+ if max_size and size > max_size:
182
+ continue
183
+
184
+ if is_binary_file(abs_path):
185
+ continue
186
+
187
+ content = read_text_file(abs_path)
188
+ content_snippet = content[:4000]
189
+ file_type = classify_type(abs_path)
190
+ domains = detect_domains(abs_path, content_snippet)
191
+ if is_infra(domains):
192
+ file_type = "infra"
193
+
194
+ files.append(
195
+ {
196
+ "path": rel_path_str,
197
+ "abs_path": abs_path,
198
+ "size": size,
199
+ "content": content,
200
+ "type": file_type,
201
+ "domains": sorted(domains),
202
+ }
203
+ )
204
+
205
+ return files
206
+
207
+
208
+ def _clone_repo(repo_url: str) -> Tuple[Path, str]:
209
+ tmpdir = Path(tempfile.mkdtemp(prefix="ai_docs_"))
210
+ try:
211
+ subprocess.check_call(["git", "clone", "--depth", "1", repo_url, str(tmpdir)])
212
+ except Exception as exc:
213
+ shutil.rmtree(tmpdir, ignore_errors=True)
214
+ raise RuntimeError(f"Failed to clone repo: {exc}")
215
+ repo_name = repo_url.rstrip("/").split("/")[-1].replace(".git", "")
216
+ return tmpdir, repo_name
217
+
218
+
219
+ def scan_source(source: str, include: Optional[Set[str]] = None, exclude: Optional[Set[str]] = None, max_size: int = 200_000) -> ScanResult:
220
+ exclude = exclude or DEFAULT_EXCLUDE_PATTERNS
221
+
222
+ if is_url(source):
223
+ root, repo_name = _clone_repo(source)
224
+ extension_config = _load_extension_config(root)
225
+ include = include or _build_default_include_patterns(extension_config)
226
+ exclude = set(exclude) | set(extension_config.get("exclude", set()))
227
+ files = _scan_directory(root, include, exclude, max_size)
228
+ return ScanResult(root=root, files=files, source=source, repo_name=repo_name)
229
+
230
+ root = Path(source).expanduser().resolve()
231
+ if not root.exists():
232
+ raise FileNotFoundError(f"Source path not found: {root}")
233
+ extension_config = _load_extension_config(root)
234
+ include = include or _build_default_include_patterns(extension_config)
235
+ exclude = set(exclude) | set(extension_config.get("exclude", set()))
236
+ files = _scan_directory(root, include, exclude, max_size)
237
+ return ScanResult(root=root, files=files, source=str(root), repo_name=root.name)