aion-evolve 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aion/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ """aion package."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ __all__ = ["__version__"]
6
+
7
+ try:
8
+ __version__ = version("aion-evolve")
9
+ except PackageNotFoundError:
10
+ __version__ = "0.0.0"
aion/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .cli import app
2
+
3
+
4
+ if __name__ == "__main__":
5
+ app()
aion/cli.py ADDED
@@ -0,0 +1,304 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import subprocess
6
+ from enum import Enum
7
+ from pathlib import Path
8
+
9
+ import typer
10
+ from rich.console import Console
11
+ from rich.panel import Panel
12
+ from rich.table import Table
13
+
14
+ from .config import AppConfig, ConfigError, load_app_config
15
+ from .context_extractor import ContextExtractor
16
+ from .llm_analyzer import LLMAnalyzer, LLMAnalyzerError
17
+ from .models import Finding, ProjectScanSummary, ScanReport, normalize_path
18
+ from .risk_heuristics import fallback_reasons
19
+ from .semgrep_runner import SemgrepError, SemgrepRunner, semgrep_available
20
+
21
+ app = typer.Typer(help="AION: The Self-Evolving Code Engine. Code Once, Live Forever.", no_args_is_help=True)
22
+ stderr_console = Console(stderr=True)
23
+ stdout_console = Console()
24
+
25
+ AI_MARKERS = (
26
+ "generated by",
27
+ "co-authored-by: github-copilot",
28
+ "github-copilot[bot]",
29
+ "@cursor",
30
+ "cursor ai",
31
+ )
32
+
33
+
34
+ class Provider(str, Enum):
35
+ anthropic = "anthropic"
36
+ openai = "openai"
37
+
38
+
39
+ @app.callback()
40
+ def main() -> None:
41
+ """AION: The Self-Evolving Code Engine. Code Once, Live Forever."""
42
+
43
+
44
+ @app.command()
45
+ def scan(
46
+ target: Path = typer.Argument(..., exists=True, readable=True, resolve_path=True),
47
+ ai_generated: list[Path] | None = typer.Option(
48
+ None,
49
+ "--ai-generated",
50
+ help="Explicit files or directories to treat as AI-generated. Can be repeated.",
51
+ ),
52
+ provider: Provider | None = typer.Option(None, "--provider", help="LLM provider: anthropic or openai."),
53
+ model: str | None = typer.Option(None, help="Model name. Defaults depend on provider."),
54
+ output: str = typer.Option("text", "--output", help="text or json"),
55
+ verbose: bool = typer.Option(False, "--verbose", help="Print context and raw prompts to stderr."),
56
+ ) -> None:
57
+ root = target if target.is_dir() else target.parent
58
+ try:
59
+ config = load_app_config(root)
60
+ except ConfigError as exc:
61
+ raise typer.BadParameter(str(exc)) from exc
62
+
63
+ resolved_provider = provider or _provider_from_config(config) or Provider.anthropic
64
+
65
+ api_key = _resolve_api_key(resolved_provider)
66
+ if not api_key:
67
+ raise typer.BadParameter(
68
+ _missing_api_key_message(resolved_provider)
69
+ )
70
+ resolved_model = model or config.model or _default_model_for_provider(resolved_provider)
71
+
72
+ extractor = ContextExtractor(root=root, extra_ignore_patterns=config.ignore_paths)
73
+ context_profile = extractor.extract()
74
+
75
+ if verbose:
76
+ stderr_console.print("[bold]Context profile[/bold]")
77
+ stderr_console.print_json(json.dumps(context_profile.summary_payload(), ensure_ascii=False))
78
+
79
+ candidate_files = _resolve_target_files(target, config.ignore_paths)
80
+ if not candidate_files:
81
+ _exit_with_summary(ProjectScanSummary(target=normalize_path(target), warnings=["No Python files found."]), output)
82
+
83
+ explicit_targets = _expand_explicit_targets(ai_generated or [])
84
+ files_to_scan, detection_warnings = _detect_ai_generated_files(candidate_files, explicit_targets, root)
85
+ summary = ProjectScanSummary(
86
+ target=normalize_path(target),
87
+ files_scanned=len(files_to_scan),
88
+ warnings=detection_warnings,
89
+ )
90
+
91
+ runner = SemgrepRunner()
92
+ has_semgrep = semgrep_available()
93
+ if not has_semgrep:
94
+ summary.warnings.append("semgrep is not installed; falling back to LLM-only mode.")
95
+
96
+ analyzer = LLMAnalyzer(api_key=api_key, model=resolved_model, provider=resolved_provider.value, verbose=verbose)
97
+
98
+ for file_path in files_to_scan:
99
+ report = ScanReport(file=normalize_path(file_path), ai_generated=True)
100
+ semgrep_findings = []
101
+ if has_semgrep:
102
+ try:
103
+ semgrep_findings = runner.run(file_path)
104
+ except SemgrepError as exc:
105
+ summary.warnings.append(f"semgrep failed for {file_path.name}: {exc}")
106
+ report.semgrep_findings = semgrep_findings
107
+
108
+ if verbose:
109
+ stderr_console.print(f"[bold]Estimated token cost[/bold] {file_path}: {analyzer.estimate_tokens(file_path, context_profile)}")
110
+ if semgrep_findings:
111
+ stderr_console.print("[bold]Semgrep findings[/bold]")
112
+ stderr_console.print_json(
113
+ json.dumps([finding.model_dump() for finding in semgrep_findings], ensure_ascii=False)
114
+ )
115
+
116
+ try:
117
+ reasons = fallback_reasons(file_path, context_profile)
118
+ if verbose and reasons:
119
+ stderr_console.print(f"[bold]Fallback reasons[/bold] {file_path}: {', '.join(reasons)}")
120
+
121
+ should_run_llm = (not has_semgrep) or bool(semgrep_findings) or bool(reasons)
122
+ if not should_run_llm:
123
+ report.mode = "semgrep-only"
124
+ report.findings = []
125
+ else:
126
+ report.findings = analyzer.analyze(
127
+ file_path,
128
+ context_profile,
129
+ semgrep_findings,
130
+ fallback_signals=reasons,
131
+ console=stderr_console,
132
+ )
133
+ report.mode = "llm-only" if not has_semgrep else "semgrep+llm"
134
+ except LLMAnalyzerError as exc:
135
+ summary.warnings.append(f"LLM analysis failed for {file_path.name}: {exc}")
136
+ report.mode = "semgrep-only" if has_semgrep else "skipped"
137
+
138
+ summary.reports.append(report)
139
+
140
+ _exit_with_summary(summary, output)
141
+
142
+
143
+ def _resolve_target_files(target: Path, extra_ignore_patterns: list[str] | None = None) -> list[Path]:
144
+ extra_ignore_patterns = extra_ignore_patterns or []
145
+ if target.is_file():
146
+ return [target] if target.suffix == ".py" else []
147
+ return sorted(
148
+ path
149
+ for path in target.rglob("*.py")
150
+ if not any(part in {".git", ".venv", "venv", "node_modules", "__pycache__"} for part in path.parts)
151
+ if not _matches_any_pattern(path, target, extra_ignore_patterns)
152
+ )
153
+
154
+
155
+ def _matches_any_pattern(path: Path, root: Path, patterns: list[str]) -> bool:
156
+ try:
157
+ relative = path.relative_to(root if root.is_dir() else root.parent).as_posix()
158
+ except ValueError:
159
+ relative = path.as_posix()
160
+ for pattern in patterns:
161
+ if Path(relative).match(pattern) or Path(path.name).match(pattern):
162
+ return True
163
+ return False
164
+
165
+
166
+ def _resolve_api_key(provider: Provider) -> str | None:
167
+ if provider == Provider.anthropic:
168
+ return os.getenv("ANTHROPIC_API_KEY")
169
+ if provider == Provider.openai:
170
+ return os.getenv("OPENAI_API_KEY")
171
+ return None
172
+
173
+
174
+ def _missing_api_key_message(provider: Provider) -> str:
175
+ if provider == Provider.anthropic:
176
+ return "ANTHROPIC_API_KEY is not set. Export it before running, for example: export ANTHROPIC_API_KEY=your_key"
177
+ if provider == Provider.openai:
178
+ return "OPENAI_API_KEY is not set. Export it before running, for example: export OPENAI_API_KEY=your_key"
179
+ return "Provider API key is not set."
180
+
181
+
182
+ def _default_model_for_provider(provider: Provider) -> str:
183
+ if provider == Provider.anthropic:
184
+ return "claude-3-5-sonnet-latest"
185
+ if provider == Provider.openai:
186
+ return "gpt-4.1"
187
+ raise ValueError(f"unsupported provider: {provider}")
188
+
189
+
190
+ def _provider_from_config(config: AppConfig) -> Provider | None:
191
+ if config.provider is None:
192
+ return None
193
+ try:
194
+ return Provider(config.provider)
195
+ except ValueError as exc:
196
+ raise typer.BadParameter(f"unsupported provider in .aion.yaml: {config.provider}") from exc
197
+
198
+
199
+ def _expand_explicit_targets(targets: list[Path]) -> set[str]:
200
+ expanded: set[str] = set()
201
+ for target in targets:
202
+ if target.is_dir():
203
+ for path in target.rglob("*.py"):
204
+ expanded.add(normalize_path(path))
205
+ elif target.suffix == ".py":
206
+ expanded.add(normalize_path(target))
207
+ return expanded
208
+
209
+
210
+ def _detect_ai_generated_files(
211
+ candidates: list[Path],
212
+ explicit_targets: set[str],
213
+ root: Path,
214
+ ) -> tuple[list[Path], list[str]]:
215
+ warnings: list[str] = []
216
+ if explicit_targets:
217
+ selected = [path for path in candidates if normalize_path(path) in explicit_targets]
218
+ if not selected:
219
+ warnings.append("No Python files matched --ai-generated targets.")
220
+ return selected, warnings
221
+
222
+ selected = [path for path in candidates if _has_ai_marker(path) or _git_history_has_ai_marker(path, root)]
223
+ if selected:
224
+ return selected, warnings
225
+
226
+ warnings.append("No AI-generated markers found; scanning all Python files.")
227
+ return candidates, warnings
228
+
229
+
230
+ def _has_ai_marker(path: Path) -> bool:
231
+ try:
232
+ content = path.read_text(encoding="utf-8", errors="ignore")[:2000].lower()
233
+ except OSError:
234
+ return False
235
+ return any(marker in content for marker in AI_MARKERS)
236
+
237
+
238
+ def _git_history_has_ai_marker(path: Path, root: Path) -> bool:
239
+ command = [
240
+ "git",
241
+ "-C",
242
+ str(root),
243
+ "log",
244
+ "--format=%an%n%B",
245
+ "--",
246
+ str(path),
247
+ ]
248
+ try:
249
+ result = subprocess.run(command, capture_output=True, text=True, check=False)
250
+ except OSError:
251
+ return False
252
+ if result.returncode != 0:
253
+ return False
254
+ history = result.stdout.lower()
255
+ return "github-copilot" in history or "cursor" in history
256
+
257
+
258
+ def _exit_with_summary(summary: ProjectScanSummary, output: str) -> None:
259
+ if output == "json":
260
+ stdout_console.print_json(summary.model_dump_json())
261
+ raise typer.Exit(code=0)
262
+
263
+ for warning in summary.warnings:
264
+ stderr_console.print(f"[yellow]warning:[/yellow] {warning}")
265
+
266
+ if summary.files_scanned == 0:
267
+ stdout_console.print(Panel("No Python files were scanned.", title="AION"))
268
+ raise typer.Exit(code=0)
269
+
270
+ stdout_console.print(
271
+ Panel(
272
+ f"Target: {summary.target}\nFiles scanned: {summary.files_scanned}\nFindings: {summary.finding_count}",
273
+ title="AION",
274
+ )
275
+ )
276
+
277
+ rendered = False
278
+ for report in summary.sorted_reports():
279
+ if not report.findings:
280
+ continue
281
+ rendered = True
282
+ table = Table(title=report.file)
283
+ table.add_column("Severity")
284
+ table.add_column("Line", justify="right")
285
+ table.add_column("Issue")
286
+ table.add_column("Context Gap")
287
+ table.add_column("Fix")
288
+ for finding in sorted(report.findings, key=_severity_sort_key):
289
+ table.add_row(
290
+ finding.severity,
291
+ str(finding.line),
292
+ finding.issue,
293
+ finding.context_gap,
294
+ finding.fix,
295
+ )
296
+ stdout_console.print(table)
297
+
298
+ if not rendered:
299
+ stdout_console.print("[green]No findings reported.[/green]")
300
+
301
+
302
+ def _severity_sort_key(finding: Finding) -> tuple[int, int]:
303
+ order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
304
+ return order.get(finding.severity, 4), finding.line
aion/config.py ADDED
@@ -0,0 +1,83 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+
7
+
8
+ @dataclass
9
+ class AppConfig:
10
+ provider: str | None = None
11
+ model: str | None = None
12
+ ignore_paths: list[str] = field(default_factory=list)
13
+
14
+
15
+ class ConfigError(RuntimeError):
16
+ pass
17
+
18
+
19
+ def load_app_config(root: Path) -> AppConfig:
20
+ config_path = root / ".aion.yaml"
21
+ if not config_path.exists():
22
+ return AppConfig()
23
+
24
+ return _parse_config(config_path)
25
+
26
+
27
+ def _parse_config(path: Path) -> AppConfig:
28
+ lines = path.read_text(encoding="utf-8").splitlines()
29
+ data: dict[str, object] = {}
30
+ index = 0
31
+
32
+ while index < len(lines):
33
+ raw_line = lines[index]
34
+ stripped = raw_line.strip()
35
+ index += 1
36
+
37
+ if not stripped or stripped.startswith("#"):
38
+ continue
39
+ if raw_line.startswith((" ", "\t")):
40
+ raise ConfigError(f"unexpected indentation in {path}")
41
+ if ":" not in raw_line:
42
+ raise ConfigError(f"invalid config line: {raw_line}")
43
+
44
+ key, value = raw_line.split(":", 1)
45
+ key = key.strip()
46
+ value = value.strip()
47
+
48
+ if not value:
49
+ items: list[str] = []
50
+ while index < len(lines):
51
+ nested_raw = lines[index]
52
+ nested = nested_raw.strip()
53
+ if not nested or nested.startswith("#"):
54
+ index += 1
55
+ continue
56
+ if not nested_raw.startswith((" ", "\t")):
57
+ break
58
+ if not nested.startswith("- "):
59
+ raise ConfigError(f"invalid list item in {path}: {nested_raw}")
60
+ items.append(_parse_scalar(nested[2:].strip()))
61
+ index += 1
62
+ data[key] = items
63
+ continue
64
+
65
+ data[key] = _parse_scalar(value)
66
+
67
+ ignore_paths = data.get("ignore_paths", [])
68
+ if not isinstance(ignore_paths, list):
69
+ raise ConfigError("ignore_paths must be a list")
70
+
71
+ provider = data.get("provider")
72
+ model = data.get("model")
73
+ return AppConfig(
74
+ provider=str(provider) if provider is not None else None,
75
+ model=str(model) if model is not None else None,
76
+ ignore_paths=[str(item) for item in ignore_paths],
77
+ )
78
+
79
+
80
+ def _parse_scalar(value: str) -> str:
81
+ if (value.startswith("'") and value.endswith("'")) or (value.startswith('"') and value.endswith('"')):
82
+ return ast.literal_eval(value)
83
+ return value
@@ -0,0 +1,243 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import hashlib
5
+ import json
6
+ import random
7
+ from dataclasses import dataclass
8
+ from fnmatch import fnmatch
9
+ from pathlib import Path
10
+
11
+ from .models import ContextProfile, normalize_path
12
+
13
+ DEFAULT_EXCLUDES = {
14
+ ".git",
15
+ ".hg",
16
+ ".svn",
17
+ ".venv",
18
+ "venv",
19
+ "node_modules",
20
+ "__pycache__",
21
+ }
22
+ ORM_IMPORTS = ("sqlalchemy", "django.db", "peewee", "tortoise", "pony", "ormar")
23
+ HTTP_IMPORTS = ("httpx", "requests", "aiohttp", "urllib3")
24
+ LOW_LEVEL_DB_IMPORTS = ("sqlite3", "pymysql", "psycopg2", "mysql.connector", "MySQLdb")
25
+ DB_CALL_PATTERNS = {
26
+ "session.query": "session.query()",
27
+ "session.execute": "session.execute()",
28
+ "db.execute": "db.execute()",
29
+ "cursor.execute": "cursor.execute()",
30
+ "Model.objects": "Model.objects",
31
+ }
32
+
33
+
34
+ @dataclass
35
+ class ExtractedFileData:
36
+ imports: list[str]
37
+ decorators: list[str]
38
+ db_patterns: list[str]
39
+ function_names: list[str]
40
+ orm_candidates: list[str]
41
+ http_candidates: list[str]
42
+ low_level_db_imports: list[str]
43
+
44
+
45
+ class ContextExtractor:
46
+ def __init__(
47
+ self,
48
+ root: Path,
49
+ max_files: int = 500,
50
+ cache_path: Path | None = None,
51
+ extra_ignore_patterns: list[str] | None = None,
52
+ ):
53
+ self.root = root.resolve()
54
+ self.max_files = max_files
55
+ self.cache_path = cache_path or Path.home() / ".aion-context.json"
56
+ self._cache = self._load_cache()
57
+ self._ignore_patterns = self._load_gitignore_patterns()
58
+ self._ignore_patterns.extend(extra_ignore_patterns or [])
59
+
60
+ def extract(self) -> ContextProfile:
61
+ python_files = self._collect_python_files()
62
+ sampled = False
63
+ if len(python_files) > self.max_files:
64
+ sampled = True
65
+ randomizer = random.Random(42)
66
+ python_files = sorted(randomizer.sample(python_files, self.max_files))
67
+
68
+ profile = ContextProfile(scanned_files=len(python_files), sampled=sampled)
69
+ orm_votes: dict[str, int] = {}
70
+ http_votes: dict[str, int] = {}
71
+ import_set: set[str] = set()
72
+ decorator_set: set[str] = set()
73
+ db_pattern_set: set[str] = set()
74
+ function_set: set[str] = set()
75
+ low_level_set: set[str] = set()
76
+
77
+ for file_path in python_files:
78
+ cached = self._extract_with_cache(file_path)
79
+ if cached is None:
80
+ profile.skipped_files.append(normalize_path(file_path))
81
+ continue
82
+ import_set.update(cached.imports)
83
+ decorator_set.update(cached.decorators)
84
+ db_pattern_set.update(cached.db_patterns)
85
+ function_set.update(cached.function_names)
86
+ low_level_set.update(cached.low_level_db_imports)
87
+ for orm in cached.orm_candidates:
88
+ orm_votes[orm] = orm_votes.get(orm, 0) + 1
89
+ for client in cached.http_candidates:
90
+ http_votes[client] = http_votes.get(client, 0) + 1
91
+
92
+ profile.imports = sorted(import_set)[:50]
93
+ profile.auth_decorators = sorted(decorator_set)[:20]
94
+ profile.db_patterns = sorted(db_pattern_set)[:20]
95
+ profile.function_names = sorted(function_set)[:30]
96
+ profile.low_level_db_imports = sorted(low_level_set)[:20]
97
+ profile.orm = self._pick_top_vote(orm_votes)
98
+ profile.http_client = self._pick_top_vote(http_votes)
99
+ self._write_cache()
100
+ return profile
101
+
102
+ def _collect_python_files(self) -> list[Path]:
103
+ files: list[Path] = []
104
+ for path in self.root.rglob("*.py"):
105
+ if any(part in DEFAULT_EXCLUDES for part in path.parts):
106
+ continue
107
+ relative = path.relative_to(self.root).as_posix()
108
+ if self._is_ignored(relative):
109
+ continue
110
+ files.append(path)
111
+ return sorted(files)
112
+
113
+ def _is_ignored(self, relative_path: str) -> bool:
114
+ for pattern in self._ignore_patterns:
115
+ if fnmatch(relative_path, pattern) or fnmatch(Path(relative_path).name, pattern):
116
+ return True
117
+ return False
118
+
119
+ def _load_gitignore_patterns(self) -> list[str]:
120
+ gitignore = self.root / ".gitignore"
121
+ if not gitignore.exists():
122
+ return []
123
+ patterns: list[str] = []
124
+ for raw_line in gitignore.read_text(encoding="utf-8", errors="ignore").splitlines():
125
+ line = raw_line.strip()
126
+ if not line or line.startswith("#") or line.startswith("!"):
127
+ continue
128
+ if line.endswith("/"):
129
+ patterns.append(f"{line}*")
130
+ patterns.append(line.lstrip("/"))
131
+ return patterns
132
+
133
+ def _extract_with_cache(self, file_path: Path) -> ExtractedFileData | None:
134
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
135
+ digest = hashlib.sha256(content.encode("utf-8")).hexdigest()
136
+ cache_key = normalize_path(file_path)
137
+ cached = self._cache.get(cache_key)
138
+ if cached and cached.get("sha256") == digest:
139
+ try:
140
+ return ExtractedFileData(**cached["data"])
141
+ except TypeError:
142
+ pass
143
+
144
+ parsed = self._extract_file(file_path, content)
145
+ if parsed is None:
146
+ return None
147
+ self._cache[cache_key] = {"sha256": digest, "data": parsed.__dict__}
148
+ return parsed
149
+
150
+ def _extract_file(self, file_path: Path, content: str) -> ExtractedFileData | None:
151
+ try:
152
+ tree = ast.parse(content, filename=str(file_path))
153
+ except SyntaxError:
154
+ return None
155
+
156
+ imports: set[str] = set()
157
+ decorators: set[str] = set()
158
+ db_patterns: set[str] = set()
159
+ function_names: set[str] = set()
160
+ orm_candidates: set[str] = set()
161
+ http_candidates: set[str] = set()
162
+ low_level_db_imports: set[str] = set()
163
+
164
+ for node in ast.walk(tree):
165
+ if isinstance(node, ast.Import):
166
+ for alias in node.names:
167
+ imports.add(alias.name)
168
+ self._classify_import(alias.name, orm_candidates, http_candidates, low_level_db_imports)
169
+ elif isinstance(node, ast.ImportFrom):
170
+ module = node.module or ""
171
+ if module:
172
+ imports.add(module)
173
+ self._classify_import(module, orm_candidates, http_candidates, low_level_db_imports)
174
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
175
+ function_names.add(node.name)
176
+ for decorator in node.decorator_list:
177
+ rendered = self._render_name(decorator)
178
+ if rendered:
179
+ decorators.add(f"@{rendered}")
180
+ elif isinstance(node, ast.Call):
181
+ rendered = self._render_name(node.func)
182
+ if rendered and rendered in DB_CALL_PATTERNS:
183
+ db_patterns.add(DB_CALL_PATTERNS[rendered])
184
+
185
+ return ExtractedFileData(
186
+ imports=sorted(imports),
187
+ decorators=sorted(decorators),
188
+ db_patterns=sorted(db_patterns),
189
+ function_names=sorted(function_names),
190
+ orm_candidates=sorted(orm_candidates),
191
+ http_candidates=sorted(http_candidates),
192
+ low_level_db_imports=sorted(low_level_db_imports),
193
+ )
194
+
195
+ def _classify_import(
196
+ self,
197
+ module_name: str,
198
+ orm_candidates: set[str],
199
+ http_candidates: set[str],
200
+ low_level_db_imports: set[str],
201
+ ) -> None:
202
+ lowered = module_name.lower()
203
+ for orm in ORM_IMPORTS:
204
+ if lowered.startswith(orm):
205
+ orm_candidates.add(orm.split(".")[0])
206
+ for client in HTTP_IMPORTS:
207
+ if lowered.startswith(client):
208
+ http_candidates.add(client)
209
+ for db_import in LOW_LEVEL_DB_IMPORTS:
210
+ if lowered.startswith(db_import.lower()):
211
+ low_level_db_imports.add(db_import)
212
+
213
+ def _render_name(self, node: ast.AST) -> str | None:
214
+ if isinstance(node, ast.Name):
215
+ return node.id
216
+ if isinstance(node, ast.Attribute):
217
+ base = self._render_name(node.value)
218
+ return f"{base}.{node.attr}" if base else node.attr
219
+ if isinstance(node, ast.Call):
220
+ return self._render_name(node.func)
221
+ return None
222
+
223
+ def _pick_top_vote(self, votes: dict[str, int]) -> str | None:
224
+ if not votes:
225
+ return None
226
+ return sorted(votes.items(), key=lambda item: (-item[1], item[0]))[0][0]
227
+
228
+ def _load_cache(self) -> dict[str, dict[str, object]]:
229
+ if not self.cache_path.exists():
230
+ return {}
231
+ try:
232
+ return json.loads(self.cache_path.read_text(encoding="utf-8"))
233
+ except (OSError, json.JSONDecodeError):
234
+ return {}
235
+
236
+ def _write_cache(self) -> None:
237
+ try:
238
+ self.cache_path.write_text(
239
+ json.dumps(self._cache, indent=2, sort_keys=True),
240
+ encoding="utf-8",
241
+ )
242
+ except OSError:
243
+ pass
aion/evaluation.py ADDED
@@ -0,0 +1,142 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ from .llm_analyzer import LLMAnalyzer, LLMAnalyzerError
8
+ from .models import ContextProfile, Finding, SemgrepFinding
9
+ from .risk_heuristics import fallback_reasons
10
+ from .semgrep_runner import SemgrepError, SemgrepRunner, semgrep_available
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class FixtureCase:
15
+ relative_path: str
16
+ source_path: Path
17
+ context_path: Path
18
+ has_vuln: bool
19
+ expected_context_gap: str
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class FixturePrediction:
24
+ case: FixtureCase
25
+ findings: list[Finding]
26
+ semgrep_findings: list[SemgrepFinding]
27
+ used_semgrep: bool
28
+
29
+ @property
30
+ def predicted_vulnerable(self) -> bool:
31
+ return bool(self.findings)
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class EvalMetrics:
36
+ true_positive: int
37
+ false_positive: int
38
+ true_negative: int
39
+ false_negative: int
40
+
41
+ @property
42
+ def precision(self) -> float:
43
+ denominator = self.true_positive + self.false_positive
44
+ return self.true_positive / denominator if denominator else 1.0
45
+
46
+ @property
47
+ def recall(self) -> float:
48
+ denominator = self.true_positive + self.false_negative
49
+ return self.true_positive / denominator if denominator else 1.0
50
+
51
+
52
+ def load_fixture_cases(fixtures_root: Path) -> list[FixtureCase]:
53
+ labels = json.loads((fixtures_root / "labels.json").read_text(encoding="utf-8"))
54
+ cases: list[FixtureCase] = []
55
+ for relative_path, metadata in sorted(labels.items()):
56
+ source_path = fixtures_root / relative_path
57
+ prefix = source_path.name.split("_", 1)[0]
58
+ context_path = source_path.with_name(f"{prefix}_context.json")
59
+ cases.append(
60
+ FixtureCase(
61
+ relative_path=relative_path,
62
+ source_path=source_path,
63
+ context_path=context_path,
64
+ has_vuln=bool(metadata["has_vuln"]),
65
+ expected_context_gap=str(metadata["expected_context_gap"]),
66
+ )
67
+ )
68
+ return cases
69
+
70
+
71
+ def load_context_profile(context_path: Path) -> ContextProfile:
72
+ payload = json.loads(context_path.read_text(encoding="utf-8"))
73
+ return ContextProfile(**payload)
74
+
75
+
76
+ def evaluate_cases(
77
+ cases: list[FixtureCase],
78
+ api_key: str,
79
+ model: str = "claude-3-5-sonnet-latest",
80
+ provider: str = "anthropic",
81
+ ignore_llm_errors: bool = True,
82
+ ) -> list[FixturePrediction]:
83
+ analyzer = LLMAnalyzer(api_key=api_key, model=model, provider=provider)
84
+ runner = SemgrepRunner()
85
+ use_semgrep = semgrep_available()
86
+ predictions: list[FixturePrediction] = []
87
+
88
+ for case in cases:
89
+ context_profile = load_context_profile(case.context_path)
90
+ semgrep_findings: list[SemgrepFinding] = []
91
+ if use_semgrep:
92
+ try:
93
+ semgrep_findings = runner.run(case.source_path)
94
+ except SemgrepError:
95
+ semgrep_findings = []
96
+
97
+ try:
98
+ reasons = fallback_reasons(case.source_path, context_profile)
99
+ if use_semgrep and not semgrep_findings and not reasons:
100
+ findings = []
101
+ else:
102
+ findings = analyzer.analyze(
103
+ case.source_path,
104
+ context_profile,
105
+ semgrep_findings,
106
+ fallback_signals=reasons,
107
+ )
108
+ except LLMAnalyzerError:
109
+ if not ignore_llm_errors:
110
+ raise
111
+ findings = []
112
+
113
+ predictions.append(
114
+ FixturePrediction(
115
+ case=case,
116
+ findings=findings,
117
+ semgrep_findings=semgrep_findings,
118
+ used_semgrep=use_semgrep,
119
+ )
120
+ )
121
+ return predictions
122
+
123
+
124
+ def compute_metrics(predictions: list[FixturePrediction]) -> EvalMetrics:
125
+ true_positive = false_positive = true_negative = false_negative = 0
126
+ for prediction in predictions:
127
+ actual = prediction.case.has_vuln
128
+ predicted = prediction.predicted_vulnerable
129
+ if actual and predicted:
130
+ true_positive += 1
131
+ elif actual and not predicted:
132
+ false_negative += 1
133
+ elif not actual and predicted:
134
+ false_positive += 1
135
+ else:
136
+ true_negative += 1
137
+ return EvalMetrics(
138
+ true_positive=true_positive,
139
+ false_positive=false_positive,
140
+ true_negative=true_negative,
141
+ false_negative=false_negative,
142
+ )
aion/llm_analyzer.py ADDED
@@ -0,0 +1,219 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Literal
6
+
7
+ from .models import ContextProfile, Finding, LLMScanResponse, SemgrepFinding
8
+
9
+ LLMProvider = Literal["anthropic", "openai"]
10
+
11
+
12
+ class LLMAnalyzerError(RuntimeError):
13
+ pass
14
+
15
+
16
+ class LLMAnalyzer:
17
+ def __init__(
18
+ self,
19
+ api_key: str,
20
+ model: str,
21
+ provider: LLMProvider = "anthropic",
22
+ max_chunk_lines: int = 200,
23
+ overlap_lines: int = 50,
24
+ verbose: bool = False,
25
+ ):
26
+ self.api_key = api_key
27
+ self.model = model
28
+ self.provider = provider
29
+ self.max_chunk_lines = max_chunk_lines
30
+ self.overlap_lines = overlap_lines
31
+ self.verbose = verbose
32
+
33
+ def analyze(
34
+ self,
35
+ target: Path,
36
+ context_profile: ContextProfile,
37
+ semgrep_findings: list[SemgrepFinding],
38
+ fallback_signals: list[str] | None = None,
39
+ console=None,
40
+ ) -> list[Finding]:
41
+ try:
42
+ source = target.read_text(encoding="utf-8", errors="ignore")
43
+ except OSError as exc:
44
+ raise LLMAnalyzerError(f"failed to read {target}: {exc}") from exc
45
+
46
+ client = self._create_client()
47
+ chunks = self._chunk_source(source)
48
+ findings: list[Finding] = []
49
+ for chunk in chunks:
50
+ prompt = self._build_prompt(
51
+ target=target,
52
+ chunk_text=chunk["text"],
53
+ start_line=chunk["start_line"],
54
+ end_line=chunk["end_line"],
55
+ context_profile=context_profile,
56
+ semgrep_findings=semgrep_findings,
57
+ fallback_signals=fallback_signals or [],
58
+ )
59
+ if self.verbose and console is not None:
60
+ console.print("[bold]LLM prompt[/bold]")
61
+ console.print(prompt)
62
+ try:
63
+ response = self._create_completion(client, prompt)
64
+ except Exception as exc: # noqa: BLE001
65
+ raise LLMAnalyzerError(str(exc)) from exc
66
+
67
+ for finding in response.findings:
68
+ findings.append(
69
+ Finding(
70
+ issue=finding.issue,
71
+ severity=finding.severity,
72
+ line=self._remap_line(chunk["start_line"], finding.line),
73
+ context_gap=finding.context_gap,
74
+ fix=finding.fix,
75
+ semgrep_rule=finding.semgrep_rule,
76
+ )
77
+ )
78
+ return self._deduplicate(findings)
79
+
80
+ def _create_client(self):
81
+ try:
82
+ import instructor
83
+ except ImportError as exc:
84
+ raise LLMAnalyzerError("instructor is not installed") from exc
85
+
86
+ if self.provider == "anthropic":
87
+ try:
88
+ from anthropic import Anthropic
89
+ except ImportError as exc:
90
+ raise LLMAnalyzerError("anthropic is not installed") from exc
91
+ return instructor.from_anthropic(Anthropic(api_key=self.api_key))
92
+
93
+ if self.provider == "openai":
94
+ try:
95
+ from openai import OpenAI
96
+ except ImportError as exc:
97
+ raise LLMAnalyzerError("openai is not installed") from exc
98
+ return instructor.from_openai(OpenAI(api_key=self.api_key))
99
+
100
+ raise LLMAnalyzerError(f"unsupported provider: {self.provider}")
101
+
102
+ def _create_completion(self, client, prompt: str) -> LLMScanResponse:
103
+ if self.provider == "anthropic":
104
+ return client.messages.create(
105
+ model=self.model,
106
+ max_tokens=1800,
107
+ temperature=0,
108
+ response_model=LLMScanResponse,
109
+ messages=[
110
+ {
111
+ "role": "user",
112
+ "content": prompt,
113
+ }
114
+ ],
115
+ )
116
+
117
+ if self.provider == "openai":
118
+ return client.chat.completions.create(
119
+ model=self.model,
120
+ max_completion_tokens=1800,
121
+ response_model=LLMScanResponse,
122
+ messages=[
123
+ {
124
+ "role": "user",
125
+ "content": prompt,
126
+ }
127
+ ],
128
+ )
129
+
130
+ raise LLMAnalyzerError(f"unsupported provider: {self.provider}")
131
+
132
+ def estimate_tokens(self, target: Path, context_profile: ContextProfile) -> int:
133
+ source = target.read_text(encoding="utf-8", errors="ignore")
134
+ payload = json.dumps(context_profile.summary_payload(), ensure_ascii=False)
135
+ total_chars = len(source) + len(payload)
136
+ return max(total_chars // 4, 1)
137
+
138
+ def _build_prompt(
139
+ self,
140
+ target: Path,
141
+ chunk_text: str,
142
+ start_line: int,
143
+ end_line: int,
144
+ context_profile: ContextProfile,
145
+ semgrep_findings: list[SemgrepFinding],
146
+ fallback_signals: list[str],
147
+ ) -> str:
148
+ semgrep_summary = [
149
+ {
150
+ "rule": finding.check_id,
151
+ "line": finding.line,
152
+ "severity": finding.severity,
153
+ "message": finding.message,
154
+ }
155
+ for finding in semgrep_findings
156
+ if start_line <= finding.line <= end_line
157
+ ]
158
+ return (
159
+ "You are reviewing AI-generated Python code for security issues.\n"
160
+ "Focus on context-blindness: places where the code ignores established project patterns.\n"
161
+ "Return only structured findings matching the response schema.\n\n"
162
+ f"Target file: {target}\n"
163
+ f"Chunk line range: {start_line}-{end_line}\n"
164
+ f"Project context summary: {json.dumps(context_profile.summary_payload(), ensure_ascii=False)}\n"
165
+ f"Semgrep findings in this chunk: {json.dumps(semgrep_summary, ensure_ascii=False)}\n\n"
166
+ f"Fallback risk signals: {json.dumps(fallback_signals, ensure_ascii=False)}\n\n"
167
+ "Rules:\n"
168
+ "- Report only concrete security issues.\n"
169
+ "- line must be relative to this chunk, not the whole file.\n"
170
+ "- context_gap must explain what the AI likely did not know about the project.\n"
171
+ "- If project context implies ORM/auth/rate-limit usage, mention the exact project pattern being bypassed.\n"
172
+ "- If fallback risk signals are present, investigate them directly even if Semgrep found nothing.\n"
173
+ "- Do not treat an empty Semgrep result as evidence that the code is safe.\n"
174
+ "- Keep fixes actionable and specific.\n\n"
175
+ "Code chunk:\n"
176
+ f"{chunk_text}"
177
+ )
178
+
179
+ def _chunk_source(self, source: str) -> list[dict[str, object]]:
180
+ lines = source.splitlines()
181
+ if len(lines) <= self.max_chunk_lines:
182
+ return [
183
+ {
184
+ "text": source,
185
+ "start_line": 1,
186
+ "end_line": max(len(lines), 1),
187
+ }
188
+ ]
189
+
190
+ chunks: list[dict[str, object]] = []
191
+ step = self.max_chunk_lines - self.overlap_lines
192
+ start = 0
193
+ while start < len(lines):
194
+ end = min(start + self.max_chunk_lines, len(lines))
195
+ chunks.append(
196
+ {
197
+ "text": "\n".join(lines[start:end]),
198
+ "start_line": start + 1,
199
+ "end_line": end,
200
+ }
201
+ )
202
+ if end >= len(lines):
203
+ break
204
+ start += step
205
+ return chunks
206
+
207
+ def _remap_line(self, chunk_start_line: int, reported_line: int) -> int:
208
+ return max(chunk_start_line + reported_line - 1, 1)
209
+
210
+ def _deduplicate(self, findings: list[Finding]) -> list[Finding]:
211
+ seen: set[tuple[int, str, str]] = set()
212
+ unique: list[Finding] = []
213
+ for finding in sorted(findings, key=lambda item: (item.line, item.issue, item.severity)):
214
+ key = (finding.line, finding.issue, finding.severity)
215
+ if key in seen:
216
+ continue
217
+ seen.add(key)
218
+ unique.append(finding)
219
+ return unique
aion/models.py ADDED
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Literal
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ Severity = Literal["critical", "high", "medium", "low"]
10
+
11
+
12
+ class ContextProfile(BaseModel):
13
+ orm: str | None = None
14
+ auth_decorators: list[str] = Field(default_factory=list)
15
+ db_patterns: list[str] = Field(default_factory=list)
16
+ low_level_db_imports: list[str] = Field(default_factory=list)
17
+ http_client: str | None = None
18
+ imports: list[str] = Field(default_factory=list)
19
+ function_names: list[str] = Field(default_factory=list)
20
+ scanned_files: int = 0
21
+ sampled: bool = False
22
+ skipped_files: list[str] = Field(default_factory=list)
23
+
24
+ def summary_payload(self) -> dict[str, object]:
25
+ return {
26
+ "orm": self.orm,
27
+ "auth_decorators": self.auth_decorators[:20],
28
+ "db_patterns": self.db_patterns[:20],
29
+ "low_level_db_imports": self.low_level_db_imports[:20],
30
+ "http_client": self.http_client,
31
+ "imports": self.imports[:50],
32
+ "function_names": self.function_names[:30],
33
+ "scanned_files": self.scanned_files,
34
+ "sampled": self.sampled,
35
+ }
36
+
37
+
38
+ class SemgrepFinding(BaseModel):
39
+ check_id: str
40
+ path: str
41
+ line: int
42
+ end_line: int | None = None
43
+ severity: str = "INFO"
44
+ message: str
45
+ code: str | None = None
46
+ metadata: dict[str, object] = Field(default_factory=dict)
47
+
48
+
49
+ class Finding(BaseModel):
50
+ issue: str
51
+ severity: Severity
52
+ line: int
53
+ context_gap: str
54
+ fix: str
55
+ semgrep_rule: str | None = None
56
+
57
+
58
+ class ScanReport(BaseModel):
59
+ file: str
60
+ findings: list[Finding] = Field(default_factory=list)
61
+ semgrep_findings: list[SemgrepFinding] = Field(default_factory=list)
62
+ ai_generated: bool = False
63
+ mode: Literal["semgrep+llm", "llm-only", "semgrep-only", "skipped"] = "skipped"
64
+
65
+
66
+ class LLMScanResponse(BaseModel):
67
+ findings: list[Finding] = Field(default_factory=list)
68
+
69
+
70
+ class ProjectScanSummary(BaseModel):
71
+ target: str
72
+ files_scanned: int = 0
73
+ reports: list[ScanReport] = Field(default_factory=list)
74
+ warnings: list[str] = Field(default_factory=list)
75
+
76
+ @property
77
+ def finding_count(self) -> int:
78
+ return sum(len(report.findings) for report in self.reports)
79
+
80
+ def sorted_reports(self) -> list[ScanReport]:
81
+ return sorted(
82
+ self.reports,
83
+ key=lambda report: (
84
+ min(
85
+ (
86
+ {"critical": 0, "high": 1, "medium": 2, "low": 3}[finding.severity]
87
+ for finding in report.findings
88
+ ),
89
+ default=4,
90
+ ),
91
+ report.file,
92
+ ),
93
+ )
94
+
95
+
96
+ def normalize_path(path: Path) -> str:
97
+ try:
98
+ return str(path.resolve())
99
+ except OSError:
100
+ return str(path)
@@ -0,0 +1,90 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from pathlib import Path
5
+
6
+ from .models import ContextProfile
7
+
8
+ ROUTE_DECORATOR_NAMES = {
9
+ "app.get",
10
+ "app.post",
11
+ "app.put",
12
+ "app.delete",
13
+ "app.patch",
14
+ "router.get",
15
+ "router.post",
16
+ "router.put",
17
+ "router.delete",
18
+ "router.patch",
19
+ }
20
+ SECRET_NAME_MARKERS = ("key", "secret", "token", "password")
21
+ LOW_LEVEL_DB_IMPORTS = {"sqlite3", "pymysql", "psycopg2", "mysql.connector", "mysqldb"}
22
+
23
+
24
+ def fallback_reasons(target: Path, context_profile: ContextProfile) -> list[str]:
25
+ try:
26
+ source = target.read_text(encoding="utf-8", errors="ignore")
27
+ tree = ast.parse(source, filename=str(target))
28
+ except (OSError, SyntaxError):
29
+ return []
30
+
31
+ reasons: list[str] = []
32
+ if context_profile.orm and _imports_low_level_db(tree):
33
+ reasons.append("low-level database access bypasses the project's ORM pattern")
34
+ if _has_hardcoded_secret(tree):
35
+ reasons.append("hardcoded secret-like assignment detected")
36
+ if context_profile.auth_decorators and _has_route_without_auth(tree, context_profile.auth_decorators):
37
+ reasons.append("route handler is missing the project's auth decorators")
38
+ return reasons
39
+
40
+
41
+ def _imports_low_level_db(tree: ast.AST) -> bool:
42
+ for node in ast.walk(tree):
43
+ if isinstance(node, ast.Import):
44
+ for alias in node.names:
45
+ if alias.name.lower() in LOW_LEVEL_DB_IMPORTS:
46
+ return True
47
+ elif isinstance(node, ast.ImportFrom):
48
+ module = (node.module or "").lower()
49
+ if module in LOW_LEVEL_DB_IMPORTS:
50
+ return True
51
+ return False
52
+
53
+
54
+ def _has_hardcoded_secret(tree: ast.AST) -> bool:
55
+ for node in ast.walk(tree):
56
+ if isinstance(node, ast.Assign):
57
+ value = node.value
58
+ if not (isinstance(value, ast.Constant) and isinstance(value.value, str)):
59
+ continue
60
+ for target in node.targets:
61
+ name = _render_name(target).lower()
62
+ if any(marker in name for marker in SECRET_NAME_MARKERS):
63
+ return True
64
+ return False
65
+
66
+
67
+ def _has_route_without_auth(tree: ast.AST, auth_decorators: list[str]) -> bool:
68
+ allowed = {decorator.lstrip("@").split(".")[-1] for decorator in auth_decorators}
69
+ for node in ast.walk(tree):
70
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
71
+ continue
72
+ decorator_names = {_render_name(decorator) for decorator in node.decorator_list}
73
+ route_like = any(name in ROUTE_DECORATOR_NAMES for name in decorator_names if name)
74
+ if not route_like:
75
+ continue
76
+ has_auth = any(name and name.split(".")[-1] in allowed for name in decorator_names)
77
+ if not has_auth:
78
+ return True
79
+ return False
80
+
81
+
82
+ def _render_name(node: ast.AST) -> str:
83
+ if isinstance(node, ast.Name):
84
+ return node.id
85
+ if isinstance(node, ast.Attribute):
86
+ base = _render_name(node.value)
87
+ return f"{base}.{node.attr}" if base else node.attr
88
+ if isinstance(node, ast.Call):
89
+ return _render_name(node.func)
90
+ return ""
aion/semgrep_runner.py ADDED
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import shutil
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+ from .models import SemgrepFinding, normalize_path
9
+
10
+
11
+ class SemgrepError(RuntimeError):
12
+ pass
13
+
14
+
15
+ def semgrep_available() -> bool:
16
+ return shutil.which("semgrep") is not None
17
+
18
+
19
+ class SemgrepRunner:
20
+ def __init__(self, config: str = "p/python"):
21
+ self.config = config
22
+
23
+ def run(self, target: Path) -> list[SemgrepFinding]:
24
+ command = ["semgrep", "--json", "--config", self.config, str(target)]
25
+ result = subprocess.run(command, capture_output=True, text=True, check=False)
26
+ if result.returncode not in (0, 1):
27
+ raise SemgrepError(result.stderr.strip() or "semgrep failed")
28
+ try:
29
+ payload = json.loads(result.stdout or "{}")
30
+ except json.JSONDecodeError as exc:
31
+ raise SemgrepError("semgrep returned malformed JSON") from exc
32
+
33
+ findings: list[SemgrepFinding] = []
34
+ for item in payload.get("results", []):
35
+ start = item.get("start", {})
36
+ end = item.get("end", {})
37
+ extra = item.get("extra", {})
38
+ findings.append(
39
+ SemgrepFinding(
40
+ check_id=item.get("check_id", "unknown"),
41
+ path=normalize_path(Path(item.get("path", str(target)))),
42
+ line=start.get("line", 1),
43
+ end_line=end.get("line"),
44
+ severity=extra.get("severity", "INFO"),
45
+ message=extra.get("message", "").strip() or item.get("check_id", "Semgrep finding"),
46
+ code=extra.get("lines"),
47
+ metadata=extra.get("metadata", {}),
48
+ )
49
+ )
50
+ return findings
@@ -0,0 +1,89 @@
1
+ Metadata-Version: 2.4
2
+ Name: aion-evolve
3
+ Version: 0.2.1
4
+ Summary: AION: The Self-Evolving Code Engine. Code Once, Live Forever.
5
+ Project-URL: Homepage, https://github.com/shenxianpeng/aion
6
+ Project-URL: Documentation, https://shenxianpeng.github.io/aion/
7
+ Project-URL: Bug Tracker, https://github.com/shenxianpeng/aion/issues
8
+ Requires-Python: >=3.11
9
+ Description-Content-Type: text/markdown
10
+ Requires-Dist: anthropic>=0.86.0
11
+ Requires-Dist: instructor>=1.14.5
12
+ Requires-Dist: openai>=2.30.0
13
+ Requires-Dist: pydantic>=2.11.0
14
+ Requires-Dist: rich>=14.3.3
15
+ Requires-Dist: semgrep>=1.156.0
16
+ Requires-Dist: typer>=0.23.1
17
+
18
+ # AION
19
+
20
+ [![Docs](https://img.shields.io/badge/docs-github%20pages-blue)](https://shenxianpeng.github.io/aion/)
21
+
22
+ > **Code Once, Live Forever.**
23
+
24
+ `AION` is The Self-Evolving Code Engine — designed to end technical debt and keep your codebase in a perpetual state of health.
25
+
26
+ AI scans your code continuously, automatically rewrites outdated syntax and risky logic, and delivers an evolved codebase every day. Instead of treating every file in isolation, it builds a lightweight profile of the existing repository, runs `semgrep` as a fast first pass, and only asks the LLM to investigate files that have concrete risk signals or meaningful context gaps. The main differentiator is context-gap reporting, for example: "this file uses `sqlite3`, but the rest of the project uses `sqlalchemy` sessions."
27
+
28
+ ## Current MVP
29
+
30
+ - Python-only scanning
31
+ - Project context extraction via `ast`
32
+ - `semgrep --config p/python` integration
33
+ - Anthropic-backed structured findings
34
+ - Anthropic and OpenAI providers
35
+ - AI-generated file detection via file markers, git history, or explicit `--ai-generated`
36
+ - Rich terminal output and JSON output
37
+
38
+ ## Install
39
+
40
+ ```bash
41
+ uv sync
42
+ ```
43
+
44
+ ## Usage
45
+
46
+ ```bash
47
+ export ANTHROPIC_API_KEY=your_key
48
+ uv run aion scan ./path/to/project
49
+ uv run aion scan ./path/to/project --ai-generated ./path/to/project/generated_file.py
50
+ uv run aion scan ./path/to/project --output json
51
+ export OPENAI_API_KEY=your_key
52
+ uv run aion scan ./path/to/project --provider openai
53
+ ```
54
+
55
+ ## Config File
56
+
57
+ Create `.aion.yaml` in the project root:
58
+
59
+ ```yaml
60
+ provider: openai
61
+ model: gpt-4.1
62
+ ignore_paths:
63
+ - tests/*
64
+ - scripts/generated_*.py
65
+ ```
66
+
67
+ CLI flags still override config values.
68
+
69
+ ## Notes
70
+
71
+ - If `semgrep` is unavailable, the tool degrades to LLM-only mode and prints a warning.
72
+ - If no AI-generated markers are found, the tool scans all Python files and prints a warning.
73
+ - Context extraction cache is stored at `~/.aion-context.json`.
74
+ - Provider-specific defaults: Anthropic uses `claude-3-5-sonnet-latest`; OpenAI uses `gpt-4.1` unless `--model` is set.
75
+
76
+ ## Tests
77
+
78
+ ```bash
79
+ uv run pytest tests/unit
80
+ uv run pytest -m eval tests/eval
81
+ ```
82
+
83
+ ## Documentation
84
+
85
+ Full documentation is published with GitHub Pages:
86
+
87
+ - English: `docs/en/`
88
+ - 中文: `docs/zh/`
89
+ - Site URL: `https://shenxianpeng.github.io/aion/`
@@ -0,0 +1,15 @@
1
+ aion/__init__.py,sha256=N0j6iIy_RoA1JMNfbMwNPS4LNJOf8cGQ-JvM1FsqQ78,211
2
+ aion/__main__.py,sha256=nwyadK0B67PgQNG60KYhqotybP8f0KvfyJ0S5Z7VSsE,60
3
+ aion/cli.py,sha256=1bEbm7Te2SwVAkmm6Z0RwA9YRwd5IT1nPhsS31Fn2OE,10923
4
+ aion/config.py,sha256=kctVKvaiRY065k8drU79LN5-2X-B2Yi7ObBv7D1oi3E,2475
5
+ aion/context_extractor.py,sha256=uUOZLAuDFiJ52NPX_aerft5I4yHFLBFiKiZsPIb0GkM,9065
6
+ aion/evaluation.py,sha256=nsx0zScje1iTVZrHCk6xC12HFvyRtyMJeX-y8Wen02s,4447
7
+ aion/llm_analyzer.py,sha256=Ys6ySkDVrPJvFG71yR-_HFKFArTBSlxeEpzy1knEmt8,8171
8
+ aion/models.py,sha256=vwEfjH0nY9660va7fnEaNxjoDboAY1FTLIrE5aZnUIo,2882
9
+ aion/risk_heuristics.py,sha256=eZYE5jgXbO-SPjvwxWGnSucUNhtwOURpS1sOoKrbebw,3158
10
+ aion/semgrep_runner.py,sha256=aFYSN4rXw5spbziSsLfISl3WXO0ELJD_3t9WBVtKypY,1745
11
+ aion_evolve-0.2.1.dist-info/METADATA,sha256=IRWjfRfubZzuEeCHFFTGMkaiTEsCrQG8DTAbjEEZsZU,2890
12
+ aion_evolve-0.2.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
13
+ aion_evolve-0.2.1.dist-info/entry_points.txt,sha256=ESe7xV_00Z1lHI7R4HnQSxbovqXvXtuZqI-k0mN4-TM,38
14
+ aion_evolve-0.2.1.dist-info/top_level.txt,sha256=Di9luRhzG-5XgrNQOA4Iw__XvBajwuEa8zFANbQyIPQ,5
15
+ aion_evolve-0.2.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ aion = aion.cli:app
@@ -0,0 +1 @@
1
+ aion