AtherisLiteLLM 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. atherislitellm-0.2.5/AtherisLiteLLM.egg-info/PKG-INFO +50 -0
  2. atherislitellm-0.2.5/AtherisLiteLLM.egg-info/SOURCES.txt +24 -0
  3. atherislitellm-0.2.5/AtherisLiteLLM.egg-info/dependency_links.txt +1 -0
  4. atherislitellm-0.2.5/AtherisLiteLLM.egg-info/entry_points.txt +2 -0
  5. atherislitellm-0.2.5/AtherisLiteLLM.egg-info/requires.txt +5 -0
  6. atherislitellm-0.2.5/AtherisLiteLLM.egg-info/top_level.txt +1 -0
  7. atherislitellm-0.2.5/LICENSE +21 -0
  8. atherislitellm-0.2.5/PKG-INFO +50 -0
  9. atherislitellm-0.2.5/README.md +32 -0
  10. atherislitellm-0.2.5/ai_fuzzer/__init__.py +0 -0
  11. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/fetch/__init__.py +0 -0
  12. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/fetch/fetch_docs.py +71 -0
  13. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/llm/__init__.py +0 -0
  14. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/llm/llm_requests.py +61 -0
  15. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/logger/__init__.py +0 -0
  16. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/logger/logs.py +50 -0
  17. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/parsing/__init__.py +0 -0
  18. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/parsing/function_parser.py +83 -0
  19. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/run.py +141 -0
  20. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/sandbox/__init__.py +0 -0
  21. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/sandbox/sandbox.py +26 -0
  22. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/smell/__init__.py +0 -0
  23. atherislitellm-0.2.5/ai_fuzzer/atherislitellm/smell/smell.py +18 -0
  24. atherislitellm-0.2.5/ai_fuzzer/cli.py +125 -0
  25. atherislitellm-0.2.5/pyproject.toml +29 -0
  26. atherislitellm-0.2.5/setup.cfg +4 -0
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.4
2
+ Name: AtherisLiteLLM
3
+ Version: 0.2.5
4
+ Summary: AI-powered Python fuzzer using LiteLLM and Atheris to automatically generate and execute fuzzing harnesses.
5
+ Author-email: Mario Marku <mariomarku7@gmail.com>
6
+ License-Expression: GPL-3.0-or-later
7
+ Project-URL: Homepage, https://github.com/mariobx
8
+ Project-URL: Repository, https://github.com/mariobx/AtherisLiteLLM
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: litellm>=1.0.0
13
+ Requires-Dist: pyyaml>=6.0
14
+ Requires-Dist: atheris>=2.3.0
15
+ Requires-Dist: radon>=5.0.0
16
+ Requires-Dist: requests>=2.31.0
17
+ Dynamic: license-file
18
+
19
+ ## AtherisLiteLLM:
20
+ This project creates a LLM-assisted Python fuzzing harness generator designed to leverage large language models via LiteLLM to automatically build fuzzing harnesses for target Python functions and classes. It uses Google’s Atheris fuzzing engine to dynamically generate and test code, with the aim of uncovering bugs or vulnerabilities in software.
21
+
22
+ # Usage:
23
+ atherislitellm \
24
+ --src-dir /path/to/code \
25
+ --output-dir /path/to/logs \
26
+ --model google/gemini-1.5-flash \
27
+ --prompts-path /path/to/prompts.yaml \
28
+ --prompt base \
29
+ --api-key your_api_key_here (optional if env var is set) \
30
+ --extra-model-prompts project=my-project \
31
+ --debug \
32
+ --smell
33
+
34
+ # Arguments:
35
+ - `-s`, `--src-dir`: Path to the Python source directory to fuzz.
36
+ - `-o`, `--output-dir`: Where to store crash logs and generated harnesses.
37
+ - `-m`, `--model`: LiteLLM model string (e.g., `gemini/gemini-1.5-flash`, `openai/gpt-4`).
38
+ - `-pp`, `--prompts-path`: Path to `prompts.yaml` config file.
39
+ - `-p`, `--prompt`: Prompt ID from `prompts.yaml` to use (default: `base`).
40
+ - `-k`, `--api-key`: API key string (optional if environment variable is set).
41
+ - `-e`, `--extra-mode-prompts`: Extra vendor-specific parameters as `key=value` pairs.
42
+ - `-d`, `--debug`: Enable debug/verbose mode.
43
+ - `-sm`, `--smell`: Enable code smell filtering via Radon.
44
+
45
+ # Workflow:
46
+ 1. Resolve API key (environment variable or raw string) and verify model via LiteLLM.
47
+ 2. Discover .py files; parse target functions and classes.
48
+ 3. (Optional) Filter by maintainability index using Radon.
49
+ 4. Build prompt with Atheris docs + target code; send to the LLM via LiteLLM.
50
+ 5. Save generated harnesses into a timestamped run directory.
@@ -0,0 +1,24 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ AtherisLiteLLM.egg-info/PKG-INFO
5
+ AtherisLiteLLM.egg-info/SOURCES.txt
6
+ AtherisLiteLLM.egg-info/dependency_links.txt
7
+ AtherisLiteLLM.egg-info/entry_points.txt
8
+ AtherisLiteLLM.egg-info/requires.txt
9
+ AtherisLiteLLM.egg-info/top_level.txt
10
+ ai_fuzzer/__init__.py
11
+ ai_fuzzer/cli.py
12
+ ai_fuzzer/atherislitellm/run.py
13
+ ai_fuzzer/atherislitellm/fetch/__init__.py
14
+ ai_fuzzer/atherislitellm/fetch/fetch_docs.py
15
+ ai_fuzzer/atherislitellm/llm/__init__.py
16
+ ai_fuzzer/atherislitellm/llm/llm_requests.py
17
+ ai_fuzzer/atherislitellm/logger/__init__.py
18
+ ai_fuzzer/atherislitellm/logger/logs.py
19
+ ai_fuzzer/atherislitellm/parsing/__init__.py
20
+ ai_fuzzer/atherislitellm/parsing/function_parser.py
21
+ ai_fuzzer/atherislitellm/sandbox/__init__.py
22
+ ai_fuzzer/atherislitellm/sandbox/sandbox.py
23
+ ai_fuzzer/atherislitellm/smell/__init__.py
24
+ ai_fuzzer/atherislitellm/smell/smell.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ atherislitellm = ai_fuzzer.cli:main
@@ -0,0 +1,5 @@
1
+ litellm>=1.0.0
2
+ pyyaml>=6.0
3
+ atheris>=2.3.0
4
+ radon>=5.0.0
5
+ requests>=2.31.0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Mario Marku
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.4
2
+ Name: AtherisLiteLLM
3
+ Version: 0.2.5
4
+ Summary: AI-powered Python fuzzer using LiteLLM and Atheris to automatically generate and execute fuzzing harnesses.
5
+ Author-email: Mario Marku <mariomarku7@gmail.com>
6
+ License-Expression: GPL-3.0-or-later
7
+ Project-URL: Homepage, https://github.com/mariobx
8
+ Project-URL: Repository, https://github.com/mariobx/AtherisLiteLLM
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: litellm>=1.0.0
13
+ Requires-Dist: pyyaml>=6.0
14
+ Requires-Dist: atheris>=2.3.0
15
+ Requires-Dist: radon>=5.0.0
16
+ Requires-Dist: requests>=2.31.0
17
+ Dynamic: license-file
18
+
19
+ ## AtherisLiteLLM:
20
+ This project creates a LLM-assisted Python fuzzing harness generator designed to leverage large language models via LiteLLM to automatically build fuzzing harnesses for target Python functions and classes. It uses Google’s Atheris fuzzing engine to dynamically generate and test code, with the aim of uncovering bugs or vulnerabilities in software.
21
+
22
+ # Usage:
23
+ atherislitellm \
24
+ --src-dir /path/to/code \
25
+ --output-dir /path/to/logs \
26
+ --model google/gemini-1.5-flash \
27
+ --prompts-path /path/to/prompts.yaml \
28
+ --prompt base \
29
+ --api-key your_api_key_here (optional if env var is set) \
30
+ --extra-model-prompts project=my-project \
31
+ --debug \
32
+ --smell
33
+
34
+ # Arguments:
35
+ - `-s`, `--src-dir`: Path to the Python source directory to fuzz.
36
+ - `-o`, `--output-dir`: Where to store crash logs and generated harnesses.
37
+ - `-m`, `--model`: LiteLLM model string (e.g., `gemini/gemini-1.5-flash`, `openai/gpt-4`).
38
+ - `-pp`, `--prompts-path`: Path to `prompts.yaml` config file.
39
+ - `-p`, `--prompt`: Prompt ID from `prompts.yaml` to use (default: `base`).
40
+ - `-k`, `--api-key`: API key string (optional if environment variable is set).
41
+ - `-e`, `--extra-mode-prompts`: Extra vendor-specific parameters as `key=value` pairs.
42
+ - `-d`, `--debug`: Enable debug/verbose mode.
43
+ - `-sm`, `--smell`: Enable code smell filtering via Radon.
44
+
45
+ # Workflow:
46
+ 1. Resolve API key (environment variable or raw string) and verify model via LiteLLM.
47
+ 2. Discover .py files; parse target functions and classes.
48
+ 3. (Optional) Filter by maintainability index using Radon.
49
+ 4. Build prompt with Atheris docs + target code; send to the LLM via LiteLLM.
50
+ 5. Save generated harnesses into a timestamped run directory.
@@ -0,0 +1,32 @@
1
+ ## AtherisLiteLLM:
2
+ This project creates a LLM-assisted Python fuzzing harness generator designed to leverage large language models via LiteLLM to automatically build fuzzing harnesses for target Python functions and classes. It uses Google’s Atheris fuzzing engine to dynamically generate and test code, with the aim of uncovering bugs or vulnerabilities in software.
3
+
4
+ # Usage:
5
+ atherislitellm \
6
+ --src-dir /path/to/code \
7
+ --output-dir /path/to/logs \
8
+ --model google/gemini-1.5-flash \
9
+ --prompts-path /path/to/prompts.yaml \
10
+ --prompt base \
11
+ --api-key your_api_key_here (optional if env var is set) \
12
+ --extra-model-prompts project=my-project \
13
+ --debug \
14
+ --smell
15
+
16
+ # Arguments:
17
+ - `-s`, `--src-dir`: Path to the Python source directory to fuzz.
18
+ - `-o`, `--output-dir`: Where to store crash logs and generated harnesses.
19
+ - `-m`, `--model`: LiteLLM model string (e.g., `gemini/gemini-1.5-flash`, `openai/gpt-4`).
20
+ - `-pp`, `--prompts-path`: Path to `prompts.yaml` config file.
21
+ - `-p`, `--prompt`: Prompt ID from `prompts.yaml` to use (default: `base`).
22
+ - `-k`, `--api-key`: API key string (optional if environment variable is set).
23
+ - `-e`, `--extra-mode-prompts`: Extra vendor-specific parameters as `key=value` pairs.
24
+ - `-d`, `--debug`: Enable debug/verbose mode.
25
+ - `-sm`, `--smell`: Enable code smell filtering via Radon.
26
+
27
+ # Workflow:
28
+ 1. Resolve API key (environment variable or raw string) and verify model via LiteLLM.
29
+ 2. Discover .py files; parse target functions and classes.
30
+ 3. (Optional) Filter by maintainability index using Radon.
31
+ 4. Build prompt with Atheris docs + target code; send to the LLM via LiteLLM.
32
+ 5. Save generated harnesses into a timestamped run directory.
File without changes
@@ -0,0 +1,71 @@
1
+ import re
2
+ import requests
3
+ import time
4
+ from ai_fuzzer.atherislitellm.logger.logs import log
5
+
6
+ cache = {}
7
+
8
+ def fetch_with_retry(url: str, max_tries: int = 5, debug: bool = False) -> str:
9
+ """Fetch URL with a simple retry mechanism."""
10
+ for i in range(max_tries):
11
+ try:
12
+ response = requests.get(url, timeout=10)
13
+ response.raise_for_status()
14
+ content = response.text
15
+ if content:
16
+ return content
17
+ log(f"Empty response from {url}, retrying...", debug)
18
+ except (requests.exceptions.RequestException, requests.exceptions.Timeout) as e:
19
+ log(f"Error fetching {url}: {e}. Retrying ({i+1}/{max_tries})...", debug)
20
+ if i < max_tries - 1:
21
+ time.sleep(2 ** i) # Exponential backoff
22
+
23
+ raise requests.exceptions.RequestException(f"Failed to fetch {url} after {max_tries} attempts")
24
+
25
+ def fetch_atheris_readme(debug: bool = False) -> str:
26
+ """Fetch and return Google's Atheris README as cleaned plain text."""
27
+ if "readme" in cache:
28
+ return cache["readme"]
29
+
30
+ url = "https://raw.githubusercontent.com/google/atheris/master/README.md"
31
+ content = fetch_with_retry(url, debug=debug)
32
+
33
+ content = re.sub(r'!\[.*?\]\(.*?\)', '', content)
34
+ content = re.sub(r'\[.*?\]\(https?:\/\/.*?\)', '', content)
35
+ content = re.sub(r'\n{3,}', '\n\n', content)
36
+ formatted = f"""
37
+ ==== START OF ATHERIS DOCUMENTATION ====
38
+
39
+ This is the official README documentation for Google's Atheris fuzzing framework for Python.
40
+
41
+ {content}
42
+
43
+ ==== END OF ATHERIS DOCUMENTATION ====
44
+ """
45
+ cache["readme"] = formatted
46
+ log("fetched atheris readme", debug)
47
+ return formatted
48
+
49
+ def fetch_atheris_hooking_docs(debug: bool = False) -> str:
50
+ """Fetch and return Google's Atheris hooking docs as cleaned plain text."""
51
+ if "hooking" in cache:
52
+ return cache["hooking"]
53
+
54
+ url = "https://raw.githubusercontent.com/google/atheris/refs/heads/master/hooking.md"
55
+ content = fetch_with_retry(url, debug=debug)
56
+
57
+ content = re.sub(r'!\[.*?\]\(.*?\)', '', content)
58
+ content = re.sub(r'\[.*?\]\(https?:\/\/.*?\)', '', content)
59
+ content = re.sub(r'\n{3,}', '\n\n', content)
60
+ formatted = f"""
61
+ ==== START OF ATHERIS' HOOKING DOCUMENTATION ====
62
+
63
+ This is the official README documentation for Google's Atheris fuzzing framework for Python.
64
+
65
+ {content}
66
+
67
+ ==== END OF ATHERIS' HOOKING DOCUMENTATION ====
68
+ """
69
+ cache["hooking"] = formatted
70
+ log("fetched atheris hooking documentation", debug)
71
+ return formatted
@@ -0,0 +1,61 @@
1
+ from pathlib import Path
2
+ from typing import Optional, Tuple
3
+ import yaml
4
+ from ai_fuzzer.atherislitellm.fetch import fetch_docs
5
+ import re
6
+ from ai_fuzzer.atherislitellm.logger.logs import log
7
+ import litellm
8
+
9
+ def extract_code_blocks(text):
10
+ """Extract fenced code blocks from text and return them joined.
11
+
12
+ Finds all triple-backtick code fences (optionally with a language
13
+ tag) and returns their inner contents joined by two newlines. If
14
+ no code blocks are found, returns an empty string.
15
+ """
16
+ if not text:
17
+ return ""
18
+ pattern = r'```(?:[\w+-]*)\s*\n([\s\S]*?)```'
19
+ matches = re.findall(pattern, text)
20
+ return '\n\n'.join(matches)
21
+
22
+ def load_prompt_data(prompt_id: str, yaml_path: Path, debug=False) -> Tuple[float, str, str]:
23
+ """Load prompt settings from a YAML file and return (temperature, description, template)."""
24
+ with open(yaml_path, "r", encoding="utf-8") as f:
25
+ all_prompts = yaml.safe_load(f)
26
+ if prompt_id not in all_prompts:
27
+ raise KeyError(f"Prompt ID '{prompt_id}' not found in {yaml_path}")
28
+ entry = all_prompts[prompt_id]
29
+ return float(entry["temperature"]), entry["description"], entry["template"]
30
+
31
+ def format_prompt(template: str, target_func: str, debug=False) -> str:
32
+ """Fill the template with the target function and Atheris docs."""
33
+ doc_block = f"{fetch_docs.fetch_atheris_readme(debug)}\n\n{fetch_docs.fetch_atheris_hooking_docs(debug)}"
34
+ return template.replace("{{CODE}}", target_func).replace("{{DOCS}}", doc_block)
35
+
36
+ def get_response(client: dict, prompt_id: str, target_func: str, yaml_path: Path, debug: bool = False, **kwargs) -> str | None:
37
+ """Prepare a prompt, call LLM via LiteLLM to generate content, and return the text."""
38
+ log("Preparing prompt and making a LiteLLM call...", debug)
39
+
40
+ try:
41
+ temperature, _, template = load_prompt_data(prompt_id, yaml_path, debug)
42
+ full_prompt = format_prompt(template, target_func, debug)
43
+
44
+ # LiteLLM handles retries via num_retries
45
+ response = litellm.completion(
46
+ model=client["model"],
47
+ messages=[{"role": "user", "content": full_prompt}],
48
+ api_key=client["api_key"],
49
+ temperature=temperature,
50
+ num_retries=5,
51
+ **kwargs
52
+ )
53
+
54
+ content = getattr(getattr(getattr(response, "choices", [None])[0], "message", None), "content", None)
55
+ if not content:
56
+ log("Warning: Received empty content from model response.", True)
57
+ return content
58
+
59
+ except Exception as e:
60
+ log(f"Error during LLM completion: {e}", True)
61
+ return None
@@ -0,0 +1,50 @@
1
+ import os
2
+ import sys
3
+ import traceback
4
+ import inspect
5
+ from pathlib import Path
6
+ from datetime import datetime
7
+
8
+ _LOG_BASE: Path | None = None
9
+ _LOG_FILE: Path | None = None
10
+
11
+ def init_logger(base_path: str) -> None:
12
+ """Set up the log directory and log file under the given base path."""
13
+ global _LOG_BASE, _LOG_FILE
14
+ _LOG_BASE = Path(base_path)
15
+ log_dir = _LOG_BASE / "logs"
16
+ log_dir.mkdir(parents=True, exist_ok=True)
17
+ _LOG_FILE = log_dir / "log.log"
18
+
19
+ def log(msg: str, echo: bool = False) -> None:
20
+ """Write a timestamped debug message to the log file (optionally echo)."""
21
+ if _LOG_FILE is None:
22
+ raise RuntimeError("Logger not initialized. Call init_logger(path) first.")
23
+
24
+ # caller frame
25
+ current = inspect.currentframe()
26
+ frame = current.f_back if current is not None else None
27
+ if frame is not None:
28
+ filename = Path(frame.f_code.co_filename).name
29
+ lineno = frame.f_lineno
30
+ else:
31
+ filename = "<unknown>"
32
+ lineno = 0
33
+
34
+ ts = datetime.now().strftime("%m/%d/%y %I:%M:%S%p")
35
+ line = f"{ts} --- DEBUG --- {filename}:{lineno} - {msg}\n"
36
+
37
+ # If called inside an exception handler, append traceback
38
+ exc_type, exc, tb = sys.exc_info()
39
+ if exc is not None and not isinstance(exc, SyntaxError):
40
+ line += "".join(traceback.format_exception(exc_type, exc, tb)) + "\n"
41
+
42
+ try:
43
+ with _LOG_FILE.open("a", encoding="utf-8") as f:
44
+ f.write(line)
45
+ if echo:
46
+ sys.stdout.write(line)
47
+ sys.stdout.flush()
48
+ except Exception as e:
49
+ sys.stderr.write(f"[log_debug ERROR] {e}\n")
50
+ sys.stderr.flush()
@@ -0,0 +1,83 @@
1
+ import os
2
+ import ast
3
+ from typing import List
4
+ from pathlib import Path
5
+ from ai_fuzzer.atherislitellm.logger.logs import log
6
+
7
+ def is_virtualenv_dir(path, debug=False):
8
+ """
9
+ Returns True if the given directory looks like a Python virtual environment.
10
+ """
11
+ pyvenv_cfg = os.path.join(path, "pyvenv.cfg")
12
+ bin_python = os.path.join(path, "bin", "python")
13
+ scripts_python = os.path.join(path, "Scripts", "python.exe")
14
+ if os.path.isfile(pyvenv_cfg):
15
+ if os.path.isfile(bin_python) or os.path.isfile(scripts_python):
16
+ log(f"Found virtualenv at {path}, which we will ignore", debug)
17
+ return True
18
+ return False
19
+
20
+ def get_python_file_paths(directory_path, debug=False):
21
+ """
22
+ Recursively get .py files, skipping virtual environments.
23
+ """
24
+ log(f"Walking directory {directory_path} (type: {type(directory_path)})", debug)
25
+ python_files: List[str] = []
26
+ for root, dirs, files in os.walk(directory_path):
27
+ dirs[:] = [d for d in dirs if not is_virtualenv_dir(os.path.join(root, d), debug)]
28
+ for file in files:
29
+ if file.endswith(".py"):
30
+ full_path = os.path.join(root, file)
31
+ python_files.append(full_path)
32
+ log(f"Found Python file: {full_path} (type: {type(full_path)})", debug)
33
+ return python_files
34
+
35
+ def extract_functions(path: str | Path, debug=False) -> dict[str, str]:
36
+ """
37
+ Parses a Python file and extracts all functions as a dictionary mapping function names to source code strings.
38
+ """
39
+ path = Path(path)
40
+ if not path.is_file():
41
+ return {}
42
+
43
+ source_code = path.read_text(encoding="utf-8")
44
+ tree = ast.parse(source_code, filename=str(path))
45
+
46
+ # Iterate ONLY through the top-level nodes of the file
47
+ # This automatically excludes functions defined inside classes
48
+ functions = {}
49
+ for node in tree.body:
50
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
51
+ source = ast.get_source_segment(source_code, node)
52
+ if source:
53
+ functions[node.name] = source
54
+
55
+ log(f"Extracted {len(functions)} top-level function(s) from {path}", debug)
56
+ return functions
57
+
58
+ def extract_classes(path: str | Path, debug=False):
59
+ path = Path(path)
60
+ if not path.is_file():
61
+ return {}, {}
62
+
63
+ source_code = path.read_text(encoding="utf-8")
64
+ tree = ast.parse(source_code)
65
+
66
+ classes_in_file = {}
67
+ functions_inside_classes = {}
68
+
69
+ for node in tree.body:
70
+ if isinstance(node, ast.ClassDef):
71
+ cls_name = node.name
72
+ cls_body = ast.get_source_segment(source_code, node)
73
+ classes_in_file[cls_name] = cls_body
74
+ methods = []
75
+ for item in node.body:
76
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
77
+ method_source = ast.get_source_segment(source_code, item)
78
+ methods.append((item.name, method_source))
79
+
80
+ functions_inside_classes[cls_name] = methods
81
+
82
+ log(f"Extracted {len(classes_in_file)} class(es) from {path}", debug)
83
+ return classes_in_file, functions_inside_classes
@@ -0,0 +1,141 @@
1
+ from pathlib import Path
2
+ from datetime import datetime
3
+ from typing import Sequence, Dict
4
+ import os
5
+ from ai_fuzzer.atherislitellm.llm import llm_requests as atherisai
6
+ from ai_fuzzer.atherislitellm.sandbox import sandbox
7
+ from ai_fuzzer.atherislitellm.parsing import function_parser
8
+ from ai_fuzzer.atherislitellm.smell.smell import code_smells
9
+ from ai_fuzzer.atherislitellm.logger.logs import log
10
+
11
+ def on_crash(output_dir: Path, data: list, debug: bool = False) -> None:
12
+ """Write a crash report file containing harness outputs and log the event."""
13
+
14
+ try:
15
+ log(f"Crash occurred, output directory: {output_dir}", debug)
16
+ with open(output_dir / "crash_report.txt", "w", encoding="utf-8") as f:
17
+ for i, contents in enumerate(data):
18
+ f.write(f"HARNESS {i+1}\n\n----\n\n{contents}\n\n----\n\n")
19
+ except (OSError, IOError, Exception) as e:
20
+ log(f"Failed to write crash report: {e}", debug)
21
+
22
+ def make_run_dir(base: Path, debug=False) -> Path:
23
+ """Create and return a timestamped run directory under the given base path."""
24
+
25
+ timestamp = datetime.now().strftime("%m-%d-%y_%I-%M-%S%p").lower()
26
+ run_dir = base / f"run-{timestamp}"
27
+ run_dir.mkdir(parents=True, exist_ok=False)
28
+ log(f"Created run directory at: {run_dir}", debug)
29
+ return run_dir
30
+
31
+ def retrieve_function_candidates(client: dict, path: Path, prompt_id: str, prompt_yaml_path: Path, output_dir: Path, run_dir: Path, debug: bool = False, smell: bool = False, **kwargs) -> dict[str, str]:
32
+ """Discover functions in the source path and generate test snippets via the LLM client."""
33
+
34
+ func_tests = {}
35
+ pyfiles = function_parser.get_python_file_paths(path, debug=debug)
36
+ if pyfiles:
37
+ log(f"Retrieved {len(pyfiles)} Python files from: {path}", debug)
38
+ for pyfile in pyfiles:
39
+ try:
40
+ funcs = function_parser.extract_functions(pyfile, debug=debug)
41
+ log(f"Found {len(funcs)} functions in {pyfile}", debug)
42
+ for func_name, func_body in funcs.items():
43
+ if smell:
44
+ if not code_smells(python_code=func_body, debug=debug):
45
+ continue
46
+ response = atherisai.get_response(
47
+ client=client,
48
+ prompt_id=prompt_id,
49
+ target_func=func_body,
50
+ yaml_path=prompt_yaml_path,
51
+ debug=debug,
52
+ **kwargs
53
+ )
54
+ block = atherisai.extract_code_blocks(response)
55
+ func_tests[func_name] = block
56
+ log(f"Generated test for function: {func_name}", debug)
57
+
58
+ # Save immediately
59
+ sandbox.save_to_file(func_name, block, run_dir, debug=debug)
60
+ except Exception as e:
61
+ log(f"Error processing file: {e}", debug)
62
+ on_crash(output_dir, list(func_tests.values()), debug=debug)
63
+ return func_tests
64
+
65
+ def retrieve_class_candidates(client: dict, path: Path, prompt_id: str, prompt_yaml_path: Path, output_dir: Path, run_dir: Path, debug: bool = False, smell: bool = False, **kwargs) -> dict[str, str]:
66
+ """Discover classes in the source path and generate test snippets via the LLM client."""
67
+
68
+ class_tests = {}
69
+ pyfiles = function_parser.get_python_file_paths(path, debug=debug)
70
+ if pyfiles:
71
+ log(f"Retrieved {len(pyfiles)} Python files from: {path}", debug)
72
+ for pyfile in pyfiles:
73
+ classes_in_file, functions_inside_classes = function_parser.extract_classes(pyfile, debug=debug)
74
+ log(f"Found {len(classes_in_file)} classes in {pyfile}", debug)
75
+ try:
76
+ for class_name, class_body in classes_in_file.items():
77
+ if smell:
78
+ if not code_smells(python_code=class_body, debug=debug):
79
+ continue
80
+ methods = functions_inside_classes.get(class_name, [])
81
+ for function_name, function_body in methods:
82
+ customized_target_prompt = (
83
+ f"\n\n{class_body}\n\n"
84
+ f"**FUZZING FOCUS**\n"
85
+ f"Method Name: {function_name}\n"
86
+ f"Method Body:\n{function_body}"
87
+ )
88
+ response = atherisai.get_response(
89
+ client=client,
90
+ prompt_id=prompt_id,
91
+ target_func=customized_target_prompt,
92
+ yaml_path=prompt_yaml_path,
93
+ debug=debug,
94
+ **kwargs
95
+ )
96
+ block = atherisai.extract_code_blocks(response)
97
+ key = f"{class_name}.{function_name}"
98
+ class_tests[key] = block
99
+ log(f"Generated test for class method: {key}", debug)
100
+
101
+ # Save immediately
102
+ sandbox.save_to_file(key, block, run_dir, debug=debug)
103
+ except Exception as e:
104
+ log(f"Error processing class: {e}", debug)
105
+ on_crash(output_dir, list(class_tests.values()), debug=debug)
106
+ return class_tests
107
+
108
+ def save_harnesses(code_snippets: dict[str, str], run_dir: Path, debug: bool):
109
+ """Save generated harnesses into the provided run directory (Redundant but kept for compatibility)."""
110
+ if not code_snippets:
111
+ return
112
+ log(f"Saving {len(code_snippets)} harnesses to {run_dir}", debug)
113
+ for name, code in code_snippets.items():
114
+ if code:
115
+ sandbox.save_to_file(name, code, run_dir, debug=debug)
116
+
117
+ def run(
118
+ source_dir: Path, output_dir: Path, prompt_id: str, prompt_yaml_path: Path, model: str, api: str, debug: bool, smell: bool, **kwargs
119
+ ) -> None:
120
+ """Coordinate test generation: create client, generate snippets, and save them immediately."""
121
+
122
+ log(f"run() called with source_dir={source_dir}, output_dir={output_dir}, model={model}, prompt_id={prompt_id}", debug)
123
+
124
+ client = {
125
+ "model": model.strip() if model else "",
126
+ "api_key": api.strip() if api else None,
127
+ }
128
+
129
+ # Ensure output_dir exists
130
+ output_dir.mkdir(parents=True, exist_ok=True)
131
+ run_dir = make_run_dir(output_dir, debug=debug)
132
+
133
+ log(f"Starting candidate retrieval for functions from {source_dir}", debug)
134
+ function_code_snippets = retrieve_function_candidates(client, source_dir, prompt_id, prompt_yaml_path, output_dir=output_dir, run_dir=run_dir, debug=debug, smell=smell, **kwargs)
135
+ log(f"Found {len(function_code_snippets)} function snippets", debug)
136
+
137
+ log(f"Starting candidate retrieval for classes from {source_dir}", debug)
138
+ class_code_snippets = retrieve_class_candidates(client, source_dir, prompt_id, prompt_yaml_path, output_dir=output_dir, run_dir=run_dir, debug=debug, smell=smell, **kwargs)
139
+ log(f"Found {len(class_code_snippets)} class snippets", debug)
140
+
141
+ log(f"Run completed. All harnesses saved in {run_dir}", debug)
@@ -0,0 +1,26 @@
1
+ import subprocess
2
+ from pathlib import Path
3
+ import tempfile
4
+ import os
5
+ from textwrap import dedent
6
+ import venv
7
+ from ai_fuzzer.atherislitellm.logger.logs import log
8
+
9
+ def save_to_file(name=None, text=None, path=None, debug=False):
10
+ """Save provided text to a timestamped Atheris harness file in path,
11
+ creating a subdirectory named after `name` and placing the file inside it.
12
+ """
13
+ if path is None:
14
+ raise ValueError("The 'path' argument must not be None.")
15
+
16
+ # Make subdirectory: <path>/<name>/
17
+ subdir = os.path.join(path, str(name))
18
+ os.makedirs(subdir, exist_ok=True)
19
+
20
+ # Write file inside the subdirectory
21
+ file_path = os.path.join(subdir, f'atheris_harness_for_(({name})).py')
22
+ with open(file_path, 'w', encoding='utf-8') as f:
23
+ f.write(text if text is not None else "")
24
+
25
+ log(f"Text length: {len(text) if text else 0}", debug)
26
+
@@ -0,0 +1,18 @@
1
+ from radon.metrics import mi_visit
2
+ from ai_fuzzer.atherislitellm.logger.logs import log
3
+
4
+ def code_smells(python_code: str, threshold: float = 65.0, debug: bool = False) -> bool:
5
+ """
6
+ Determines if the given Python code smells based on the Maintainability Index (MI).
7
+ Returns bool: True if the code smells, False otherwise.
8
+ """
9
+ results = mi_visit(python_code, True)
10
+ if not results:
11
+ return False
12
+
13
+ decision = results < threshold
14
+
15
+ action = "will fuzz" if decision else "will skip fuzzing"
16
+ log(f"MI score = {results} (threshold = {threshold}) → {action}", debug)
17
+
18
+ return decision
@@ -0,0 +1,125 @@
1
+ from pathlib import Path
2
+ import argparse
3
+ import os
4
+ import sys
5
+ import litellm
6
+ from ai_fuzzer.atherislitellm.run import run
7
+ from ai_fuzzer.atherislitellm.logger.logs import log, init_logger
8
+
9
+ def resolve_api_key(arg_val: str | None, model: str, debug: bool = False) -> str | None:
10
+ """
11
+ Resolve API key from CLI or Environment.
12
+ If not found, use LiteLLM to tell the user which env var to set.
13
+ """
14
+ if arg_val:
15
+ log("Using API key provided via CLI", debug)
16
+ return arg_val.strip()
17
+
18
+ # Check LiteLLM environment requirements for the model
19
+ try:
20
+ check = litellm.validate_environment(model)
21
+ if check.get("keys_in_environment"):
22
+ log(f"Environment is valid for model '{model}'", debug)
23
+ return None # LiteLLM will pick it up from env
24
+
25
+ missing = check.get("missing_keys", [])
26
+ if missing:
27
+ print(f"Error: Missing API key for model '{model}'.")
28
+ print("Please provide it via --api-key or set the following environment variable(s):")
29
+ for key in missing:
30
+ # Provide cross-platform instructions
31
+ if sys.platform == "win32":
32
+ print(f" set {key}=your_api_key_here")
33
+ else:
34
+ print(f" export {key}=your_api_key_here")
35
+ sys.exit(1)
36
+ except Exception as e:
37
+ log(f"Error validating environment with LiteLLM: {e}", debug)
38
+
39
+ return None
40
+
41
+ class ParseKwargs(argparse.Action):
42
+ def __call__(self, parser, namespace, values, option_string=None):
43
+ setattr(namespace, self.dest, dict())
44
+ for value in values:
45
+ if '=' in value:
46
+ key, val = value.split('=', 1)
47
+ getattr(namespace, self.dest)[key] = val
48
+ else:
49
+ log(f"Warning: Ignoring malformed extra prompt: {value}", True)
50
+
51
+ def main():
52
+ """Parse CLI arguments and run the fuzzer."""
53
+ parser = argparse.ArgumentParser(description="AI-powered Python fuzzer with AtherisLiteLLM (LiteLLM + Atheris).")
54
+
55
+ parser.add_argument("-s", "--src-dir", type=Path, required=True,
56
+ help="Path to the Python source directory to fuzz.")
57
+
58
+ parser.add_argument("-o", "--output-dir", type=Path, required=True,
59
+ help="Where to store crash logs and generated harnesses.")
60
+
61
+ parser.add_argument("-pp", "--prompts-path", type=Path, required=True,
62
+ help="Path to prompts.yaml config file.")
63
+
64
+ parser.add_argument("-p", "--prompt", default="base", required=True,
65
+ help="Prompt ID from prompts.yaml to use (default: 'base')")
66
+
67
+ parser.add_argument("-m", "--model", type=str, required=True,
68
+ help="LiteLLM model string (e.g., 'gemini/gemini-1.5-flash', 'openai/gpt-4').")
69
+
70
+ parser.add_argument("-k", "--api-key", type=str,
71
+ help="API key string. If not provided, the tool will check environment variables.")
72
+
73
+ parser.add_argument("-e", "--extra-model-prompts", nargs='*', action=ParseKwargs,
74
+ help="Extra vendor-specific parameters as key=value pairs (e.g., project=my-project).")
75
+
76
+ parser.add_argument("-d", "--verbose", "-v", "--debug", action="store_true",
77
+ help="Enable debug/verbose mode.")
78
+
79
+ parser.add_argument("-sm", "--smell", action="store_true",
80
+ help="Enable code smell filtering via Radon (Maintainability Index).")
81
+
82
+ args = parser.parse_args()
83
+
84
+ # Initialize logger early
85
+ init_logger(args.output_dir)
86
+
87
+ # Validate model
88
+ try:
89
+ litellm.get_llm_provider(args.model)
90
+ except Exception as e:
91
+ print(f"Error: '{args.model}' is not a recognized LiteLLM model. {str(e)}")
92
+ sys.exit(1)
93
+
94
+ # Resolve API Key
95
+ api_key = resolve_api_key(args.api_key, args.model, args.verbose)
96
+ if not api_key:
97
+ print(f"Error: No API key found for model '{args.model}'. Please provide one via -k or environment variables.")
98
+ sys.exit(1)
99
+
100
+ # Prepare extra parameters
101
+ extra_params = getattr(args, 'extra_model_prompts', {}) or {}
102
+ if extra_params:
103
+ log(f"Using extra model parameters: {list(extra_params.keys())}", args.verbose)
104
+
105
+ try:
106
+ run(
107
+ source_dir=args.src_dir,
108
+ output_dir=args.output_dir,
109
+ prompt_id=args.prompt,
110
+ prompt_yaml_path=args.prompts_path,
111
+ model=args.model,
112
+ api=api_key,
113
+ debug=args.verbose,
114
+ smell=args.smell,
115
+ **extra_params
116
+ )
117
+ except Exception as e:
118
+ import traceback
119
+ print(f"ERROR: {e}")
120
+ if args.verbose:
121
+ traceback.print_exc()
122
+ sys.exit(1)
123
+
124
+ if __name__ == "__main__":
125
+ main()
@@ -0,0 +1,29 @@
1
+ [project]
2
+ name = "AtherisLiteLLM"
3
+ version = "0.2.005"
4
+ description = "AI-powered Python fuzzer using LiteLLM and Atheris to automatically generate and execute fuzzing harnesses."
5
+ authors = [
6
+ { name="Mario Marku", email="mariomarku7@gmail.com" }
7
+ ]
8
+ readme = "README.md"
9
+ license = "GPL-3.0-or-later"
10
+ requires-python = ">=3.11,"
11
+
12
+ dependencies = [
13
+ "litellm>=1.0.0",
14
+ "pyyaml>=6.0",
15
+ "atheris>=2.3.0",
16
+ "radon>=5.0.0",
17
+ "requests>=2.31.0",
18
+ ]
19
+
20
+ [project.urls]
21
+ Homepage = "https://github.com/mariobx"
22
+ Repository = "https://github.com/mariobx/AtherisLiteLLM"
23
+
24
+ [project.scripts]
25
+ atherislitellm = "ai_fuzzer.cli:main"
26
+
27
+ [build-system]
28
+ requires = ["setuptools>=61", "wheel"]
29
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+