permi 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
permi-0.1.1/PKG-INFO ADDED
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: permi
3
+ Version: 0.1.1
4
+ Summary: AI-powered vulnerability scanner for Nigerian developers
5
+ Requires-Python: >=3.9
6
+ Requires-Dist: python-dotenv
7
+ Requires-Dist: colorama
8
+ Requires-Dist: click
9
+ Requires-Dist: requests
permi-0.1.1/README.md ADDED
@@ -0,0 +1,87 @@
1
+ # Permi
2
+
3
+ **AI-powered vulnerability scanner for Nigerian developers and global SMBs.**
4
+
5
+ Permi scans your code for security vulnerabilities and uses AI to filter out
6
+ false positives — so you only see findings that actually matter.
7
+
8
+ Built in Nigeria. For Nigeria. Then for the world.
9
+
10
+ ---
11
+
12
+ ## What Permi detects
13
+
14
+ - SQL Injection (string concatenation, f-strings, % formatting)
15
+ - Cross-Site Scripting (innerHTML, document.write, Jinja2 |safe)
16
+ - Hardcoded secrets (passwords, API keys, AWS keys, Paystack/Flutterwave keys)
17
+ - Insecure practices (eval/exec, pickle.loads, SSL verification disabled, debug mode)
18
+ - USSD vulnerabilities (Nigerian-specific — unvalidated sessionId, phoneNumber, serviceCode)
19
+
20
+ ---
21
+
22
+ ## Installation
23
+ ```bash
24
+ pip install permi
25
+ ```
26
+
27
+ Requires Python 3.9+
28
+
29
+ ---
30
+
31
+ ## Usage
32
+
33
+ **Scan a local project:**
34
+ ```bash
35
+ permi scan --path ./myapp
36
+ ```
37
+
38
+ **Scan a GitHub repository:**
39
+ ```bash
40
+ permi scan --path https://github.com/user/repo
41
+ ```
42
+
43
+ **Show only high severity findings:**
44
+ ```bash
45
+ permi scan --path ./myapp --severity high
46
+ ```
47
+
48
+ **Output as JSON (for CI/CD pipelines):**
49
+ ```bash
50
+ permi scan --path ./myapp --output json
51
+ ```
52
+
53
+ **Skip AI filter (offline mode):**
54
+ ```bash
55
+ permi scan --path ./myapp --offline
56
+ ```
57
+
58
+ ---
59
+
60
+ ## Setup
61
+
62
+ Permi uses [OpenRouter](https://openrouter.ai) for AI-powered false positive
63
+ filtering. Create a free account, generate an API key, and add it to a `.env`
64
+ file in your project root:
65
+ OPENROUTER_API_KEY=sk-or-your-key-here
66
+
67
+ No API key? Use `--offline` mode. All findings are shown unfiltered.
68
+
69
+ ---
70
+
71
+ ## Example output
72
+ [1] [HIGH] SQL001 SQL Injection — string concatenation
73
+ File : app/auth.py
74
+ Line : 42
75
+ Code : cursor.execute("SELECT * FROM users WHERE name = " + username)
76
+ Why : Raw string concatenation used to build a SQL query.
77
+ AI : REAL User input is directly embedded into a SQL query with no sanitisation.
78
+
79
+ ---
80
+
81
+ ## Built by
82
+
83
+ Peter N. D. — Cybersecurity student, University of Jos, Nigeria.
84
+
85
+ ---
86
+
87
+ *Permi is in active development. Feedback and contributions welcome.*
File without changes
@@ -0,0 +1,71 @@
1
+ # ai_filter/filter.py
2
+ # Takes a list of raw findings, runs each through the LLM,
3
+ # saves the verdict back to the database, and returns only
4
+ # the findings the LLM marked as REAL.
5
+
6
+ from ai_filter.llm_client import analyse
7
+ from db.database import get_connection
8
+
9
+
10
+ def _update_finding_verdict(conn, finding_id: int, verdict: str, explanation: str) -> None:
11
+ """Write the AI verdict back to the findings table."""
12
+ conn.execute("""
13
+ UPDATE findings
14
+ SET ai_verdict = ?,
15
+ ai_explanation = ?
16
+ WHERE id = ?
17
+ """, (verdict, explanation, finding_id))
18
+ conn.commit()
19
+
20
+
21
+ def run_filter(findings: list[dict], offline: bool = False) -> list[dict]:
22
+ """
23
+ Run the AI filter over a list of findings.
24
+
25
+ Args:
26
+ findings: Raw findings list from the scanner.
27
+ offline: If True, skip all API calls and return everything as REAL.
28
+
29
+ Returns:
30
+ Only the findings the LLM (or offline fallback) marked as REAL.
31
+ """
32
+ if not findings:
33
+ return []
34
+
35
+ if offline:
36
+ print("[Permi] Offline mode — AI filter skipped, showing all findings.\n")
37
+ return findings
38
+
39
+ print(f"[Permi] Running AI filter on {len(findings)} finding(s)...\n")
40
+
41
+ conn = get_connection()
42
+ real = []
43
+ fp_count = 0
44
+
45
+ for i, finding in enumerate(findings, start=1):
46
+ print(f" [{i}/{len(findings)}] {finding['rule_id']} "
47
+ f"line {finding['line_number']} — ", end="", flush=True)
48
+
49
+ # Send to LLM
50
+ result = analyse(finding)
51
+
52
+ verdict = result["ai_verdict"]
53
+ explanation = result["ai_explanation"]
54
+
55
+ print(f"{verdict} {explanation}")
56
+
57
+ # Save verdict back to DB
58
+ if "id" in finding:
59
+ _update_finding_verdict(conn, finding["id"], verdict, explanation)
60
+
61
+ if verdict == "REAL":
62
+ real.append(result)
63
+ else:
64
+ fp_count += 1
65
+
66
+ conn.close()
67
+
68
+ print(f"\n[Permi] Filter complete — "
69
+ f"{len(real)} real | {fp_count} false positive(s) removed\n")
70
+
71
+ return real
@@ -0,0 +1,123 @@
1
+ # ai_filter/llm_client.py
2
+ # The only module in Permi that makes network requests.
3
+ # Sends a finding to OpenRouter and returns a verdict:
4
+ # REAL — this is a genuine vulnerability
5
+ # FP — this is a false positive, ignore it
6
+
7
+ import os
8
+ import json
9
+ import requests
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
15
+ OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
16
+
17
+ # We use DeepSeek V3 — fast, cheap, and very good at code analysis.
18
+ # You can swap this for any model on openrouter.ai/models
19
+ MODEL = "deepseek/deepseek-chat"
20
+
21
+ # How many seconds to wait for the API before giving up
22
+ TIMEOUT = 30
23
+
24
+
25
+ def _build_prompt(finding: dict) -> str:
26
+ """
27
+ Build the prompt we send to the LLM for a single finding.
28
+ The prompt is structured so the model returns a predictable format
29
+ we can parse reliably.
30
+ """
31
+ return f"""You are a senior application security engineer reviewing automated scan results.
32
+
33
+ A static analysis tool flagged the following finding. Your job is to decide if this is a REAL vulnerability or a FALSE POSITIVE (FP).
34
+
35
+ --- FINDING ---
36
+ Rule : {finding['rule_id']} — {finding['rule_name']}
37
+ Severity : {finding['severity']}
38
+ File : {finding['file']}
39
+ Line : {finding['line_number']}
40
+ Code : {finding['line_content']}
41
+ Detail : {finding['description']}
42
+ ---------------
43
+
44
+ Instructions:
45
+ - Answer with exactly one word on the first line: REAL or FP
46
+ - On the second line, write one short sentence (max 20 words) explaining your verdict
47
+ - Do not write anything else
48
+
49
+ Example response:
50
+ REAL
51
+ The string concatenation directly embeds user input into a SQL query with no sanitisation.
52
+
53
+ Your verdict:"""
54
+
55
+
56
+ def analyse(finding: dict) -> dict:
57
+ """
58
+ Send one finding to the LLM and return the finding dict
59
+ updated with ai_verdict and ai_explanation.
60
+
61
+ If the API call fails for any reason (no key, network error,
62
+ bad response), we default to REAL so nothing gets silently dropped.
63
+ """
64
+ # If no API key is configured, skip the filter entirely
65
+ if not OPENROUTER_API_KEY:
66
+ finding["ai_verdict"] = "REAL"
67
+ finding["ai_explanation"] = "No API key — AI filter skipped."
68
+ return finding
69
+
70
+ prompt = _build_prompt(finding)
71
+
72
+ try:
73
+ response = requests.post(
74
+ OPENROUTER_URL,
75
+ headers={
76
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
77
+ "Content-Type": "application/json",
78
+ "HTTP-Referer": "https://github.com/permi", # required by OpenRouter
79
+ "X-Title": "Permi Security Scanner",
80
+ },
81
+ json={
82
+ "model": MODEL,
83
+ "messages": [
84
+ {"role": "user", "content": prompt}
85
+ ],
86
+ "temperature": 0, # we want deterministic, not creative
87
+ "max_tokens": 60, # verdict + one sentence is plenty
88
+ },
89
+ timeout=TIMEOUT,
90
+ )
91
+ response.raise_for_status()
92
+
93
+ except requests.exceptions.Timeout:
94
+ finding["ai_verdict"] = "REAL"
95
+ finding["ai_explanation"] = "API timeout — defaulting to REAL."
96
+ return finding
97
+
98
+ except requests.exceptions.RequestException as e:
99
+ finding["ai_verdict"] = "REAL"
100
+ finding["ai_explanation"] = f"API error — defaulting to REAL. ({e})"
101
+ return finding
102
+
103
+ # ── Parse the response ────────────────────────────────────────────────────
104
+ try:
105
+ content = response.json()["choices"][0]["message"]["content"].strip()
106
+ lines = content.splitlines()
107
+
108
+ verdict = lines[0].strip().upper()
109
+ explanation = lines[1].strip() if len(lines) > 1 else "No explanation provided."
110
+
111
+ # Normalise — if the model returns anything unexpected, treat as REAL
112
+ if verdict not in ("REAL", "FP"):
113
+ verdict = "REAL"
114
+ explanation = f"Unexpected verdict '{verdict}' — defaulting to REAL."
115
+
116
+ finding["ai_verdict"] = verdict
117
+ finding["ai_explanation"] = explanation
118
+
119
+ except (KeyError, IndexError, json.JSONDecodeError) as e:
120
+ finding["ai_verdict"] = "REAL"
121
+ finding["ai_explanation"] = f"Parse error — defaulting to REAL. ({e})"
122
+
123
+ return finding
File without changes
@@ -0,0 +1,123 @@
1
+ # cli/formatter.py
2
+ # Handles all terminal output formatting.
3
+ # Keeps colour logic completely separate from scan logic.
4
+
5
+ from colorama import init, Fore, Style
6
+
7
+ # Initialise colorama — required on Windows for ANSI colours to work
8
+ init(autoreset=True)
9
+
10
+ # Severity colours
11
+ SEVERITY_COLOUR = {
12
+ "high": Fore.RED,
13
+ "medium": Fore.YELLOW,
14
+ "low": Fore.CYAN,
15
+ }
16
+
17
+ VERDICT_COLOUR = {
18
+ "REAL": Fore.RED,
19
+ "FP": Fore.GREEN,
20
+ }
21
+
22
+
23
+ def _divider(char="─", width=72, colour=Fore.WHITE):
24
+ print(colour + char * width + Style.RESET_ALL)
25
+
26
+
27
+ def print_banner():
28
+ """Print the Permi header banner."""
29
+ print()
30
+ print(Fore.CYAN + Style.BRIGHT + "┌─────────────────────────────────────────┐")
31
+ print(Fore.CYAN + Style.BRIGHT + "│ Permi — Security Scanner │")
32
+ print(Fore.CYAN + Style.BRIGHT + "│ Built in Nigeria. For the World. │")
33
+ print(Fore.CYAN + Style.BRIGHT + "└─────────────────────────────────────────┘")
34
+ print()
35
+
36
+
37
+ def print_finding(finding: dict, index: int) -> None:
38
+ """
39
+ Print a single finding as a formatted block.
40
+ Each finding gets a numbered header, severity badge,
41
+ file location, code snippet, AI verdict, and explanation.
42
+ """
43
+ sev = finding.get("severity", "low")
44
+ colour = SEVERITY_COLOUR.get(sev, Fore.WHITE)
45
+
46
+ _divider()
47
+
48
+ # ── Header line ───────────────────────────────────────────────────────────
49
+ print(
50
+ Fore.WHITE + Style.BRIGHT + f" [{index}] " +
51
+ colour + Style.BRIGHT + f"[{sev.upper()}] " +
52
+ Fore.WHITE + Style.BRIGHT + finding.get("rule_id", "") +
53
+ Style.RESET_ALL + " " +
54
+ finding.get("rule_name", "")
55
+ )
56
+
57
+ print()
58
+
59
+ # ── File and line ─────────────────────────────────────────────────────────
60
+ print(
61
+ Fore.WHITE + " File : " + Style.RESET_ALL +
62
+ finding.get("file", "unknown")
63
+ )
64
+ print(
65
+ Fore.WHITE + " Line : " + Style.RESET_ALL +
66
+ str(finding.get("line_number", "?"))
67
+ )
68
+
69
+ # ── Code snippet ──────────────────────────────────────────────────────────
70
+ print(
71
+ Fore.WHITE + " Code : " + Style.RESET_ALL +
72
+ Fore.YELLOW + finding.get("line_content", "") + Style.RESET_ALL
73
+ )
74
+
75
+ # ── Description ───────────────────────────────────────────────────────────
76
+ print(
77
+ Fore.WHITE + " Why : " + Style.RESET_ALL +
78
+ finding.get("description", "")
79
+ )
80
+
81
+ # ── AI verdict ────────────────────────────────────────────────────────────
82
+ verdict = finding.get("ai_verdict")
83
+ if verdict:
84
+ v_colour = VERDICT_COLOUR.get(verdict, Fore.WHITE)
85
+ print(
86
+ Fore.WHITE + " AI : " +
87
+ v_colour + Style.BRIGHT + verdict + Style.RESET_ALL +
88
+ " " + finding.get("ai_explanation", "")
89
+ )
90
+
91
+ print()
92
+
93
+
94
+ def print_results_human(findings: list[dict]) -> None:
95
+ """Print all findings in human-readable coloured format."""
96
+ if not findings:
97
+ print(Fore.GREEN + Style.BRIGHT + "\n ✅ No real vulnerabilities found.\n")
98
+ return
99
+
100
+ for i, finding in enumerate(findings, start=1):
101
+ print_finding(finding, i)
102
+
103
+ _divider()
104
+
105
+
106
+ def print_summary(findings: list[dict], raw_count: int) -> None:
107
+ """Print the final summary block."""
108
+ high = sum(1 for f in findings if f["severity"] == "high")
109
+ medium = sum(1 for f in findings if f["severity"] == "medium")
110
+ low = sum(1 for f in findings if f["severity"] == "low")
111
+ fp = raw_count - len(findings)
112
+
113
+ print()
114
+ _divider("═")
115
+ print(Fore.WHITE + Style.BRIGHT + " SCAN SUMMARY")
116
+ _divider("═")
117
+ print(f" Total findings : {Style.BRIGHT}{len(findings)}{Style.RESET_ALL} "
118
+ f"(filtered {fp} false positive(s))")
119
+ print(f" {Fore.RED}High : {high}{Style.RESET_ALL}")
120
+ print(f" {Fore.YELLOW}Medium : {medium}{Style.RESET_ALL}")
121
+ print(f" {Fore.CYAN}Low : {low}{Style.RESET_ALL}")
122
+ _divider("═")
123
+ print()
@@ -0,0 +1,133 @@
1
+ # cli/main.py
2
+ import json
3
+ import sys
4
+ import click
5
+ from colorama import Fore, Style, init
6
+
7
+ init(autoreset=True)
8
+
9
+ from cli.formatter import print_banner, print_results_human, print_summary
10
+ from scanner.scan import scan
11
+
12
+
13
+ @click.group()
14
+ def cli():
15
+ """
16
+ Permi — AI-powered vulnerability scanner.
17
+
18
+ Scans code for vulnerabilities and uses AI to filter out false
19
+ positives so you only see findings that actually matter.
20
+
21
+ Built in Nigeria. For Nigeria. Then for the world.
22
+ """
23
+ pass
24
+
25
+
26
+ @cli.command()
27
+ @click.option("--path", "-p", required=True,
28
+ help="Local directory path or GitHub URL to scan.")
29
+ @click.option("--output", "-o",
30
+ type=click.Choice(["human", "json"], case_sensitive=False),
31
+ default="human", show_default=True,
32
+ help="Output format.")
33
+ @click.option("--severity", "-s",
34
+ type=click.Choice(["high", "medium", "low", "all"], case_sensitive=False),
35
+ default="all", show_default=True,
36
+ help="Minimum severity level to display.")
37
+ @click.option("--offline", is_flag=True, default=False,
38
+ help="Skip AI filter and show all raw findings.")
39
+ @click.option("--project", default=None,
40
+ help="Project name to store in the database.")
41
+ def scan_cmd(path, output, severity, offline, project):
42
+ """
43
+ Scan a local directory or GitHub repo for vulnerabilities.
44
+
45
+ Permi detects SQL injection, XSS, hardcoded secrets, insecure
46
+ practices, and USSD vulnerabilities. An AI filter then removes
47
+ false positives so only real issues are shown.
48
+
49
+ \b
50
+ EXAMPLES
51
+
52
+ Scan a local project:
53
+ permi scan --path ./myapp
54
+
55
+ Scan a GitHub repo:
56
+ permi scan --path https://github.com/user/repo
57
+
58
+ High severity only:
59
+ permi scan --path ./myapp --severity high
60
+
61
+ Export as JSON for CI/CD:
62
+ permi scan --path ./myapp --output json
63
+
64
+ Skip AI filter (no API key needed):
65
+ permi scan --path ./myapp --offline
66
+
67
+ Name your project in the database:
68
+ permi scan --path ./myapp --project my-api
69
+
70
+ \b
71
+ SEVERITY LEVELS
72
+
73
+ high — SQL injection, hardcoded secrets, eval(), XSS, SSL disabled
74
+ medium — debug mode, USSD input issues
75
+ low — informational findings
76
+ all — everything (default)
77
+
78
+ \b
79
+ EXIT CODES
80
+
81
+ 0 No high severity findings
82
+ 1 At least one high severity finding (useful for CI/CD pipelines)
83
+ """
84
+ if output == "human":
85
+ print_banner()
86
+
87
+ try:
88
+ # Run the full scan pipeline
89
+ findings, raw_count = scan(
90
+ path=path,
91
+ project_name=project,
92
+ offline=offline,
93
+ )
94
+
95
+ # ── Severity filter ───────────────────────────────────────────────────
96
+ order = {"high": 1, "medium": 2, "low": 3}
97
+
98
+ if severity != "all":
99
+ level = order[severity]
100
+ findings = [
101
+ f for f in findings
102
+ if isinstance(f, dict) and order.get(f.get("severity", "low"), 99) <= level
103
+ ]
104
+
105
+ # ── Output ────────────────────────────────────────────────────────────
106
+ if output == "json":
107
+ clean = [
108
+ {k: v for k, v in f.items() if v is not None}
109
+ for f in findings
110
+ if isinstance(f, dict)
111
+ ]
112
+ click.echo(json.dumps(clean, indent=2))
113
+
114
+ else:
115
+ print_results_human(findings)
116
+ print_summary(findings, raw_count=raw_count)
117
+
118
+ # Exit code 1 if any high severity findings — used by GitHub Action
119
+ if any(f.get("severity") == "high" for f in findings if isinstance(f, dict)):
120
+ sys.exit(1)
121
+
122
+ except FileNotFoundError as e:
123
+ click.echo(Fore.RED + f"\n[Error] {e}\n")
124
+ sys.exit(1)
125
+ except NotADirectoryError as e:
126
+ click.echo(Fore.RED + f"\n[Error] {e}\n")
127
+ sys.exit(1)
128
+ except Exception as e:
129
+ click.echo(Fore.RED + f"\n[Unexpected error] {e}\n")
130
+ sys.exit(1)
131
+
132
+
133
+ cli.add_command(scan_cmd, name="scan")
File without changes
@@ -0,0 +1,95 @@
1
+ # db/database.py
2
+ # Handles all database creation and connection logic.
3
+ # The database file (permi.db) is created automatically in the
4
+ # project root the first time this module is imported.
5
+
6
+ import sqlite3
7
+ from pathlib import Path
8
+
9
+ # The database lives in the project root folder
10
+ DB_PATH = Path(__file__).parent.parent / "permi.db"
11
+
12
+
13
+ def get_connection() -> sqlite3.Connection:
14
+ """
15
+ Open and return a connection to the local SQLite database.
16
+ Sets row_factory so rows behave like dictionaries — you can
17
+ access columns by name (row['severity']) instead of index (row[2]).
18
+ """
19
+ conn = sqlite3.connect(DB_PATH)
20
+ conn.row_factory = sqlite3.Row
21
+ conn.execute("PRAGMA foreign_keys = ON") # enforce relationships
22
+ return conn
23
+
24
+
25
+ def init_db() -> None:
26
+ """
27
+ Create all tables if they don't already exist.
28
+ Safe to call every time the app starts — won't overwrite existing data.
29
+ """
30
+ conn = get_connection()
31
+
32
+ with conn:
33
+
34
+ # ── projects ──────────────────────────────────────────────────────────
35
+ # One row per codebase you want to scan repeatedly.
36
+ conn.execute("""
37
+ CREATE TABLE IF NOT EXISTS projects (
38
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
39
+ name TEXT NOT NULL,
40
+ path TEXT NOT NULL,
41
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
42
+ last_scan TEXT
43
+ )
44
+ """)
45
+
46
+ # ── scan_results ──────────────────────────────────────────────────────
47
+ # One row per scan run. Links back to the project that was scanned.
48
+ conn.execute("""
49
+ CREATE TABLE IF NOT EXISTS scan_results (
50
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
51
+ project_id INTEGER NOT NULL REFERENCES projects(id),
52
+ started_at TEXT NOT NULL DEFAULT (datetime('now')),
53
+ finished_at TEXT,
54
+ total_files INTEGER DEFAULT 0,
55
+ total_findings INTEGER DEFAULT 0,
56
+ status TEXT DEFAULT 'running'
57
+ )
58
+ """)
59
+
60
+ # ── findings ──────────────────────────────────────────────────────────
61
+ # One row per vulnerability found in a scan.
62
+ # ai_verdict and ai_explanation are NULL until Phase 1 fills them in.
63
+ conn.execute("""
64
+ CREATE TABLE IF NOT EXISTS findings (
65
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
66
+ scan_id INTEGER NOT NULL REFERENCES scan_results(id),
67
+ rule_id TEXT NOT NULL,
68
+ rule_name TEXT NOT NULL,
69
+ severity TEXT NOT NULL,
70
+ description TEXT,
71
+ file TEXT NOT NULL,
72
+ line_number INTEGER NOT NULL,
73
+ line_content TEXT,
74
+ ai_verdict TEXT,
75
+ ai_explanation TEXT,
76
+ fix_suggestion TEXT,
77
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
78
+ )
79
+ """)
80
+
81
+ # ── feedback ──────────────────────────────────────────────────────────
82
+ # Stores manual corrections from the user.
83
+ # 'confirmed' = real vulnerability, 'false_positive' = not a real issue.
84
+ conn.execute("""
85
+ CREATE TABLE IF NOT EXISTS feedback (
86
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
87
+ finding_id INTEGER NOT NULL REFERENCES findings(id),
88
+ verdict TEXT NOT NULL CHECK(verdict IN ('confirmed', 'false_positive')),
89
+ note TEXT,
90
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
91
+ )
92
+ """)
93
+
94
+ conn.close()
95
+ print(f"Database ready: {DB_PATH}")
@@ -0,0 +1,112 @@
1
+ # db/queries.py
2
+ # All database read/write functions used by the scanner.
3
+ # Every function takes a connection as its first argument
4
+ # so the caller controls when to open and close it.
5
+
6
+ from datetime import datetime
7
+
8
+
9
+ def create_project(conn, name: str, path: str) -> int:
10
+ """
11
+ Insert a new project and return its ID.
12
+ If a project with the same path already exists, return its ID instead.
13
+ """
14
+ existing = conn.execute(
15
+ "SELECT id FROM projects WHERE path = ?", (path,)
16
+ ).fetchone()
17
+
18
+ if existing:
19
+ return existing["id"]
20
+
21
+ cursor = conn.execute(
22
+ "INSERT INTO projects (name, path) VALUES (?, ?)",
23
+ (name, path)
24
+ )
25
+ conn.commit()
26
+ return cursor.lastrowid
27
+
28
+
29
+ def start_scan(conn, project_id: int) -> int:
30
+ """
31
+ Create a new scan_results row with status 'running'.
32
+ Returns the scan ID.
33
+ """
34
+ cursor = conn.execute(
35
+ "INSERT INTO scan_results (project_id, status) VALUES (?, 'running')",
36
+ (project_id,)
37
+ )
38
+ conn.commit()
39
+ return cursor.lastrowid
40
+
41
+
42
+ def finish_scan(conn, scan_id: int, total_files: int, total_findings: int) -> None:
43
+ """
44
+ Mark a scan as completed and record the final counts.
45
+ """
46
+ conn.execute("""
47
+ UPDATE scan_results
48
+ SET status = 'completed',
49
+ finished_at = datetime('now'),
50
+ total_files = ?,
51
+ total_findings = ?
52
+ WHERE id = ?
53
+ """, (total_files, total_findings, scan_id))
54
+
55
+ conn.commit()
56
+
57
+
58
+ def save_finding(conn, scan_id: int, finding: dict) -> int:
59
+ """
60
+ Insert one finding into the findings table.
61
+ Returns the new finding's ID.
62
+ """
63
+ cursor = conn.execute("""
64
+ INSERT INTO findings (
65
+ scan_id, rule_id, rule_name, severity, description,
66
+ file, line_number, line_content, ai_verdict, ai_explanation
67
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
68
+ """, (
69
+ scan_id,
70
+ finding["rule_id"],
71
+ finding["rule_name"],
72
+ finding["severity"],
73
+ finding["description"],
74
+ finding["file"],
75
+ finding["line_number"],
76
+ finding["line_content"],
77
+ finding.get("ai_verdict"),
78
+ finding.get("ai_explanation"),
79
+ ))
80
+ conn.commit()
81
+ return cursor.lastrowid
82
+
83
+
84
+ def get_findings_for_scan(conn, scan_id: int) -> list:
85
+ """
86
+ Retrieve all findings for a given scan, ordered by severity then file.
87
+ """
88
+ rows = conn.execute("""
89
+ SELECT * FROM findings
90
+ WHERE scan_id = ?
91
+ ORDER BY
92
+ CASE severity
93
+ WHEN 'high' THEN 1
94
+ WHEN 'medium' THEN 2
95
+ WHEN 'low' THEN 3
96
+ ELSE 4
97
+ END,
98
+ file, line_number
99
+ """, (scan_id,)).fetchall()
100
+
101
+ return [dict(row) for row in rows]
102
+
103
+
104
+ def update_last_scan(conn, project_id: int) -> None:
105
+ """
106
+ Update the last_scan timestamp on a project after a scan completes.
107
+ """
108
+ conn.execute(
109
+ "UPDATE projects SET last_scan = datetime('now') WHERE id = ?",
110
+ (project_id,)
111
+ )
112
+ conn.commit()
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: permi
3
+ Version: 0.1.1
4
+ Summary: AI-powered vulnerability scanner for Nigerian developers
5
+ Requires-Python: >=3.9
6
+ Requires-Dist: python-dotenv
7
+ Requires-Dist: colorama
8
+ Requires-Dist: click
9
+ Requires-Dist: requests
@@ -0,0 +1,21 @@
1
+ README.md
2
+ pyproject.toml
3
+ ai_filter/__init__.py
4
+ ai_filter/filter.py
5
+ ai_filter/llm_client.py
6
+ cli/__init__.py
7
+ cli/formatter.py
8
+ cli/main.py
9
+ db/__init__.py
10
+ db/database.py
11
+ db/queries.py
12
+ permi.egg-info/PKG-INFO
13
+ permi.egg-info/SOURCES.txt
14
+ permi.egg-info/dependency_links.txt
15
+ permi.egg-info/entry_points.txt
16
+ permi.egg-info/requires.txt
17
+ permi.egg-info/top_level.txt
18
+ scanner/__init__.py
19
+ scanner/engine.py
20
+ scanner/rules.py
21
+ scanner/scan.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ permi = cli.main:cli
@@ -0,0 +1,4 @@
1
+ python-dotenv
2
+ colorama
3
+ click
4
+ requests
@@ -0,0 +1,4 @@
1
+ ai_filter
2
+ cli
3
+ db
4
+ scanner
@@ -0,0 +1,21 @@
1
+ [build-system]
2
+ requires = ["setuptools>=42"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "permi"
7
+ version = "0.1.1"
8
+ description = "AI-powered vulnerability scanner for Nigerian developers"
9
+ requires-python = ">=3.9"
10
+ dependencies = [
11
+ "python-dotenv",
12
+ "colorama",
13
+ "click",
14
+ "requests",
15
+ ]
16
+
17
+ [project.scripts]
18
+ permi = "cli.main:cli"
19
+
20
+ [tool.setuptools.packages.find]
21
+ include = ["scanner*", "ai_filter*", "cli*", "db*"]
File without changes
@@ -0,0 +1,68 @@
1
+ # scanner/engine.py
2
+ # The engine takes a file path, reads it line by line,
3
+ # and tests every line against every rule in RULES.
4
+ # Returns a list of finding dictionaries.
5
+
6
+ from pathlib import Path
7
+ from .rules import RULES, SCANNABLE_EXTENSIONS, SKIP_DIRS
8
+
9
+
10
+ def scan_file(file_path: Path) -> list[dict]:
11
+ """
12
+ Scan a single file against all rules.
13
+ Returns a list of findings (may be empty).
14
+ """
15
+ findings = []
16
+
17
+ # Skip files with extensions we don't handle
18
+ if file_path.suffix.lower() not in SCANNABLE_EXTENSIONS:
19
+ return findings
20
+
21
+ try:
22
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
23
+ except Exception:
24
+ # If the file can't be read for any reason, skip it silently
25
+ return findings
26
+
27
+ lines = content.splitlines()
28
+
29
+ for line_number, line in enumerate(lines, start=1):
30
+ for rule in RULES:
31
+ if rule["pattern"].search(line):
32
+ findings.append({
33
+ "rule_id": rule["id"],
34
+ "rule_name": rule["name"],
35
+ "severity": rule["severity"],
36
+ "description": rule["description"],
37
+ "file": str(file_path),
38
+ "line_number": line_number,
39
+ "line_content": line.strip(),
40
+ "ai_verdict": None, # filled in Phase 1
41
+ "ai_explanation": None, # filled in Phase 1
42
+ })
43
+
44
+ return findings
45
+
46
+
47
+ def scan_directory(directory: Path) -> list[dict]:
48
+ """
49
+ Recursively walk a directory, scan every eligible file,
50
+ and return all findings combined.
51
+ """
52
+ all_findings = []
53
+ directory = Path(directory).resolve()
54
+
55
+ for file_path in directory.rglob("*"):
56
+
57
+ # Skip any path that passes through a blocked directory
58
+ if any(skip in file_path.parts for skip in SKIP_DIRS):
59
+ continue
60
+
61
+ # Skip directories themselves — only process files
62
+ if not file_path.is_file():
63
+ continue
64
+
65
+ findings = scan_file(file_path)
66
+ all_findings.extend(findings)
67
+
68
+ return all_findings
@@ -0,0 +1,268 @@
1
+ # scanner/rules.py
2
+ # Each rule has: id, name, pattern (regex), severity, description
3
+ # Patterns are intentionally broad — the AI filter in Phase 1 will
4
+ # remove false positives. Better to over-catch than under-catch here.
5
+
6
+ import re
7
+
8
+ RULES = [
9
+
10
+ # ── SQL INJECTION ─────────────────────────────────────────────────────────
11
+
12
+ {
13
+ "id": "SQL001",
14
+ "name": "SQL Injection — string concatenation",
15
+ "severity": "high",
16
+ "description": (
17
+ "Raw string concatenation used to build a SQL query. "
18
+ "An attacker can inject arbitrary SQL through user input."
19
+ ),
20
+ "pattern": re.compile(
21
+ r'(execute|query|cursor\.execute)\s*\(\s*["\'].*\+',
22
+ re.IGNORECASE
23
+ ),
24
+ },
25
+ {
26
+ "id": "SQL002",
27
+ "name": "SQL Injection — f-string or format() in query",
28
+ "severity": "high",
29
+ "description": (
30
+ "An f-string or .format() call is used inside a SQL query. "
31
+ "User-controlled variables embedded this way are injectable."
32
+ ),
33
+ "pattern": re.compile(
34
+ r'(execute|query|cursor\.execute)\s*\(\s*f["\']',
35
+ re.IGNORECASE
36
+ ),
37
+ },
38
+ {
39
+ "id": "SQL003",
40
+ "name": "SQL Injection — % formatting in query",
41
+ "severity": "high",
42
+ "description": (
43
+ "% string formatting is used to build a SQL query. "
44
+ "This is a classic injection vector."
45
+ ),
46
+ "pattern": re.compile(
47
+ r'(execute|query)\s*\(\s*["\'].*%\s*[(\w]',
48
+ re.IGNORECASE
49
+ ),
50
+ },
51
+
52
+ # ── CROSS-SITE SCRIPTING (XSS) ────────────────────────────────────────────
53
+
54
+ {
55
+ "id": "XSS001",
56
+ "name": "XSS — innerHTML assignment",
57
+ "severity": "high",
58
+ "description": (
59
+ "innerHTML is set dynamically. If any part of the value comes "
60
+ "from user input, this is a direct XSS vector."
61
+ ),
62
+ "pattern": re.compile(
63
+ r'\.innerHTML\s*=',
64
+ re.IGNORECASE
65
+ ),
66
+ },
67
+ {
68
+ "id": "XSS002",
69
+ "name": "XSS — document.write with variable",
70
+ "severity": "high",
71
+ "description": (
72
+ "document.write() is called with a variable. "
73
+ "Writing user-controlled content to the page enables XSS."
74
+ ),
75
+ "pattern": re.compile(
76
+ r'document\.write\s*\(\s*\w',
77
+ re.IGNORECASE
78
+ ),
79
+ },
80
+ {
81
+ "id": "XSS003",
82
+ "name": "XSS — Flask render without escape (Jinja2 | safe filter)",
83
+ "severity": "medium",
84
+ "description": (
85
+ "The Jinja2 |safe filter disables auto-escaping. "
86
+ "If the variable contains user input, this enables XSS."
87
+ ),
88
+ "pattern": re.compile(
89
+ r'\|\s*safe',
90
+ re.IGNORECASE
91
+ ),
92
+ },
93
+
94
+ # ── HARDCODED SECRETS ─────────────────────────────────────────────────────
95
+
96
+ {
97
+ "id": "SEC001",
98
+ "name": "Hardcoded secret — generic password or key assignment",
99
+ "severity": "high",
100
+ "description": (
101
+ "A variable named password, secret, api_key, or token is "
102
+ "assigned a string literal. Hardcoded credentials are a "
103
+ "critical exposure risk if the code is shared or pushed."
104
+ ),
105
+ "pattern": re.compile(
106
+ r'(password|passwd|secret|api_key|apikey|token|auth_key)'
107
+ r'\s*=\s*["\'][^"\']{4,}["\']',
108
+ re.IGNORECASE
109
+ ),
110
+ },
111
+ {
112
+ "id": "SEC002",
113
+ "name": "Hardcoded secret — AWS key pattern",
114
+ "severity": "high",
115
+ "description": (
116
+ "A string matching the format of an AWS Access Key ID was found. "
117
+ "Exposed AWS keys can lead to full account compromise."
118
+ ),
119
+ "pattern": re.compile(
120
+ r'AKIA[0-9A-Z]{16}',
121
+ ),
122
+ },
123
+ {
124
+ "id": "SEC003",
125
+ "name": "Hardcoded secret — private key header",
126
+ "severity": "high",
127
+ "description": (
128
+ "A PEM private key header was found in the source code. "
129
+ "Private keys must never be committed to a repository."
130
+ ),
131
+ "pattern": re.compile(
132
+ r'-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----',
133
+ ),
134
+ },
135
+ {
136
+ "id": "SEC004",
137
+ "name": "Hardcoded secret — Paystack or Flutterwave secret key",
138
+ "severity": "high",
139
+ "description": (
140
+ "A Paystack or Flutterwave secret key pattern was found. "
141
+ "Exposed payment gateway keys allow fraudulent transactions."
142
+ ),
143
+ "pattern": re.compile(
144
+ r'(sk_live_|sk_test_)[a-zA-Z0-9]{20,}',
145
+ ),
146
+ },
147
+
148
+ # ── USSD / NIGERIAN-SPECIFIC ──────────────────────────────────────────────
149
+
150
+ {
151
+ "id": "USSD001",
152
+ "name": "USSD — missing input validation on sessionId or phoneNumber",
153
+ "severity": "medium",
154
+ "description": (
155
+ "A USSD handler accesses sessionId or phoneNumber from the "
156
+ "request without any visible validation. Unvalidated USSD "
157
+ "inputs can be manipulated to hijack sessions or spoof callers."
158
+ ),
159
+ "pattern": re.compile(
160
+ r'request\.(get|form|json|args)\s*[\.\[]\s*'
161
+ r'["\']?(sessionId|phoneNumber|serviceCode)["\']?',
162
+ re.IGNORECASE
163
+ ),
164
+ },
165
+ {
166
+ "id": "USSD002",
167
+ "name": "USSD — wildcard or open-ended serviceCode handling",
168
+ "severity": "medium",
169
+ "description": (
170
+ "A USSD serviceCode is compared to a wildcard or catch-all "
171
+ "value. This may allow unintended service codes to trigger "
172
+ "application logic."
173
+ ),
174
+ "pattern": re.compile(
175
+ r'serviceCode\s*[=!]=\s*["\'][*\?]["\']',
176
+ re.IGNORECASE
177
+ ),
178
+ },
179
+
180
+ # ── INSECURE PRACTICES ────────────────────────────────────────────────────
181
+
182
+ {
183
+ "id": "INS001",
184
+ "name": "Insecure — debug mode enabled in production",
185
+ "severity": "medium",
186
+ "description": (
187
+ "debug=True is set, likely in a Flask or Django app. "
188
+ "Debug mode exposes stack traces and an interactive console "
189
+ "to anyone who triggers an error."
190
+ ),
191
+ "pattern": re.compile(
192
+ r'debug\s*=\s*True',
193
+ re.IGNORECASE
194
+ ),
195
+ },
196
+ {
197
+ "id": "INS002",
198
+ "name": "Insecure — SSL/TLS verification disabled",
199
+ "severity": "high",
200
+ "description": (
201
+ "verify=False is passed to a requests call. "
202
+ "This disables certificate validation and exposes the app "
203
+ "to man-in-the-middle attacks."
204
+ ),
205
+ "pattern": re.compile(
206
+ r'requests\.\w+\(.*verify\s*=\s*False',
207
+ re.IGNORECASE
208
+ ),
209
+ },
210
+ {
211
+ "id": "INS003",
212
+ "name": "Insecure — use of eval() on external input",
213
+ "severity": "high",
214
+ "description": (
215
+ "eval() is called with a variable argument. If the variable "
216
+ "contains user-supplied data, this allows arbitrary code execution."
217
+ ),
218
+ "pattern": re.compile(
219
+ r'eval\s*\(\s*\w',
220
+ re.IGNORECASE
221
+ ),
222
+ },
223
+ {
224
+ "id": "INS004",
225
+ "name": "Insecure — use of exec() on external input",
226
+ "severity": "high",
227
+ "description": (
228
+ "exec() is called with a variable argument — same risk as eval()."
229
+ ),
230
+ "pattern": re.compile(
231
+ r'exec\s*\(\s*\w',
232
+ re.IGNORECASE
233
+ ),
234
+ },
235
+ {
236
+ "id": "INS005",
237
+ "name": "Insecure — pickle.loads() on untrusted data",
238
+ "severity": "high",
239
+ "description": (
240
+ "pickle.loads() deserializes data. If the data comes from "
241
+ "an untrusted source (network, user upload), this allows "
242
+ "arbitrary code execution."
243
+ ),
244
+ "pattern": re.compile(
245
+ r'pickle\.loads\s*\(',
246
+ re.IGNORECASE
247
+ ),
248
+ },
249
+ ]
250
+
251
+ # ── FILE EXTENSIONS TO SCAN ───────────────────────────────────────────────────
252
+ # Permi only reads these file types. Binary files, images, and lockfiles
253
+ # are skipped automatically.
254
+
255
+ SCANNABLE_EXTENSIONS = {
256
+ ".py", ".js", ".ts", ".jsx", ".tsx",
257
+ ".html", ".htm", ".php", ".java",
258
+ ".env", ".yml", ".yaml", ".json",
259
+ }
260
+
261
+ # ── DIRECTORIES TO SKIP ───────────────────────────────────────────────────────
262
+ # These folders are never scanned — they contain dependencies or build
263
+ # artifacts, not your actual code.
264
+
265
+ SKIP_DIRS = {
266
+ "node_modules", "venv", ".venv", "__pycache__",
267
+ ".git", "dist", "build", ".next", "target",
268
+ }
@@ -0,0 +1,123 @@
1
+ # scanner/scan.py
2
+ # Full scan pipeline — engine + AI filter combined.
3
+ # Supports local directories and GitHub URLs.
4
+
5
+ import tempfile
6
+ import subprocess
7
+ from pathlib import Path
8
+
9
+ from scanner.engine import scan_directory as run_engine
10
+ from ai_filter.filter import run_filter
11
+ from db.database import init_db, get_connection
12
+ from db.queries import (
13
+ create_project,
14
+ start_scan,
15
+ save_finding,
16
+ finish_scan,
17
+ update_last_scan,
18
+ )
19
+
20
+
21
+ def _is_github_url(path: str) -> bool:
22
+ """Return True if the path looks like a GitHub URL."""
23
+ p = path.strip().strip('"').strip("'")
24
+ return p.startswith("https://github.com/") or p.startswith("git@github.com:")
25
+
26
+
27
+ def _clone_repo(url: str, target_dir: Path) -> None:
28
+ """
29
+ Clone a GitHub repo into target_dir.
30
+ Raises RuntimeError if git is not installed or the clone fails.
31
+ """
32
+ print(f"[Permi] Cloning : {url}")
33
+ result = subprocess.run(
34
+ ["git", "clone", "--depth", "1", url, str(target_dir)],
35
+ capture_output=True,
36
+ text=True,
37
+ )
38
+ if result.returncode != 0:
39
+ raise RuntimeError(f"git clone failed:\n{result.stderr.strip()}")
40
+ print(f"[Permi] Cloned to: {target_dir}\n")
41
+
42
+
43
+ def scan(
44
+ path: str,
45
+ project_name: str = None,
46
+ offline: bool = False,
47
+ ) -> tuple[list[dict], int]:
48
+ """
49
+ Full scan pipeline.
50
+
51
+ Returns:
52
+ (real_findings, raw_count)
53
+ real_findings — findings the AI marked as REAL
54
+ raw_count — total before AI filtering (for accurate summary)
55
+ """
56
+ # ── Normalise path ────────────────────────────────────────────────────────
57
+ path = path.strip().strip('"').strip("'")
58
+ is_github = _is_github_url(path)
59
+
60
+ # ── GitHub URL — clone to a temp directory ────────────────────────────────
61
+ if is_github:
62
+ tmp = tempfile.TemporaryDirectory()
63
+ target = Path(tmp.name) / "repo"
64
+ _clone_repo(path, target)
65
+ default_name = path.rstrip("/").split("/")[-1].replace(".git", "")
66
+ else:
67
+ tmp = None
68
+ target = Path(path).resolve()
69
+ if not target.exists():
70
+ raise FileNotFoundError(f"Path does not exist: {target}")
71
+ if not target.is_dir():
72
+ raise NotADirectoryError(f"Path is not a directory: {target}")
73
+ default_name = target.name
74
+
75
+ name = project_name or default_name
76
+
77
+ # ── Init DB and create/find project ───────────────────────────────────────
78
+ init_db()
79
+ conn = get_connection()
80
+ project_id = create_project(conn, name=name, path=str(target))
81
+
82
+ # ── Start scan record ─────────────────────────────────────────────────────
83
+ scan_id = start_scan(conn, project_id)
84
+
85
+ print(f"[Permi] Scanning : {target}")
86
+ print(f"[Permi] Project : {name} (id={project_id})")
87
+ print(f"[Permi] Scan : id={scan_id}\n")
88
+
89
+ # ── Run the engine ────────────────────────────────────────────────────────
90
+ raw_findings = run_engine(target)
91
+ raw_count = len(raw_findings)
92
+ scanned_files = len({f["file"] for f in raw_findings}) if raw_findings else 0
93
+
94
+ print(f"[Permi] Engine found {raw_count} raw finding(s) "
95
+ f"across {scanned_files} file(s)\n")
96
+
97
+ # ── Save every raw finding to DB ──────────────────────────────────────────
98
+ for finding in raw_findings:
99
+ finding_id = save_finding(conn, scan_id, finding)
100
+ finding["id"] = finding_id
101
+
102
+ conn.close()
103
+
104
+ # ── Run AI filter ─────────────────────────────────────────────────────────
105
+ real_findings = run_filter(raw_findings, offline=offline)
106
+
107
+ # ── Finish scan record ────────────────────────────────────────────────────
108
+ conn = get_connection()
109
+ finish_scan(
110
+ conn,
111
+ scan_id=scan_id,
112
+ total_files=scanned_files,
113
+ total_findings=len(real_findings),
114
+ )
115
+ update_last_scan(conn, project_id)
116
+ conn.close()
117
+
118
+ # ── Clean up temp clone ───────────────────────────────────────────────────
119
+ if tmp:
120
+ tmp.cleanup()
121
+ print("[Permi] Temp clone deleted.\n")
122
+
123
+ return real_findings, raw_count
permi-0.1.1/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+