secret-scanner-cl 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- secret_scanner_cl-1.0.0/PKG-INFO +143 -0
- secret_scanner_cl-1.0.0/README.md +128 -0
- secret_scanner_cl-1.0.0/pyproject.toml +47 -0
- secret_scanner_cl-1.0.0/setup.cfg +4 -0
- secret_scanner_cl-1.0.0/src/secret_scanner/__init__.py +0 -0
- secret_scanner_cl-1.0.0/src/secret_scanner/main.py +186 -0
- secret_scanner_cl-1.0.0/src/secret_scanner/scanner/__init__.py +0 -0
- secret_scanner_cl-1.0.0/src/secret_scanner/scanner/file_scanner.py +127 -0
- secret_scanner_cl-1.0.0/src/secret_scanner/scanner/patterns.py +71 -0
- secret_scanner_cl-1.0.0/src/secret_scanner/scanner/reporter.py +58 -0
- secret_scanner_cl-1.0.0/src/secret_scanner_cl.egg-info/PKG-INFO +143 -0
- secret_scanner_cl-1.0.0/src/secret_scanner_cl.egg-info/SOURCES.txt +17 -0
- secret_scanner_cl-1.0.0/src/secret_scanner_cl.egg-info/dependency_links.txt +1 -0
- secret_scanner_cl-1.0.0/src/secret_scanner_cl.egg-info/entry_points.txt +2 -0
- secret_scanner_cl-1.0.0/src/secret_scanner_cl.egg-info/requires.txt +1 -0
- secret_scanner_cl-1.0.0/src/secret_scanner_cl.egg-info/top_level.txt +1 -0
- secret_scanner_cl-1.0.0/tests/test_file_scanner.py +156 -0
- secret_scanner_cl-1.0.0/tests/test_patterns.py +193 -0
- secret_scanner_cl-1.0.0/tests/test_reporter.py +123 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: secret-scanner-cl
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: CLI tool that detects hardcoded secrets and credentials in source code.
|
|
5
|
+
Author-email: Kiara <kiara@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Kiara1616/secret-scanner
|
|
8
|
+
Project-URL: Repository, https://github.com/Kiara1616/secret-scanner
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: colorama>=0.4.6
|
|
15
|
+
|
|
16
|
+
[](https://classroom.github.com/a/MQUb8mG3)
|
|
17
|
+
[](https://classroom.github.com/open-in-codespaces?assignment_repo_id=23328920)
|
|
18
|
+
|
|
19
|
+
# SecretScanner — Analizador de Secretos
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
Herramienta de código abierto desarrollada en **Python 3.10** que analiza proyectos de software y detecta **secretos y credenciales hardcodeadas** (API keys, tokens, contraseñas, claves privadas) mediante expresiones regulares.
|
|
23
|
+
|
|
24
|
+
## Características
|
|
25
|
+
|
|
26
|
+
- Soporte para cualquier directorio o archivo de texto.
|
|
27
|
+
- Detección automática del tipo de secreto encontrado.
|
|
28
|
+
- Recorrido recursivo de directorios con `os.walk`.
|
|
29
|
+
- Análisis basado en **8 patrones regex** documentados: GitHub Token, AWS Access Key, API Key genérica, contraseña hardcodeada, JWT Token, Slack Token, clave privada RSA y URL con credenciales.
|
|
30
|
+
- Salida en consola con colores diferenciados por severidad.
|
|
31
|
+
- Exportación de reportes a **JSON** y **CSV** en la carpeta `output/`.
|
|
32
|
+
- Interfaz de línea de comandos (CLI) con `--path`, `--output` y `--verbose`.
|
|
33
|
+
|
|
34
|
+
## Requisitos
|
|
35
|
+
|
|
36
|
+
- Python 3.10 o superior
|
|
37
|
+
- pip
|
|
38
|
+
|
|
39
|
+
## Instalación
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
git clone https://github.com/UPT-FAING-EPIS/proyecto-si784-2026-i-u1-analizador-de-secretos.git
|
|
43
|
+
cd proyecto-si784-2026-i-u1-analizador-de-secretos
|
|
44
|
+
pip install -r requirements.txt
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Uso de la CLI
|
|
48
|
+
|
|
49
|
+
Una vez instaladas las dependencias, ejecuta la herramienta con:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
python main.py --path <ruta-del-proyecto>
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Parámetros y opciones
|
|
56
|
+
|
|
57
|
+
| Opción | Descripción |
|
|
58
|
+
|--------|-------------|
|
|
59
|
+
| `--path <ruta>` | **(Requerido)** Ruta al directorio o archivo a analizar |
|
|
60
|
+
| `--output json` | Exporta el reporte a `output/report.json` |
|
|
61
|
+
| `--output csv` | Exporta el reporte a `output/report.csv` |
|
|
62
|
+
| `--verbose` | Muestra cada archivo procesado durante el escaneo |
|
|
63
|
+
|
|
64
|
+
## Ejemplos
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Analizar el directorio actual
|
|
68
|
+
python main.py --path .
|
|
69
|
+
|
|
70
|
+
# Analizar una ruta específica y exportar JSON
|
|
71
|
+
python main.py --path ./mi_proyecto --output json
|
|
72
|
+
|
|
73
|
+
# Analizar y exportar CSV
|
|
74
|
+
python main.py --path ./mi_proyecto --output csv
|
|
75
|
+
|
|
76
|
+
# Modo verbose — muestra cada archivo procesado
|
|
77
|
+
python main.py --path ./mi_proyecto --verbose
|
|
78
|
+
|
|
79
|
+
# Verbose + exportar JSON
|
|
80
|
+
python main.py --path ./mi_proyecto --verbose --output json
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Ejemplo de salida
|
|
84
|
+
🔍 Analizando: ./mi_proyecto
|
|
85
|
+
|
|
86
|
+
[ALERTA] GitHub Token encontrado
|
|
87
|
+
Archivo : mi_proyecto/config.py
|
|
88
|
+
Línea : 12
|
|
89
|
+
Contenido: token = "ghp_1234...****"
|
|
90
|
+
|
|
91
|
+
[ALERTA] Contraseña hardcodeada encontrada
|
|
92
|
+
Archivo : mi_proyecto/db.py
|
|
93
|
+
Línea : 8
|
|
94
|
+
Contenido: password = "****"
|
|
95
|
+
|
|
96
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
97
|
+
✅ Análisis completado
|
|
98
|
+
Archivos analizados : 24
|
|
99
|
+
Secretos encontrados : 2
|
|
100
|
+
Reporte exportado : output/report.json
|
|
101
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
102
|
+
|
|
103
|
+
text
|
|
104
|
+
|
|
105
|
+
## Tipos de secretos detectados
|
|
106
|
+
|
|
107
|
+
| Tipo | Patrón detectado |
|
|
108
|
+
|------|-----------------|
|
|
109
|
+
| GitHub Token | `ghp_`, `gho_`, `ghu_`, `ghs_` |
|
|
110
|
+
| AWS Access Key | `AKIA[0-9A-Z]{16}` |
|
|
111
|
+
| API Key genérica | `api_key = "..."` |
|
|
112
|
+
| Contraseña hardcodeada | `password = "..."` |
|
|
113
|
+
| JWT Token | `eyJ...` (header base64) |
|
|
114
|
+
| Slack Token | `xox[baprs]-...` |
|
|
115
|
+
| Clave privada RSA | `-----BEGIN RSA PRIVATE KEY-----` |
|
|
116
|
+
| URL con credenciales | `http://user:pass@host` |
|
|
117
|
+
|
|
118
|
+
## Archivos ignorados
|
|
119
|
+
|
|
120
|
+
El escáner omite automáticamente extensiones binarias (`.png`, `.jpg`, `.gif`, `.exe`, `.zip`, `.pdf`) y directorios no relevantes (`.git`, `__pycache__`, `node_modules`, `output`).
|
|
121
|
+
|
|
122
|
+
## Desarrollo y tests
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
# Instalar dependencias de desarrollo
|
|
126
|
+
pip install -r requirements-dev.txt
|
|
127
|
+
|
|
128
|
+
# Ejecutar todos los tests
|
|
129
|
+
pytest
|
|
130
|
+
|
|
131
|
+
# Ver cobertura por módulo
|
|
132
|
+
pytest --cov=scanner --cov-report=term-missing
|
|
133
|
+
|
|
134
|
+
# Tests de un módulo específico
|
|
135
|
+
pytest tests/test_patterns.py -v
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
La cobertura mínima requerida es **80%** sobre el paquete `scanner/`.
|
|
139
|
+
|
|
140
|
+
## CI/CD
|
|
141
|
+
|
|
142
|
+
El proyecto cuenta con un pipeline de **GitHub Actions** (`.github/workflows/ci.yml`) que se activa en cada `push` y `pull_request` hacia `main`, instala dependencias, ejecuta los tests con cobertura y falla el build si algún test no pasa.
|
|
143
|
+
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
[](https://classroom.github.com/a/MQUb8mG3)
|
|
2
|
+
[](https://classroom.github.com/open-in-codespaces?assignment_repo_id=23328920)
|
|
3
|
+
|
|
4
|
+
# SecretScanner — Analizador de Secretos
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
Herramienta de código abierto desarrollada en **Python 3.10** que analiza proyectos de software y detecta **secretos y credenciales hardcodeadas** (API keys, tokens, contraseñas, claves privadas) mediante expresiones regulares.
|
|
8
|
+
|
|
9
|
+
## Características
|
|
10
|
+
|
|
11
|
+
- Soporte para cualquier directorio o archivo de texto.
|
|
12
|
+
- Detección automática del tipo de secreto encontrado.
|
|
13
|
+
- Recorrido recursivo de directorios con `os.walk`.
|
|
14
|
+
- Análisis basado en **8 patrones regex** documentados: GitHub Token, AWS Access Key, API Key genérica, contraseña hardcodeada, JWT Token, Slack Token, clave privada RSA y URL con credenciales.
|
|
15
|
+
- Salida en consola con colores diferenciados por severidad.
|
|
16
|
+
- Exportación de reportes a **JSON** y **CSV** en la carpeta `output/`.
|
|
17
|
+
- Interfaz de línea de comandos (CLI) con `--path`, `--output` y `--verbose`.
|
|
18
|
+
|
|
19
|
+
## Requisitos
|
|
20
|
+
|
|
21
|
+
- Python 3.10 o superior
|
|
22
|
+
- pip
|
|
23
|
+
|
|
24
|
+
## Instalación
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
git clone https://github.com/UPT-FAING-EPIS/proyecto-si784-2026-i-u1-analizador-de-secretos.git
|
|
28
|
+
cd proyecto-si784-2026-i-u1-analizador-de-secretos
|
|
29
|
+
pip install -r requirements.txt
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Uso de la CLI
|
|
33
|
+
|
|
34
|
+
Una vez instaladas las dependencias, ejecuta la herramienta con:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
python main.py --path <ruta-del-proyecto>
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Parámetros y opciones
|
|
41
|
+
|
|
42
|
+
| Opción | Descripción |
|
|
43
|
+
|--------|-------------|
|
|
44
|
+
| `--path <ruta>` | **(Requerido)** Ruta al directorio o archivo a analizar |
|
|
45
|
+
| `--output json` | Exporta el reporte a `output/report.json` |
|
|
46
|
+
| `--output csv` | Exporta el reporte a `output/report.csv` |
|
|
47
|
+
| `--verbose` | Muestra cada archivo procesado durante el escaneo |
|
|
48
|
+
|
|
49
|
+
## Ejemplos
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Analizar el directorio actual
|
|
53
|
+
python main.py --path .
|
|
54
|
+
|
|
55
|
+
# Analizar una ruta específica y exportar JSON
|
|
56
|
+
python main.py --path ./mi_proyecto --output json
|
|
57
|
+
|
|
58
|
+
# Analizar y exportar CSV
|
|
59
|
+
python main.py --path ./mi_proyecto --output csv
|
|
60
|
+
|
|
61
|
+
# Modo verbose — muestra cada archivo procesado
|
|
62
|
+
python main.py --path ./mi_proyecto --verbose
|
|
63
|
+
|
|
64
|
+
# Verbose + exportar JSON
|
|
65
|
+
python main.py --path ./mi_proyecto --verbose --output json
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Ejemplo de salida
|
|
69
|
+
🔍 Analizando: ./mi_proyecto
|
|
70
|
+
|
|
71
|
+
[ALERTA] GitHub Token encontrado
|
|
72
|
+
Archivo : mi_proyecto/config.py
|
|
73
|
+
Línea : 12
|
|
74
|
+
Contenido: token = "ghp_1234...****"
|
|
75
|
+
|
|
76
|
+
[ALERTA] Contraseña hardcodeada encontrada
|
|
77
|
+
Archivo : mi_proyecto/db.py
|
|
78
|
+
Línea : 8
|
|
79
|
+
Contenido: password = "****"
|
|
80
|
+
|
|
81
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
82
|
+
✅ Análisis completado
|
|
83
|
+
Archivos analizados : 24
|
|
84
|
+
Secretos encontrados : 2
|
|
85
|
+
Reporte exportado : output/report.json
|
|
86
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
87
|
+
|
|
88
|
+
text
|
|
89
|
+
|
|
90
|
+
## Tipos de secretos detectados
|
|
91
|
+
|
|
92
|
+
| Tipo | Patrón detectado |
|
|
93
|
+
|------|-----------------|
|
|
94
|
+
| GitHub Token | `ghp_`, `gho_`, `ghu_`, `ghs_` |
|
|
95
|
+
| AWS Access Key | `AKIA[0-9A-Z]{16}` |
|
|
96
|
+
| API Key genérica | `api_key = "..."` |
|
|
97
|
+
| Contraseña hardcodeada | `password = "..."` |
|
|
98
|
+
| JWT Token | `eyJ...` (header base64) |
|
|
99
|
+
| Slack Token | `xox[baprs]-...` |
|
|
100
|
+
| Clave privada RSA | `-----BEGIN RSA PRIVATE KEY-----` |
|
|
101
|
+
| URL con credenciales | `http://user:pass@host` |
|
|
102
|
+
|
|
103
|
+
## Archivos ignorados
|
|
104
|
+
|
|
105
|
+
El escáner omite automáticamente extensiones binarias (`.png`, `.jpg`, `.gif`, `.exe`, `.zip`, `.pdf`) y directorios no relevantes (`.git`, `__pycache__`, `node_modules`, `output`).
|
|
106
|
+
|
|
107
|
+
## Desarrollo y tests
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Instalar dependencias de desarrollo
|
|
111
|
+
pip install -r requirements-dev.txt
|
|
112
|
+
|
|
113
|
+
# Ejecutar todos los tests
|
|
114
|
+
pytest
|
|
115
|
+
|
|
116
|
+
# Ver cobertura por módulo
|
|
117
|
+
pytest --cov=scanner --cov-report=term-missing
|
|
118
|
+
|
|
119
|
+
# Tests de un módulo específico
|
|
120
|
+
pytest tests/test_patterns.py -v
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
La cobertura mínima requerida es **80%** sobre el paquete `scanner/`.
|
|
124
|
+
|
|
125
|
+
## CI/CD
|
|
126
|
+
|
|
127
|
+
El proyecto cuenta con un pipeline de **GitHub Actions** (`.github/workflows/ci.yml`) que se activa en cada `push` y `pull_request` hacia `main`, instala dependencias, ejecuta los tests con cobertura y falla el build si algún test no pasa.
|
|
128
|
+
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "secret-scanner-cl"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "CLI tool that detects hardcoded secrets and credentials in source code."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Kiara", email = "kiara@example.com"}
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"colorama>=0.4.6",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Homepage = "https://github.com/Kiara1616/secret-scanner"
|
|
26
|
+
Repository = "https://github.com/Kiara1616/secret-scanner"
|
|
27
|
+
|
|
28
|
+
[project.scripts]
|
|
29
|
+
secret-scanner = "secret_scanner.main:main"
|
|
30
|
+
|
|
31
|
+
# ── pytest + coverage ──────────────────────────────────────────────────────
|
|
32
|
+
[tool.pytest.ini_options]
|
|
33
|
+
testpaths = ["tests"]
|
|
34
|
+
addopts = [
|
|
35
|
+
"--cov=src.secret_scanner",
|
|
36
|
+
"--cov-report=term-missing",
|
|
37
|
+
"--cov-fail-under=80",
|
|
38
|
+
"-v",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[tool.coverage.run]
|
|
42
|
+
source = ["src/secret_scanner"]
|
|
43
|
+
omit = ["tests/*"]
|
|
44
|
+
|
|
45
|
+
[tool.coverage.report]
|
|
46
|
+
show_missing = true
|
|
47
|
+
fail_under = 80
|
|
File without changes
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""
|
|
2
|
+
main.py – SecretScanner CLI entry-point.
|
|
3
|
+
|
|
4
|
+
Usage examples
|
|
5
|
+
--------------
|
|
6
|
+
python main.py --path .
|
|
7
|
+
python main.py --path ./myproject --output json --verbose
|
|
8
|
+
python main.py --path ./myproject --output csv
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import os
|
|
13
|
+
import sys
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
# Force UTF-8 output on Windows to avoid cp1252 UnicodeEncodeError
|
|
17
|
+
if sys.stdout.encoding and sys.stdout.encoding.lower() != "utf-8":
|
|
18
|
+
try:
|
|
19
|
+
sys.stdout.reconfigure(encoding="utf-8", errors="replace") # type: ignore[attr-defined]
|
|
20
|
+
except AttributeError:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
from colorama import Fore, Style, init as colorama_init
|
|
24
|
+
|
|
25
|
+
from secret_scanner.scanner.file_scanner import scan_path
|
|
26
|
+
from secret_scanner.scanner.reporter import export_json, export_csv
|
|
27
|
+
|
|
28
|
+
# ── Colour helpers ─────────────────────────────────────────────────────────
|
|
29
|
+
SEVERITY_COLOR = {
|
|
30
|
+
"HIGH": Fore.RED + Style.BRIGHT,
|
|
31
|
+
"MEDIUM": Fore.YELLOW + Style.BRIGHT,
|
|
32
|
+
"LOW": Fore.CYAN,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _colored(text: str, color: str) -> str:
|
|
37
|
+
return f"{color}{text}{Style.RESET_ALL}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _banner() -> None:
|
|
41
|
+
print(
|
|
42
|
+
_colored(
|
|
43
|
+
r"""
|
|
44
|
+
____ ___ ____ ____ _____ _____ ____ ____ _ _ _ _ _ _____ ____
|
|
45
|
+
/ ___||__ \ / ___| __ )| ____|_ _/ ___| / ___| / \ | \ | | \ | | ____| _ \
|
|
46
|
+
\___ \ / // | | _ \| _| | | \___ \| | / _ \ | \| | \| | _| | |_) |
|
|
47
|
+
___) |/ /_| |___| |_) | |___ | | ___) | |___ / ___ \| |\ | |\ | |___| _ <
|
|
48
|
+
|____//_____\____|____/|_____| |_| |____/ \____/_/ \_\_| \_|_| \_|_____|_| \_\
|
|
49
|
+
""",
|
|
50
|
+
Fore.CYAN + Style.BRIGHT,
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
print(_colored(" SecretScanner v1.0.0 - Hardcoded Secret Detector\n", Fore.WHITE))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
57
|
+
parser = argparse.ArgumentParser(
|
|
58
|
+
prog="secret-scanner",
|
|
59
|
+
description="Detect hardcoded secrets and credentials in source code.",
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--path",
|
|
63
|
+
required=True,
|
|
64
|
+
metavar="PATH",
|
|
65
|
+
help="Directory or file to scan.",
|
|
66
|
+
)
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"--output",
|
|
69
|
+
choices=["json", "csv"],
|
|
70
|
+
default=None,
|
|
71
|
+
metavar="FORMAT",
|
|
72
|
+
help="Export format: 'json' or 'csv'. Saves to output/ directory.",
|
|
73
|
+
)
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
"--verbose",
|
|
76
|
+
action="store_true",
|
|
77
|
+
help="Print each file as it is processed.",
|
|
78
|
+
)
|
|
79
|
+
return parser
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _print_findings(findings: list) -> None:
|
|
83
|
+
if not findings:
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
print(_colored("\n" + "-" * 70, Fore.WHITE))
|
|
87
|
+
print(_colored(" FINDINGS", Fore.RED + Style.BRIGHT))
|
|
88
|
+
print(_colored("-" * 70 + "\n", Fore.WHITE))
|
|
89
|
+
|
|
90
|
+
for f in findings:
|
|
91
|
+
severity = f.get("severity", "MEDIUM")
|
|
92
|
+
color = SEVERITY_COLOR.get(severity, Fore.WHITE)
|
|
93
|
+
badge = _colored(f"[{severity}]", color)
|
|
94
|
+
type_label = _colored(f.get("type", "Unknown"), Fore.WHITE + Style.BRIGHT)
|
|
95
|
+
file_info = _colored(f"{f['file']}:{f['line']}", Fore.BLUE)
|
|
96
|
+
content = f.get("content", "")
|
|
97
|
+
|
|
98
|
+
print(f" {badge} {type_label}")
|
|
99
|
+
print(f" {_colored('File:', Fore.WHITE)} {file_info}")
|
|
100
|
+
print(f" {_colored('Content:', Fore.WHITE)} {content}")
|
|
101
|
+
print()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _print_summary(
|
|
105
|
+
total_files: int,
|
|
106
|
+
findings: list,
|
|
107
|
+
report_path: str | None,
|
|
108
|
+
) -> None:
|
|
109
|
+
count = len(findings)
|
|
110
|
+
separator = "-" * 70
|
|
111
|
+
|
|
112
|
+
print(_colored(separator, Fore.GREEN))
|
|
113
|
+
|
|
114
|
+
if count == 0:
|
|
115
|
+
print(_colored(" [OK] No secrets found. Your project looks clean!", Fore.GREEN + Style.BRIGHT))
|
|
116
|
+
else:
|
|
117
|
+
print(
|
|
118
|
+
_colored(
|
|
119
|
+
f" [!] {count} secret(s) found - review them before committing!",
|
|
120
|
+
Fore.RED + Style.BRIGHT,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
print(_colored(f" Files analysed : {total_files}", Fore.GREEN))
|
|
125
|
+
print(_colored(f" Secrets found : {count}", Fore.GREEN))
|
|
126
|
+
|
|
127
|
+
if report_path:
|
|
128
|
+
print(_colored(f" Report saved : {report_path}", Fore.GREEN))
|
|
129
|
+
|
|
130
|
+
print(_colored(separator, Fore.GREEN))
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _count_files(path: str) -> int:
|
|
134
|
+
"""Count scannable files under *path* (mirrors the scanner logic)."""
|
|
135
|
+
root = Path(path).resolve()
|
|
136
|
+
if root.is_file():
|
|
137
|
+
return 1
|
|
138
|
+
total = 0
|
|
139
|
+
ignored_dirs = {".git", "__pycache__", "node_modules", "output",
|
|
140
|
+
".venv", "venv", ".tox", "dist", "build", ".mypy_cache"}
|
|
141
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
142
|
+
dirnames[:] = [d for d in dirnames if d not in ignored_dirs]
|
|
143
|
+
total += len(filenames)
|
|
144
|
+
return total
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def main() -> int:
|
|
148
|
+
colorama_init(autoreset=True)
|
|
149
|
+
_banner()
|
|
150
|
+
|
|
151
|
+
parser = _build_parser()
|
|
152
|
+
args = parser.parse_args()
|
|
153
|
+
|
|
154
|
+
target = Path(args.path)
|
|
155
|
+
if not target.exists():
|
|
156
|
+
print(_colored(f" ERROR: Path not found: {args.path}", Fore.RED + Style.BRIGHT))
|
|
157
|
+
return 1
|
|
158
|
+
|
|
159
|
+
print(_colored(f" Scanning: {target.resolve()}", Fore.CYAN))
|
|
160
|
+
if args.verbose:
|
|
161
|
+
print()
|
|
162
|
+
|
|
163
|
+
findings = scan_path(str(target), verbose=args.verbose)
|
|
164
|
+
|
|
165
|
+
total_files = _count_files(str(target))
|
|
166
|
+
|
|
167
|
+
_print_findings(findings)
|
|
168
|
+
|
|
169
|
+
report_path: str | None = None
|
|
170
|
+
if args.output and findings:
|
|
171
|
+
os.makedirs("output", exist_ok=True)
|
|
172
|
+
if args.output == "json":
|
|
173
|
+
report_path = os.path.join("output", "report.json")
|
|
174
|
+
export_json(findings, report_path)
|
|
175
|
+
elif args.output == "csv":
|
|
176
|
+
report_path = os.path.join("output", "report.csv")
|
|
177
|
+
export_csv(findings, report_path)
|
|
178
|
+
|
|
179
|
+
_print_summary(total_files, findings, report_path)
|
|
180
|
+
|
|
181
|
+
# Exit code 1 when secrets found (useful for CI pipelines)
|
|
182
|
+
return 1 if findings else 0
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
if __name__ == "__main__":
|
|
186
|
+
sys.exit(main())
|
|
File without changes
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""
|
|
2
|
+
file_scanner.py – Recursive file scanner that applies PATTERNS to every
|
|
3
|
+
readable text file found under a given path.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Dict, Any
|
|
10
|
+
|
|
11
|
+
from scanner.patterns import PATTERNS
|
|
12
|
+
|
|
13
|
+
# ── Constants ──────────────────────────────────────────────────────────────
|
|
14
|
+
BINARY_EXTENSIONS: set[str] = {
|
|
15
|
+
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".tiff",
|
|
16
|
+
".exe", ".dll", ".so", ".dylib",
|
|
17
|
+
".zip", ".tar", ".gz", ".bz2", ".rar", ".7z",
|
|
18
|
+
".pdf", ".docx", ".xlsx", ".pptx",
|
|
19
|
+
".pyc", ".pyo",
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
IGNORED_DIRS: set[str] = {
|
|
23
|
+
".git", "__pycache__", "node_modules", "output",
|
|
24
|
+
".venv", "venv", ".tox", "dist", "build", ".mypy_cache",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _mask_secret(text: str) -> str:
|
|
29
|
+
"""Return the line with the middle portion of each token replaced by ***."""
|
|
30
|
+
# Mask every word longer than 6 chars that looks like a secret value
|
|
31
|
+
def _replace(m: re.Match) -> str:
|
|
32
|
+
s = m.group(0)
|
|
33
|
+
if len(s) <= 6:
|
|
34
|
+
return s
|
|
35
|
+
keep = max(3, len(s) // 5)
|
|
36
|
+
return s[:keep] + "***" + s[-keep:]
|
|
37
|
+
|
|
38
|
+
return re.sub(r"[A-Za-z0-9\+/=_\-]{7,}", _replace, text)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _is_text_file(filepath: Path) -> bool:
|
|
42
|
+
"""Return True if the file is likely a text file."""
|
|
43
|
+
if filepath.suffix.lower() in BINARY_EXTENSIONS:
|
|
44
|
+
return False
|
|
45
|
+
try:
|
|
46
|
+
with open(filepath, "rb") as fh:
|
|
47
|
+
chunk = fh.read(1024)
|
|
48
|
+
# If the chunk contains a null byte it is almost certainly binary
|
|
49
|
+
return b"\x00" not in chunk
|
|
50
|
+
except OSError:
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def scan_path(path: str, verbose: bool = False) -> List[Dict[str, Any]]:
|
|
55
|
+
"""
|
|
56
|
+
Recursively scan *path* for secrets.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
path : str
|
|
61
|
+
Directory or single file to scan.
|
|
62
|
+
verbose : bool
|
|
63
|
+
When True, print the name of each file as it is processed.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
list of dict
|
|
68
|
+
Each dict contains:
|
|
69
|
+
type – pattern name (str)
|
|
70
|
+
severity – "HIGH" | "MEDIUM" | "LOW"
|
|
71
|
+
file – relative (or absolute) path to the file (str)
|
|
72
|
+
line – 1-based line number (int)
|
|
73
|
+
content – masked line content (str)
|
|
74
|
+
"""
|
|
75
|
+
findings: List[Dict[str, Any]] = []
|
|
76
|
+
root = Path(path).resolve()
|
|
77
|
+
|
|
78
|
+
# Build the list of files to inspect
|
|
79
|
+
if root.is_file():
|
|
80
|
+
files_to_scan = [root]
|
|
81
|
+
else:
|
|
82
|
+
files_to_scan = _walk_directory(root)
|
|
83
|
+
|
|
84
|
+
for filepath in files_to_scan:
|
|
85
|
+
if not _is_text_file(filepath):
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
if verbose:
|
|
89
|
+
print(f" [scanning] {filepath}")
|
|
90
|
+
|
|
91
|
+
_scan_file(filepath, findings)
|
|
92
|
+
|
|
93
|
+
return findings
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _walk_directory(root: Path) -> List[Path]:
|
|
97
|
+
"""Walk *root* skipping ignored directories and return all file paths."""
|
|
98
|
+
result: List[Path] = []
|
|
99
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
100
|
+
# Prune ignored directories in-place so os.walk won't descend into them
|
|
101
|
+
dirnames[:] = [
|
|
102
|
+
d for d in dirnames if d not in IGNORED_DIRS
|
|
103
|
+
]
|
|
104
|
+
for filename in filenames:
|
|
105
|
+
result.append(Path(dirpath) / filename)
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _scan_file(filepath: Path, findings: List[Dict[str, Any]]) -> None:
|
|
110
|
+
"""Read *filepath* line-by-line and append any matches to *findings*."""
|
|
111
|
+
try:
|
|
112
|
+
with open(filepath, "r", encoding="utf-8", errors="replace") as fh:
|
|
113
|
+
for lineno, line in enumerate(fh, start=1):
|
|
114
|
+
for pat in PATTERNS:
|
|
115
|
+
if pat["pattern"].search(line):
|
|
116
|
+
findings.append(
|
|
117
|
+
{
|
|
118
|
+
"type": pat["name"],
|
|
119
|
+
"severity": pat["severity"],
|
|
120
|
+
"file": str(filepath),
|
|
121
|
+
"line": lineno,
|
|
122
|
+
"content": _mask_secret(line.rstrip()),
|
|
123
|
+
}
|
|
124
|
+
)
|
|
125
|
+
except OSError:
|
|
126
|
+
# Skip files we cannot open (permission errors, etc.)
|
|
127
|
+
pass
|