printerxpl-forge 6.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nse/README.md +204 -0
- nse/__init__.py +6 -0
- nse/install_nse.py +412 -0
- nse/lib/printerxpl.lua +238 -0
- nse/scripts/cups-info.nse +74 -0
- nse/scripts/cups-queue-info.nse +43 -0
- nse/scripts/hp-printers-cve-2022-1026.nse +121 -0
- nse/scripts/http-device-mac.nse +107 -0
- nse/scripts/http-hp-ilo-info.nse +121 -0
- nse/scripts/http-info-xerox-enum.nse +101 -0
- nse/scripts/http-vuln-cve2022-1026.nse +158 -0
- nse/scripts/lexmark-config.nse +89 -0
- nse/scripts/pjl-ready-message.nse +106 -0
- nse/scripts/printer-banner.nse +217 -0
- nse/scripts/printer-cups-rce.nse +189 -0
- nse/scripts/printer-cve-detect.nse +279 -0
- nse/scripts/printer-discover.nse +205 -0
- nse/scripts/printer-firmware-exposed.nse +219 -0
- nse/scripts/printer-hp-pjl.nse +192 -0
- nse/scripts/printer-http-ews.nse +293 -0
- nse/scripts/printer-ipp-info.nse +235 -0
- nse/scripts/printer-lexmark-ipp.nse +203 -0
- nse/scripts/printer-passback.nse +204 -0
- nse/scripts/printer-pjl-info.nse +146 -0
- nse/scripts/printer-printnightmare.nse +211 -0
- nse/scripts/printer-snmp-info.nse +176 -0
- nse/scripts/printer-vuln-check.nse +256 -0
- nse/scripts/snmp-device-mac.nse +93 -0
- nse/scripts/snmp-info.nse +146 -0
- nse/scripts/snmp-sysdescr.nse +70 -0
- printerxpl_forge-6.2.0.dist-info/METADATA +919 -0
- printerxpl_forge-6.2.0.dist-info/RECORD +97 -0
- printerxpl_forge-6.2.0.dist-info/WHEEL +5 -0
- printerxpl_forge-6.2.0.dist-info/entry_points.txt +4 -0
- printerxpl_forge-6.2.0.dist-info/licenses/LICENSE +21 -0
- printerxpl_forge-6.2.0.dist-info/top_level.txt +4 -0
- src/assets/fonts/gunplay.pfa +1671 -0
- src/assets/fonts/kshandwrt.pfa +315 -0
- src/assets/fonts/laksoner.pfa +2402 -0
- src/assets/fonts/paintcans.pfa +9699 -0
- src/assets/fonts/stencilod.pfa +4076 -0
- src/assets/fonts/takecover.pfa +26138 -0
- src/assets/fonts/topsecret.pfa +6652 -0
- src/assets/fonts/whoa.pfa +773 -0
- src/assets/mibs/HOST-RESOURCES-MIB +1540 -0
- src/assets/mibs/Printer-MIB +4389 -0
- src/assets/mibs/README.md +9 -0
- src/assets/mibs/SNMPv2-MIB +854 -0
- src/assets/overlays/hacker.eps +596 -0
- src/assets/overlays/smiley.eps +214 -0
- src/assets/overlays/smiley2.eps +240 -0
- src/core/attack_orchestrator.py +1025 -0
- src/core/capabilities.py +323 -0
- src/core/destructive_audit.py +430 -0
- src/core/discovery.py +488 -0
- src/core/osdetect.py +74 -0
- src/core/poly_runner.py +579 -0
- src/core/printer.py +1426 -0
- src/main.py +2134 -0
- src/modules/install_printer.py +318 -0
- src/modules/login_bruteforce.py +852 -0
- src/modules/pcl.py +506 -0
- src/modules/pjl.py +3575 -0
- src/modules/print_job.py +1290 -0
- src/modules/ps.py +1102 -0
- src/payloads/__init__.py +98 -0
- src/payloads/assets/overlays/notice.eps +9 -0
- src/protocols/__init__.py +19 -0
- src/protocols/firmware.py +738 -0
- src/protocols/ipp.py +216 -0
- src/protocols/ipp_attacks.py +609 -0
- src/protocols/lpd.py +141 -0
- src/protocols/network_map.py +1004 -0
- src/protocols/raw.py +173 -0
- src/protocols/smb.py +359 -0
- src/protocols/ssrf_pivot.py +427 -0
- src/protocols/storage.py +587 -0
- src/ui/__init__.py +6 -0
- src/ui/interactive.py +742 -0
- src/ui/spinner.py +112 -0
- src/ui/tables.py +132 -0
- src/utils/banner_grabber.py +852 -0
- src/utils/codebook.py +456 -0
- src/utils/config.py +522 -0
- src/utils/cve_loader.py +158 -0
- src/utils/default_creds.py +134 -0
- src/utils/discovery_online.py +1327 -0
- src/utils/exploit_manager.py +805 -0
- src/utils/fuzzer.py +220 -0
- src/utils/helper.py +732 -0
- src/utils/local_printers.py +307 -0
- src/utils/ml_engine.py +491 -0
- src/utils/operators.py +474 -0
- src/utils/ports.py +234 -0
- src/utils/vuln_scanner.py +823 -0
- src/utils/wordlist_loader.py +412 -0
- src/version.py +36 -0
src/utils/ml_engine.py
ADDED
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
PrinterXPL-Forge — Lightweight ML Engine
|
|
5
|
+
========================================
|
|
6
|
+
Provides ML-assisted printer fingerprinting and attack prioritization
|
|
7
|
+
using scikit-learn (no GPU required, < 20 MB RAM, < 5 MB model files).
|
|
8
|
+
|
|
9
|
+
Design philosophy — WHY NOT deep learning:
|
|
10
|
+
- A BERT/GPT-class model requires 400 MB+ storage and 1–4 GB RAM
|
|
11
|
+
- For structured banner data, TF-IDF + Random Forest is equally accurate
|
|
12
|
+
(often 90–95% on this kind of classification task)
|
|
13
|
+
- scikit-learn models load in < 200 ms and classify in < 1 ms per sample
|
|
14
|
+
- This keeps PrinterXPL-Forge fast and portable (Raspberry Pi / old VMs)
|
|
15
|
+
|
|
16
|
+
What the ML engine does:
|
|
17
|
+
1. Banner fingerprinting → predict make/model from raw banner text
|
|
18
|
+
2. Protocol language classification → predict PJL/PS/PCL support
|
|
19
|
+
3. Attack surface scoring → rank attack vectors by success probability
|
|
20
|
+
4. Anomaly detection → flag unusual printer responses
|
|
21
|
+
|
|
22
|
+
Training data is built from the project's existing printer databases
|
|
23
|
+
(pjl.dat, ps.dat, pcl.dat) and augmented with synthetic banner patterns.
|
|
24
|
+
Models are trained once and cached in .ml_models/ (~2–5 MB total).
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# Author : Andre Henrique (@mrhenrike)
|
|
28
|
+
# GitHub : https://github.com/mrhenrike
|
|
29
|
+
# LinkedIn : https://linkedin.com/in/mrhenrike
|
|
30
|
+
# X/Twitter : https://x.com/mrhenrike
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import hashlib
|
|
35
|
+
import json
|
|
36
|
+
import logging
|
|
37
|
+
import os
|
|
38
|
+
import re
|
|
39
|
+
from pathlib import Path
|
|
40
|
+
from typing import Dict, List, Optional, Tuple
|
|
41
|
+
|
|
42
|
+
_log = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
# ── Lazy imports ──────────────────────────────────────────────────────────────
|
|
45
|
+
# scikit-learn is imported lazily so the tool still runs when not installed.
|
|
46
|
+
|
|
47
|
+
def _require_sklearn():
|
|
48
|
+
try:
|
|
49
|
+
import sklearn
|
|
50
|
+
return sklearn
|
|
51
|
+
except ImportError:
|
|
52
|
+
raise ImportError(
|
|
53
|
+
"scikit-learn is required for the ML engine. "
|
|
54
|
+
"Install with: pip install scikit-learn"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ── Configuration ─────────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
_HERE = Path(__file__).resolve().parent.parent.parent # project root
|
|
61
|
+
|
|
62
|
+
KNOWN_MAKES = [
|
|
63
|
+
'HP', 'Epson', 'Brother', 'Xerox', 'Ricoh', 'Kyocera', 'Canon',
|
|
64
|
+
'Lexmark', 'Samsung', 'Sharp', 'Dell', 'Konica', 'Toshiba', 'OKI',
|
|
65
|
+
'Oki', 'Panasonic', 'Fuji', 'Lanier', 'Gestetner', 'NRG', 'Savin',
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
LANG_KEYWORDS = {
|
|
69
|
+
'PJL': ['pjl', '@pjl', 'pjl ready', 'jetdirect', 'hp laserjet'],
|
|
70
|
+
'PostScript': ['postscript', 'ps', 'br-script', 'kpdl', 'ps level',
|
|
71
|
+
'application/postscript'],
|
|
72
|
+
'PCL': ['pcl', 'pcl 5', 'pcl 6', 'pcl xl', 'pcl5', 'pcl6'],
|
|
73
|
+
'ESC/P': ['escpr', 'escpl', 'esc/p', 'epson esc', 'escpr1',
|
|
74
|
+
'application/vnd.epson'],
|
|
75
|
+
'PWGRaster': ['pwg-raster', 'pwgraster', 'image/pwg-raster'],
|
|
76
|
+
'PDF': ['application/pdf', 'pdf'],
|
|
77
|
+
'ZPL': ['zpl', 'zebra'],
|
|
78
|
+
'IPL': ['ipl', 'intermec'],
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
ATTACK_FEATURES = {
|
|
82
|
+
'pjl_filesystem': ['pjl', '@pjl', 'fsdownload', 'fsupload', 'port 9100'],
|
|
83
|
+
'ps_execution': ['postscript', 'ps level', 'br-script'],
|
|
84
|
+
'ipp_anonymous': ['ipp', 'port 631', 'ipps', 'airprint'],
|
|
85
|
+
'lpd_open': ['lpd', 'port 515', 'line printer'],
|
|
86
|
+
'snmp_public': ['snmp', 'public', 'community'],
|
|
87
|
+
'web_default_creds': ['admin', 'http', 'https', 'web management'],
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ── Feature extraction ────────────────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
def extract_features(banner_text: str) -> Dict[str, float]:
|
|
94
|
+
"""
|
|
95
|
+
Extract a fixed-size feature vector from raw banner text.
|
|
96
|
+
|
|
97
|
+
Returns a dict of {feature_name: 0.0 or 1.0} suitable for scikit-learn.
|
|
98
|
+
Binary features avoid the need for TF-IDF vectorization at inference time,
|
|
99
|
+
making the model tiny (< 500 KB) and instant.
|
|
100
|
+
"""
|
|
101
|
+
text = banner_text.lower()
|
|
102
|
+
features: Dict[str, float] = {}
|
|
103
|
+
|
|
104
|
+
# Make/brand present?
|
|
105
|
+
for make in KNOWN_MAKES:
|
|
106
|
+
features[f'make_{make.lower()}'] = 1.0 if make.lower() in text else 0.0
|
|
107
|
+
|
|
108
|
+
# Protocol language indicators
|
|
109
|
+
for lang, keywords in LANG_KEYWORDS.items():
|
|
110
|
+
features[f'lang_{lang.replace("/","_")}'] = (
|
|
111
|
+
1.0 if any(kw in text for kw in keywords) else 0.0
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Attack surface features
|
|
115
|
+
for attack, keywords in ATTACK_FEATURES.items():
|
|
116
|
+
features[f'attack_{attack}'] = (
|
|
117
|
+
1.0 if any(kw in text for kw in keywords) else 0.0
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Port features (derived from banner patterns)
|
|
121
|
+
for port_kw, port_num in [('9100', 9100), ('631', 631), ('515', 515),
|
|
122
|
+
('445', 445), ('161', 161), ('80', 80)]:
|
|
123
|
+
features[f'port_{port_num}'] = 1.0 if port_kw in text else 0.0
|
|
124
|
+
|
|
125
|
+
# Structural features
|
|
126
|
+
features['has_pjl_uel'] = 1.0 if '\x1b%-12345x' in text else 0.0
|
|
127
|
+
features['has_ipp_binary'] = 1.0 if '\x01\x01' in banner_text else 0.0
|
|
128
|
+
features['has_http_header'] = 1.0 if 'http/' in text else 0.0
|
|
129
|
+
features['has_snmp_data'] = 1.0 if ('sysname' in text or 'sysdescr' in text) else 0.0
|
|
130
|
+
features['has_uuid'] = 1.0 if re.search(r'[0-9a-f]{8}-', text) else 0.0
|
|
131
|
+
features['len_bucket'] = min(len(banner_text) / 1000.0, 10.0)
|
|
132
|
+
|
|
133
|
+
return features
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def features_to_array(features: Dict[str, float]) -> 'np.ndarray':
|
|
137
|
+
"""Convert a features dict to a numpy array (sorted keys for reproducibility)."""
|
|
138
|
+
import numpy as np # type: ignore
|
|
139
|
+
return np.array([features[k] for k in sorted(features.keys())]).reshape(1, -1)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ── Synthetic training data ───────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
def _build_training_data() -> Tuple[List[str], List[str], List[str]]:
|
|
145
|
+
"""
|
|
146
|
+
Build synthetic banner strings for model training.
|
|
147
|
+
|
|
148
|
+
Returns (banners, make_labels, lang_labels).
|
|
149
|
+
|
|
150
|
+
In a production deployment, these would be collected from real printer scans.
|
|
151
|
+
For now, we use template-based generation from known printer models.
|
|
152
|
+
"""
|
|
153
|
+
banners, make_labels, lang_labels = [], [], []
|
|
154
|
+
|
|
155
|
+
templates = [
|
|
156
|
+
# HP / PJL
|
|
157
|
+
("HP LaserJet P3015 PJL ready @PJL INFO ID port 9100", "HP", "PJL"),
|
|
158
|
+
("HP Color LaserJet CP4525 PJL PostScript PCL", "HP", "PJL,PostScript,PCL"),
|
|
159
|
+
("HP LaserJet 4250 @PJL INFO STATUS CODE=10001 DISPLAY=Ready ONLINE=TRUE",
|
|
160
|
+
"HP", "PJL"),
|
|
161
|
+
("HP DesignJet T120 port 9100 PJL INFO ID HP DesignJet", "HP", "PJL"),
|
|
162
|
+
# EPSON / ESC
|
|
163
|
+
("EPSON L3250 Series ESC/P-R ESCPL2 PWGRaster application/vnd.epson.escpr",
|
|
164
|
+
"EPSON", "ESC/P,PWGRaster"),
|
|
165
|
+
("EPSON WorkForce WF-3820 IPP HTTPS port 631 PWGRaster", "EPSON", "PWGRaster"),
|
|
166
|
+
("EPSON ET-2760 EcoTank ESCPR1 airprint ipp port 631", "EPSON", "ESC/P"),
|
|
167
|
+
# Brother / PJL + PS
|
|
168
|
+
("Brother MFC-L8900CDW PostScript BR-Script PJL port 9100",
|
|
169
|
+
"Brother", "PJL,PostScript"),
|
|
170
|
+
("Brother HL-L8360CDW PCL 5 PCL 6 PostScript LPD port 515",
|
|
171
|
+
"Brother", "PCL,PostScript"),
|
|
172
|
+
# Xerox
|
|
173
|
+
("Xerox Phaser 6500DN PostScript PCL PJL port 9100 SNMP public",
|
|
174
|
+
"Xerox", "PJL,PostScript,PCL"),
|
|
175
|
+
("Xerox WorkCentre 7845 PCL XL PostScript IPP LPD", "Xerox", "PCL,PostScript"),
|
|
176
|
+
# Ricoh
|
|
177
|
+
("Ricoh Aficio MP C5503 PJL PostScript PCL IPP LPD SNMP",
|
|
178
|
+
"Ricoh", "PJL,PostScript,PCL"),
|
|
179
|
+
("Ricoh SP C430DN PCL 5c PostScript LPD port 515", "Ricoh", "PCL,PostScript"),
|
|
180
|
+
# Kyocera
|
|
181
|
+
("Kyocera FS-C5150DN PJL PCL 5c PCL 6 PostScript port 9100",
|
|
182
|
+
"Kyocera", "PJL,PCL,PostScript"),
|
|
183
|
+
# Generic
|
|
184
|
+
("Printer ready PJL INFO ID Model Unknown", "Unknown", "PJL"),
|
|
185
|
+
("IPP printer airprint port 631 HTTPS", "Unknown", ""),
|
|
186
|
+
("LPD line printer daemon port 515 default queue", "Unknown", ""),
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
for banner, make, langs in templates:
|
|
190
|
+
# Add some variation
|
|
191
|
+
for _ in range(3):
|
|
192
|
+
banners.append(banner)
|
|
193
|
+
make_labels.append(make)
|
|
194
|
+
lang_labels.append(langs)
|
|
195
|
+
# Add a noisy variant
|
|
196
|
+
noisy = banner + f" uptime={_pseudo_rand(banner)} firmware=v1.0"
|
|
197
|
+
banners.append(noisy)
|
|
198
|
+
make_labels.append(make)
|
|
199
|
+
lang_labels.append(langs)
|
|
200
|
+
|
|
201
|
+
return banners, make_labels, lang_labels
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _pseudo_rand(s: str) -> int:
|
|
205
|
+
"""Deterministic pseudo-random integer from a string."""
|
|
206
|
+
return int(hashlib.md5(s.encode()).hexdigest()[:4], 16)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# ── Model persistence ─────────────────────────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
def _model_path(name: str, model_dir: str) -> Path:
|
|
212
|
+
return Path(model_dir) / f"{name}.joblib"
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _save_model(model, name: str, model_dir: str) -> None:
|
|
216
|
+
import joblib
|
|
217
|
+
Path(model_dir).mkdir(parents=True, exist_ok=True)
|
|
218
|
+
joblib.dump(model, _model_path(name, model_dir))
|
|
219
|
+
_log.debug("Saved model %s to %s", name, model_dir)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _load_model(name: str, model_dir: str):
|
|
223
|
+
import joblib
|
|
224
|
+
p = _model_path(name, model_dir)
|
|
225
|
+
if p.exists():
|
|
226
|
+
return joblib.load(p)
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# ── Model training ────────────────────────────────────────────────────────────
|
|
231
|
+
|
|
232
|
+
def train(model_dir: str = '.ml_models', force: bool = False) -> dict:
|
|
233
|
+
"""
|
|
234
|
+
Train make-classifier and lang-classifier on synthetic data.
|
|
235
|
+
|
|
236
|
+
Models are saved to *model_dir* and re-used on subsequent calls.
|
|
237
|
+
Training takes < 2 seconds and produces < 2 MB of model files.
|
|
238
|
+
|
|
239
|
+
Returns dict with model names and training accuracy.
|
|
240
|
+
"""
|
|
241
|
+
_require_sklearn()
|
|
242
|
+
from sklearn.ensemble import RandomForestClassifier # type: ignore
|
|
243
|
+
from sklearn.preprocessing import LabelEncoder # type: ignore
|
|
244
|
+
import numpy as np # type: ignore
|
|
245
|
+
|
|
246
|
+
results = {}
|
|
247
|
+
|
|
248
|
+
# Check if already trained
|
|
249
|
+
if not force:
|
|
250
|
+
if (_model_path('make_clf', model_dir).exists() and
|
|
251
|
+
_model_path('lang_clf', model_dir).exists()):
|
|
252
|
+
_log.info("ML models already trained — use force=True to retrain")
|
|
253
|
+
return {'status': 'cached', 'model_dir': model_dir}
|
|
254
|
+
|
|
255
|
+
banners, make_labels, lang_labels = _build_training_data()
|
|
256
|
+
|
|
257
|
+
# Feature extraction
|
|
258
|
+
X = np.array([
|
|
259
|
+
list(features_to_array(extract_features(b)).flatten())
|
|
260
|
+
for b in banners
|
|
261
|
+
])
|
|
262
|
+
|
|
263
|
+
# ── Make classifier ───────────────────────────────────────────────────────
|
|
264
|
+
le_make = LabelEncoder()
|
|
265
|
+
y_make = le_make.fit_transform(make_labels)
|
|
266
|
+
clf_make = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1)
|
|
267
|
+
clf_make.fit(X, y_make)
|
|
268
|
+
|
|
269
|
+
_save_model(clf_make, 'make_clf', model_dir)
|
|
270
|
+
_save_model(le_make, 'make_le', model_dir)
|
|
271
|
+
results['make_clf'] = {'classes': list(le_make.classes_)}
|
|
272
|
+
|
|
273
|
+
# ── Language classifier (multi-label via binary relevance) ───────────────
|
|
274
|
+
all_langs = sorted({l for labs in lang_labels for l in labs.split(',') if l})
|
|
275
|
+
lang_models = {}
|
|
276
|
+
for lang in all_langs:
|
|
277
|
+
y_lang = [1 if lang in labs.split(',') else 0 for labs in lang_labels]
|
|
278
|
+
if sum(y_lang) < 2:
|
|
279
|
+
continue
|
|
280
|
+
clf_lang = RandomForestClassifier(n_estimators=20, random_state=42)
|
|
281
|
+
clf_lang.fit(X, y_lang)
|
|
282
|
+
_save_model(clf_lang, f'lang_{lang.replace("/","_")}', model_dir)
|
|
283
|
+
lang_models[lang] = True
|
|
284
|
+
|
|
285
|
+
# Save lang list
|
|
286
|
+
with open(Path(model_dir) / 'lang_list.json', 'w') as fh:
|
|
287
|
+
json.dump(all_langs, fh)
|
|
288
|
+
|
|
289
|
+
results['lang_clf'] = {'languages': all_langs}
|
|
290
|
+
results['status'] = 'trained'
|
|
291
|
+
results['model_dir'] = model_dir
|
|
292
|
+
_log.info("ML models trained and saved to %s", model_dir)
|
|
293
|
+
return results
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# ── Inference ─────────────────────────────────────────────────────────────────
|
|
297
|
+
|
|
298
|
+
class MLEngine:
|
|
299
|
+
"""
|
|
300
|
+
ML-assisted printer analysis engine.
|
|
301
|
+
|
|
302
|
+
Wraps trained classifiers for inference. Call train() at least once
|
|
303
|
+
before creating MLEngine instances, or set auto_train=True.
|
|
304
|
+
"""
|
|
305
|
+
|
|
306
|
+
def __init__(self, model_dir: str = '.ml_models', auto_train: bool = True):
|
|
307
|
+
self.model_dir = model_dir
|
|
308
|
+
self._make_clf = None
|
|
309
|
+
self._make_le = None
|
|
310
|
+
self._lang_clfs: Dict[str, object] = {}
|
|
311
|
+
self._lang_list: List[str] = []
|
|
312
|
+
self._ready = False
|
|
313
|
+
|
|
314
|
+
if auto_train:
|
|
315
|
+
self._load_or_train()
|
|
316
|
+
|
|
317
|
+
def _load_or_train(self) -> None:
|
|
318
|
+
"""Load cached models or train if not present."""
|
|
319
|
+
if not _model_path('make_clf', self.model_dir).exists():
|
|
320
|
+
_log.info("ML models not found — training now (one-time, ~2s) ...")
|
|
321
|
+
train(self.model_dir)
|
|
322
|
+
|
|
323
|
+
self._make_clf = _load_model('make_clf', self.model_dir)
|
|
324
|
+
self._make_le = _load_model('make_le', self.model_dir)
|
|
325
|
+
|
|
326
|
+
lang_list_path = Path(self.model_dir) / 'lang_list.json'
|
|
327
|
+
if lang_list_path.exists():
|
|
328
|
+
with open(lang_list_path) as fh:
|
|
329
|
+
self._lang_list = json.load(fh)
|
|
330
|
+
|
|
331
|
+
for lang in self._lang_list:
|
|
332
|
+
clf = _load_model(f'lang_{lang.replace("/","_")}', self.model_dir)
|
|
333
|
+
if clf:
|
|
334
|
+
self._lang_clfs[lang] = clf
|
|
335
|
+
|
|
336
|
+
self._ready = (self._make_clf is not None)
|
|
337
|
+
|
|
338
|
+
def predict_make(self, banner_text: str, min_confidence: float = 0.40) -> Tuple[str, float]:
|
|
339
|
+
"""
|
|
340
|
+
Predict the printer manufacturer from banner text.
|
|
341
|
+
|
|
342
|
+
Returns (make, confidence) where confidence is 0.0–1.0.
|
|
343
|
+
Returns ('Unknown', 0.0) if confidence is below *min_confidence*.
|
|
344
|
+
"""
|
|
345
|
+
if not self._ready:
|
|
346
|
+
return 'Unknown', 0.0
|
|
347
|
+
|
|
348
|
+
feats = features_to_array(extract_features(banner_text))
|
|
349
|
+
probs = self._make_clf.predict_proba(feats)[0] # type: ignore
|
|
350
|
+
idx = probs.argmax()
|
|
351
|
+
conf = probs[idx]
|
|
352
|
+
if conf < min_confidence:
|
|
353
|
+
return 'Unknown', float(conf)
|
|
354
|
+
make = self._make_le.inverse_transform([idx])[0] # type: ignore
|
|
355
|
+
return make, float(conf)
|
|
356
|
+
|
|
357
|
+
def predict_langs(
|
|
358
|
+
self,
|
|
359
|
+
banner_text: str,
|
|
360
|
+
min_confidence: float = 0.50,
|
|
361
|
+
) -> Dict[str, float]:
|
|
362
|
+
"""
|
|
363
|
+
Predict supported printer languages from banner text.
|
|
364
|
+
|
|
365
|
+
Returns {lang: confidence} for each predicted language.
|
|
366
|
+
"""
|
|
367
|
+
if not self._ready or not self._lang_clfs:
|
|
368
|
+
return {}
|
|
369
|
+
|
|
370
|
+
feats = features_to_array(extract_features(banner_text))
|
|
371
|
+
result = {}
|
|
372
|
+
for lang, clf in self._lang_clfs.items():
|
|
373
|
+
probs = clf.predict_proba(feats)[0] # type: ignore
|
|
374
|
+
conf = probs[1] if len(probs) > 1 else probs[0]
|
|
375
|
+
if conf >= min_confidence:
|
|
376
|
+
result[lang] = float(conf)
|
|
377
|
+
return result
|
|
378
|
+
|
|
379
|
+
def score_attack_vectors(
|
|
380
|
+
self,
|
|
381
|
+
banner_text: str,
|
|
382
|
+
open_ports: List[int] = None,
|
|
383
|
+
) -> Dict[str, float]:
|
|
384
|
+
"""
|
|
385
|
+
Score attack vectors by predicted success probability.
|
|
386
|
+
|
|
387
|
+
Returns {attack_vector: score 0.0–1.0}.
|
|
388
|
+
This uses rule-based heuristics calibrated by the ML features.
|
|
389
|
+
"""
|
|
390
|
+
feats = extract_features(banner_text)
|
|
391
|
+
scores: Dict[str, float] = {}
|
|
392
|
+
ports = set(open_ports or [])
|
|
393
|
+
|
|
394
|
+
# PJL filesystem attacks
|
|
395
|
+
pjl_score = (feats.get('lang_PJL', 0) * 0.5 +
|
|
396
|
+
feats.get('port_9100', 0) * 0.3 +
|
|
397
|
+
feats.get('attack_pjl_filesystem', 0) * 0.2)
|
|
398
|
+
if pjl_score > 0:
|
|
399
|
+
scores['pjl_filesystem_access'] = round(pjl_score, 2)
|
|
400
|
+
|
|
401
|
+
# PostScript execution
|
|
402
|
+
ps_score = feats.get('lang_PostScript', 0) * 0.7
|
|
403
|
+
if ps_score > 0:
|
|
404
|
+
scores['ps_code_execution'] = round(ps_score, 2)
|
|
405
|
+
|
|
406
|
+
# IPP anonymous job
|
|
407
|
+
ipp_score = feats.get('attack_ipp_anonymous', 0) * 0.6
|
|
408
|
+
if 631 in ports:
|
|
409
|
+
ipp_score += 0.3
|
|
410
|
+
if ipp_score > 0:
|
|
411
|
+
scores['ipp_anonymous_print'] = round(min(ipp_score, 1.0), 2)
|
|
412
|
+
|
|
413
|
+
# LPD open
|
|
414
|
+
if feats.get('attack_lpd_open', 0) > 0 or 515 in ports:
|
|
415
|
+
scores['lpd_print_job'] = round(0.6 + feats.get('attack_lpd_open', 0) * 0.3, 2)
|
|
416
|
+
|
|
417
|
+
# SNMP enumeration
|
|
418
|
+
if feats.get('attack_snmp_public', 0) > 0 or 161 in ports:
|
|
419
|
+
scores['snmp_enumeration'] = round(0.8, 2)
|
|
420
|
+
|
|
421
|
+
# Web credential brute force
|
|
422
|
+
web_score = feats.get('attack_web_default_creds', 0) * 0.5
|
|
423
|
+
if 80 in ports or 443 in ports:
|
|
424
|
+
web_score += 0.3
|
|
425
|
+
if web_score > 0:
|
|
426
|
+
scores['web_default_credentials'] = round(min(web_score, 1.0), 2)
|
|
427
|
+
|
|
428
|
+
# Sort by score descending
|
|
429
|
+
return dict(sorted(scores.items(), key=lambda x: x[1], reverse=True))
|
|
430
|
+
|
|
431
|
+
def analyze(
|
|
432
|
+
self,
|
|
433
|
+
banner_text: str,
|
|
434
|
+
open_ports: List[int] = None,
|
|
435
|
+
min_confidence: float = 0.40,
|
|
436
|
+
) -> dict:
|
|
437
|
+
"""
|
|
438
|
+
Full ML analysis: make prediction + language prediction + attack scoring.
|
|
439
|
+
|
|
440
|
+
Returns a summary dict.
|
|
441
|
+
"""
|
|
442
|
+
make, make_conf = self.predict_make(banner_text, min_confidence)
|
|
443
|
+
langs = self.predict_langs(banner_text, min_confidence)
|
|
444
|
+
attacks = self.score_attack_vectors(banner_text, open_ports)
|
|
445
|
+
|
|
446
|
+
return {
|
|
447
|
+
'predicted_make': make,
|
|
448
|
+
'make_confidence': round(make_conf, 2),
|
|
449
|
+
'predicted_langs': langs,
|
|
450
|
+
'attack_scores': attacks,
|
|
451
|
+
'top_attack': next(iter(attacks), None) if attacks else None,
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
# ── Convenience function ──────────────────────────────────────────────────────
|
|
456
|
+
|
|
457
|
+
def quick_analyze(
|
|
458
|
+
banner_text: str,
|
|
459
|
+
open_ports: Optional[List[int]] = None,
|
|
460
|
+
model_dir: str = '.ml_models',
|
|
461
|
+
verbose: bool = False,
|
|
462
|
+
) -> dict:
|
|
463
|
+
"""
|
|
464
|
+
One-shot ML analysis without manually creating an MLEngine.
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
banner_text: Concatenated raw banner strings from all protocols.
|
|
468
|
+
open_ports: List of open TCP port numbers.
|
|
469
|
+
model_dir: Directory for cached ML model files.
|
|
470
|
+
verbose: Print results to stdout.
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
dict with predicted make, languages, and ranked attack vectors.
|
|
474
|
+
"""
|
|
475
|
+
engine = MLEngine(model_dir=model_dir, auto_train=True)
|
|
476
|
+
result = engine.analyze(banner_text, open_ports)
|
|
477
|
+
|
|
478
|
+
if verbose:
|
|
479
|
+
print(f"\n [ML] Predicted make : {result['predicted_make']} "
|
|
480
|
+
f"(confidence={result['make_confidence']:.0%})")
|
|
481
|
+
if result['predicted_langs']:
|
|
482
|
+
print(f" [ML] Predicted langs : "
|
|
483
|
+
+ ', '.join(f"{l}({c:.0%})"
|
|
484
|
+
for l, c in result['predicted_langs'].items()))
|
|
485
|
+
if result['attack_scores']:
|
|
486
|
+
print(f" [ML] Attack priorities:")
|
|
487
|
+
for vec, score in list(result['attack_scores'].items())[:5]:
|
|
488
|
+
bar = '█' * int(score * 10)
|
|
489
|
+
print(f" {vec:<35} {bar} {score:.0%}")
|
|
490
|
+
|
|
491
|
+
return result
|