sentrix 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sentrix/__init__.py +131 -0
- sentrix/cli.py +396 -0
- sentrix/compliance/__init__.py +4 -0
- sentrix/compliance/reporter.py +344 -0
- sentrix/db.py +251 -0
- sentrix/eval/__init__.py +15 -0
- sentrix/eval/compare.py +230 -0
- sentrix/eval/dataset.py +143 -0
- sentrix/eval/experiment.py +288 -0
- sentrix/eval/scorers.py +199 -0
- sentrix/git_tracker.py +212 -0
- sentrix/guard/__init__.py +14 -0
- sentrix/guard/agent.py +239 -0
- sentrix/guard/attacks.py +188 -0
- sentrix/guard/auto_dataset.py +113 -0
- sentrix/guard/fingerprint.py +213 -0
- sentrix/guard/rag_scanner.py +217 -0
- sentrix/guard/red_team.py +278 -0
- sentrix/interceptor.py +119 -0
- sentrix/monitor/__init__.py +5 -0
- sentrix/monitor/daemon.py +132 -0
- sentrix/monitor/drift.py +249 -0
- sentrix/monitor/tracer.py +178 -0
- sentrix/plugins/__init__.py +4 -0
- sentrix/plugins/registry.py +138 -0
- sentrix/pricing.py +103 -0
- sentrix/providers.py +146 -0
- sentrix/review/__init__.py +4 -0
- sentrix/review/annotations.py +193 -0
- sentrix/server/__init__.py +1 -0
- sentrix/server/app.py +306 -0
- sentrix-0.1.0.dist-info/METADATA +336 -0
- sentrix-0.1.0.dist-info/RECORD +35 -0
- sentrix-0.1.0.dist-info/WHEEL +4 -0
- sentrix-0.1.0.dist-info/entry_points.txt +2 -0
sentrix/__init__.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
sentrix — Red-team, eval, and monitor your LLMs. Security-first, Python-native.
|
|
3
|
+
|
|
4
|
+
Quick start:
|
|
5
|
+
import sentrix
|
|
6
|
+
sentrix.init()
|
|
7
|
+
|
|
8
|
+
# Red team your chatbot
|
|
9
|
+
report = sentrix.red_team(my_chatbot, plugins=["jailbreak", "pii"])
|
|
10
|
+
report.summary()
|
|
11
|
+
|
|
12
|
+
# Attack heatmap across models
|
|
13
|
+
fp = sentrix.guard.fingerprint({"gpt-4o-mini": fn1, "claude-haiku": fn2})
|
|
14
|
+
fp.heatmap()
|
|
15
|
+
|
|
16
|
+
# Auto-generate test cases
|
|
17
|
+
ds = sentrix.auto_dataset(my_chatbot, n=50, focus="adversarial")
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
__version__ = "0.1.0"
|
|
22
|
+
__author__ = "sentrix"
|
|
23
|
+
|
|
24
|
+
# Guard (primary — security)
|
|
25
|
+
from sentrix.guard.red_team import red_team, RedTeamReport
|
|
26
|
+
from sentrix.guard.fingerprint import fingerprint, ModelFingerprint
|
|
27
|
+
from sentrix.guard.auto_dataset import auto_dataset
|
|
28
|
+
|
|
29
|
+
# Eval
|
|
30
|
+
from sentrix.eval.dataset import Dataset, DatasetItem
|
|
31
|
+
from sentrix.eval.experiment import Experiment, ExperimentResults
|
|
32
|
+
from sentrix.eval import scorers
|
|
33
|
+
from sentrix.eval.compare import compare_models, prompt_ab_test
|
|
34
|
+
|
|
35
|
+
# Monitor
|
|
36
|
+
from sentrix.monitor.tracer import trace, span
|
|
37
|
+
from sentrix.monitor.drift import DriftDetector, DriftReport
|
|
38
|
+
|
|
39
|
+
# Sub-packages
|
|
40
|
+
from sentrix import guard, eval, monitor
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
# Core
|
|
44
|
+
"init",
|
|
45
|
+
"dataset",
|
|
46
|
+
"experiment",
|
|
47
|
+
# Guard
|
|
48
|
+
"red_team",
|
|
49
|
+
"RedTeamReport",
|
|
50
|
+
"fingerprint",
|
|
51
|
+
"ModelFingerprint",
|
|
52
|
+
"auto_dataset",
|
|
53
|
+
"guard",
|
|
54
|
+
# Eval
|
|
55
|
+
"Dataset",
|
|
56
|
+
"DatasetItem",
|
|
57
|
+
"Experiment",
|
|
58
|
+
"ExperimentResults",
|
|
59
|
+
"scorers",
|
|
60
|
+
"compare_models",
|
|
61
|
+
"prompt_ab_test",
|
|
62
|
+
"eval",
|
|
63
|
+
# Monitor
|
|
64
|
+
"trace",
|
|
65
|
+
"span",
|
|
66
|
+
"DriftDetector",
|
|
67
|
+
"DriftReport",
|
|
68
|
+
"monitor",
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def init(
|
|
73
|
+
persist: bool = True,
|
|
74
|
+
db_path: str | None = None,
|
|
75
|
+
offline: bool = False,
|
|
76
|
+
local_judge_model: str = "llama3",
|
|
77
|
+
judge_model: str = "gpt-4o-mini",
|
|
78
|
+
) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Initialize sentrix — enable persistence and activate SDK interceptors.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
persist: write results to SQLite (default True)
|
|
84
|
+
db_path: custom path for sentrix.db (default: ~/.sentrix/data.db)
|
|
85
|
+
offline: use local Ollama model for judging (no external API calls)
|
|
86
|
+
local_judge_model: Ollama model to use when offline=True
|
|
87
|
+
judge_model: default judge model when offline=False
|
|
88
|
+
|
|
89
|
+
Example:
|
|
90
|
+
sentrix.init() # Standard
|
|
91
|
+
sentrix.init(offline=True) # Fully offline with Ollama
|
|
92
|
+
sentrix.init(db_path="/tmp/sentrix.db") # Custom DB path
|
|
93
|
+
"""
|
|
94
|
+
from sentrix import providers, db
|
|
95
|
+
|
|
96
|
+
# Configure providers
|
|
97
|
+
providers.configure(
|
|
98
|
+
offline=offline,
|
|
99
|
+
local_judge_model=local_judge_model,
|
|
100
|
+
judge_model=judge_model,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Configure DB path
|
|
104
|
+
if db_path:
|
|
105
|
+
db.set_db_path(db_path)
|
|
106
|
+
|
|
107
|
+
# Initialize DB schema
|
|
108
|
+
if persist:
|
|
109
|
+
db.init_db(db_path)
|
|
110
|
+
|
|
111
|
+
# Activate SDK interceptors
|
|
112
|
+
try:
|
|
113
|
+
from sentrix import interceptor
|
|
114
|
+
interceptor.activate()
|
|
115
|
+
except Exception:
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def dataset(name: str, description: str = "") -> Dataset:
|
|
120
|
+
"""Create or load a named dataset."""
|
|
121
|
+
return Dataset(name, description)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def experiment(
|
|
125
|
+
name: str,
|
|
126
|
+
dataset: Dataset | str,
|
|
127
|
+
fn,
|
|
128
|
+
scorers: list | None = None,
|
|
129
|
+
) -> Experiment:
|
|
130
|
+
"""Create an experiment."""
|
|
131
|
+
return Experiment(name=name, dataset=dataset, fn=fn, scorers=scorers or [])
|
sentrix/cli.py
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
"""sentrix CLI — command-line interface for red teaming, eval, monitoring, and more."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import importlib
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
from typing import Callable
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _load_fn(module_path: str) -> Callable:
|
|
12
|
+
"""Load a function from 'module:function' syntax."""
|
|
13
|
+
if ":" not in module_path:
|
|
14
|
+
print(f"[sentrix] Error: expected 'module:function' format, got '{module_path}'")
|
|
15
|
+
sys.exit(1)
|
|
16
|
+
module_name, fn_name = module_path.rsplit(":", 1)
|
|
17
|
+
try:
|
|
18
|
+
mod = importlib.import_module(module_name)
|
|
19
|
+
return getattr(mod, fn_name)
|
|
20
|
+
except (ImportError, AttributeError) as e:
|
|
21
|
+
print(f"[sentrix] Error loading {module_path}: {e}")
|
|
22
|
+
sys.exit(1)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def cmd_scan(args) -> None:
|
|
26
|
+
"""sentrix scan module:fn [--plugins ...] [--n 10] [--git-compare main] [--fail-on-regression]"""
|
|
27
|
+
from sentrix import init, red_team
|
|
28
|
+
init(persist=True)
|
|
29
|
+
|
|
30
|
+
fn = _load_fn(args.target)
|
|
31
|
+
plugins = [p.strip() for p in args.plugins.split(",")] if args.plugins else ["jailbreak", "pii", "harmful"]
|
|
32
|
+
|
|
33
|
+
report = red_team(
|
|
34
|
+
fn,
|
|
35
|
+
plugins=plugins,
|
|
36
|
+
n_attacks=args.n,
|
|
37
|
+
git_compare=args.git_compare,
|
|
38
|
+
fail_on_regression=args.fail_on_regression,
|
|
39
|
+
)
|
|
40
|
+
report.summary()
|
|
41
|
+
|
|
42
|
+
if args.output:
|
|
43
|
+
with open(args.output, "w") as f:
|
|
44
|
+
json.dump(report.to_json(), f, indent=2)
|
|
45
|
+
print(f"[sentrix] Report saved to {args.output}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def cmd_fingerprint(args) -> None:
|
|
49
|
+
"""sentrix fingerprint module:fn1 module:fn2 [--plugins all] [--n 10]"""
|
|
50
|
+
from sentrix import init
|
|
51
|
+
from sentrix.guard.fingerprint import fingerprint
|
|
52
|
+
init(persist=True)
|
|
53
|
+
|
|
54
|
+
targets = {}
|
|
55
|
+
for t in args.targets:
|
|
56
|
+
fn = _load_fn(t)
|
|
57
|
+
targets[t] = fn
|
|
58
|
+
|
|
59
|
+
plugins = [p.strip() for p in args.plugins.split(",")] if args.plugins else ["jailbreak", "pii", "harmful", "hallucination", "injection"]
|
|
60
|
+
if args.plugins == "all":
|
|
61
|
+
from sentrix.guard.attacks import PLUGIN_REGISTRY
|
|
62
|
+
plugins = list(PLUGIN_REGISTRY.keys())
|
|
63
|
+
|
|
64
|
+
fp = fingerprint(targets, plugins=plugins, n_attacks=args.n)
|
|
65
|
+
fp.heatmap()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def cmd_auto_dataset(args) -> None:
|
|
69
|
+
"""sentrix auto-dataset module:fn [--n 20] [--focus adversarial] [--name myds]"""
|
|
70
|
+
from sentrix import init, auto_dataset
|
|
71
|
+
init(persist=True)
|
|
72
|
+
|
|
73
|
+
fn = _load_fn(args.target)
|
|
74
|
+
ds = auto_dataset(fn, n=args.n, focus=args.focus, name=args.name)
|
|
75
|
+
print(f"[sentrix] Generated dataset '{ds.name}' with {len(ds)} items")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def cmd_scan_agent(args) -> None:
|
|
79
|
+
from sentrix import init
|
|
80
|
+
from sentrix.guard.agent import scan_agent
|
|
81
|
+
init(persist=True)
|
|
82
|
+
|
|
83
|
+
fn = _load_fn(args.target)
|
|
84
|
+
report = scan_agent(fn, mcp_endpoint=args.mcp)
|
|
85
|
+
report.summary()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def cmd_scan_rag(args) -> None:
|
|
89
|
+
from sentrix import init
|
|
90
|
+
from sentrix.guard.rag_scanner import scan_rag
|
|
91
|
+
import os
|
|
92
|
+
init(persist=True)
|
|
93
|
+
|
|
94
|
+
# Load documents
|
|
95
|
+
docs = []
|
|
96
|
+
if os.path.isdir(args.docs):
|
|
97
|
+
for f in os.listdir(args.docs):
|
|
98
|
+
fp = os.path.join(args.docs, f)
|
|
99
|
+
if os.path.isfile(fp):
|
|
100
|
+
with open(fp, errors="replace") as fh:
|
|
101
|
+
docs.append({"id": f, "content": fh.read()})
|
|
102
|
+
else:
|
|
103
|
+
with open(args.docs, errors="replace") as fh:
|
|
104
|
+
docs = [{"id": args.docs, "content": fh.read()}]
|
|
105
|
+
|
|
106
|
+
system_prompt = None
|
|
107
|
+
if args.system_prompt:
|
|
108
|
+
with open(args.system_prompt) as fh:
|
|
109
|
+
system_prompt = fh.read()
|
|
110
|
+
|
|
111
|
+
report = scan_rag(docs, system_prompt=system_prompt, baseline_hash=args.baseline_hash)
|
|
112
|
+
report.summary()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def cmd_eval_run(args) -> None:
|
|
116
|
+
from sentrix import init
|
|
117
|
+
init(persist=True)
|
|
118
|
+
|
|
119
|
+
# Load experiment file
|
|
120
|
+
import runpy
|
|
121
|
+
ns = runpy.run_path(args.file)
|
|
122
|
+
exp = ns.get("experiment") or ns.get("exp")
|
|
123
|
+
if exp is None:
|
|
124
|
+
print("[sentrix] Error: file must define 'experiment' or 'exp' variable")
|
|
125
|
+
sys.exit(1)
|
|
126
|
+
|
|
127
|
+
results = exp.run()
|
|
128
|
+
results.summary()
|
|
129
|
+
|
|
130
|
+
if args.fail_below and results.pass_rate < args.fail_below:
|
|
131
|
+
print(f"[sentrix] FAIL: pass rate {results.pass_rate:.1%} < {args.fail_below:.1%}")
|
|
132
|
+
sys.exit(1)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def cmd_monitor_watch(args) -> None:
|
|
136
|
+
from sentrix import init
|
|
137
|
+
from sentrix.monitor.daemon import watch
|
|
138
|
+
init(persist=True)
|
|
139
|
+
|
|
140
|
+
fn = _load_fn(args.target)
|
|
141
|
+
plugins = [p.strip() for p in args.plugins.split(",")] if args.plugins else ["jailbreak", "pii"]
|
|
142
|
+
watch(fn, interval_seconds=args.interval, plugins=plugins, n_attacks=args.n, alert_webhook=args.webhook)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def cmd_monitor_drift(args) -> None:
|
|
146
|
+
from sentrix import init
|
|
147
|
+
from sentrix.monitor.drift import DriftDetector
|
|
148
|
+
init(persist=True)
|
|
149
|
+
|
|
150
|
+
detector = DriftDetector(on_drift="warn" if not args.fail else "raise")
|
|
151
|
+
detector.baseline(args.baseline)
|
|
152
|
+
report = detector.check(window_hours=args.window)
|
|
153
|
+
report.summary()
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def cmd_monitor_traces(args) -> None:
|
|
157
|
+
from sentrix.db import _q
|
|
158
|
+
rows = _q("SELECT id, name, start_time, end_time, user_id, error FROM traces ORDER BY start_time DESC LIMIT ?", (args.limit,))
|
|
159
|
+
if not rows:
|
|
160
|
+
print("[sentrix] No traces found.")
|
|
161
|
+
return
|
|
162
|
+
print(f"\n{'ID':^38} {'Name':<25} {'Start':^20} {'Error'}")
|
|
163
|
+
print("-" * 90)
|
|
164
|
+
import datetime
|
|
165
|
+
for r in rows:
|
|
166
|
+
ts = datetime.datetime.fromtimestamp(r["start_time"]).strftime("%Y-%m-%d %H:%M:%S") if r["start_time"] else "-"
|
|
167
|
+
print(f"{r['id']:<38} {(r['name'] or '-'):<25} {ts:<20} {r['error'] or '-'}")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def cmd_review_list(args) -> None:
|
|
171
|
+
from sentrix.review.annotations import ReviewQueue
|
|
172
|
+
q = ReviewQueue()
|
|
173
|
+
if args.pending:
|
|
174
|
+
items = q.pending()
|
|
175
|
+
print(f"\n[sentrix] {len(items)} pending review items:")
|
|
176
|
+
for item in items[:20]:
|
|
177
|
+
print(f" [{item['plugin']}] {item['attack_input'][:80]}...")
|
|
178
|
+
else:
|
|
179
|
+
anns = q.list_annotations(limit=args.limit)
|
|
180
|
+
print(f"\n[sentrix] {len(anns)} annotations:")
|
|
181
|
+
for a in anns:
|
|
182
|
+
print(f" [{a.label}] {a.result_id[:40]} — {a.comment or ''}")
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def cmd_review_annotate(args) -> None:
|
|
186
|
+
from sentrix.review.annotations import annotate
|
|
187
|
+
ann = annotate(args.result_id, args.label, reviewer=args.reviewer, comment=args.comment)
|
|
188
|
+
print(f"[sentrix] Annotated {ann.result_id} as '{ann.label}'")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def cmd_compliance(args) -> None:
|
|
192
|
+
from sentrix import init
|
|
193
|
+
from sentrix.compliance import generate_report
|
|
194
|
+
init(persist=True)
|
|
195
|
+
|
|
196
|
+
for framework in args.framework:
|
|
197
|
+
report = generate_report(framework, output=args.output)
|
|
198
|
+
report.summary()
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def cmd_plugin_list(args) -> None:
|
|
202
|
+
from sentrix.plugins import list_available, list_installed
|
|
203
|
+
print("\n[sentrix] Available plugins:")
|
|
204
|
+
for p in list_available():
|
|
205
|
+
print(f" {p['name']:<30} {p['description']}")
|
|
206
|
+
print("\n[sentrix] Installed plugins:")
|
|
207
|
+
for p in list_installed():
|
|
208
|
+
print(f" {p['name']:<30} v{p.get('version', '?')}")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def cmd_plugin_install(args) -> None:
|
|
212
|
+
from sentrix.plugins import install
|
|
213
|
+
install(args.name)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def cmd_serve(args) -> None:
|
|
217
|
+
from sentrix.server.app import run
|
|
218
|
+
run(port=args.port, db_path=args.db, no_open=args.no_open)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def cmd_history(args) -> None:
|
|
222
|
+
from sentrix.db import _q, init_db
|
|
223
|
+
init_db()
|
|
224
|
+
rows = _q("SELECT target_fn, model, vulnerability_rate, total_cost_usd, created_at FROM red_team_reports ORDER BY created_at DESC LIMIT ?", (args.limit,))
|
|
225
|
+
if not rows:
|
|
226
|
+
print("[sentrix] No scan history found. Run sentrix scan first.")
|
|
227
|
+
return
|
|
228
|
+
import datetime
|
|
229
|
+
print(f"\n{'Target':<30} {'Model':<15} {'Vuln%':>8} {'Cost':>10} {'Date'}")
|
|
230
|
+
print("-" * 75)
|
|
231
|
+
for r in rows:
|
|
232
|
+
ts = datetime.datetime.fromtimestamp(r["created_at"]).strftime("%Y-%m-%d") if r["created_at"] else "-"
|
|
233
|
+
rate = f"{r['vulnerability_rate']:.1%}" if r["vulnerability_rate"] is not None else "-"
|
|
234
|
+
cost = f"${r['total_cost_usd']:.4f}" if r["total_cost_usd"] else "-"
|
|
235
|
+
print(f"{(r['target_fn'] or '-'):<30} {(r['model'] or '-'):<15} {rate:>8} {cost:>10} {ts}")
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def cmd_costs(args) -> None:
|
|
239
|
+
from sentrix.db import _q, init_db
|
|
240
|
+
import time
|
|
241
|
+
init_db()
|
|
242
|
+
cutoff = time.time() - args.days * 86400
|
|
243
|
+
rows = _q("SELECT model, SUM(cost_usd) as total, COUNT(*) as calls FROM llm_calls WHERE timestamp > ? GROUP BY model ORDER BY total DESC", (cutoff,))
|
|
244
|
+
if not rows:
|
|
245
|
+
print(f"[sentrix] No LLM calls in the last {args.days} days.")
|
|
246
|
+
return
|
|
247
|
+
print(f"\nLLM Costs — last {args.days} days:")
|
|
248
|
+
print(f" {'Model':<30} {'Calls':>8} {'Total Cost':>12}")
|
|
249
|
+
print(" " + "-" * 54)
|
|
250
|
+
total = 0.0
|
|
251
|
+
for r in rows:
|
|
252
|
+
print(f" {r['model']:<30} {r['calls']:>8} ${r['total']:>11.4f}")
|
|
253
|
+
total += r["total"]
|
|
254
|
+
print(f"\n {'TOTAL':<30} {'':>8} ${total:>11.4f}")
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def cmd_version(args) -> None:
|
|
258
|
+
from sentrix import __version__
|
|
259
|
+
print(f"sentrix v{__version__}")
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def main() -> None:
|
|
263
|
+
parser = argparse.ArgumentParser(
|
|
264
|
+
prog="sentrix",
|
|
265
|
+
description="Red-team, eval, and monitor your LLMs.",
|
|
266
|
+
)
|
|
267
|
+
sub = parser.add_subparsers(dest="command")
|
|
268
|
+
|
|
269
|
+
# scan
|
|
270
|
+
p_scan = sub.add_parser("scan", help="Red team a function")
|
|
271
|
+
p_scan.add_argument("target", help="module:function")
|
|
272
|
+
p_scan.add_argument("--plugins", default="jailbreak,pii,harmful")
|
|
273
|
+
p_scan.add_argument("--n", type=int, default=10, help="Attacks per plugin")
|
|
274
|
+
p_scan.add_argument("--git-compare", dest="git_compare", metavar="REF")
|
|
275
|
+
p_scan.add_argument("--fail-on-regression", action="store_true", dest="fail_on_regression")
|
|
276
|
+
p_scan.add_argument("--output", metavar="FILE")
|
|
277
|
+
p_scan.set_defaults(func=cmd_scan)
|
|
278
|
+
|
|
279
|
+
# fingerprint
|
|
280
|
+
p_fp = sub.add_parser("fingerprint", help="Attack heatmap across models")
|
|
281
|
+
p_fp.add_argument("targets", nargs="+", help="module:function pairs")
|
|
282
|
+
p_fp.add_argument("--plugins", default="jailbreak,pii,harmful")
|
|
283
|
+
p_fp.add_argument("--n", type=int, default=10)
|
|
284
|
+
p_fp.set_defaults(func=cmd_fingerprint)
|
|
285
|
+
|
|
286
|
+
# auto-dataset
|
|
287
|
+
p_ds = sub.add_parser("auto-dataset", help="Generate test dataset from function")
|
|
288
|
+
p_ds.add_argument("target", help="module:function")
|
|
289
|
+
p_ds.add_argument("--n", type=int, default=20)
|
|
290
|
+
p_ds.add_argument("--focus", default="mixed", choices=["adversarial", "normal", "edge_case", "mixed"])
|
|
291
|
+
p_ds.add_argument("--name")
|
|
292
|
+
p_ds.set_defaults(func=cmd_auto_dataset)
|
|
293
|
+
|
|
294
|
+
# scan-agent
|
|
295
|
+
p_agent = sub.add_parser("scan-agent", help="Test agentic workflow security")
|
|
296
|
+
p_agent.add_argument("target", help="module:function")
|
|
297
|
+
p_agent.add_argument("--mcp", metavar="URL")
|
|
298
|
+
p_agent.set_defaults(func=cmd_scan_agent)
|
|
299
|
+
|
|
300
|
+
# scan-rag
|
|
301
|
+
p_rag = sub.add_parser("scan-rag", help="Scan RAG corpus for poisoning and PII")
|
|
302
|
+
p_rag.add_argument("--docs", required=True, metavar="PATH")
|
|
303
|
+
p_rag.add_argument("--system-prompt", dest="system_prompt", metavar="FILE")
|
|
304
|
+
p_rag.add_argument("--baseline-hash", dest="baseline_hash", metavar="HASH")
|
|
305
|
+
p_rag.set_defaults(func=cmd_scan_rag)
|
|
306
|
+
|
|
307
|
+
# eval run
|
|
308
|
+
p_eval = sub.add_parser("eval", help="Evaluation commands")
|
|
309
|
+
eval_sub = p_eval.add_subparsers(dest="eval_command")
|
|
310
|
+
p_eval_run = eval_sub.add_parser("run")
|
|
311
|
+
p_eval_run.add_argument("file", help="Python file defining 'experiment' variable")
|
|
312
|
+
p_eval_run.add_argument("--fail-below", type=float, dest="fail_below", metavar="RATE")
|
|
313
|
+
p_eval_run.set_defaults(func=cmd_eval_run)
|
|
314
|
+
|
|
315
|
+
# monitor
|
|
316
|
+
p_mon = sub.add_parser("monitor", help="Monitoring commands")
|
|
317
|
+
mon_sub = p_mon.add_subparsers(dest="mon_command")
|
|
318
|
+
|
|
319
|
+
p_watch = mon_sub.add_parser("watch")
|
|
320
|
+
p_watch.add_argument("target", help="module:function")
|
|
321
|
+
p_watch.add_argument("--interval", type=int, default=60)
|
|
322
|
+
p_watch.add_argument("--plugins", default="jailbreak,pii")
|
|
323
|
+
p_watch.add_argument("--n", type=int, default=5)
|
|
324
|
+
p_watch.add_argument("--webhook", metavar="URL")
|
|
325
|
+
p_watch.set_defaults(func=cmd_monitor_watch)
|
|
326
|
+
|
|
327
|
+
p_drift = mon_sub.add_parser("drift")
|
|
328
|
+
p_drift.add_argument("--baseline", required=True, metavar="EXP_NAME")
|
|
329
|
+
p_drift.add_argument("--window", type=float, default=24.0, metavar="HOURS")
|
|
330
|
+
p_drift.add_argument("--fail", action="store_true")
|
|
331
|
+
p_drift.set_defaults(func=cmd_monitor_drift)
|
|
332
|
+
|
|
333
|
+
p_traces = mon_sub.add_parser("traces")
|
|
334
|
+
p_traces.add_argument("--limit", type=int, default=20)
|
|
335
|
+
p_traces.set_defaults(func=cmd_monitor_traces)
|
|
336
|
+
|
|
337
|
+
# review
|
|
338
|
+
p_rev = sub.add_parser("review", help="Human review workflow")
|
|
339
|
+
rev_sub = p_rev.add_subparsers(dest="rev_command")
|
|
340
|
+
|
|
341
|
+
p_rev_list = rev_sub.add_parser("list")
|
|
342
|
+
p_rev_list.add_argument("--pending", action="store_true")
|
|
343
|
+
p_rev_list.add_argument("--limit", type=int, default=20)
|
|
344
|
+
p_rev_list.set_defaults(func=cmd_review_list)
|
|
345
|
+
|
|
346
|
+
p_rev_ann = rev_sub.add_parser("annotate")
|
|
347
|
+
p_rev_ann.add_argument("result_id")
|
|
348
|
+
p_rev_ann.add_argument("--label", required=True, choices=["true_positive", "false_positive", "needs_review"])
|
|
349
|
+
p_rev_ann.add_argument("--reviewer")
|
|
350
|
+
p_rev_ann.add_argument("--comment")
|
|
351
|
+
p_rev_ann.set_defaults(func=cmd_review_annotate)
|
|
352
|
+
|
|
353
|
+
# compliance
|
|
354
|
+
p_comp = sub.add_parser("compliance", help="Generate compliance reports")
|
|
355
|
+
p_comp.add_argument("--framework", action="append", default=[], dest="framework",
|
|
356
|
+
choices=["owasp_llm_top10", "nist_ai_rmf", "eu_ai_act", "soc2"],
|
|
357
|
+
help="Framework to report on (can specify multiple)")
|
|
358
|
+
p_comp.add_argument("--output", metavar="FILE")
|
|
359
|
+
p_comp.set_defaults(func=cmd_compliance, framework=["owasp_llm_top10"])
|
|
360
|
+
|
|
361
|
+
# plugin
|
|
362
|
+
p_plug = sub.add_parser("plugin", help="Plugin ecosystem")
|
|
363
|
+
plug_sub = p_plug.add_subparsers(dest="plug_command")
|
|
364
|
+
plug_sub.add_parser("list").set_defaults(func=cmd_plugin_list)
|
|
365
|
+
p_install = plug_sub.add_parser("install")
|
|
366
|
+
p_install.add_argument("name")
|
|
367
|
+
p_install.set_defaults(func=cmd_plugin_install)
|
|
368
|
+
|
|
369
|
+
# serve
|
|
370
|
+
p_serve = sub.add_parser("serve", help="Start dashboard")
|
|
371
|
+
p_serve.add_argument("--port", type=int, default=7234)
|
|
372
|
+
p_serve.add_argument("--db", metavar="PATH")
|
|
373
|
+
p_serve.add_argument("--no-open", action="store_true", dest="no_open")
|
|
374
|
+
p_serve.set_defaults(func=cmd_serve)
|
|
375
|
+
|
|
376
|
+
# history / costs / version
|
|
377
|
+
p_hist = sub.add_parser("history", help="Show scan history")
|
|
378
|
+
p_hist.add_argument("--limit", type=int, default=20)
|
|
379
|
+
p_hist.set_defaults(func=cmd_history)
|
|
380
|
+
|
|
381
|
+
p_costs = sub.add_parser("costs", help="Show LLM costs")
|
|
382
|
+
p_costs.add_argument("--days", type=int, default=7)
|
|
383
|
+
p_costs.set_defaults(func=cmd_costs)
|
|
384
|
+
|
|
385
|
+
sub.add_parser("version").set_defaults(func=cmd_version)
|
|
386
|
+
|
|
387
|
+
args = parser.parse_args()
|
|
388
|
+
if not hasattr(args, "func"):
|
|
389
|
+
parser.print_help()
|
|
390
|
+
return
|
|
391
|
+
|
|
392
|
+
args.func(args)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
if __name__ == "__main__":
|
|
396
|
+
main()
|