sentrix 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sentrix/__init__.py ADDED
@@ -0,0 +1,131 @@
1
+ """
2
+ sentrix — Red-team, eval, and monitor your LLMs. Security-first, Python-native.
3
+
4
+ Quick start:
5
+ import sentrix
6
+ sentrix.init()
7
+
8
+ # Red team your chatbot
9
+ report = sentrix.red_team(my_chatbot, plugins=["jailbreak", "pii"])
10
+ report.summary()
11
+
12
+ # Attack heatmap across models
13
+ fp = sentrix.guard.fingerprint({"gpt-4o-mini": fn1, "claude-haiku": fn2})
14
+ fp.heatmap()
15
+
16
+ # Auto-generate test cases
17
+ ds = sentrix.auto_dataset(my_chatbot, n=50, focus="adversarial")
18
+ """
19
+ from __future__ import annotations
20
+
21
+ __version__ = "0.1.0"
22
+ __author__ = "sentrix"
23
+
24
+ # Guard (primary — security)
25
+ from sentrix.guard.red_team import red_team, RedTeamReport
26
+ from sentrix.guard.fingerprint import fingerprint, ModelFingerprint
27
+ from sentrix.guard.auto_dataset import auto_dataset
28
+
29
+ # Eval
30
+ from sentrix.eval.dataset import Dataset, DatasetItem
31
+ from sentrix.eval.experiment import Experiment, ExperimentResults
32
+ from sentrix.eval import scorers
33
+ from sentrix.eval.compare import compare_models, prompt_ab_test
34
+
35
+ # Monitor
36
+ from sentrix.monitor.tracer import trace, span
37
+ from sentrix.monitor.drift import DriftDetector, DriftReport
38
+
39
+ # Sub-packages
40
+ from sentrix import guard, eval, monitor
41
+
42
+ __all__ = [
43
+ # Core
44
+ "init",
45
+ "dataset",
46
+ "experiment",
47
+ # Guard
48
+ "red_team",
49
+ "RedTeamReport",
50
+ "fingerprint",
51
+ "ModelFingerprint",
52
+ "auto_dataset",
53
+ "guard",
54
+ # Eval
55
+ "Dataset",
56
+ "DatasetItem",
57
+ "Experiment",
58
+ "ExperimentResults",
59
+ "scorers",
60
+ "compare_models",
61
+ "prompt_ab_test",
62
+ "eval",
63
+ # Monitor
64
+ "trace",
65
+ "span",
66
+ "DriftDetector",
67
+ "DriftReport",
68
+ "monitor",
69
+ ]
70
+
71
+
72
+ def init(
73
+ persist: bool = True,
74
+ db_path: str | None = None,
75
+ offline: bool = False,
76
+ local_judge_model: str = "llama3",
77
+ judge_model: str = "gpt-4o-mini",
78
+ ) -> None:
79
+ """
80
+ Initialize sentrix — enable persistence and activate SDK interceptors.
81
+
82
+ Args:
83
+ persist: write results to SQLite (default True)
84
+ db_path: custom path for sentrix.db (default: ~/.sentrix/data.db)
85
+ offline: use local Ollama model for judging (no external API calls)
86
+ local_judge_model: Ollama model to use when offline=True
87
+ judge_model: default judge model when offline=False
88
+
89
+ Example:
90
+ sentrix.init() # Standard
91
+ sentrix.init(offline=True) # Fully offline with Ollama
92
+ sentrix.init(db_path="/tmp/sentrix.db") # Custom DB path
93
+ """
94
+ from sentrix import providers, db
95
+
96
+ # Configure providers
97
+ providers.configure(
98
+ offline=offline,
99
+ local_judge_model=local_judge_model,
100
+ judge_model=judge_model,
101
+ )
102
+
103
+ # Configure DB path
104
+ if db_path:
105
+ db.set_db_path(db_path)
106
+
107
+ # Initialize DB schema
108
+ if persist:
109
+ db.init_db(db_path)
110
+
111
+ # Activate SDK interceptors
112
+ try:
113
+ from sentrix import interceptor
114
+ interceptor.activate()
115
+ except Exception:
116
+ pass
117
+
118
+
119
+ def dataset(name: str, description: str = "") -> Dataset:
120
+ """Create or load a named dataset."""
121
+ return Dataset(name, description)
122
+
123
+
124
+ def experiment(
125
+ name: str,
126
+ dataset: Dataset | str,
127
+ fn,
128
+ scorers: list | None = None,
129
+ ) -> Experiment:
130
+ """Create an experiment."""
131
+ return Experiment(name=name, dataset=dataset, fn=fn, scorers=scorers or [])
sentrix/cli.py ADDED
@@ -0,0 +1,396 @@
1
+ """sentrix CLI — command-line interface for red teaming, eval, monitoring, and more."""
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import importlib
6
+ import json
7
+ import sys
8
+ from typing import Callable
9
+
10
+
11
+ def _load_fn(module_path: str) -> Callable:
12
+ """Load a function from 'module:function' syntax."""
13
+ if ":" not in module_path:
14
+ print(f"[sentrix] Error: expected 'module:function' format, got '{module_path}'")
15
+ sys.exit(1)
16
+ module_name, fn_name = module_path.rsplit(":", 1)
17
+ try:
18
+ mod = importlib.import_module(module_name)
19
+ return getattr(mod, fn_name)
20
+ except (ImportError, AttributeError) as e:
21
+ print(f"[sentrix] Error loading {module_path}: {e}")
22
+ sys.exit(1)
23
+
24
+
25
+ def cmd_scan(args) -> None:
26
+ """sentrix scan module:fn [--plugins ...] [--n 10] [--git-compare main] [--fail-on-regression]"""
27
+ from sentrix import init, red_team
28
+ init(persist=True)
29
+
30
+ fn = _load_fn(args.target)
31
+ plugins = [p.strip() for p in args.plugins.split(",")] if args.plugins else ["jailbreak", "pii", "harmful"]
32
+
33
+ report = red_team(
34
+ fn,
35
+ plugins=plugins,
36
+ n_attacks=args.n,
37
+ git_compare=args.git_compare,
38
+ fail_on_regression=args.fail_on_regression,
39
+ )
40
+ report.summary()
41
+
42
+ if args.output:
43
+ with open(args.output, "w") as f:
44
+ json.dump(report.to_json(), f, indent=2)
45
+ print(f"[sentrix] Report saved to {args.output}")
46
+
47
+
48
+ def cmd_fingerprint(args) -> None:
49
+ """sentrix fingerprint module:fn1 module:fn2 [--plugins all] [--n 10]"""
50
+ from sentrix import init
51
+ from sentrix.guard.fingerprint import fingerprint
52
+ init(persist=True)
53
+
54
+ targets = {}
55
+ for t in args.targets:
56
+ fn = _load_fn(t)
57
+ targets[t] = fn
58
+
59
+ plugins = [p.strip() for p in args.plugins.split(",")] if args.plugins else ["jailbreak", "pii", "harmful", "hallucination", "injection"]
60
+ if args.plugins == "all":
61
+ from sentrix.guard.attacks import PLUGIN_REGISTRY
62
+ plugins = list(PLUGIN_REGISTRY.keys())
63
+
64
+ fp = fingerprint(targets, plugins=plugins, n_attacks=args.n)
65
+ fp.heatmap()
66
+
67
+
68
+ def cmd_auto_dataset(args) -> None:
69
+ """sentrix auto-dataset module:fn [--n 20] [--focus adversarial] [--name myds]"""
70
+ from sentrix import init, auto_dataset
71
+ init(persist=True)
72
+
73
+ fn = _load_fn(args.target)
74
+ ds = auto_dataset(fn, n=args.n, focus=args.focus, name=args.name)
75
+ print(f"[sentrix] Generated dataset '{ds.name}' with {len(ds)} items")
76
+
77
+
78
+ def cmd_scan_agent(args) -> None:
79
+ from sentrix import init
80
+ from sentrix.guard.agent import scan_agent
81
+ init(persist=True)
82
+
83
+ fn = _load_fn(args.target)
84
+ report = scan_agent(fn, mcp_endpoint=args.mcp)
85
+ report.summary()
86
+
87
+
88
+ def cmd_scan_rag(args) -> None:
89
+ from sentrix import init
90
+ from sentrix.guard.rag_scanner import scan_rag
91
+ import os
92
+ init(persist=True)
93
+
94
+ # Load documents
95
+ docs = []
96
+ if os.path.isdir(args.docs):
97
+ for f in os.listdir(args.docs):
98
+ fp = os.path.join(args.docs, f)
99
+ if os.path.isfile(fp):
100
+ with open(fp, errors="replace") as fh:
101
+ docs.append({"id": f, "content": fh.read()})
102
+ else:
103
+ with open(args.docs, errors="replace") as fh:
104
+ docs = [{"id": args.docs, "content": fh.read()}]
105
+
106
+ system_prompt = None
107
+ if args.system_prompt:
108
+ with open(args.system_prompt) as fh:
109
+ system_prompt = fh.read()
110
+
111
+ report = scan_rag(docs, system_prompt=system_prompt, baseline_hash=args.baseline_hash)
112
+ report.summary()
113
+
114
+
115
+ def cmd_eval_run(args) -> None:
116
+ from sentrix import init
117
+ init(persist=True)
118
+
119
+ # Load experiment file
120
+ import runpy
121
+ ns = runpy.run_path(args.file)
122
+ exp = ns.get("experiment") or ns.get("exp")
123
+ if exp is None:
124
+ print("[sentrix] Error: file must define 'experiment' or 'exp' variable")
125
+ sys.exit(1)
126
+
127
+ results = exp.run()
128
+ results.summary()
129
+
130
+ if args.fail_below and results.pass_rate < args.fail_below:
131
+ print(f"[sentrix] FAIL: pass rate {results.pass_rate:.1%} < {args.fail_below:.1%}")
132
+ sys.exit(1)
133
+
134
+
135
+ def cmd_monitor_watch(args) -> None:
136
+ from sentrix import init
137
+ from sentrix.monitor.daemon import watch
138
+ init(persist=True)
139
+
140
+ fn = _load_fn(args.target)
141
+ plugins = [p.strip() for p in args.plugins.split(",")] if args.plugins else ["jailbreak", "pii"]
142
+ watch(fn, interval_seconds=args.interval, plugins=plugins, n_attacks=args.n, alert_webhook=args.webhook)
143
+
144
+
145
+ def cmd_monitor_drift(args) -> None:
146
+ from sentrix import init
147
+ from sentrix.monitor.drift import DriftDetector
148
+ init(persist=True)
149
+
150
+ detector = DriftDetector(on_drift="warn" if not args.fail else "raise")
151
+ detector.baseline(args.baseline)
152
+ report = detector.check(window_hours=args.window)
153
+ report.summary()
154
+
155
+
156
+ def cmd_monitor_traces(args) -> None:
157
+ from sentrix.db import _q
158
+ rows = _q("SELECT id, name, start_time, end_time, user_id, error FROM traces ORDER BY start_time DESC LIMIT ?", (args.limit,))
159
+ if not rows:
160
+ print("[sentrix] No traces found.")
161
+ return
162
+ print(f"\n{'ID':^38} {'Name':<25} {'Start':^20} {'Error'}")
163
+ print("-" * 90)
164
+ import datetime
165
+ for r in rows:
166
+ ts = datetime.datetime.fromtimestamp(r["start_time"]).strftime("%Y-%m-%d %H:%M:%S") if r["start_time"] else "-"
167
+ print(f"{r['id']:<38} {(r['name'] or '-'):<25} {ts:<20} {r['error'] or '-'}")
168
+
169
+
170
+ def cmd_review_list(args) -> None:
171
+ from sentrix.review.annotations import ReviewQueue
172
+ q = ReviewQueue()
173
+ if args.pending:
174
+ items = q.pending()
175
+ print(f"\n[sentrix] {len(items)} pending review items:")
176
+ for item in items[:20]:
177
+ print(f" [{item['plugin']}] {item['attack_input'][:80]}...")
178
+ else:
179
+ anns = q.list_annotations(limit=args.limit)
180
+ print(f"\n[sentrix] {len(anns)} annotations:")
181
+ for a in anns:
182
+ print(f" [{a.label}] {a.result_id[:40]} — {a.comment or ''}")
183
+
184
+
185
+ def cmd_review_annotate(args) -> None:
186
+ from sentrix.review.annotations import annotate
187
+ ann = annotate(args.result_id, args.label, reviewer=args.reviewer, comment=args.comment)
188
+ print(f"[sentrix] Annotated {ann.result_id} as '{ann.label}'")
189
+
190
+
191
+ def cmd_compliance(args) -> None:
192
+ from sentrix import init
193
+ from sentrix.compliance import generate_report
194
+ init(persist=True)
195
+
196
+ for framework in args.framework:
197
+ report = generate_report(framework, output=args.output)
198
+ report.summary()
199
+
200
+
201
+ def cmd_plugin_list(args) -> None:
202
+ from sentrix.plugins import list_available, list_installed
203
+ print("\n[sentrix] Available plugins:")
204
+ for p in list_available():
205
+ print(f" {p['name']:<30} {p['description']}")
206
+ print("\n[sentrix] Installed plugins:")
207
+ for p in list_installed():
208
+ print(f" {p['name']:<30} v{p.get('version', '?')}")
209
+
210
+
211
+ def cmd_plugin_install(args) -> None:
212
+ from sentrix.plugins import install
213
+ install(args.name)
214
+
215
+
216
+ def cmd_serve(args) -> None:
217
+ from sentrix.server.app import run
218
+ run(port=args.port, db_path=args.db, no_open=args.no_open)
219
+
220
+
221
+ def cmd_history(args) -> None:
222
+ from sentrix.db import _q, init_db
223
+ init_db()
224
+ rows = _q("SELECT target_fn, model, vulnerability_rate, total_cost_usd, created_at FROM red_team_reports ORDER BY created_at DESC LIMIT ?", (args.limit,))
225
+ if not rows:
226
+ print("[sentrix] No scan history found. Run sentrix scan first.")
227
+ return
228
+ import datetime
229
+ print(f"\n{'Target':<30} {'Model':<15} {'Vuln%':>8} {'Cost':>10} {'Date'}")
230
+ print("-" * 75)
231
+ for r in rows:
232
+ ts = datetime.datetime.fromtimestamp(r["created_at"]).strftime("%Y-%m-%d") if r["created_at"] else "-"
233
+ rate = f"{r['vulnerability_rate']:.1%}" if r["vulnerability_rate"] is not None else "-"
234
+ cost = f"${r['total_cost_usd']:.4f}" if r["total_cost_usd"] else "-"
235
+ print(f"{(r['target_fn'] or '-'):<30} {(r['model'] or '-'):<15} {rate:>8} {cost:>10} {ts}")
236
+
237
+
238
+ def cmd_costs(args) -> None:
239
+ from sentrix.db import _q, init_db
240
+ import time
241
+ init_db()
242
+ cutoff = time.time() - args.days * 86400
243
+ rows = _q("SELECT model, SUM(cost_usd) as total, COUNT(*) as calls FROM llm_calls WHERE timestamp > ? GROUP BY model ORDER BY total DESC", (cutoff,))
244
+ if not rows:
245
+ print(f"[sentrix] No LLM calls in the last {args.days} days.")
246
+ return
247
+ print(f"\nLLM Costs — last {args.days} days:")
248
+ print(f" {'Model':<30} {'Calls':>8} {'Total Cost':>12}")
249
+ print(" " + "-" * 54)
250
+ total = 0.0
251
+ for r in rows:
252
+ print(f" {r['model']:<30} {r['calls']:>8} ${r['total']:>11.4f}")
253
+ total += r["total"]
254
+ print(f"\n {'TOTAL':<30} {'':>8} ${total:>11.4f}")
255
+
256
+
257
+ def cmd_version(args) -> None:
258
+ from sentrix import __version__
259
+ print(f"sentrix v{__version__}")
260
+
261
+
262
+ def main() -> None:
263
+ parser = argparse.ArgumentParser(
264
+ prog="sentrix",
265
+ description="Red-team, eval, and monitor your LLMs.",
266
+ )
267
+ sub = parser.add_subparsers(dest="command")
268
+
269
+ # scan
270
+ p_scan = sub.add_parser("scan", help="Red team a function")
271
+ p_scan.add_argument("target", help="module:function")
272
+ p_scan.add_argument("--plugins", default="jailbreak,pii,harmful")
273
+ p_scan.add_argument("--n", type=int, default=10, help="Attacks per plugin")
274
+ p_scan.add_argument("--git-compare", dest="git_compare", metavar="REF")
275
+ p_scan.add_argument("--fail-on-regression", action="store_true", dest="fail_on_regression")
276
+ p_scan.add_argument("--output", metavar="FILE")
277
+ p_scan.set_defaults(func=cmd_scan)
278
+
279
+ # fingerprint
280
+ p_fp = sub.add_parser("fingerprint", help="Attack heatmap across models")
281
+ p_fp.add_argument("targets", nargs="+", help="module:function pairs")
282
+ p_fp.add_argument("--plugins", default="jailbreak,pii,harmful")
283
+ p_fp.add_argument("--n", type=int, default=10)
284
+ p_fp.set_defaults(func=cmd_fingerprint)
285
+
286
+ # auto-dataset
287
+ p_ds = sub.add_parser("auto-dataset", help="Generate test dataset from function")
288
+ p_ds.add_argument("target", help="module:function")
289
+ p_ds.add_argument("--n", type=int, default=20)
290
+ p_ds.add_argument("--focus", default="mixed", choices=["adversarial", "normal", "edge_case", "mixed"])
291
+ p_ds.add_argument("--name")
292
+ p_ds.set_defaults(func=cmd_auto_dataset)
293
+
294
+ # scan-agent
295
+ p_agent = sub.add_parser("scan-agent", help="Test agentic workflow security")
296
+ p_agent.add_argument("target", help="module:function")
297
+ p_agent.add_argument("--mcp", metavar="URL")
298
+ p_agent.set_defaults(func=cmd_scan_agent)
299
+
300
+ # scan-rag
301
+ p_rag = sub.add_parser("scan-rag", help="Scan RAG corpus for poisoning and PII")
302
+ p_rag.add_argument("--docs", required=True, metavar="PATH")
303
+ p_rag.add_argument("--system-prompt", dest="system_prompt", metavar="FILE")
304
+ p_rag.add_argument("--baseline-hash", dest="baseline_hash", metavar="HASH")
305
+ p_rag.set_defaults(func=cmd_scan_rag)
306
+
307
+ # eval run
308
+ p_eval = sub.add_parser("eval", help="Evaluation commands")
309
+ eval_sub = p_eval.add_subparsers(dest="eval_command")
310
+ p_eval_run = eval_sub.add_parser("run")
311
+ p_eval_run.add_argument("file", help="Python file defining 'experiment' variable")
312
+ p_eval_run.add_argument("--fail-below", type=float, dest="fail_below", metavar="RATE")
313
+ p_eval_run.set_defaults(func=cmd_eval_run)
314
+
315
+ # monitor
316
+ p_mon = sub.add_parser("monitor", help="Monitoring commands")
317
+ mon_sub = p_mon.add_subparsers(dest="mon_command")
318
+
319
+ p_watch = mon_sub.add_parser("watch")
320
+ p_watch.add_argument("target", help="module:function")
321
+ p_watch.add_argument("--interval", type=int, default=60)
322
+ p_watch.add_argument("--plugins", default="jailbreak,pii")
323
+ p_watch.add_argument("--n", type=int, default=5)
324
+ p_watch.add_argument("--webhook", metavar="URL")
325
+ p_watch.set_defaults(func=cmd_monitor_watch)
326
+
327
+ p_drift = mon_sub.add_parser("drift")
328
+ p_drift.add_argument("--baseline", required=True, metavar="EXP_NAME")
329
+ p_drift.add_argument("--window", type=float, default=24.0, metavar="HOURS")
330
+ p_drift.add_argument("--fail", action="store_true")
331
+ p_drift.set_defaults(func=cmd_monitor_drift)
332
+
333
+ p_traces = mon_sub.add_parser("traces")
334
+ p_traces.add_argument("--limit", type=int, default=20)
335
+ p_traces.set_defaults(func=cmd_monitor_traces)
336
+
337
+ # review
338
+ p_rev = sub.add_parser("review", help="Human review workflow")
339
+ rev_sub = p_rev.add_subparsers(dest="rev_command")
340
+
341
+ p_rev_list = rev_sub.add_parser("list")
342
+ p_rev_list.add_argument("--pending", action="store_true")
343
+ p_rev_list.add_argument("--limit", type=int, default=20)
344
+ p_rev_list.set_defaults(func=cmd_review_list)
345
+
346
+ p_rev_ann = rev_sub.add_parser("annotate")
347
+ p_rev_ann.add_argument("result_id")
348
+ p_rev_ann.add_argument("--label", required=True, choices=["true_positive", "false_positive", "needs_review"])
349
+ p_rev_ann.add_argument("--reviewer")
350
+ p_rev_ann.add_argument("--comment")
351
+ p_rev_ann.set_defaults(func=cmd_review_annotate)
352
+
353
+ # compliance
354
+ p_comp = sub.add_parser("compliance", help="Generate compliance reports")
355
+ p_comp.add_argument("--framework", action="append", default=[], dest="framework",
356
+ choices=["owasp_llm_top10", "nist_ai_rmf", "eu_ai_act", "soc2"],
357
+ help="Framework to report on (can specify multiple)")
358
+ p_comp.add_argument("--output", metavar="FILE")
359
+ p_comp.set_defaults(func=cmd_compliance, framework=["owasp_llm_top10"])
360
+
361
+ # plugin
362
+ p_plug = sub.add_parser("plugin", help="Plugin ecosystem")
363
+ plug_sub = p_plug.add_subparsers(dest="plug_command")
364
+ plug_sub.add_parser("list").set_defaults(func=cmd_plugin_list)
365
+ p_install = plug_sub.add_parser("install")
366
+ p_install.add_argument("name")
367
+ p_install.set_defaults(func=cmd_plugin_install)
368
+
369
+ # serve
370
+ p_serve = sub.add_parser("serve", help="Start dashboard")
371
+ p_serve.add_argument("--port", type=int, default=7234)
372
+ p_serve.add_argument("--db", metavar="PATH")
373
+ p_serve.add_argument("--no-open", action="store_true", dest="no_open")
374
+ p_serve.set_defaults(func=cmd_serve)
375
+
376
+ # history / costs / version
377
+ p_hist = sub.add_parser("history", help="Show scan history")
378
+ p_hist.add_argument("--limit", type=int, default=20)
379
+ p_hist.set_defaults(func=cmd_history)
380
+
381
+ p_costs = sub.add_parser("costs", help="Show LLM costs")
382
+ p_costs.add_argument("--days", type=int, default=7)
383
+ p_costs.set_defaults(func=cmd_costs)
384
+
385
+ sub.add_parser("version").set_defaults(func=cmd_version)
386
+
387
+ args = parser.parse_args()
388
+ if not hasattr(args, "func"):
389
+ parser.print_help()
390
+ return
391
+
392
+ args.func(args)
393
+
394
+
395
+ if __name__ == "__main__":
396
+ main()
@@ -0,0 +1,4 @@
1
+ """sentrix.compliance — Generate compliance reports: OWASP LLM Top 10, NIST AI RMF, EU AI Act."""
2
+ from sentrix.compliance.reporter import ComplianceReport, generate_report, FRAMEWORKS
3
+
4
+ __all__ = ["ComplianceReport", "generate_report", "FRAMEWORKS"]