intentprobe 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """intentprobe public package."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,9 @@
1
+ """Run the scanner CLI with `python -m intentprobe`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .cli import main
6
+
7
+
8
+ if __name__ == "__main__":
9
+ raise SystemExit(main())
intentprobe/cli.py ADDED
@@ -0,0 +1,9 @@
1
+ """Product CLI entrypoint for the activation scanner."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from intentprobe.scanner.cli import main
6
+
7
+
8
+ if __name__ == "__main__":
9
+ raise SystemExit(main())
intentprobe/hook.py ADDED
@@ -0,0 +1,9 @@
1
+ """Hook CLI entrypoint for MCP/tool/skill scanner gates."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from intentprobe.scanner.hook import main
6
+
7
+
8
+ if __name__ == "__main__":
9
+ raise SystemExit(main())
@@ -0,0 +1,12 @@
1
+ """intentprobe scanner runtime package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .core import CORE_VERSION, DECISION_POLICY_NAME, scan_text, scan_texts
6
+
7
+ __all__ = [
8
+ "CORE_VERSION",
9
+ "DECISION_POLICY_NAME",
10
+ "scan_text",
11
+ "scan_texts",
12
+ ]
@@ -0,0 +1,176 @@
1
+ {
2
+ "artifact_id": "qwen-pooled-curated-core-l13-15-v2",
3
+ "artifact_version": "activation-probe-artifact-v1",
4
+ "created_at": "2026-06-03T09:27:39Z",
5
+ "dataset": {
6
+ "max_train_samples": null,
7
+ "seed": 42,
8
+ "summary": {
9
+ "families": {
10
+ "credential_forwarding": {
11
+ "clean": 6,
12
+ "other": 0,
13
+ "poisoned": 6
14
+ },
15
+ "hidden_persistence_logging": {
16
+ "clean": 4,
17
+ "other": 0,
18
+ "poisoned": 4
19
+ },
20
+ "instruction_chaining": {
21
+ "clean": 4,
22
+ "other": 0,
23
+ "poisoned": 4
24
+ },
25
+ "live_system_access": {
26
+ "clean": 6,
27
+ "other": 0,
28
+ "poisoned": 6
29
+ },
30
+ "network_exfiltration": {
31
+ "clean": 6,
32
+ "other": 0,
33
+ "poisoned": 6
34
+ },
35
+ "secret_file_access": {
36
+ "clean": 3,
37
+ "other": 0,
38
+ "poisoned": 3
39
+ },
40
+ "system_inventory": {
41
+ "clean": 6,
42
+ "other": 0,
43
+ "poisoned": 6
44
+ },
45
+ "tool_shadowing": {
46
+ "clean": 3,
47
+ "other": 0,
48
+ "poisoned": 3
49
+ }
50
+ },
51
+ "labels": {
52
+ "clean": 423,
53
+ "other": 0,
54
+ "poisoned": 566
55
+ },
56
+ "n": 989,
57
+ "pair_count": 38,
58
+ "source_types": {
59
+ "public_dataset": 16,
60
+ "real_carrier_adapted": 14,
61
+ "regression_fixture": 2,
62
+ "synthetic_curated": 44
63
+ },
64
+ "sources": {
65
+ "family_curated_v0.json": 76,
66
+ "hard_v2_clean.json": 20,
67
+ "hard_v2_poisoned.json": 20,
68
+ "hard_v3_matched_clean.json": 8,
69
+ "hard_v3_matched_poisoned.json": 8,
70
+ "mcptox_clean_descriptions_labeled.json": 342,
71
+ "mcptox_poisoned_descriptions_labeled.json": 485,
72
+ "neutral_clean.json": 15,
73
+ "neutral_poisoned.json": 15
74
+ },
75
+ "split_group_count": 38,
76
+ "styles": {
77
+ "family_curated_v0": {
78
+ "clean": 30,
79
+ "other": 0,
80
+ "poisoned": 30
81
+ },
82
+ "hard_v2": {
83
+ "clean": 20,
84
+ "other": 0,
85
+ "poisoned": 20
86
+ },
87
+ "matched": {
88
+ "clean": 8,
89
+ "other": 0,
90
+ "poisoned": 8
91
+ },
92
+ "mcptox": {
93
+ "clean": 342,
94
+ "other": 0,
95
+ "poisoned": 485
96
+ },
97
+ "neutral": {
98
+ "clean": 15,
99
+ "other": 0,
100
+ "poisoned": 15
101
+ },
102
+ "skillinject_curated": {
103
+ "clean": 8,
104
+ "other": 0,
105
+ "poisoned": 8
106
+ }
107
+ }
108
+ },
109
+ "train_source": "pooled-curated-core"
110
+ },
111
+ "extractor": "raw",
112
+ "feature_details": {
113
+ "batch_size": 16,
114
+ "device": "cpu",
115
+ "dtype": "float32",
116
+ "extract_elapsed_seconds": 36.922180209017824,
117
+ "gated": false,
118
+ "hf_model_id": "Qwen/Qwen2.5-0.5B",
119
+ "max_length": 256,
120
+ "n_hidden_layers": 24
121
+ },
122
+ "feature_dim": 2688,
123
+ "feature_kind": "raw_activation",
124
+ "layer_policy": {
125
+ "candidate_layers": [
126
+ 13,
127
+ 14,
128
+ 15
129
+ ],
130
+ "layer": "concat:13,14,15",
131
+ "mode": "concat",
132
+ "selected_k": 3,
133
+ "selected_layers": [
134
+ 13,
135
+ 14,
136
+ 15
137
+ ],
138
+ "selection_score": 0.9615648874532121,
139
+ "selector": "cv"
140
+ },
141
+ "model_id": "Qwen/Qwen2.5-0.5B",
142
+ "probe": {
143
+ "algorithm": "logistic_regression",
144
+ "classes": [
145
+ 0,
146
+ 1
147
+ ],
148
+ "max_iter": 3000,
149
+ "train_metrics": {
150
+ "accuracy": 0.9949443882709808,
151
+ "f1": 0.9955634427684117,
152
+ "precision": 1.0,
153
+ "recall": 0.991166077738516
154
+ }
155
+ },
156
+ "sae": null,
157
+ "scanner_version": "activation-scanner-core-2026-06-03-static-v2",
158
+ "sensor": {
159
+ "default_depths": [
160
+ 0.25,
161
+ 0.4,
162
+ 0.55
163
+ ],
164
+ "default_layers": null,
165
+ "gated": false,
166
+ "hf_model_id": "Qwen/Qwen2.5-0.5B",
167
+ "kind": "transformer",
168
+ "name": "qwen2.5-0.5b",
169
+ "notes": "Small local fallback seen in earlier experiments."
170
+ },
171
+ "sensor_model": "qwen2.5-0.5b",
172
+ "thresholds": {
173
+ "block": 0.85,
174
+ "warn": 0.3
175
+ }
176
+ }
@@ -0,0 +1,338 @@
1
+ #!/usr/bin/env python3
2
+ """Product-preview CLI for the cached activation scanner."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import json
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ try:
13
+ from .core import (
14
+ CORE_VERSION,
15
+ DECISION_POLICY_NAME,
16
+ load_artifact,
17
+ read_batch_inputs,
18
+ scan_texts,
19
+ )
20
+ except ImportError:
21
+ sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
22
+ from intentprobe.scanner.core import ( # type: ignore
23
+ CORE_VERSION,
24
+ DECISION_POLICY_NAME,
25
+ load_artifact,
26
+ read_batch_inputs,
27
+ scan_texts,
28
+ )
29
+
30
+
31
+ PACKAGE_DIR = Path(__file__).resolve().parent
32
+ DEFAULT_ARTIFACT = (
33
+ PACKAGE_DIR
34
+ / "artifacts"
35
+ / "qwen-pooled-curated-core-l13-15-v2"
36
+ )
37
+ DECISION_RANK = {
38
+ "allow": 0,
39
+ "warn": 1,
40
+ "block": 2,
41
+ "quarantine": 3,
42
+ }
43
+
44
+
45
+ def artifact_complete(path: Path) -> bool:
46
+ artifact = path.parent if path.is_file() else path
47
+ return (artifact / "metadata.json").exists() and (artifact / "probe_weights.npz").exists()
48
+
49
+
50
+ def add_runtime_args(parser: argparse.ArgumentParser) -> None:
51
+ parser.add_argument("--artifact", type=Path, default=DEFAULT_ARTIFACT, help="Cached scanner artifact directory.")
52
+ parser.add_argument("--batch-size", type=int, default=None)
53
+ parser.add_argument("--max-length", type=int, default=None)
54
+ parser.add_argument("--device", default=None)
55
+ parser.add_argument("--dtype", choices=("auto", "float32", "bfloat16"), default=None)
56
+ parser.add_argument("--local-files-only", action="store_true")
57
+ parser.add_argument("--warn-threshold", type=float, default=None)
58
+ parser.add_argument("--block-threshold", type=float, default=None)
59
+ parser.add_argument("--top-sae-features", type=int, default=8)
60
+ parser.add_argument(
61
+ "--fail-on",
62
+ choices=("never", "warn", "block", "quarantine"),
63
+ default="never",
64
+ help="Exit with status 2 when any result reaches this decision or higher.",
65
+ )
66
+ parser.add_argument(
67
+ "--format",
68
+ choices=("json", "summary"),
69
+ default="json",
70
+ help="Output JSON for hooks or a short human-readable summary.",
71
+ )
72
+ parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output.")
73
+
74
+
75
+ def read_scan_text(args: argparse.Namespace) -> str:
76
+ selected = [bool(args.text), bool(args.file), bool(args.stdin)]
77
+ if sum(selected) > 1:
78
+ raise SystemExit("Use only one of --text, --file, or --stdin.")
79
+ if args.text:
80
+ return args.text
81
+ if args.file:
82
+ return args.file.read_text()
83
+ if args.stdin or not sys.stdin.isatty():
84
+ return sys.stdin.read()
85
+ raise SystemExit("Provide --text, --file, or pipe text on stdin.")
86
+
87
+
88
+ def max_decision(results: list[dict[str, Any]]) -> str:
89
+ if not results:
90
+ return "allow"
91
+ return max((str(row.get("decision", "allow")) for row in results), key=lambda decision: DECISION_RANK.get(decision, -1))
92
+
93
+
94
+ def exit_code_for(results: list[dict[str, Any]], fail_on: str) -> int:
95
+ if fail_on == "never":
96
+ return 0
97
+ threshold = DECISION_RANK[fail_on]
98
+ return 2 if any(DECISION_RANK.get(str(row.get("decision")), -1) >= threshold for row in results) else 0
99
+
100
+
101
+ def scan_with_args(texts: list[str], args: argparse.Namespace) -> list[dict[str, Any]]:
102
+ if not artifact_complete(args.artifact):
103
+ raise SystemExit(
104
+ "Missing default scanner artifact. Build it with:\n"
105
+ "research/.venv-audit/bin/python -m research.train_probe_artifact "
106
+ "--model qwen2.5-0.5b --feature-kind raw --train-source pooled-curated-core "
107
+ "--layers 13,14,15 --layer-mode concat "
108
+ "--artifact-id qwen-pooled-curated-core-l13-15-v2 "
109
+ "--output-dir intentprobe/scanner/artifacts "
110
+ "--overwrite --warn-threshold 0.30 --block-threshold 0.85 --pretty"
111
+ )
112
+ return scan_texts(
113
+ texts,
114
+ args.artifact,
115
+ batch_size=args.batch_size,
116
+ max_length=args.max_length,
117
+ device=args.device,
118
+ dtype=args.dtype,
119
+ local_files_only=args.local_files_only,
120
+ warn_threshold=args.warn_threshold,
121
+ block_threshold=args.block_threshold,
122
+ top_sae_features=args.top_sae_features,
123
+ )
124
+
125
+
126
+ def print_json(payload: Any, pretty: bool) -> None:
127
+ print(json.dumps(payload, indent=2 if pretty else None, ensure_ascii=False))
128
+
129
+
130
+ def print_summary(results: list[dict[str, Any]]) -> None:
131
+ for idx, row in enumerate(results, start=1):
132
+ label = row.get("input_id") or f"input-{idx}"
133
+ print(
134
+ f"{label}: decision={row.get('decision')} "
135
+ f"risk={float(row.get('risk_score', 0)):.3f} "
136
+ f"activation={float(row.get('activation_score', 0)):.3f} "
137
+ f"static={float(row.get('static_score', 0)):.3f}"
138
+ )
139
+ for reason in row.get("risk_reasons", [])[:3]:
140
+ print(f" - {reason}")
141
+
142
+
143
+ def command_scan(args: argparse.Namespace) -> int:
144
+ result = scan_with_args([read_scan_text(args)], args)[0]
145
+ if args.format == "summary":
146
+ print_summary([result])
147
+ else:
148
+ print_json(result, args.pretty)
149
+ return exit_code_for([result], args.fail_on)
150
+
151
+
152
+ def command_batch(args: argparse.Namespace) -> int:
153
+ rows = read_batch_inputs(args.batch_file)
154
+ results = scan_with_args([text for _, text in rows], args)
155
+ for idx, (input_id, _) in enumerate(rows):
156
+ if input_id is not None:
157
+ results[idx]["input_id"] = input_id
158
+ payload = {
159
+ "mode": "activation_scanner_cli_batch",
160
+ "scanner_version": CORE_VERSION,
161
+ "decision_policy": DECISION_POLICY_NAME,
162
+ "artifact": str(args.artifact),
163
+ "count": len(results),
164
+ "max_decision": max_decision(results),
165
+ "results": results,
166
+ }
167
+ if args.format == "summary":
168
+ print_summary(results)
169
+ else:
170
+ print_json(payload, args.pretty)
171
+ return exit_code_for(results, args.fail_on)
172
+
173
+
174
+ def print_subject_summary(results: list[dict[str, Any]]) -> None:
175
+ for idx, row in enumerate(results, start=1):
176
+ subject = row.get("subject") or {}
177
+ risk = row.get("risk") or {}
178
+ label = subject.get("path") or subject.get("name") or subject.get("id") or f"subject-{idx}"
179
+ print(
180
+ f"{label}: decision={row.get('decision')} "
181
+ f"risk={float(row.get('risk_score', 0)):.3f} "
182
+ f"activation={float(risk.get('activation_score', 0)):.3f} "
183
+ f"static={float(risk.get('static_score', 0)):.3f}"
184
+ )
185
+ for reason in risk.get("risk_reasons", [])[:3]:
186
+ print(f" - {reason}")
187
+
188
+
189
+ def command_scan_path(args: argparse.Namespace) -> int:
190
+ from .hook import scan_subjects
191
+ from .targets import collect_subjects_from_path
192
+
193
+ subjects = collect_subjects_from_path(
194
+ args.path,
195
+ max_files=args.max_files,
196
+ max_file_bytes=args.max_file_bytes,
197
+ include_readme=args.include_readme,
198
+ )
199
+ payload = scan_subjects(subjects, args)
200
+ payload["mode"] = "activation_scanner_cli_path"
201
+ payload["target_path"] = str(args.path)
202
+ if args.format == "summary":
203
+ print_subject_summary(payload["results"])
204
+ else:
205
+ print_json(payload, args.pretty)
206
+ return int(payload["gate"]["exit_code"])
207
+
208
+
209
+ def command_doctor(args: argparse.Namespace) -> int:
210
+ complete = artifact_complete(args.artifact)
211
+ payload: dict[str, Any] = {
212
+ "scanner_version": CORE_VERSION,
213
+ "decision_policy": DECISION_POLICY_NAME,
214
+ "artifact": str(args.artifact),
215
+ "artifact_complete": complete,
216
+ }
217
+ if complete:
218
+ artifact_dir, metadata, _weights = load_artifact(args.artifact)
219
+ payload.update(
220
+ {
221
+ "artifact_dir": str(artifact_dir),
222
+ "artifact_id": metadata.get("artifact_id", artifact_dir.name),
223
+ "model_id": metadata.get("model_id"),
224
+ "sensor_model": metadata.get("sensor_model"),
225
+ "feature_kind": metadata.get("feature_kind"),
226
+ "feature_dim": metadata.get("feature_dim"),
227
+ "thresholds": metadata.get("thresholds"),
228
+ "layer_policy": metadata.get("layer_policy"),
229
+ }
230
+ )
231
+ print_json(payload, args.pretty)
232
+ return 0 if complete else 1
233
+
234
+
235
+ def command_runtime_normalize(args: argparse.Namespace) -> int:
236
+ from .hook import command_normalize
237
+
238
+ return command_normalize(args)
239
+
240
+
241
+ def command_runtime_scan(args: argparse.Namespace) -> int:
242
+ from .hook import command_scan
243
+
244
+ return command_scan(args)
245
+
246
+
247
+ def command_runtime_serve_jsonl(args: argparse.Namespace) -> int:
248
+ from .hook import command_serve_jsonl
249
+
250
+ return command_serve_jsonl(args)
251
+
252
+
253
+ def build_parser() -> argparse.ArgumentParser:
254
+ parser = argparse.ArgumentParser(description=__doc__)
255
+ parser.add_argument("--version", action="store_true", help="Print scanner version and exit.")
256
+ subparsers = parser.add_subparsers(dest="command")
257
+
258
+ scan = subparsers.add_parser("scan", help="Scan one tool, MCP, skill, hook, or prompt text.")
259
+ scan.add_argument("--text", help="Text to scan.")
260
+ scan.add_argument("--file", type=Path, help="File containing text to scan.")
261
+ scan.add_argument("--stdin", action="store_true", help="Read text from stdin.")
262
+ add_runtime_args(scan)
263
+ scan.set_defaults(func=command_scan)
264
+
265
+ batch = subparsers.add_parser("batch", help="Scan a JSON array of strings or {id,text} objects.")
266
+ batch.add_argument("--batch-file", type=Path, required=True)
267
+ add_runtime_args(batch)
268
+ batch.set_defaults(func=command_batch)
269
+
270
+ scan_path = subparsers.add_parser(
271
+ "scan-path",
272
+ help="Scan a local MCP config, package folder, skill folder, README, or manifest.",
273
+ )
274
+ scan_path.add_argument("path", type=Path, help="File or directory to scan.")
275
+ scan_path.add_argument("--max-files", type=int, default=40, help="Maximum candidate files to scan under a directory.")
276
+ scan_path.add_argument("--max-file-bytes", type=int, default=200_000, help="Maximum bytes read from each candidate file.")
277
+ scan_path.add_argument("--include-readme", dest="include_readme", action="store_true", default=True)
278
+ scan_path.add_argument("--no-readme", dest="include_readme", action="store_false")
279
+ add_runtime_args(scan_path)
280
+ scan_path.set_defaults(func=command_scan_path)
281
+
282
+ doctor = subparsers.add_parser("doctor", help="Check the cached scanner artifact.")
283
+ doctor.add_argument("--artifact", type=Path, default=DEFAULT_ARTIFACT)
284
+ doctor.add_argument("--pretty", action="store_true")
285
+ doctor.set_defaults(func=command_doctor)
286
+
287
+ from .hook import add_payload_args as add_hook_payload_args
288
+ from .hook import add_runtime_args as add_hook_runtime_args
289
+
290
+ runtime = subparsers.add_parser(
291
+ "runtime",
292
+ help="Normalize or scan runtime tool-call events.",
293
+ )
294
+ runtime_subparsers = runtime.add_subparsers(dest="runtime_command")
295
+
296
+ runtime_normalize = runtime_subparsers.add_parser(
297
+ "normalize",
298
+ help="Normalize/redact a runtime event without loading the model.",
299
+ )
300
+ add_hook_payload_args(runtime_normalize)
301
+ runtime_normalize.add_argument("--pretty", action="store_true")
302
+ runtime_normalize.set_defaults(func=command_runtime_normalize)
303
+
304
+ runtime_scan = runtime_subparsers.add_parser(
305
+ "scan",
306
+ help="Scan one runtime event and emit a gate decision.",
307
+ )
308
+ add_hook_payload_args(runtime_scan)
309
+ add_hook_runtime_args(runtime_scan)
310
+ runtime_scan.set_defaults(func=command_runtime_scan)
311
+
312
+ runtime_serve = runtime_subparsers.add_parser(
313
+ "serve-jsonl",
314
+ help="Keep a warm scanner process and scan one runtime JSON/text event per line.",
315
+ )
316
+ add_hook_runtime_args(runtime_serve)
317
+ runtime_serve.add_argument("--input-format", choices=("auto", "json", "text"), default="auto")
318
+ runtime_serve.add_argument("--warmup", dest="warmup", action="store_true", default=True)
319
+ runtime_serve.add_argument("--no-warmup", dest="warmup", action="store_false")
320
+ runtime_serve.set_defaults(func=command_runtime_serve_jsonl)
321
+
322
+ return parser
323
+
324
+
325
+ def main(argv: list[str] | None = None) -> int:
326
+ parser = build_parser()
327
+ args = parser.parse_args(argv)
328
+ if args.version:
329
+ print(CORE_VERSION)
330
+ return 0
331
+ if not hasattr(args, "func"):
332
+ parser.print_help()
333
+ return 2
334
+ return int(args.func(args))
335
+
336
+
337
+ if __name__ == "__main__":
338
+ raise SystemExit(main())