agentseal 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -106
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,11 +1,35 @@
|
|
|
1
|
-
#
|
|
1
|
+
# AgentSeal
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Find out if your AI agent can be hacked. Before someone else does.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/agentseal)
|
|
6
|
-
[](../LICENSE)
|
|
7
|
+
[](https://nodejs.org)
|
|
7
8
|
|
|
8
|
-
|
|
9
|
+
> **[agentseal.org](https://agentseal.org)** : Dashboard, scan history, PDF reports, and more.
|
|
10
|
+
|
|
11
|
+
## Why AgentSeal?
|
|
12
|
+
|
|
13
|
+
Your system prompt contains proprietary instructions, business logic, and behavioral rules. Attackers use prompt injection and extraction techniques to steal or override this data.
|
|
14
|
+
|
|
15
|
+
AgentSeal sends 150 automated attack probes at your agent and tells you exactly what broke, why it broke, and how to fix it. Every scan is deterministic. No AI judge. Same input, same result, every time.
|
|
16
|
+
|
|
17
|
+
## Open Source vs Hosted
|
|
18
|
+
|
|
19
|
+
| | Open Source | Hosted ([agentseal.org](https://agentseal.org)) |
|
|
20
|
+
|---|---|---|
|
|
21
|
+
| **Price** | Free | Free tier available |
|
|
22
|
+
| **Setup** | Bring your own API keys | Zero configuration |
|
|
23
|
+
| **Probes** | 150 (extraction + injection) | 196 (+ MCP + RAG) |
|
|
24
|
+
| **Mutations** | 8 adaptive transforms | 8 adaptive transforms |
|
|
25
|
+
| **Reports** | JSON output | Interactive dashboard + PDF |
|
|
26
|
+
| **History** | Manual tracking | Full scan history and trends |
|
|
27
|
+
| **CI/CD** | `--min-score` flag | Built-in |
|
|
28
|
+
| **Extras** | | Behavioral genome mapping |
|
|
29
|
+
|
|
30
|
+
[Try the hosted version](https://agentseal.org)
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
9
33
|
|
|
10
34
|
```bash
|
|
11
35
|
npm install agentseal
|
|
@@ -13,8 +37,6 @@ npm install agentseal
|
|
|
13
37
|
|
|
14
38
|
## Quick Start
|
|
15
39
|
|
|
16
|
-
### With OpenAI
|
|
17
|
-
|
|
18
40
|
```typescript
|
|
19
41
|
import { AgentValidator } from "agentseal";
|
|
20
42
|
import OpenAI from "openai";
|
|
@@ -27,85 +49,81 @@ const validator = AgentValidator.fromOpenAI(client, {
|
|
|
27
49
|
});
|
|
28
50
|
|
|
29
51
|
const report = await validator.run();
|
|
30
|
-
|
|
52
|
+
|
|
53
|
+
console.log(`Score: ${report.trust_score}/100`);
|
|
54
|
+
console.log(`Level: ${report.trust_level}`);
|
|
55
|
+
console.log(`Extraction resistance: ${report.score_breakdown.extraction_resistance}`);
|
|
56
|
+
console.log(`Injection resistance: ${report.score_breakdown.injection_resistance}`);
|
|
31
57
|
```
|
|
32
58
|
|
|
33
|
-
|
|
59
|
+
## Supported Providers
|
|
60
|
+
|
|
61
|
+
**Anthropic**
|
|
34
62
|
|
|
35
63
|
```typescript
|
|
36
|
-
import { AgentValidator } from "agentseal";
|
|
37
64
|
import Anthropic from "@anthropic-ai/sdk";
|
|
38
65
|
|
|
39
|
-
const
|
|
40
|
-
|
|
41
|
-
const validator = AgentValidator.fromAnthropic(client, {
|
|
66
|
+
const validator = AgentValidator.fromAnthropic(new Anthropic(), {
|
|
42
67
|
model: "claude-sonnet-4-5-20250929",
|
|
43
68
|
systemPrompt: "You are a helpful assistant.",
|
|
44
69
|
});
|
|
45
|
-
|
|
46
|
-
const report = await validator.run();
|
|
47
70
|
```
|
|
48
71
|
|
|
49
|
-
|
|
72
|
+
**Vercel AI SDK**
|
|
50
73
|
|
|
51
74
|
```typescript
|
|
52
|
-
import { AgentValidator } from "agentseal";
|
|
53
75
|
import { openai } from "@ai-sdk/openai";
|
|
54
76
|
|
|
55
77
|
const validator = AgentValidator.fromVercelAI({
|
|
56
78
|
model: openai("gpt-4o"),
|
|
57
79
|
systemPrompt: "You are a helpful assistant.",
|
|
58
80
|
});
|
|
59
|
-
|
|
60
|
-
const report = await validator.run();
|
|
61
81
|
```
|
|
62
82
|
|
|
63
|
-
|
|
83
|
+
**Ollama**
|
|
64
84
|
|
|
65
85
|
```typescript
|
|
66
|
-
|
|
86
|
+
const validator = AgentValidator.fromEndpoint({
|
|
87
|
+
url: "http://localhost:11434/v1/chat/completions",
|
|
88
|
+
});
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
**Any HTTP Endpoint**
|
|
67
92
|
|
|
93
|
+
```typescript
|
|
68
94
|
const validator = AgentValidator.fromEndpoint({
|
|
69
95
|
url: "http://localhost:8080/chat",
|
|
70
|
-
messageField: "message",
|
|
71
|
-
responseField: "response",
|
|
96
|
+
messageField: "message",
|
|
97
|
+
responseField: "response",
|
|
72
98
|
});
|
|
73
|
-
|
|
74
|
-
const report = await validator.run();
|
|
75
99
|
```
|
|
76
100
|
|
|
77
|
-
|
|
101
|
+
**Custom Function**
|
|
78
102
|
|
|
79
103
|
```typescript
|
|
80
|
-
import { AgentValidator } from "agentseal";
|
|
81
|
-
|
|
82
104
|
const validator = new AgentValidator({
|
|
83
105
|
agentFn: async (message) => {
|
|
84
|
-
|
|
85
|
-
return "response";
|
|
106
|
+
return await myAgent.chat(message);
|
|
86
107
|
},
|
|
87
108
|
groundTruthPrompt: "Your system prompt for comparison",
|
|
88
|
-
agentName: "My Agent",
|
|
89
109
|
concurrency: 5,
|
|
90
|
-
adaptive: true,
|
|
110
|
+
adaptive: true,
|
|
91
111
|
});
|
|
92
|
-
|
|
93
|
-
const report = await validator.run();
|
|
94
112
|
```
|
|
95
113
|
|
|
96
|
-
## CLI
|
|
114
|
+
## CLI Usage
|
|
97
115
|
|
|
98
116
|
```bash
|
|
99
|
-
# Scan
|
|
100
|
-
npx agentseal scan --prompt "You are a helpful assistant" --model gpt-4o
|
|
117
|
+
# Scan a system prompt
|
|
118
|
+
npx agentseal scan --prompt "You are a helpful assistant..." --model gpt-4o
|
|
101
119
|
|
|
102
|
-
# Scan
|
|
103
|
-
npx agentseal scan --
|
|
120
|
+
# Scan from file
|
|
121
|
+
npx agentseal scan --file ./my-prompt.txt --model ollama/qwen3
|
|
104
122
|
|
|
105
|
-
#
|
|
106
|
-
npx agentseal scan --prompt "
|
|
123
|
+
# JSON output
|
|
124
|
+
npx agentseal scan --prompt "..." --model gpt-4o --output json --save report.json
|
|
107
125
|
|
|
108
|
-
#
|
|
126
|
+
# CI mode (exit code 1 if below threshold)
|
|
109
127
|
npx agentseal scan --prompt "..." --model gpt-4o --min-score 75
|
|
110
128
|
|
|
111
129
|
# Compare two reports
|
|
@@ -115,67 +133,68 @@ npx agentseal compare baseline.json current.json
|
|
|
115
133
|
### CLI Options
|
|
116
134
|
|
|
117
135
|
| Flag | Description | Default |
|
|
118
|
-
|
|
119
|
-
| `-p, --prompt
|
|
120
|
-
| `-f, --file
|
|
121
|
-
| `--url
|
|
122
|
-
| `-m, --model
|
|
123
|
-
| `--api-key
|
|
124
|
-
| `-o, --output
|
|
125
|
-
| `--save
|
|
126
|
-
| `--concurrency
|
|
127
|
-
| `--timeout
|
|
136
|
+
|---|---|---|
|
|
137
|
+
| `-p, --prompt` | System prompt to test | |
|
|
138
|
+
| `-f, --file` | File containing system prompt | |
|
|
139
|
+
| `--url` | HTTP endpoint to test | |
|
|
140
|
+
| `-m, --model` | Model name (gpt-4o, claude-sonnet-4-5-20250929, ollama/qwen3) | |
|
|
141
|
+
| `--api-key` | API key (or use env var) | |
|
|
142
|
+
| `-o, --output` | `terminal` or `json` | terminal |
|
|
143
|
+
| `--save` | Save JSON report to file | |
|
|
144
|
+
| `--concurrency` | Parallel probes | 3 |
|
|
145
|
+
| `--timeout` | Per-probe timeout in seconds | 30 |
|
|
128
146
|
| `--adaptive` | Enable mutation phase | false |
|
|
129
|
-
| `--min-score
|
|
130
|
-
| `-v, --verbose` | Show
|
|
147
|
+
| `--min-score` | Minimum passing score for CI | |
|
|
148
|
+
| `-v, --verbose` | Show individual probe results | false |
|
|
149
|
+
|
|
150
|
+
## Attack Categories
|
|
131
151
|
|
|
132
|
-
|
|
152
|
+
AgentSeal runs 150 probes across two categories:
|
|
133
153
|
|
|
134
|
-
|
|
154
|
+
| Category | Probes | Techniques |
|
|
155
|
+
|---|:---:|---|
|
|
156
|
+
| **Extraction** | 70 | Direct requests, roleplay overrides, output format tricks, base64/ROT13/unicode encoding, multi-turn escalation, hypothetical framing, poems, songs, fill-in-the-blank |
|
|
157
|
+
| **Injection** | 80 | Instruction overrides, delimiter attacks, persona hijacking, DAN variants, privilege escalation, skeleton key, indirect injection, tool exploits, social engineering |
|
|
135
158
|
|
|
136
|
-
###
|
|
137
|
-
Attempts to extract the system prompt via:
|
|
138
|
-
- Direct requests, roleplay overrides, output format tricks
|
|
139
|
-
- Encoding attacks (base64, ROT13, unicode)
|
|
140
|
-
- Multi-turn escalation, hypothetical framing
|
|
141
|
-
- Creative format exploitation (poems, songs, fill-in-blank)
|
|
159
|
+
### Adaptive Mutations
|
|
142
160
|
|
|
143
|
-
|
|
144
|
-
Attempts to inject instructions via:
|
|
145
|
-
- Instruction overrides, delimiter attacks
|
|
146
|
-
- Persona hijacking, DAN variants
|
|
147
|
-
- Privilege escalation, skeleton key attacks
|
|
148
|
-
- Indirect injection, tool exploits
|
|
149
|
-
- Social engineering, emotional manipulation
|
|
161
|
+
When `adaptive: true`, AgentSeal takes the top 5 blocked probes and retries them with 8 obfuscation transforms:
|
|
150
162
|
|
|
151
|
-
|
|
163
|
+
| Transform | What it does |
|
|
164
|
+
|---|---|
|
|
165
|
+
| `base64` | Encodes the attack payload |
|
|
166
|
+
| `rot13` | Letter rotation cipher |
|
|
167
|
+
| `homoglyphs` | Replaces characters with unicode lookalikes |
|
|
168
|
+
| `zero-width` | Injects invisible unicode characters |
|
|
169
|
+
| `leetspeak` | Character substitution (a=4, e=3, etc.) |
|
|
170
|
+
| `case-scramble` | Randomizes capitalization |
|
|
171
|
+
| `reverse-embed` | Reverses and embeds the payload |
|
|
172
|
+
| `prefix-pad` | Pads with misleading context |
|
|
173
|
+
|
|
174
|
+
## Scan Results
|
|
152
175
|
|
|
153
176
|
```typescript
|
|
154
177
|
interface ScanReport {
|
|
155
|
-
trust_score: number;
|
|
156
|
-
trust_level: TrustLevel;
|
|
178
|
+
trust_score: number; // 0 to 100, higher is more secure
|
|
179
|
+
trust_level: TrustLevel; // "critical" | "low" | "medium" | "high" | "excellent"
|
|
157
180
|
score_breakdown: {
|
|
158
181
|
extraction_resistance: number;
|
|
159
182
|
injection_resistance: number;
|
|
160
183
|
boundary_integrity: number;
|
|
161
184
|
consistency: number;
|
|
162
185
|
};
|
|
163
|
-
defense_profile?: DefenseProfile;
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
186
|
+
defense_profile?: DefenseProfile; // Detected defense system (Prompt Shield, Llama Guard, etc.)
|
|
187
|
+
results: ProbeResult[]; // Individual probe results
|
|
188
|
+
mutation_results?: ProbeResult[]; // Results from adaptive phase
|
|
189
|
+
mutation_resistance?: number; // 0 to 100
|
|
167
190
|
}
|
|
168
191
|
```
|
|
169
192
|
|
|
170
|
-
## Semantic Detection
|
|
193
|
+
## Semantic Detection
|
|
171
194
|
|
|
172
|
-
Bring your own
|
|
195
|
+
Optional. Bring your own embedding function for paraphrase detection:
|
|
173
196
|
|
|
174
197
|
```typescript
|
|
175
|
-
import OpenAI from "openai";
|
|
176
|
-
|
|
177
|
-
const openai = new OpenAI();
|
|
178
|
-
|
|
179
198
|
const validator = new AgentValidator({
|
|
180
199
|
agentFn: myAgent,
|
|
181
200
|
groundTruthPrompt: "...",
|
|
@@ -191,40 +210,38 @@ const validator = new AgentValidator({
|
|
|
191
210
|
});
|
|
192
211
|
```
|
|
193
212
|
|
|
194
|
-
## Adaptive Mode
|
|
195
|
-
|
|
196
|
-
When `adaptive: true`, AgentSeal takes the top 5 blocked probes and mutates them using 8 transforms (base64, ROT13, unicode homoglyphs, zero-width injection, leetspeak, case scramble, reverse embedding, prefix padding) to test mutation resistance.
|
|
197
|
-
|
|
198
213
|
## Pro Features
|
|
199
214
|
|
|
200
|
-
The
|
|
215
|
+
The open source scanner covers 150 probes. [AgentSeal Pro](https://agentseal.org) extends this with:
|
|
216
|
+
|
|
217
|
+
| Feature | What it does |
|
|
218
|
+
|---|---|
|
|
219
|
+
| **MCP tool poisoning** (+26 probes) | Tests for hidden instructions in tool descriptions, malicious return values, cross-tool privilege escalation |
|
|
220
|
+
| **RAG poisoning** (+20 probes) | Tests for poisoned documents in retrieval pipelines |
|
|
221
|
+
| **Behavioral genome mapping** | Maps your agent's decision boundaries with ~105 targeted probes |
|
|
222
|
+
| **PDF security reports** | Exportable reports for compliance and audits |
|
|
223
|
+
| **Dashboard** | Real-time scan progress, history, trends, and remediation guidance |
|
|
201
224
|
|
|
202
|
-
|
|
203
|
-
|---------|:----:|:---:|
|
|
204
|
-
| 150 base probes (extraction + injection) | Yes | Yes |
|
|
205
|
-
| Adaptive mutations (`--adaptive`) | Yes | Yes |
|
|
206
|
-
| JSON output, CI/CD integration | Yes | Yes |
|
|
207
|
-
| Defense fingerprinting | Yes | Yes |
|
|
208
|
-
| **MCP tool poisoning probes** (+26) | - | Yes |
|
|
209
|
-
| **RAG poisoning probes** (+20) | - | Yes |
|
|
210
|
-
| **Behavioral genome mapping** | - | Yes |
|
|
211
|
-
| **PDF security reports** | - | Yes |
|
|
212
|
-
| **Dashboard** (track security over time) | - | Yes |
|
|
225
|
+
[Start scanning at agentseal.org](https://agentseal.org)
|
|
213
226
|
|
|
214
|
-
|
|
227
|
+
## Also Available in Python
|
|
215
228
|
|
|
216
|
-
|
|
229
|
+
```bash
|
|
230
|
+
pip install agentseal
|
|
231
|
+
agentseal scan --prompt "You are a helpful assistant" --model gpt-4o
|
|
232
|
+
```
|
|
217
233
|
|
|
218
|
-
|
|
219
|
-
- Provider SDKs are optional peer dependencies — install only what you use
|
|
234
|
+
[View Python package on PyPI](https://pypi.org/project/agentseal/)
|
|
220
235
|
|
|
221
236
|
## Links
|
|
222
237
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
238
|
+
| | |
|
|
239
|
+
|---|---|
|
|
240
|
+
| Website | [agentseal.org](https://agentseal.org) |
|
|
241
|
+
| GitHub | [github.com/agentseal/agentseal](https://github.com/agentseal/agentseal) |
|
|
242
|
+
| PyPI | [pypi.org/project/agentseal](https://pypi.org/project/agentseal/) |
|
|
243
|
+
| Probe catalog | [PROBES.md](https://github.com/agentseal/agentseal/blob/main/PROBES.md) |
|
|
227
244
|
|
|
228
245
|
## License
|
|
229
246
|
|
|
230
|
-
FSL-1.1-Apache-2.0
|
|
247
|
+
FSL-1.1-Apache-2.0
|
package/package.json
CHANGED