agent-challenge 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +131 -0
- package/package.json +39 -0
- package/src/agentchallenge.js +766 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 KavKlaw
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# agent-challenge
|
|
2
|
+
|
|
3
|
+
LLM-solvable challenge-response authentication for AI agent APIs.
|
|
4
|
+
|
|
5
|
+
Traditional CAPTCHAs block agents. Proof-of-work wastes compute. **agent-challenge** uses reasoning puzzles that any LLM can solve through thinking alone — no scripts, no hashing, no external tools.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install agent-challenge
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Server Side
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from agentchallenge import AgentChallenge
|
|
17
|
+
|
|
18
|
+
# Create an instance with your secret
|
|
19
|
+
ac = AgentChallenge(secret="your-server-secret-key")
|
|
20
|
+
|
|
21
|
+
# Generate a challenge
|
|
22
|
+
challenge = ac.create()
|
|
23
|
+
# challenge.prompt = "Reverse the following string: NOHTYP"
|
|
24
|
+
# challenge.token = "eyJpZCI6ImNoXz..." (signed, stateless)
|
|
25
|
+
|
|
26
|
+
# Send challenge.prompt and challenge.token to the agent via your API
|
|
27
|
+
|
|
28
|
+
# When the agent responds with their answer:
|
|
29
|
+
result = ac.verify(token=challenge.token, answer="PYTHON")
|
|
30
|
+
if result.valid:
|
|
31
|
+
print("Agent verified! ✅")
|
|
32
|
+
else:
|
|
33
|
+
print(f"Failed: {result.error}")
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Agent Side
|
|
37
|
+
|
|
38
|
+
The agent just reads the prompt and answers it. That's it.
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
Server: "Reverse the following string: NOHTYP"
|
|
42
|
+
Agent: "PYTHON"
|
|
43
|
+
|
|
44
|
+
Server: "What is 234 + 567?"
|
|
45
|
+
Agent: "801"
|
|
46
|
+
|
|
47
|
+
Server: "Decode this ROT13-encoded string: URYYB"
|
|
48
|
+
Agent: "HELLO"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
No SDK needed on the agent side. No computation. Just reasoning.
|
|
52
|
+
|
|
53
|
+
## Challenge Types
|
|
54
|
+
|
|
55
|
+
| Type | Example | Difficulty |
|
|
56
|
+
|------|---------|------------|
|
|
57
|
+
| `reverse_string` | Reverse "PYTHON" → "NOHTYP" | Easy |
|
|
58
|
+
| `simple_math` | 234 + 567 = ? | Easy |
|
|
59
|
+
| `pattern` | 2, 4, 8, 16, ? → 32 | Easy |
|
|
60
|
+
| `rot13` | Decode "URYYB" → "HELLO" | Medium |
|
|
61
|
+
| `letter_position` | Sum of A=1,B=2.. in "CAT" → 24 | Medium |
|
|
62
|
+
| `extract_letters` | Every 2nd char of "HWEOLRLLOD" | Medium |
|
|
63
|
+
| `word_math` | 7 + 8 as a word → "fifteen" | Hard |
|
|
64
|
+
|
|
65
|
+
## Configuration
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
ac = AgentChallenge(
|
|
69
|
+
secret="your-secret", # Required (min 8 chars)
|
|
70
|
+
difficulty="easy", # "easy", "medium", or "hard"
|
|
71
|
+
ttl=300, # Challenge expiry in seconds
|
|
72
|
+
types=["reverse_string"], # Restrict to specific types
|
|
73
|
+
)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## API
|
|
77
|
+
|
|
78
|
+
### `AgentChallenge(secret, difficulty, ttl, types)`
|
|
79
|
+
|
|
80
|
+
Create an instance. The secret is used for HMAC-signing tokens.
|
|
81
|
+
|
|
82
|
+
### `ac.create(challenge_type=None) → Challenge`
|
|
83
|
+
|
|
84
|
+
Generate a challenge. Returns:
|
|
85
|
+
- `challenge.id` — unique challenge ID
|
|
86
|
+
- `challenge.prompt` — text to send to the agent
|
|
87
|
+
- `challenge.token` — signed token for stateless verification
|
|
88
|
+
- `challenge.expires_at` — expiry timestamp
|
|
89
|
+
- `challenge.to_dict()` — serialize for JSON API responses
|
|
90
|
+
|
|
91
|
+
### `ac.verify(token, answer) → VerifyResult`
|
|
92
|
+
|
|
93
|
+
Verify an agent's answer. Returns:
|
|
94
|
+
- `result.valid` — boolean
|
|
95
|
+
- `result.error` — error message if invalid
|
|
96
|
+
- `result.challenge_type` — which type was used
|
|
97
|
+
- `result.elapsed_ms` — verification time
|
|
98
|
+
|
|
99
|
+
## Stateless Design
|
|
100
|
+
|
|
101
|
+
No database required. Challenge data is HMAC-signed into tokens:
|
|
102
|
+
1. Server creates challenge → signs answer hash into token
|
|
103
|
+
2. Agent receives prompt + token
|
|
104
|
+
3. Agent sends back answer + token
|
|
105
|
+
4. Server verifies HMAC signature, checks expiry, compares answer hash
|
|
106
|
+
|
|
107
|
+
Tokens can't be forged or tampered with. Each contains its own expiry.
|
|
108
|
+
|
|
109
|
+
## JavaScript Port
|
|
110
|
+
|
|
111
|
+
A JavaScript/Node.js version is included at `src/agentchallenge.js`:
|
|
112
|
+
|
|
113
|
+
```javascript
|
|
114
|
+
import { AgentChallenge } from './agentchallenge.js';
|
|
115
|
+
|
|
116
|
+
const ac = new AgentChallenge({ secret: 'your-secret' });
|
|
117
|
+
const challenge = ac.create();
|
|
118
|
+
const result = ac.verify(challenge.token, 'agent answer');
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Testing
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
PYTHONPATH=src python3 run_tests.py
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
47 tests covering initialization, creation, verification, normalization, tokens, all 7 challenge types, integration flows, difficulty distribution, performance, and cross-secret isolation.
|
|
128
|
+
|
|
129
|
+
## License
|
|
130
|
+
|
|
131
|
+
MIT
|
package/package.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "agent-challenge",
|
|
3
|
+
"version": "0.5.0",
|
|
4
|
+
"description": "LLM-solvable challenge-response authentication for AI agent APIs. Protect endpoints with reasoning puzzles. Stateless via HMAC tokens.",
|
|
5
|
+
"main": "src/agentchallenge.js",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/agentchallenge.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"src/agentchallenge.js",
|
|
12
|
+
"README.md",
|
|
13
|
+
"LICENSE"
|
|
14
|
+
],
|
|
15
|
+
"keywords": [
|
|
16
|
+
"agent",
|
|
17
|
+
"challenge",
|
|
18
|
+
"authentication",
|
|
19
|
+
"llm",
|
|
20
|
+
"captcha",
|
|
21
|
+
"ai",
|
|
22
|
+
"reasoning",
|
|
23
|
+
"hmac",
|
|
24
|
+
"stateless"
|
|
25
|
+
],
|
|
26
|
+
"author": {
|
|
27
|
+
"name": "KavKlaw",
|
|
28
|
+
"email": "kavklawrevived@clawmail.cc"
|
|
29
|
+
},
|
|
30
|
+
"license": "MIT",
|
|
31
|
+
"repository": {
|
|
32
|
+
"type": "git",
|
|
33
|
+
"url": "https://github.com/Kav-K/agent-challenge"
|
|
34
|
+
},
|
|
35
|
+
"homepage": "https://challenge.llm.kaveenk.com",
|
|
36
|
+
"engines": {
|
|
37
|
+
"node": ">=18.0.0"
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,766 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* agent-challenge v0.2.0 (JavaScript/Node.js port)
|
|
3
|
+
*
|
|
4
|
+
* LLM-solvable challenge-response authentication for AI agent APIs.
|
|
5
|
+
* 12 static challenge types with fully randomized inputs.
|
|
6
|
+
* Optional dynamic mode: LLM-generated challenges with self-verification.
|
|
7
|
+
*
|
|
8
|
+
* ⚠️ Dynamic mode adds 2 LLM API requests per challenge generation.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import crypto from 'crypto';
|
|
12
|
+
import { request as httpsRequest } from 'https';
|
|
13
|
+
|
|
14
|
+
// ── Helpers ──────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
function pick(arr) { return arr[Math.floor(Math.random() * arr.length)]; }
|
|
17
|
+
function randInt(min, max) { return Math.floor(Math.random() * (max - min + 1)) + min; }
|
|
18
|
+
|
|
19
|
+
const CONSONANTS = 'BCDFGHJKLMNPQRSTVWXYZ';
|
|
20
|
+
const VOWELS = 'AEIOU';
|
|
21
|
+
const UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
|
|
22
|
+
const DIGITS = '0123456789';
|
|
23
|
+
const LETTERS_ALL = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
|
|
24
|
+
|
|
25
|
+
function randChars(pool, n) { let s = ''; for (let i = 0; i < n; i++) s += pool[randInt(0, pool.length - 1)]; return s; }
|
|
26
|
+
function pronounceable(n) { let s = ''; for (let i = 0; i < n; i++) s += pick(i % 2 === 0 ? CONSONANTS : VOWELS); return s; }
|
|
27
|
+
|
|
28
|
+
function rot13(text) {
|
|
29
|
+
return text.replace(/[A-Za-z]/g, c => {
|
|
30
|
+
const base = c <= 'Z' ? 65 : 97;
|
|
31
|
+
return String.fromCharCode(((c.charCodeAt(0) - base + 13) % 26) + base);
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function caesarEncode(text, shift) {
|
|
36
|
+
return text.replace(/[A-Z]/g, c => String.fromCharCode(((c.charCodeAt(0) - 65 + shift) % 26) + 65));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const NUM_WORDS = {
|
|
40
|
+
0:'zero',1:'one',2:'two',3:'three',4:'four',5:'five',6:'six',7:'seven',8:'eight',9:'nine',10:'ten',
|
|
41
|
+
11:'eleven',12:'twelve',13:'thirteen',14:'fourteen',15:'fifteen',16:'sixteen',17:'seventeen',18:'eighteen',
|
|
42
|
+
19:'nineteen',20:'twenty'
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// ── Reply instruction templates (randomized per challenge) ───
|
|
46
|
+
|
|
47
|
+
const REPLY_INSTRUCTIONS = [
|
|
48
|
+
"Reply with ONLY the answer, nothing else.",
|
|
49
|
+
"Respond with just the answer.",
|
|
50
|
+
"Give me only the answer.",
|
|
51
|
+
"Your response should contain nothing but the answer.",
|
|
52
|
+
"Write only the final answer.",
|
|
53
|
+
"Output just the answer, no explanation.",
|
|
54
|
+
"Answer with a single value only.",
|
|
55
|
+
"Just the answer please, nothing more.",
|
|
56
|
+
];
|
|
57
|
+
function ri() { return pick(REPLY_INSTRUCTIONS); }
|
|
58
|
+
|
|
59
|
+
// ── Challenge Types (with randomized prompt templates) ───
|
|
60
|
+
|
|
61
|
+
const CHALLENGE_TYPES = {
|
|
62
|
+
reverse_string() {
|
|
63
|
+
const variant = pick(['pronounceable', 'random', 'mixed']);
|
|
64
|
+
let word;
|
|
65
|
+
if (variant === 'pronounceable') word = pronounceable(randInt(5, 9));
|
|
66
|
+
else if (variant === 'random') word = randChars(UPPER, randInt(5, 8));
|
|
67
|
+
else word = randChars(UPPER + DIGITS, randInt(5, 8));
|
|
68
|
+
const templates = [
|
|
69
|
+
w => `Reverse the following string: ${w}.`,
|
|
70
|
+
w => `Write the characters of ${w} in reverse order.`,
|
|
71
|
+
w => `Spell ${w} backwards.`,
|
|
72
|
+
w => `If you flip the string ${w} end-to-end, what do you get?`,
|
|
73
|
+
w => `Read ${w} from right to left and write what you see.`,
|
|
74
|
+
w => `Take the word ${w} and reverse every character.`,
|
|
75
|
+
w => `Starting from the last character to the first, rewrite ${w}.`,
|
|
76
|
+
w => `What is the result of reversing all characters in ${w}?`,
|
|
77
|
+
];
|
|
78
|
+
return { prompt: pick(templates)(word) + ' ' + ri(), answer: word.split('').reverse().join('').toLowerCase() };
|
|
79
|
+
},
|
|
80
|
+
|
|
81
|
+
simple_math() {
|
|
82
|
+
const op = pick(['+', '+', '-', '×', '++', '--']);
|
|
83
|
+
let prompt, answer;
|
|
84
|
+
if (op === '+') {
|
|
85
|
+
const a = randInt(10, 999), b = randInt(10, 999); answer = a + b;
|
|
86
|
+
prompt = pick([`What is ${a} + ${b}?`, `Calculate the sum of ${a} and ${b}.`, `Add ${a} to ${b}. What do you get?`, `If you combine ${a} and ${b}, what is the total?`]);
|
|
87
|
+
} else if (op === '-') {
|
|
88
|
+
const a = randInt(100, 999), b = randInt(10, a - 1); answer = a - b;
|
|
89
|
+
prompt = pick([`What is ${a} - ${b}?`, `Subtract ${b} from ${a}.`, `If you take ${b} away from ${a}, what remains?`, `Calculate ${a} minus ${b}.`]);
|
|
90
|
+
} else if (op === '×') {
|
|
91
|
+
const a = randInt(2, 30), b = randInt(2, 30); answer = a * b;
|
|
92
|
+
prompt = pick([`What is ${a} × ${b}?`, `Multiply ${a} by ${b}.`, `Calculate the product of ${a} and ${b}.`, `What do you get when you multiply ${a} times ${b}?`]);
|
|
93
|
+
} else if (op === '++') {
|
|
94
|
+
const a = randInt(10, 300), b = randInt(10, 300), c = randInt(10, 300); answer = a + b + c;
|
|
95
|
+
prompt = pick([`What is ${a} + ${b} + ${c}?`, `Add together ${a}, ${b}, and ${c}.`, `Find the sum of these three numbers: ${a}, ${b}, ${c}.`]);
|
|
96
|
+
} else {
|
|
97
|
+
const a = randInt(500, 999), b = randInt(10, 200), c = randInt(10, Math.min(200, a - b - 1)); answer = a - b - c;
|
|
98
|
+
prompt = pick([`What is ${a} - ${b} - ${c}?`, `Start with ${a}, subtract ${b}, then subtract ${c}.`, `Take ${a}, remove ${b}, then remove another ${c}. What's left?`]);
|
|
99
|
+
}
|
|
100
|
+
return { prompt: prompt + ' ' + ri(), answer: String(answer) };
|
|
101
|
+
},
|
|
102
|
+
|
|
103
|
+
letter_position() {
|
|
104
|
+
const word = randChars(UPPER, randInt(3, 4));
|
|
105
|
+
const total = [...word].reduce((s, c) => s + (c.charCodeAt(0) - 64), 0);
|
|
106
|
+
const templates = [
|
|
107
|
+
w => `If A=1, B=2, C=3, ... Z=26, what is the sum of the letter values in "${w}"?`,
|
|
108
|
+
w => `Assign each letter a number (A=1, B=2, through Z=26). Add up the values of all letters in "${w}".`,
|
|
109
|
+
w => `Using the mapping A→1, B→2, C→3, ..., Z→26, calculate the total value of the letters in "${w}".`,
|
|
110
|
+
w => `Each letter has a position in the alphabet (A=1, Z=26). What is the sum of positions for the letters in "${w}"?`,
|
|
111
|
+
];
|
|
112
|
+
return { prompt: pick(templates)(word) + ' ' + ri(), answer: String(total) };
|
|
113
|
+
},
|
|
114
|
+
|
|
115
|
+
rot13() {
|
|
116
|
+
const word = pronounceable(randInt(4, 7));
|
|
117
|
+
const encoded = rot13(word);
|
|
118
|
+
const templates = [
|
|
119
|
+
e => `Decode this ROT13-encoded string (each letter shifts 13 places back in the alphabet): ${e}`,
|
|
120
|
+
e => `Apply ROT13 decoding to the text: ${e}`,
|
|
121
|
+
e => `The following text was encoded with ROT13. Decode it: ${e}`,
|
|
122
|
+
e => `Shift each letter in ${e} by 13 positions in the alphabet to decode it.`,
|
|
123
|
+
];
|
|
124
|
+
return { prompt: pick(templates)(encoded) + ' ' + ri(), answer: word.toLowerCase() };
|
|
125
|
+
},
|
|
126
|
+
|
|
127
|
+
pattern() {
|
|
128
|
+
const ptype = pick(['add', 'multiply', 'add_growing', 'squares', 'triangular']);
|
|
129
|
+
let display, answer;
|
|
130
|
+
if (ptype === 'add') {
|
|
131
|
+
const start = randInt(1, 50), step = randInt(2, 15);
|
|
132
|
+
display = Array.from({length: 5}, (_, i) => start + step * i); answer = display[4] + step;
|
|
133
|
+
} else if (ptype === 'multiply') {
|
|
134
|
+
const base = pick([2, 3, 4, 5]), se = randInt(0, 2);
|
|
135
|
+
display = Array.from({length: 5}, (_, i) => base ** (se + i)); answer = base ** (se + 5);
|
|
136
|
+
} else if (ptype === 'squares') {
|
|
137
|
+
const off = randInt(0, 10), sn = randInt(1, 5);
|
|
138
|
+
display = Array.from({length: 5}, (_, i) => (sn + i) ** 2 + off); answer = (sn + 5) ** 2 + off;
|
|
139
|
+
} else if (ptype === 'triangular') {
|
|
140
|
+
const start = randInt(1, 10), si = randInt(2, 4);
|
|
141
|
+
display = [start]; let cs = si;
|
|
142
|
+
for (let j = 0; j < 4; j++) { display.push(display[display.length-1] + cs); cs += si; }
|
|
143
|
+
answer = display[4] + cs;
|
|
144
|
+
} else {
|
|
145
|
+
const start = randInt(1, 20), is_ = randInt(1, 5);
|
|
146
|
+
display = [start]; for (let i = 0; i < 4; i++) display.push(display[display.length-1] + is_ + i);
|
|
147
|
+
answer = display[4] + is_ + 4;
|
|
148
|
+
}
|
|
149
|
+
const seq = display.join(', ');
|
|
150
|
+
const templates = [
|
|
151
|
+
s => `What comes next in this sequence: ${s}, ?`,
|
|
152
|
+
s => `Find the next number: ${s}, ?`,
|
|
153
|
+
s => `Continue this pattern: ${s}, ?`,
|
|
154
|
+
s => `What number follows this sequence: ${s}, ?`,
|
|
155
|
+
s => `Identify the next value in the series: ${s}, ?`,
|
|
156
|
+
];
|
|
157
|
+
return { prompt: pick(templates)(seq) + ' ' + ri(), answer: String(answer) };
|
|
158
|
+
},
|
|
159
|
+
|
|
160
|
+
extract_letters() {
|
|
161
|
+
const word = randChars(UPPER, randInt(4, 6));
|
|
162
|
+
const n = pick([2, 3]);
|
|
163
|
+
let mixed = '';
|
|
164
|
+
for (let i = 0; i < word.length; i++) {
|
|
165
|
+
mixed += word[i];
|
|
166
|
+
if (i < word.length - 1) for (let j = 0; j < n - 1; j++) mixed += CONSONANTS[randInt(0, CONSONANTS.length - 1)];
|
|
167
|
+
}
|
|
168
|
+
const templates2 = [
|
|
169
|
+
m => `Extract every 2nd letter from this string, starting from the 1st character: ${m}`,
|
|
170
|
+
m => `Take every other character from ${m}, beginning with the first.`,
|
|
171
|
+
m => `From the string ${m}, pick characters at positions 1, 3, 5, 7... What do you get?`,
|
|
172
|
+
];
|
|
173
|
+
const templates3 = [
|
|
174
|
+
m => `Extract every 3rd letter from this string, starting from the 1st character: ${m}`,
|
|
175
|
+
m => `From ${m}, take the 1st, 4th, 7th, 10th... characters.`,
|
|
176
|
+
m => `Pick every third character from ${m}, starting at position 1.`,
|
|
177
|
+
];
|
|
178
|
+
return { prompt: pick(n === 2 ? templates2 : templates3)(mixed) + ' ' + ri(), answer: word.toLowerCase() };
|
|
179
|
+
},
|
|
180
|
+
|
|
181
|
+
word_math() {
|
|
182
|
+
const v = pick(['digit_to_word', 'char_count', 'vowel_count', 'digit_sum']);
|
|
183
|
+
if (v === 'digit_to_word') {
|
|
184
|
+
const a = randInt(1, 10), b = randInt(1, 10);
|
|
185
|
+
const t = pick([
|
|
186
|
+
(x,y) => `What is ${x} + ${y}? Write the answer as a word (e.g., "twelve"), not a number.`,
|
|
187
|
+
(x,y) => `Add ${x} and ${y}. Spell out the answer as an English word.`,
|
|
188
|
+
(x,y) => `Calculate ${x} + ${y} and write the result as a word, not a digit.`,
|
|
189
|
+
]);
|
|
190
|
+
return { prompt: t(a, b) + ' ' + ri(), answer: NUM_WORDS[a + b] };
|
|
191
|
+
} else if (v === 'char_count') {
|
|
192
|
+
const w = randChars(UPPER, randInt(4, 8));
|
|
193
|
+
const t = pick([
|
|
194
|
+
x => `How many characters are in the string "${x}"?`,
|
|
195
|
+
x => `Count the total number of letters in "${x}".`,
|
|
196
|
+
x => `What is the length of the string "${x}"?`,
|
|
197
|
+
]);
|
|
198
|
+
return { prompt: t(w) + ' ' + ri(), answer: String(w.length) };
|
|
199
|
+
} else if (v === 'vowel_count') {
|
|
200
|
+
const w = randChars(UPPER, randInt(5, 9));
|
|
201
|
+
const t = pick([
|
|
202
|
+
x => `How many vowels (A, E, I, O, U) are in "${x}"?`,
|
|
203
|
+
x => `Count the vowels in the string "${x}".`,
|
|
204
|
+
x => `In the text "${x}", how many letters are vowels (A, E, I, O, U)?`,
|
|
205
|
+
]);
|
|
206
|
+
return { prompt: t(w) + ' ' + ri(), answer: String([...w].filter(c => 'AEIOU'.includes(c)).length) };
|
|
207
|
+
} else {
|
|
208
|
+
const num = randInt(100, 9999);
|
|
209
|
+
const t = pick([
|
|
210
|
+
n => `What is the sum of the digits of ${n}?`,
|
|
211
|
+
n => `Add up each individual digit in the number ${n}.`,
|
|
212
|
+
n => `Take the number ${n} and sum its digits together.`,
|
|
213
|
+
]);
|
|
214
|
+
return { prompt: t(num) + ' ' + ri(), answer: String([...String(num)].reduce((s, d) => s + Number(d), 0)) };
|
|
215
|
+
}
|
|
216
|
+
},
|
|
217
|
+
|
|
218
|
+
caesar() {
|
|
219
|
+
const word = pronounceable(randInt(4, 7));
|
|
220
|
+
const shift = pick([3, 5, 7, 11]);
|
|
221
|
+
const encoded = caesarEncode(word, shift);
|
|
222
|
+
const templates = [
|
|
223
|
+
(e,s) => `Decode this Caesar cipher (each letter is shifted ${s} positions forward in the alphabet): ${e}\nShift each letter ${s} positions BACKWARD to decode.`,
|
|
224
|
+
(e,s) => `The text ${e} was encrypted with a Caesar shift of ${s}. Decrypt it by shifting each letter back by ${s}.`,
|
|
225
|
+
(e,s) => `Apply a reverse Caesar shift of ${s} to decode: ${e}`,
|
|
226
|
+
(e,s) => `This message was encoded by shifting each letter forward by ${s} in the alphabet: ${e}. What is the original text?`,
|
|
227
|
+
];
|
|
228
|
+
return { prompt: pick(templates)(encoded, shift) + ' ' + ri(), answer: word.toLowerCase() };
|
|
229
|
+
},
|
|
230
|
+
|
|
231
|
+
sorting() {
|
|
232
|
+
const v = pick(['sort_letters', 'sort_numbers', 'sort_reverse']);
|
|
233
|
+
if (v === 'sort_letters') {
|
|
234
|
+
const w = randChars(UPPER, randInt(5, 8));
|
|
235
|
+
const t = pick([x => `Sort these letters in alphabetical order: ${x}`, x => `Arrange the letters ${x} from A to Z.`, x => `Put these letters in alphabetical sequence: ${x}`]);
|
|
236
|
+
return { prompt: t(w) + ' ' + ri(), answer: [...w].sort().join('').toLowerCase() };
|
|
237
|
+
} else if (v === 'sort_numbers') {
|
|
238
|
+
const nums = []; const seen = new Set();
|
|
239
|
+
while (nums.length < randInt(5, 7)) { const n = randInt(1, 99); if (!seen.has(n)) { nums.push(n); seen.add(n); } }
|
|
240
|
+
const t = pick([x => `Sort these numbers from smallest to largest: ${x}`, x => `Arrange in ascending order: ${x}`, x => `Put these numbers in order from lowest to highest: ${x}`]);
|
|
241
|
+
return { prompt: t(nums.join(', ')) + ' ' + ri(), answer: [...nums].sort((a, b) => a - b).join(', ') };
|
|
242
|
+
} else {
|
|
243
|
+
const w = randChars(UPPER, randInt(5, 7));
|
|
244
|
+
const t = pick([x => `Sort these letters in REVERSE alphabetical order (Z first, A last): ${x}`, x => `Arrange the letters ${x} from Z to A.`, x => `Put these letters in reverse alphabetical order: ${x}`]);
|
|
245
|
+
return { prompt: t(w) + ' ' + ri(), answer: [...w].sort().reverse().join('').toLowerCase() };
|
|
246
|
+
}
|
|
247
|
+
},
|
|
248
|
+
|
|
249
|
+
counting() {
|
|
250
|
+
const v = pick(['count_letter', 'count_consonants', 'count_digits', 'count_upper']);
|
|
251
|
+
if (v === 'count_letter') {
|
|
252
|
+
const target = UPPER[randInt(0, 25)], len = randInt(10, 18);
|
|
253
|
+
let chars = Array(randInt(2, 5)).fill(target);
|
|
254
|
+
while (chars.length < len) chars.push(UPPER[randInt(0, 25)]);
|
|
255
|
+
chars = chars.sort(() => Math.random() - 0.5);
|
|
256
|
+
const text = chars.join('');
|
|
257
|
+
const t = pick([
|
|
258
|
+
(tg,tx) => `How many times does the letter "${tg}" appear in "${tx}"?`,
|
|
259
|
+
(tg,tx) => `Count the occurrences of "${tg}" in the string "${tx}".`,
|
|
260
|
+
(tg,tx) => `In "${tx}", how many "${tg}" characters are there?`,
|
|
261
|
+
]);
|
|
262
|
+
return { prompt: t(target, text) + ' ' + ri(), answer: String(text.split(target).length - 1) };
|
|
263
|
+
} else if (v === 'count_consonants') {
|
|
264
|
+
const w = randChars(UPPER, randInt(6, 10));
|
|
265
|
+
const t = pick([x => `How many consonants (non-vowel letters) are in "${x}"?`, x => `Count all consonants in the string "${x}".`, x => `In "${x}", how many letters are NOT vowels?`]);
|
|
266
|
+
return { prompt: t(w) + ' ' + ri(), answer: String([...w].filter(c => !'AEIOU'.includes(c)).length) };
|
|
267
|
+
} else if (v === 'count_digits') {
|
|
268
|
+
const d = randChars(DIGITS, randInt(8, 14)), target = d[randInt(0, d.length - 1)];
|
|
269
|
+
const t = pick([(tg,dd) => `How many times does the digit "${tg}" appear in "${dd}"?`, (tg,dd) => `Count how often "${tg}" occurs in the number string "${dd}".`]);
|
|
270
|
+
return { prompt: t(target, d) + ' ' + ri(), answer: String(d.split(target).length - 1) };
|
|
271
|
+
} else {
|
|
272
|
+
const text = randChars(LETTERS_ALL, randInt(10, 16));
|
|
273
|
+
const t = pick([x => `How many UPPERCASE letters are in "${x}"?`, x => `Count the capital letters in "${x}".`, x => `In the mixed-case string "${x}", how many characters are uppercase?`]);
|
|
274
|
+
return { prompt: t(text) + ' ' + ri(), answer: String([...text].filter(c => c >= 'A' && c <= 'Z').length) };
|
|
275
|
+
}
|
|
276
|
+
},
|
|
277
|
+
|
|
278
|
+
transform() {
|
|
279
|
+
const v = pick(['remove_vowels', 'remove_consonants', 'first_letters', 'last_letters']);
|
|
280
|
+
if (v === 'remove_vowels') {
|
|
281
|
+
const w = pronounceable(randInt(5, 8));
|
|
282
|
+
const t = pick([x => `Remove all vowels (A, E, I, O, U) from "${x}".`, x => `Delete every vowel from the string "${x}". What remains?`, x => `Strip out A, E, I, O, and U from "${x}".`]);
|
|
283
|
+
return { prompt: t(w) + ' ' + ri(), answer: [...w].filter(c => !'AEIOU'.includes(c)).join('').toLowerCase() };
|
|
284
|
+
} else if (v === 'remove_consonants') {
|
|
285
|
+
let w = randChars(UPPER, randInt(6, 10));
|
|
286
|
+
const vowels = [...w].filter(c => 'AEIOU'.includes(c)).join('');
|
|
287
|
+
if (!vowels) return CHALLENGE_TYPES.transform();
|
|
288
|
+
const t = pick([x => `Remove all consonants from "${x}" and keep only the vowels (A, E, I, O, U).`, x => `Extract only the vowels from "${x}".`, x => `From the string "${x}", delete every consonant and keep only vowels.`]);
|
|
289
|
+
return { prompt: t(w) + ' ' + ri(), answer: vowels.toLowerCase() };
|
|
290
|
+
} else if (v === 'first_letters') {
|
|
291
|
+
const words = Array.from({length: randInt(4, 7)}, () => { let w = randChars('abcdefghijklmnopqrstuvwxyz', randInt(3, 7)); return w[0].toUpperCase() + w.slice(1); });
|
|
292
|
+
const s = words.join(' ');
|
|
293
|
+
const t = pick([x => `What do the first letters of each word spell: "${x}"?`, x => `Take the initial letter of every word in "${x}" and combine them.`, x => `Form an acronym from: "${x}".`]);
|
|
294
|
+
return { prompt: t(s) + ' ' + ri(), answer: words.map(w => w[0]).join('').toLowerCase() };
|
|
295
|
+
} else {
|
|
296
|
+
const words = Array.from({length: randInt(4, 6)}, () => { let w = randChars('abcdefghijklmnopqrstuvwxyz', randInt(3, 6)); return w[0].toUpperCase() + w.slice(1); });
|
|
297
|
+
const s = words.join(' ');
|
|
298
|
+
const t = pick([x => `What do the LAST letters of each word spell: "${x}"?`, x => `Take the final letter of each word in "${x}" and combine them.`, x => `Extract the ending letter from every word in "${x}" and join them.`]);
|
|
299
|
+
return { prompt: t(s) + ' ' + ri(), answer: words.map(w => w[w.length - 1]).join('').toLowerCase() };
|
|
300
|
+
}
|
|
301
|
+
},
|
|
302
|
+
|
|
303
|
+
binary() {
|
|
304
|
+
const v = pick(['binary_to_decimal', 'decimal_to_binary', 'digit_sum']);
|
|
305
|
+
if (v === 'binary_to_decimal') {
|
|
306
|
+
const num = randInt(1, 63), b = num.toString(2);
|
|
307
|
+
const t = pick([x => `Convert binary ${x} to decimal.`, x => `What is the decimal value of the binary number ${x}?`, x => `Express ${x} (binary) as a base-10 number.`]);
|
|
308
|
+
return { prompt: t(b) + ' ' + ri(), answer: String(num) };
|
|
309
|
+
} else if (v === 'decimal_to_binary') {
|
|
310
|
+
const num = randInt(1, 31);
|
|
311
|
+
const t = pick([n => `Convert the decimal number ${n} to binary.`, n => `What is ${n} in binary?`, n => `Write ${n} as a binary number (no 0b prefix).`]);
|
|
312
|
+
return { prompt: t(num) + ' ' + ri(), answer: num.toString(2) };
|
|
313
|
+
} else {
|
|
314
|
+
const num = randInt(1000, 99999);
|
|
315
|
+
const t = pick([n => `What is the sum of all digits in ${n}?`, n => `Add each digit of ${n} together.`, n => `Calculate the digit sum of ${n}.`]);
|
|
316
|
+
return { prompt: t(num) + ' ' + ri(), answer: String([...String(num)].reduce((s, d) => s + Number(d), 0)) };
|
|
317
|
+
}
|
|
318
|
+
},
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
const DIFFICULTY_MAP = {
|
|
322
|
+
easy: ['reverse_string', 'simple_math', 'pattern', 'counting'],
|
|
323
|
+
medium: ['reverse_string', 'simple_math', 'rot13', 'letter_position', 'extract_letters', 'pattern', 'counting', 'sorting', 'binary'],
|
|
324
|
+
hard: Object.keys(CHALLENGE_TYPES),
|
|
325
|
+
};
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
// ── Token Helpers ────────────────────────────────────
|
|
329
|
+
|
|
330
|
+
function hmacSign(data, secret) { return crypto.createHmac('sha256', secret).update(data).digest('hex'); }
|
|
331
|
+
function encodeToken(payload, secret) {
|
|
332
|
+
const data = Buffer.from(JSON.stringify(payload)).toString('base64url');
|
|
333
|
+
return `${data}.${hmacSign(data, secret)}`;
|
|
334
|
+
}
|
|
335
|
+
function decodeToken(token, secret) {
|
|
336
|
+
if (!token || !token.includes('.')) throw new Error('Invalid token format');
|
|
337
|
+
const idx = token.lastIndexOf('.');
|
|
338
|
+
const data = token.slice(0, idx), sig = token.slice(idx + 1);
|
|
339
|
+
const expected = hmacSign(data, secret);
|
|
340
|
+
if (!crypto.timingSafeEqual(Buffer.from(sig, 'hex'), Buffer.from(expected, 'hex'))) throw new Error('Invalid token signature');
|
|
341
|
+
return JSON.parse(Buffer.from(data, 'base64url').toString());
|
|
342
|
+
}
|
|
343
|
+
function hashAnswer(answer) { return crypto.createHash('sha256').update(answer, 'utf8').digest('hex'); }
|
|
344
|
+
function normalizeAnswer(answer) {
|
|
345
|
+
if (typeof answer !== 'string') return '';
|
|
346
|
+
let s = answer.trim().toLowerCase();
|
|
347
|
+
if (s.length >= 2 && s[0] === s[s.length - 1] && (s[0] === '"' || s[0] === "'")) s = s.slice(1, -1).trim();
|
|
348
|
+
return s.replace(/\s+/g, ' ');
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
// ── Dynamic Mode — LLM Providers ─────────────────────
|
|
353
|
+
|
|
354
|
+
const PROVIDERS = {
|
|
355
|
+
openai: {
|
|
356
|
+
host: 'api.openai.com', path: '/v1/chat/completions',
|
|
357
|
+
envKey: 'OPENAI_API_KEY', defaultModel: 'gpt-4o-mini',
|
|
358
|
+
headers: (key) => ({ Authorization: `Bearer ${key}`, 'Content-Type': 'application/json' }),
|
|
359
|
+
body: (model, messages, temperature = 1.0) => JSON.stringify({ model, messages, temperature, max_tokens: 400 }),
|
|
360
|
+
extract: (resp) => resp.choices[0].message.content.trim(),
|
|
361
|
+
},
|
|
362
|
+
anthropic: {
|
|
363
|
+
host: 'api.anthropic.com', path: '/v1/messages',
|
|
364
|
+
envKey: 'ANTHROPIC_API_KEY', defaultModel: 'claude-sonnet-4-20250514',
|
|
365
|
+
headers: (key) => ({ 'x-api-key': key, 'anthropic-version': '2023-06-01', 'Content-Type': 'application/json' }),
|
|
366
|
+
body: (model, messages, temperature = 1.0) => JSON.stringify({ model, max_tokens: 400, temperature, messages }),
|
|
367
|
+
extract: (resp) => resp.content[0].text.trim(),
|
|
368
|
+
},
|
|
369
|
+
google: {
|
|
370
|
+
envKey: 'GOOGLE_API_KEY', defaultModel: 'gemini-2.0-flash',
|
|
371
|
+
buildPath: (model, key) => `/v1beta/models/${model}:generateContent?key=${key}`,
|
|
372
|
+
host: 'generativelanguage.googleapis.com',
|
|
373
|
+
headers: () => ({ 'Content-Type': 'application/json' }),
|
|
374
|
+
body: (model, messages, temperature = 1.0) => JSON.stringify({
|
|
375
|
+
contents: messages.map(m => ({ role: m.role === 'assistant' ? 'model' : m.role, parts: [{ text: m.content }] })),
|
|
376
|
+
generationConfig: { temperature, maxOutputTokens: 400 },
|
|
377
|
+
}),
|
|
378
|
+
extract: (resp) => resp.candidates[0].content.parts[0].text.trim(),
|
|
379
|
+
},
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
const GENERATE_PROMPT = `You are a challenge generator for AI agent authentication. Generate ONE unique reasoning challenge.
|
|
383
|
+
|
|
384
|
+
RULES (strict):
|
|
385
|
+
1. The answer MUST be a single number OR a single word (1-2 words max). Never a sentence.
|
|
386
|
+
2. The challenge MUST have exactly ONE objectively correct answer — no ambiguity.
|
|
387
|
+
3. The challenge MUST be solvable by pure reasoning — no trivia, pop culture, or world knowledge.
|
|
388
|
+
4. Include "Reply with ONLY the answer, nothing else." at the end of the prompt.
|
|
389
|
+
5. SHOW YOUR WORK: Before writing the JSON, solve the challenge yourself step by step to verify the answer is correct. Write your work FIRST, then the JSON on the last line.
|
|
390
|
+
|
|
391
|
+
ALLOWED categories (pick one randomly):
|
|
392
|
+
- Arithmetic: multi-step math, order of operations, percentages
|
|
393
|
+
- String manipulation: reverse strings, remove/replace characters, count letters
|
|
394
|
+
- Pattern completion: number sequences with clear rules
|
|
395
|
+
- Cipher/encoding: Caesar shift, letter-to-number mapping
|
|
396
|
+
- Counting: count specific items in a given string
|
|
397
|
+
- Sorting: alphabetize letters, sort numbers
|
|
398
|
+
|
|
399
|
+
FORBIDDEN (never generate):
|
|
400
|
+
- Riddles, lateral thinking, "trick questions"
|
|
401
|
+
- Challenges with multiple valid answers
|
|
402
|
+
- Trivia or world knowledge
|
|
403
|
+
- Full-sentence answers
|
|
404
|
+
- Time/date/clock puzzles
|
|
405
|
+
- Word association / "what am I"
|
|
406
|
+
|
|
407
|
+
EXAMPLES (DO NOT reuse — generate something novel):
|
|
408
|
+
Working: 8×7=56, 3×9=27, 56-27=29
|
|
409
|
+
{"prompt": "What is (8 × 7) - (3 × 9)? Reply with ONLY the answer, nothing else.", "answer": "29"}
|
|
410
|
+
|
|
411
|
+
Working: ALGORITHM reversed → M-H-T-I-R-O-G-L-A
|
|
412
|
+
{"prompt": "Reverse the string ALGORITHM. Reply with ONLY the answer, nothing else.", "answer": "MHTIROGLA"}
|
|
413
|
+
|
|
414
|
+
Working: M-I-S-S-I-S-S-I-P-P-I, S at positions 4,5,7,8 → 4 times
|
|
415
|
+
{"prompt": "In the string MISSISSIPPI, how many times does the letter S appear? Reply with ONLY the answer, nothing else.", "answer": "4"}
|
|
416
|
+
|
|
417
|
+
Working: H=8, E=5, L=12, P=16. 8+5=13, 13+12=25, 25+16=41
|
|
418
|
+
{"prompt": "If A=1, B=2, ..., Z=26, what is the value of H + E + L + P? Reply with ONLY the answer, nothing else.", "answer": "41"}
|
|
419
|
+
|
|
420
|
+
Working: Remove A,E,I,O,U from EDUCATION → D,C,T,N
|
|
421
|
+
{"prompt": "Remove all vowels from the word EDUCATION. Reply with ONLY the answer, nothing else.", "answer": "DCTN"}
|
|
422
|
+
|
|
423
|
+
Now generate a NOVEL challenge — different from the examples above. Be creative with the specific values and words you choose. Show your work first, then the JSON on the final line:`;
|
|
424
|
+
|
|
425
|
+
function _callLLM(providerName, apiKey, messages, model, timeout = 15000, temperature = 1.0) {
|
|
426
|
+
const provider = PROVIDERS[providerName];
|
|
427
|
+
model = model || provider.defaultModel;
|
|
428
|
+
|
|
429
|
+
return new Promise((resolve, reject) => {
|
|
430
|
+
const path = provider.buildPath ? provider.buildPath(model, apiKey) : provider.path;
|
|
431
|
+
const hdrs = provider.headers(apiKey);
|
|
432
|
+
const body = provider.body(model, messages, temperature);
|
|
433
|
+
|
|
434
|
+
const req = httpsRequest({ hostname: provider.host, path, method: 'POST', headers: { ...hdrs, 'Content-Length': Buffer.byteLength(body) }, timeout }, (res) => {
|
|
435
|
+
let data = '';
|
|
436
|
+
res.on('data', chunk => data += chunk);
|
|
437
|
+
res.on('end', () => {
|
|
438
|
+
if (res.statusCode !== 200) return reject(new Error(`LLM API error (${res.statusCode}): ${data.substring(0, 200)}`));
|
|
439
|
+
try { resolve(provider.extract(JSON.parse(data))); } catch (e) { reject(new Error(`LLM response parse error: ${e.message}`)); }
|
|
440
|
+
});
|
|
441
|
+
});
|
|
442
|
+
req.on('error', e => reject(new Error(`LLM connection error: ${e.message}`)));
|
|
443
|
+
req.on('timeout', () => { req.destroy(); reject(new Error('LLM API timeout')); });
|
|
444
|
+
req.write(body);
|
|
445
|
+
req.end();
|
|
446
|
+
});
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
function _extractVerifierAnswer(text) {
|
|
450
|
+
text = text.trim();
|
|
451
|
+
// Strategy 1: "ANSWER: ..." prefix
|
|
452
|
+
const m1 = text.match(/ANSWER:\s*(.+?)(?:\n|$)/i);
|
|
453
|
+
if (m1) return m1[1].trim();
|
|
454
|
+
// Strategy 2: "The answer is ..."
|
|
455
|
+
const m2 = text.match(/(?:the\s+)?(?:final\s+)?answer\s+is[:\s]+(.+?)(?:\.|$)/i);
|
|
456
|
+
if (m2) return m2[1].trim();
|
|
457
|
+
// Strategy 3: Last non-empty line
|
|
458
|
+
const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
|
|
459
|
+
if (lines.length) {
|
|
460
|
+
let last = lines[lines.length - 1];
|
|
461
|
+
for (const p of ['so ', 'therefore ', 'thus ', 'hence ', '= ', 'answer: ', 'result: ']) {
|
|
462
|
+
if (last.toLowerCase().startsWith(p)) last = last.slice(p.length).trim();
|
|
463
|
+
}
|
|
464
|
+
return last;
|
|
465
|
+
}
|
|
466
|
+
return text;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
function _normalizeForCompare(answer) {
|
|
470
|
+
let s = answer.trim().toLowerCase();
|
|
471
|
+
if (s.length >= 2 && s[0] === s[s.length - 1] && (s[0] === '"' || s[0] === "'")) s = s.slice(1, -1).trim();
|
|
472
|
+
s = s.replace(/[.!,;:]+$/, '');
|
|
473
|
+
for (const p of ['the answer is ', 'answer: ', 'result: ', 'it is ', "it's "]) {
|
|
474
|
+
if (s.startsWith(p)) s = s.slice(p.length).trim();
|
|
475
|
+
}
|
|
476
|
+
if (s.length >= 2 && s[0] === s[s.length - 1] && (s[0] === '"' || s[0] === "'")) s = s.slice(1, -1).trim();
|
|
477
|
+
s = s.replace(/\s+/g, ' ');
|
|
478
|
+
if (/^[a-z]( [a-z])+$/.test(s)) s = s.replace(/ /g, '');
|
|
479
|
+
return s;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
function _answersMatch(expected, actual) {
|
|
483
|
+
const a = _normalizeForCompare(expected), b = _normalizeForCompare(actual);
|
|
484
|
+
if (a === b) return true;
|
|
485
|
+
try { if (Math.abs(parseFloat(a) - parseFloat(b)) < 0.001) return true; } catch {}
|
|
486
|
+
if (a.replace(/ /g, '') === b.replace(/ /g, '')) return true;
|
|
487
|
+
if (a.replace(/, /g, ',').replace(/ /g, '') === b.replace(/, /g, ',').replace(/ /g, '')) return true;
|
|
488
|
+
return false;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
function _preValidate(prompt, answer) {
|
|
492
|
+
if (answer.split(/\s+/).length > 4) return 'Answer too long';
|
|
493
|
+
if (!prompt.toLowerCase().includes('reply with only') && !prompt.toLowerCase().includes('respond with only')) return 'Missing reply instruction';
|
|
494
|
+
for (const sig of ['what am i', 'what has', 'i am a', 'riddle']) {
|
|
495
|
+
if (prompt.toLowerCase().includes(sig)) return 'Detected riddle';
|
|
496
|
+
}
|
|
497
|
+
return null;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
async function generateDynamicChallenge(providerName, apiKey, model, verifyModel, maxRetries = 3) {
|
|
501
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
502
|
+
try {
|
|
503
|
+
let raw = await _callLLM(providerName, apiKey, [{ role: 'user', content: GENERATE_PROMPT }], model, 15000);
|
|
504
|
+
// Remove markdown fences
|
|
505
|
+
raw = raw.replace(/```(?:json)?\s*/g, '').replace(/```/g, '');
|
|
506
|
+
// Find last JSON object (after chain-of-thought work)
|
|
507
|
+
const matches = [...raw.matchAll(/\{[^{}]*\}/g)];
|
|
508
|
+
if (!matches.length) continue;
|
|
509
|
+
const data = JSON.parse(matches[matches.length - 1][0]);
|
|
510
|
+
const prompt = (data.prompt || '').trim();
|
|
511
|
+
const expectedAnswer = (data.answer || '').trim();
|
|
512
|
+
if (!prompt || !expectedAnswer) continue;
|
|
513
|
+
|
|
514
|
+
// Pre-validate
|
|
515
|
+
const preErr = _preValidate(prompt, expectedAnswer);
|
|
516
|
+
if (preErr) continue;
|
|
517
|
+
|
|
518
|
+
// Verify by solving (low temperature for determinism)
|
|
519
|
+
const verifyResp = await _callLLM(providerName, apiKey,
|
|
520
|
+
[{ role: 'user', content: `Solve this challenge step by step. Show your work, then write your final answer on the LAST line prefixed with "ANSWER: ".\n\nChallenge: ${prompt}\n\nWork through it carefully:` }],
|
|
521
|
+
verifyModel || model, 15000
|
|
522
|
+
);
|
|
523
|
+
const verifyAnswer = _extractVerifierAnswer(verifyResp);
|
|
524
|
+
|
|
525
|
+
if (_answersMatch(expectedAnswer, verifyAnswer)) {
|
|
526
|
+
return { prompt, answer: _normalizeForCompare(expectedAnswer) };
|
|
527
|
+
}
|
|
528
|
+
} catch (e) {
|
|
529
|
+
// continue to next retry
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
return null; // fallback to static
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
// ── Main Class ───────────────────────────────────────
|
|
537
|
+
|
|
538
|
+
export class AgentChallenge {
|
|
539
|
+
/**
|
|
540
|
+
* LLM-solvable challenge-response system.
|
|
541
|
+
*
|
|
542
|
+
* @param {Object} opts
|
|
543
|
+
* @param {string} opts.secret - Server secret key for signing tokens (min 8 chars)
|
|
544
|
+
* @param {string} [opts.difficulty='easy'] - "easy", "medium", or "hard"
|
|
545
|
+
* @param {number} [opts.ttl=300] - Challenge TTL in seconds
|
|
546
|
+
* @param {string[]} [opts.types] - Optional list of allowed challenge types
|
|
547
|
+
*/
|
|
548
|
+
constructor({ secret, difficulty = 'easy', ttl = 300, types = null } = {}) {
|
|
549
|
+
if (!secret || secret.length < 8) throw new Error('Secret must be at least 8 characters');
|
|
550
|
+
this._secret = secret;
|
|
551
|
+
this._difficulty = difficulty;
|
|
552
|
+
this._ttl = ttl;
|
|
553
|
+
this._types = types;
|
|
554
|
+
|
|
555
|
+
// Dynamic mode state
|
|
556
|
+
this._dynamicEnabled = false;
|
|
557
|
+
this._dynamicProvider = null;
|
|
558
|
+
this._dynamicModel = null;
|
|
559
|
+
this._dynamicVerifyModel = null;
|
|
560
|
+
this._apiKeys = {};
|
|
561
|
+
|
|
562
|
+
// Auto-detect from env
|
|
563
|
+
for (const [name, config] of Object.entries(PROVIDERS)) {
|
|
564
|
+
const envVal = process.env[config.envKey];
|
|
565
|
+
if (envVal) this._apiKeys[name] = envVal;
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// ── API Key Management ────────────────────────────
|
|
570
|
+
|
|
571
|
+
/** Set OpenAI API key for dynamic challenge generation. */
|
|
572
|
+
setOpenaiApiKey(key) { this._apiKeys.openai = key; return this; }
|
|
573
|
+
|
|
574
|
+
/** Set Anthropic API key for dynamic challenge generation. */
|
|
575
|
+
setAnthropicApiKey(key) { this._apiKeys.anthropic = key; return this; }
|
|
576
|
+
|
|
577
|
+
/** Set Google Gemini API key for dynamic challenge generation. */
|
|
578
|
+
setGoogleApiKey(key) { this._apiKeys.google = key; return this; }
|
|
579
|
+
|
|
580
|
+
// ── Dynamic Mode ──────────────────────────────────
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* Enable dynamic LLM-generated challenges.
|
|
584
|
+
*
|
|
585
|
+
* ⚠️ This adds 2 LLM API requests per challenge generation
|
|
586
|
+
* (one to generate, one to verify). This adds latency (~2-5s)
|
|
587
|
+
* and cost to your challenge endpoint.
|
|
588
|
+
*
|
|
589
|
+
* @param {Object} [opts]
|
|
590
|
+
* @param {string} [opts.provider] - "openai", "anthropic", or "google". Auto-detected if omitted.
|
|
591
|
+
* @param {string} [opts.model] - LLM model for generation.
|
|
592
|
+
* @param {string} [opts.verifyModel] - LLM model for verification.
|
|
593
|
+
* @returns {AgentChallenge} self (for chaining)
|
|
594
|
+
*/
|
|
595
|
+
enableDynamicMode({ provider, model, verifyModel } = {}) {
|
|
596
|
+
if (provider) {
|
|
597
|
+
if (!PROVIDERS[provider]) throw new Error(`Unknown provider: ${provider}. Choose from: ${Object.keys(PROVIDERS).join(', ')}`);
|
|
598
|
+
if (!this._apiKeys[provider]) throw new Error(`No API key set for ${provider}. Use set${provider[0].toUpperCase() + provider.slice(1)}ApiKey() or set ${PROVIDERS[provider].envKey} env var.`);
|
|
599
|
+
this._dynamicProvider = provider;
|
|
600
|
+
} else {
|
|
601
|
+
for (const p of ['openai', 'anthropic', 'google']) {
|
|
602
|
+
if (this._apiKeys[p]) { this._dynamicProvider = p; break; }
|
|
603
|
+
}
|
|
604
|
+
if (!this._dynamicProvider) throw new Error('No API key available. Set OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY.');
|
|
605
|
+
}
|
|
606
|
+
this._dynamicEnabled = true;
|
|
607
|
+
this._dynamicModel = model || null;
|
|
608
|
+
this._dynamicVerifyModel = verifyModel || null;
|
|
609
|
+
return this;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
/** Disable dynamic mode and return to static challenges only. */
|
|
613
|
+
disableDynamicMode() { this._dynamicEnabled = false; return this; }
|
|
614
|
+
|
|
615
|
+
/** Whether dynamic challenge generation is currently enabled. */
|
|
616
|
+
get dynamicMode() { return this._dynamicEnabled; }
|
|
617
|
+
|
|
618
|
+
// ── Challenge Operations ──────────────────────────
|
|
619
|
+
|
|
620
|
+
/**
|
|
621
|
+
* Create a new challenge. If dynamic mode is enabled, attempts LLM generation
|
|
622
|
+
* first and falls back to static on failure.
|
|
623
|
+
*/
|
|
624
|
+
async create(challengeType = null) {
|
|
625
|
+
// Dynamic mode: try LLM-generated challenge
|
|
626
|
+
if (this._dynamicEnabled && !challengeType) {
|
|
627
|
+
const result = await generateDynamicChallenge(
|
|
628
|
+
this._dynamicProvider, this._apiKeys[this._dynamicProvider],
|
|
629
|
+
this._dynamicModel, this._dynamicVerifyModel
|
|
630
|
+
);
|
|
631
|
+
if (result) return this._buildChallenge('dynamic', result.prompt, result.answer);
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
// Static mode (or dynamic fallback)
|
|
635
|
+
const pool = this._types || DIFFICULTY_MAP[this._difficulty] || DIFFICULTY_MAP.easy;
|
|
636
|
+
const typeName = challengeType || pick(pool);
|
|
637
|
+
if (!CHALLENGE_TYPES[typeName]) throw new Error(`Unknown type: ${typeName}`);
|
|
638
|
+
const { prompt, answer } = CHALLENGE_TYPES[typeName]();
|
|
639
|
+
return this._buildChallenge(typeName, prompt, answer);
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
/**
|
|
643
|
+
* Create a challenge synchronously (static only, no dynamic mode).
|
|
644
|
+
* Use this when you need a sync API and don't need dynamic challenges.
|
|
645
|
+
*/
|
|
646
|
+
createSync(challengeType = null) {
|
|
647
|
+
const pool = this._types || DIFFICULTY_MAP[this._difficulty] || DIFFICULTY_MAP.easy;
|
|
648
|
+
const typeName = challengeType || pick(pool);
|
|
649
|
+
if (!CHALLENGE_TYPES[typeName]) throw new Error(`Unknown type: ${typeName}`);
|
|
650
|
+
const { prompt, answer } = CHALLENGE_TYPES[typeName]();
|
|
651
|
+
return this._buildChallenge(typeName, prompt, answer);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
_buildChallenge(typeName, prompt, answer) {
|
|
655
|
+
const id = 'ch_' + crypto.randomBytes(12).toString('hex');
|
|
656
|
+
const now = Math.floor(Date.now() / 1000);
|
|
657
|
+
const payload = { id, type: typeName, answer_hash: hashAnswer(answer), created_at: now, expires_at: now + this._ttl };
|
|
658
|
+
const token = encodeToken(payload, this._secret);
|
|
659
|
+
return {
|
|
660
|
+
id, prompt, token, expires_at: now + this._ttl, challenge_type: typeName,
|
|
661
|
+
toDict() { return { id, prompt, token, expires_in: Math.max(0, payload.expires_at - Math.floor(Date.now() / 1000)), type: 'reasoning' }; }
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
verify(token, answer) {
|
|
666
|
+
const start = Date.now();
|
|
667
|
+
let payload;
|
|
668
|
+
try { payload = decodeToken(token, this._secret); } catch (e) { return { valid: false, error: e.message }; }
|
|
669
|
+
if (Math.floor(Date.now() / 1000) > payload.expires_at) return { valid: false, error: 'Challenge expired' };
|
|
670
|
+
const normalized = normalizeAnswer(answer);
|
|
671
|
+
if (!normalized) return { valid: false, error: 'Empty answer' };
|
|
672
|
+
if (hashAnswer(normalized) === payload.answer_hash) return { valid: true, challenge_type: payload.type, elapsed_ms: Date.now() - start };
|
|
673
|
+
return { valid: false, error: 'Incorrect answer', challenge_type: payload.type };
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// ── Gate (unified challenge/auth endpoint) ─────────
|
|
677
|
+
|
|
678
|
+
/**
|
|
679
|
+
* Unified challenge gate. Call from a single endpoint.
|
|
680
|
+
*
|
|
681
|
+
* Three modes:
|
|
682
|
+
* 1. token → verify persistent token → { status: "authenticated" }
|
|
683
|
+
* 2. challengeToken + answer → verify → issue persistent token
|
|
684
|
+
* 3. Nothing → issue a new challenge
|
|
685
|
+
*
|
|
686
|
+
* @param {Object} opts
|
|
687
|
+
* @param {string} [opts.token] - Persistent agent token
|
|
688
|
+
* @param {string} [opts.challengeToken] - Challenge token from previous gate()
|
|
689
|
+
* @param {string} [opts.answer] - Agent's answer
|
|
690
|
+
* @returns {{ status: string, prompt?: string, challenge_token?: string, expires_in?: number, token?: string, error?: string }}
|
|
691
|
+
*/
|
|
692
|
+
gateSync({ token, challengeToken, answer } = {}) {
|
|
693
|
+
// Mode 1: Persistent token
|
|
694
|
+
if (token) {
|
|
695
|
+
if (this.verifyToken(token)) return { status: 'authenticated' };
|
|
696
|
+
return { status: 'error', error: 'Invalid or expired token' };
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
// Mode 2: Submitting answer
|
|
700
|
+
if (challengeToken && answer) {
|
|
701
|
+
const result = this.verify(challengeToken, answer);
|
|
702
|
+
if (result.valid) {
|
|
703
|
+
const newToken = this.createToken();
|
|
704
|
+
return { status: 'authenticated', token: newToken };
|
|
705
|
+
}
|
|
706
|
+
return { status: 'error', error: result.error || 'Incorrect answer' };
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
// Mode 3: Issue challenge
|
|
710
|
+
const challenge = this.createSync();
|
|
711
|
+
return {
|
|
712
|
+
status: 'challenge_required',
|
|
713
|
+
prompt: challenge.prompt,
|
|
714
|
+
challenge_token: challenge.token,
|
|
715
|
+
expires_in: Math.max(0, challenge.expires_at - Math.floor(Date.now() / 1000)),
|
|
716
|
+
};
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
/**
|
|
720
|
+
* Async version of gate (supports dynamic mode).
|
|
721
|
+
*/
|
|
722
|
+
async gate({ token, challengeToken, answer } = {}) {
|
|
723
|
+
if (token) {
|
|
724
|
+
if (this.verifyToken(token)) return { status: 'authenticated' };
|
|
725
|
+
return { status: 'error', error: 'Invalid or expired token' };
|
|
726
|
+
}
|
|
727
|
+
if (challengeToken && answer) {
|
|
728
|
+
const result = this.verify(challengeToken, answer);
|
|
729
|
+
if (result.valid) return { status: 'authenticated', token: this.createToken() };
|
|
730
|
+
return { status: 'error', error: result.error || 'Incorrect answer' };
|
|
731
|
+
}
|
|
732
|
+
const challenge = await this.create();
|
|
733
|
+
return {
|
|
734
|
+
status: 'challenge_required',
|
|
735
|
+
prompt: challenge.prompt,
|
|
736
|
+
challenge_token: challenge.token,
|
|
737
|
+
expires_in: Math.max(0, challenge.expires_at - Math.floor(Date.now() / 1000)),
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
// ── Persistent Tokens ─────────────────────────────
|
|
742
|
+
|
|
743
|
+
/**
|
|
744
|
+
* Create a persistent agent token (long-lived, HMAC-signed).
|
|
745
|
+
* Issued after an agent solves a challenge. Stateless.
|
|
746
|
+
*/
|
|
747
|
+
createToken(agentId) {
|
|
748
|
+
const id = 'at_' + crypto.randomBytes(16).toString('hex');
|
|
749
|
+
const payload = { id, type: 'agent_token', created_at: Math.floor(Date.now() / 1000) };
|
|
750
|
+
if (agentId) payload.agent_id = agentId;
|
|
751
|
+
return encodeToken(payload, this._secret);
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
/**
|
|
755
|
+
* Verify a persistent agent token.
|
|
756
|
+
* @returns {boolean}
|
|
757
|
+
*/
|
|
758
|
+
verifyToken(token) {
|
|
759
|
+
try {
|
|
760
|
+
const payload = decodeToken(token, this._secret);
|
|
761
|
+
return payload.type === 'agent_token';
|
|
762
|
+
} catch { return false; }
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
export { CHALLENGE_TYPES, DIFFICULTY_MAP, _callLLM };
|