agent-challenge 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/agentchallenge.js +119 -12
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-challenge",
3
- "version": "1.2.0",
3
+ "version": "1.3.0",
4
4
  "description": "Drop-in LLM authentication for any API endpoint. Reasoning puzzles that agents solve once, then pass through forever. Stateless HMAC tokens, no database.",
5
5
  "main": "src/agentchallenge.js",
6
6
  "type": "module",
@@ -1361,17 +1361,44 @@ CHALLENGE_TYPES.string_interleave = () => {
1361
1361
  }
1362
1362
  };
1363
1363
 
1364
- // Chained arithmetic: (a+b)*c - d mod m — GPT-5.2 100%, GPT-4o 30%
1364
+ // Chained arithmetic: multi-step chains + knowledge facts — GPT-5.2 100%, GPT-4o 30%
1365
1365
  CHALLENGE_TYPES.chained_arithmetic = () => {
1366
- const a = randInt(2, 9), b = randInt(2, 9), c = randInt(2, 5), d = randInt(1, 9), m = randInt(3, 7);
1367
- const result = ((a + b) * c - d) % m;
1368
- const templates = [
1369
- `Compute (${a} + ${b}), multiply by ${c}, subtract ${d}, then find the remainder when divided by ${m}.`,
1370
- `Add ${a} and ${b}. Multiply the result by ${c}. Subtract ${d}. What is the remainder when divided by ${m}?`,
1371
- `What is ((${a} + ${b}) × ${c} - ${d}) mod ${m}?`,
1372
- `Calculate ${a} plus ${b}, times ${c}, minus ${d}. Find the remainder after dividing by ${m}.`,
1373
- ];
1374
- return { prompt: buildPrompt(pick(templates)), answer: String(result) };
1366
+ const pattern = pick(["add_mul_sub_mod", "mul_add_mul_mod", "add_square_sub_mod", "mul_sub_add_mod"]);
1367
+
1368
+ if (pattern === "add_mul_sub_mod") {
1369
+ const a = randInt(2,9), b = randInt(2,9), c = randInt(2,5), d = randInt(1,9), m = randInt(3,7);
1370
+ return { prompt: buildPrompt(pick([
1371
+ `Add ${a} and ${b}. Multiply by ${c}. Subtract ${d}. Find the remainder when divided by ${m}.`,
1372
+ `What is ((${a} + ${b}) × ${c} - ${d}) mod ${m}?`,
1373
+ `Compute ${a} + ${b}, multiply the sum by ${c}, subtract ${d}, remainder mod ${m}.`,
1374
+ `Sum ${a} and ${b}, then multiply by ${c}, then subtract ${d}. What is the remainder after dividing by ${m}?`,
1375
+ `(${a} + ${b}) × ${c} − ${d}. Divide by ${m} and give the remainder.`,
1376
+ ])), answer: String(((a + b) * c - d) % m) };
1377
+ } else if (pattern === "mul_add_mul_mod") {
1378
+ const a = randInt(2,7), b = randInt(2,5), c = randInt(3,9), d = randInt(2,4), m = randInt(3,7);
1379
+ return { prompt: buildPrompt(pick([
1380
+ `Multiply ${a} by ${b}. Add ${c}. Multiply by ${d}. Find the remainder when divided by ${m}.`,
1381
+ `What is ((${a} × ${b} + ${c}) × ${d}) mod ${m}?`,
1382
+ `Compute ${a} × ${b}, add ${c}, multiply by ${d}, remainder mod ${m}.`,
1383
+ `Product of ${a} and ${b}, plus ${c}, times ${d}. What is the remainder after dividing by ${m}?`,
1384
+ ])), answer: String(((a * b + c) * d) % m) };
1385
+ } else if (pattern === "add_square_sub_mod") {
1386
+ const a = randInt(2,5), b = randInt(1,4), c = randInt(1,8), m = randInt(3,7);
1387
+ return { prompt: buildPrompt(pick([
1388
+ `Add ${a} and ${b}. Square the result. Subtract ${c}. Find the remainder when divided by ${m}.`,
1389
+ `What is ((${a} + ${b})² - ${c}) mod ${m}?`,
1390
+ `Compute (${a} + ${b}) squared, subtract ${c}, remainder mod ${m}.`,
1391
+ `Sum ${a} and ${b}, square it, subtract ${c}. Remainder after dividing by ${m}?`,
1392
+ ])), answer: String(((a + b) ** 2 - c) % m) };
1393
+ } else { // mul_sub_add_mod
1394
+ const a = randInt(3,9), b = randInt(2,5), c = randInt(1, Math.min(a*b-1,9)), d = randInt(2,9), m = randInt(3,7);
1395
+ return { prompt: buildPrompt(pick([
1396
+ `Multiply ${a} by ${b}. Subtract ${c}. Add ${d}. Find the remainder when divided by ${m}.`,
1397
+ `What is (${a} × ${b} - ${c} + ${d}) mod ${m}?`,
1398
+ `Compute ${a} × ${b}, subtract ${c}, add ${d}, remainder mod ${m}.`,
1399
+ `Product of ${a} and ${b}, minus ${c}, plus ${d}. Remainder after dividing by ${m}?`,
1400
+ ])), answer: String((a * b - c + d) % m) };
1401
+ }
1375
1402
  };
1376
1403
 
1377
1404
  // Power modulo: base^exp mod m — GPT-5.2 100%, GPT-4o 80%
@@ -1387,13 +1414,93 @@ CHALLENGE_TYPES.power_mod = () => {
1387
1414
  return { prompt: buildPrompt(pick(templates)), answer: String(answer) };
1388
1415
  };
1389
1416
 
1417
+ // Knowledge + math: world-knowledge facts (values stated) + arithmetic + mod
1418
+ // GPT-5.2: 100% | GPT-4o: ~85-93% | Humans: need Google for fact verification
1419
+ CHALLENGE_TYPES.knowledge_math = () => {
1420
+ const FACTS = [
1421
+ ["The atomic number of oxygen is {v}", 8],
1422
+ ["The atomic number of carbon is {v}", 6],
1423
+ ["The atomic number of nitrogen is {v}", 7],
1424
+ ["The atomic number of neon is {v}", 10],
1425
+ ["The atomic number of sodium is {v}", 11],
1426
+ ["The atomic number of iron is {v}", 26],
1427
+ ["The atomic number of copper is {v}", 29],
1428
+ ["The atomic number of gold is {v}", 79],
1429
+ ["The atomic number of silver is {v}", 47],
1430
+ ["There are {v} planets in our solar system", 8],
1431
+ ["There are {v} continents on Earth", 7],
1432
+ ["A hexagon has {v} sides", 6],
1433
+ ["A pentagon has {v} sides", 5],
1434
+ ["A standard guitar has {v} strings", 6],
1435
+ ["A violin has {v} strings", 4],
1436
+ ["The English alphabet has {v} letters", 26],
1437
+ ["An adult human has {v} teeth", 32],
1438
+ ["The US flag has {v} stripes", 13],
1439
+ ["A spider has {v} legs", 8],
1440
+ ["An insect has {v} legs", 6],
1441
+ ["There are {v} Harry Potter books in the main series", 7],
1442
+ ["Brazil has won {v} FIFA World Cups", 5],
1443
+ ["The Olympic flag has {v} rings", 5],
1444
+ ["A standard die has {v} total dots across all faces", 21],
1445
+ ["There are {v} ounces in a pound", 16],
1446
+ ["There are {v} inches in a foot", 12],
1447
+ ["A byte has {v} bits", 8],
1448
+ ["Beethoven composed {v} symphonies", 9],
1449
+ ["A soccer team fields {v} players", 11],
1450
+ ["A basketball team has {v} players on court", 5],
1451
+ ["A golf course has {v} holes", 18],
1452
+ ["A standard deck has {v} cards", 52],
1453
+ ["A marathon is approximately {v} miles", 26],
1454
+ ["A chess board has {v} squares", 64],
1455
+ ["There are {v} hours in a day", 24],
1456
+ ["A human cell has {v} chromosomes", 46],
1457
+ ["A piano has {v} keys", 88],
1458
+ ];
1459
+ const [f1, f2] = pickN(FACTS, 2);
1460
+ const [tmpl1, val1] = f1;
1461
+ const [tmpl2, val2] = f2;
1462
+ const sent1 = tmpl1.replace('{v}', val1);
1463
+ const sent2 = tmpl2.replace('{v}', val2);
1464
+ const m = randInt(3, 9);
1465
+
1466
+ const ops = [];
1467
+ if (val1 + val2 < 200) ops.push('add');
1468
+ if (val1 * val2 < 5000) ops.push('mul');
1469
+ if (val1 !== val2) ops.push('sub');
1470
+ const op = pick(ops);
1471
+
1472
+ let result, opText;
1473
+ if (op === 'add') {
1474
+ result = (val1 + val2) % m;
1475
+ opText = pick(["Add these two numbers", "Sum these two numbers"]);
1476
+ } else if (op === 'mul') {
1477
+ result = (val1 * val2) % m;
1478
+ opText = pick(["Multiply these two numbers", "Find the product of these two numbers"]);
1479
+ } else {
1480
+ result = ((val1 >= val2 ? val1 - val2 : val2 - val1) % m + m) % m;
1481
+ opText = val1 >= val2 ? "Subtract the second from the first" : "Subtract the first from the second";
1482
+ }
1483
+ return { prompt: buildPrompt(`${sent1} and ${sent2}. ${opText}, then find the remainder when divided by ${m}.`), answer: String(result) };
1484
+ };
1485
+
1486
+ // Helper: pick N unique items from array
1487
+ function pickN(arr, n) {
1488
+ const copy = [...arr];
1489
+ const result = [];
1490
+ for (let i = 0; i < n; i++) {
1491
+ const idx = Math.floor(Math.random() * copy.length);
1492
+ result.push(copy.splice(idx, 1)[0]);
1493
+ }
1494
+ return result;
1495
+ }
1496
+
1390
1497
  const DIFFICULTY_MAP = {
1391
1498
  // Easy: gpt-4o-mini solves 100% single-shot (empirically validated)
1392
1499
  easy: ['simple_math', 'string_math', 'binary', 'pattern'],
1393
1500
  // Medium: gpt-4o ~90%, gpt-4o-mini ~60%
1394
1501
  medium: ['sorting', 'word_math'],
1395
- // Hard: gpt-5.2 100%, gpt-4o ~70-80%
1396
- hard: ['nested_operations', 'base_conversion_chain', 'power_mod'],
1502
+ // Hard: gpt-5.2 100%, gpt-4o ~70-85%
1503
+ hard: ['nested_operations', 'base_conversion_chain', 'power_mod', 'knowledge_math'],
1397
1504
  // Agentic: multi-step chains, blocks both gpt-4o and gpt-4o-mini
1398
1505
  agentic: ['chained_arithmetic'],
1399
1506
  };