@arcis/node 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1021,6 +1021,187 @@ arcisWithMethods.logger = createSafeLogger;
1021
1021
  arcisWithMethods.errorHandler = createErrorHandler;
1022
1022
  var main_default = arcisWithMethods;
1023
1023
 
1024
+ // src/utils/duration.ts
1025
+ var MAX_DURATION_MS = 4294967295;
1026
+ var DURATION_REGEX = /^(\d+(?:\.\d+)?)\s*(ms|s|m|h|d)$/i;
1027
+ var UNIT_TO_MS = {
1028
+ ms: 1,
1029
+ s: 1e3,
1030
+ m: 6e4,
1031
+ h: 36e5,
1032
+ d: 864e5
1033
+ };
1034
+ function parseDuration(value) {
1035
+ if (typeof value === "number") {
1036
+ if (!Number.isFinite(value) || value < 0) {
1037
+ throw new Error(`Invalid duration: ${value}. Must be a non-negative finite number.`);
1038
+ }
1039
+ return Math.min(Math.floor(value), MAX_DURATION_MS);
1040
+ }
1041
+ if (typeof value !== "string" || value.trim() === "") {
1042
+ throw new Error(`Invalid duration: "${value}". Expected a duration string (e.g. "5m", "2h") or number.`);
1043
+ }
1044
+ const match = value.trim().match(DURATION_REGEX);
1045
+ if (!match) {
1046
+ throw new Error(
1047
+ `Invalid duration: "${value}". Expected format: <number><unit> where unit is ms, s, m, h, or d.`
1048
+ );
1049
+ }
1050
+ const amount = parseFloat(match[1]);
1051
+ const unit = match[2].toLowerCase();
1052
+ const ms = Math.floor(amount * UNIT_TO_MS[unit]);
1053
+ if (ms < 0 || ms > MAX_DURATION_MS) {
1054
+ throw new Error(`Duration "${value}" exceeds maximum allowed (${MAX_DURATION_MS}ms / ~49.7 days).`);
1055
+ }
1056
+ return ms;
1057
+ }
1058
+
1059
+ // src/middleware/rate-limit-sliding.ts
1060
+ function createSlidingWindowLimiter(options = {}) {
1061
+ const {
1062
+ max = RATE_LIMIT.DEFAULT_MAX_REQUESTS,
1063
+ window: windowOpt = RATE_LIMIT.DEFAULT_WINDOW_MS,
1064
+ message = RATE_LIMIT.DEFAULT_MESSAGE,
1065
+ statusCode = RATE_LIMIT.DEFAULT_STATUS_CODE,
1066
+ keyGenerator = (req) => req.ip ?? req.socket?.remoteAddress ?? "unknown",
1067
+ skip
1068
+ } = options;
1069
+ const windowMs = parseDuration(windowOpt);
1070
+ const currentWindows = /* @__PURE__ */ Object.create(null);
1071
+ const previousWindows = /* @__PURE__ */ Object.create(null);
1072
+ const cleanupInterval = setInterval(() => {
1073
+ const now = Date.now();
1074
+ const cutoff = now - windowMs * 2;
1075
+ for (const key of Object.keys(previousWindows)) {
1076
+ if (previousWindows[key].startTime < cutoff) {
1077
+ delete previousWindows[key];
1078
+ }
1079
+ }
1080
+ for (const key of Object.keys(currentWindows)) {
1081
+ if (currentWindows[key].startTime < cutoff) {
1082
+ delete currentWindows[key];
1083
+ }
1084
+ }
1085
+ }, windowMs);
1086
+ if (typeof cleanupInterval.unref === "function") {
1087
+ cleanupInterval.unref();
1088
+ }
1089
+ const handler = (req, res, next) => {
1090
+ try {
1091
+ if (skip?.(req)) return next();
1092
+ const key = keyGenerator(req);
1093
+ const now = Date.now();
1094
+ const windowStart = Math.floor(now / windowMs) * windowMs;
1095
+ if (!currentWindows[key] || currentWindows[key].startTime < windowStart) {
1096
+ if (currentWindows[key]) {
1097
+ previousWindows[key] = currentWindows[key];
1098
+ }
1099
+ currentWindows[key] = { count: 0, startTime: windowStart };
1100
+ }
1101
+ const elapsed = now - windowStart;
1102
+ const weight = Math.max(0, (windowMs - elapsed) / windowMs);
1103
+ const prevCount = previousWindows[key]?.count ?? 0;
1104
+ const estimatedCount = prevCount * weight + currentWindows[key].count + 1;
1105
+ const remaining = Math.max(0, Math.floor(max - estimatedCount));
1106
+ const resetMs = windowStart + windowMs - now;
1107
+ const resetSeconds = Math.max(1, Math.ceil(resetMs / 1e3));
1108
+ res.setHeader("X-RateLimit-Limit", max.toString());
1109
+ res.setHeader("X-RateLimit-Remaining", remaining.toString());
1110
+ res.setHeader("X-RateLimit-Reset", resetSeconds.toString());
1111
+ res.setHeader("X-RateLimit-Policy", `${max};w=${Math.floor(windowMs / 1e3)}`);
1112
+ if (estimatedCount > max) {
1113
+ res.setHeader("Retry-After", resetSeconds.toString());
1114
+ res.status(statusCode).json({
1115
+ error: message,
1116
+ retryAfter: resetSeconds
1117
+ });
1118
+ return;
1119
+ }
1120
+ currentWindows[key].count++;
1121
+ next();
1122
+ } catch (error) {
1123
+ console.error("[arcis] Sliding window rate limiter error:", error);
1124
+ next();
1125
+ }
1126
+ };
1127
+ const middleware = handler;
1128
+ middleware.close = () => {
1129
+ clearInterval(cleanupInterval);
1130
+ };
1131
+ return middleware;
1132
+ }
1133
+
1134
+ // src/middleware/rate-limit-token.ts
1135
+ function createTokenBucketLimiter(options = {}) {
1136
+ const {
1137
+ capacity = 100,
1138
+ refillRate = 10,
1139
+ cost = 1,
1140
+ message = RATE_LIMIT.DEFAULT_MESSAGE,
1141
+ statusCode = RATE_LIMIT.DEFAULT_STATUS_CODE,
1142
+ keyGenerator = (req) => req.ip ?? req.socket?.remoteAddress ?? "unknown",
1143
+ skip
1144
+ } = options;
1145
+ if (capacity < 1) throw new RangeError(`Token bucket capacity must be >= 1, got ${capacity}`);
1146
+ if (refillRate <= 0) throw new RangeError(`Token bucket refillRate must be > 0, got ${refillRate}`);
1147
+ if (cost < 1) throw new RangeError(`Token bucket cost must be >= 1, got ${cost}`);
1148
+ if (cost > capacity) throw new RangeError(`Token bucket cost (${cost}) must be <= capacity (${capacity}), otherwise all requests are permanently denied`);
1149
+ const buckets = /* @__PURE__ */ Object.create(null);
1150
+ const cleanupInterval = setInterval(() => {
1151
+ const now = Date.now();
1152
+ const staleThreshold = capacity / refillRate * 1e3 * 2;
1153
+ for (const key of Object.keys(buckets)) {
1154
+ if (now - buckets[key].lastRefill > staleThreshold) {
1155
+ delete buckets[key];
1156
+ }
1157
+ }
1158
+ }, 6e4);
1159
+ if (typeof cleanupInterval.unref === "function") {
1160
+ cleanupInterval.unref();
1161
+ }
1162
+ function refillBucket(bucket, now) {
1163
+ const elapsed = (now - bucket.lastRefill) / 1e3;
1164
+ const tokensToAdd = elapsed * refillRate;
1165
+ bucket.tokens = Math.min(capacity, bucket.tokens + tokensToAdd);
1166
+ bucket.lastRefill = now;
1167
+ }
1168
+ const handler = (req, res, next) => {
1169
+ try {
1170
+ if (skip?.(req)) return next();
1171
+ const key = keyGenerator(req);
1172
+ const now = Date.now();
1173
+ if (!buckets[key]) {
1174
+ buckets[key] = { tokens: capacity, lastRefill: now };
1175
+ }
1176
+ const bucket = buckets[key];
1177
+ refillBucket(bucket, now);
1178
+ const retryAfterSec = bucket.tokens < cost ? Math.ceil((cost - bucket.tokens) / refillRate) : 0;
1179
+ res.setHeader("X-RateLimit-Limit", capacity.toString());
1180
+ res.setHeader("X-RateLimit-Remaining", Math.floor(Math.max(0, bucket.tokens - cost)).toString());
1181
+ res.setHeader("X-RateLimit-Policy", `${capacity};w=${Math.floor(capacity / refillRate)};burst=${capacity}`);
1182
+ if (bucket.tokens < cost) {
1183
+ res.setHeader("Retry-After", retryAfterSec.toString());
1184
+ res.setHeader("X-RateLimit-Reset", retryAfterSec.toString());
1185
+ res.status(statusCode).json({
1186
+ error: message,
1187
+ retryAfter: retryAfterSec
1188
+ });
1189
+ return;
1190
+ }
1191
+ bucket.tokens -= cost;
1192
+ next();
1193
+ } catch (error) {
1194
+ console.error("[arcis] Token bucket rate limiter error:", error);
1195
+ next();
1196
+ }
1197
+ };
1198
+ const middleware = handler;
1199
+ middleware.close = () => {
1200
+ clearInterval(cleanupInterval);
1201
+ };
1202
+ return middleware;
1203
+ }
1204
+
1024
1205
  // src/middleware/cors.ts
1025
1206
  var DEFAULT_METHODS = ["GET", "HEAD", "PUT", "PATCH", "POST", "DELETE"];
1026
1207
  var DEFAULT_HEADERS = ["Content-Type", "Authorization"];
@@ -1151,6 +1332,199 @@ function secureCookieDefaults(options = {}) {
1151
1332
  }
1152
1333
  var createSecureCookies = secureCookieDefaults;
1153
1334
 
1154
- export { arcis, arcisWithMethods as arcisFunction, createCors, createErrorHandler, createHeaders, createRateLimiter, createSecureCookies, main_default as default, enforceSecureCookie, errorHandler, rateLimit, safeCors, secureCookieDefaults, securityHeaders };
1335
+ // src/middleware/bot-detection.ts
1336
+ var BOT_PATTERNS = [
1337
+ // --- SEARCH ENGINES (specific variants before generic) ---
1338
+ { pattern: /Googlebot-Image/i, name: "Googlebot-Image", category: "SEARCH_ENGINE" },
1339
+ { pattern: /Googlebot-Video/i, name: "Googlebot-Video", category: "SEARCH_ENGINE" },
1340
+ { pattern: /Googlebot-News/i, name: "Googlebot-News", category: "SEARCH_ENGINE" },
1341
+ { pattern: /Googlebot/i, name: "Googlebot", category: "SEARCH_ENGINE" },
1342
+ { pattern: /AdsBot-Google/i, name: "AdsBot-Google", category: "SEARCH_ENGINE" },
1343
+ { pattern: /Mediapartners-Google/i, name: "Mediapartners-Google", category: "SEARCH_ENGINE" },
1344
+ { pattern: /Bingbot/i, name: "Bingbot", category: "SEARCH_ENGINE" },
1345
+ { pattern: /msnbot/i, name: "msnbot", category: "SEARCH_ENGINE" },
1346
+ { pattern: /Slurp/i, name: "Yahoo Slurp", category: "SEARCH_ENGINE" },
1347
+ { pattern: /DuckDuckBot/i, name: "DuckDuckBot", category: "SEARCH_ENGINE" },
1348
+ { pattern: /Baiduspider/i, name: "Baiduspider", category: "SEARCH_ENGINE" },
1349
+ { pattern: /YandexBot/i, name: "YandexBot", category: "SEARCH_ENGINE" },
1350
+ { pattern: /YandexImages/i, name: "YandexImages", category: "SEARCH_ENGINE" },
1351
+ { pattern: /Sogou/i, name: "Sogou", category: "SEARCH_ENGINE" },
1352
+ { pattern: /Exabot/i, name: "Exabot", category: "SEARCH_ENGINE" },
1353
+ { pattern: /ia_archiver/i, name: "Alexa", category: "SEARCH_ENGINE" },
1354
+ { pattern: /Applebot/i, name: "Applebot", category: "SEARCH_ENGINE" },
1355
+ { pattern: /Qwantify/i, name: "Qwantify", category: "SEARCH_ENGINE" },
1356
+ { pattern: /PetalBot/i, name: "PetalBot", category: "SEARCH_ENGINE" },
1357
+ { pattern: /SeznamBot/i, name: "SeznamBot", category: "SEARCH_ENGINE" },
1358
+ // --- SOCIAL ---
1359
+ { pattern: /Twitterbot/i, name: "Twitterbot", category: "SOCIAL" },
1360
+ { pattern: /facebookexternalhit/i, name: "Facebook", category: "SOCIAL" },
1361
+ { pattern: /Facebot/i, name: "Facebot", category: "SOCIAL" },
1362
+ { pattern: /LinkedInBot/i, name: "LinkedInBot", category: "SOCIAL" },
1363
+ { pattern: /Pinterest/i, name: "Pinterest", category: "SOCIAL" },
1364
+ { pattern: /Slackbot/i, name: "Slackbot", category: "SOCIAL" },
1365
+ { pattern: /TelegramBot/i, name: "TelegramBot", category: "SOCIAL" },
1366
+ { pattern: /WhatsApp/i, name: "WhatsApp", category: "SOCIAL" },
1367
+ { pattern: /Discordbot/i, name: "Discordbot", category: "SOCIAL" },
1368
+ { pattern: /Redditbot/i, name: "Redditbot", category: "SOCIAL" },
1369
+ { pattern: /Embedly/i, name: "Embedly", category: "SOCIAL" },
1370
+ { pattern: /Quora Link Preview/i, name: "Quora", category: "SOCIAL" },
1371
+ { pattern: /Mastodon/i, name: "Mastodon", category: "SOCIAL" },
1372
+ // --- MONITORING ---
1373
+ { pattern: /UptimeRobot/i, name: "UptimeRobot", category: "MONITORING" },
1374
+ { pattern: /Pingdom/i, name: "Pingdom", category: "MONITORING" },
1375
+ { pattern: /Site24x7/i, name: "Site24x7", category: "MONITORING" },
1376
+ { pattern: /StatusCake/i, name: "StatusCake", category: "MONITORING" },
1377
+ { pattern: /Datadog/i, name: "Datadog", category: "MONITORING" },
1378
+ { pattern: /NewRelicPinger/i, name: "New Relic", category: "MONITORING" },
1379
+ { pattern: /Better Uptime Bot/i, name: "Better Uptime", category: "MONITORING" },
1380
+ { pattern: /GTmetrix/i, name: "GTmetrix", category: "MONITORING" },
1381
+ { pattern: /PageSpeed/i, name: "PageSpeed Insights", category: "MONITORING" },
1382
+ // --- AI CRAWLERS ---
1383
+ { pattern: /GPTBot/i, name: "GPTBot", category: "AI_CRAWLER" },
1384
+ { pattern: /ChatGPT-User/i, name: "ChatGPT-User", category: "AI_CRAWLER" },
1385
+ { pattern: /Claude-Web/i, name: "Claude-Web", category: "AI_CRAWLER" },
1386
+ { pattern: /ClaudeBot/i, name: "ClaudeBot", category: "AI_CRAWLER" },
1387
+ { pattern: /anthropic-ai/i, name: "Anthropic", category: "AI_CRAWLER" },
1388
+ { pattern: /Bytespider/i, name: "Bytespider", category: "AI_CRAWLER" },
1389
+ { pattern: /CCBot/i, name: "CCBot", category: "AI_CRAWLER" },
1390
+ { pattern: /cohere-ai/i, name: "Cohere", category: "AI_CRAWLER" },
1391
+ { pattern: /PerplexityBot/i, name: "PerplexityBot", category: "AI_CRAWLER" },
1392
+ { pattern: /YouBot/i, name: "YouBot", category: "AI_CRAWLER" },
1393
+ { pattern: /Google-Extended/i, name: "Google-Extended", category: "AI_CRAWLER" },
1394
+ { pattern: /Diffbot/i, name: "Diffbot", category: "AI_CRAWLER" },
1395
+ { pattern: /Amazonbot/i, name: "Amazonbot", category: "AI_CRAWLER" },
1396
+ { pattern: /meta-externalagent/i, name: "Meta AI", category: "AI_CRAWLER" },
1397
+ // --- AUTOMATED TOOLS (headless browsers, testing frameworks) ---
1398
+ { pattern: /HeadlessChrome/i, name: "Headless Chrome", category: "AUTOMATED" },
1399
+ { pattern: /PhantomJS/i, name: "PhantomJS", category: "AUTOMATED" },
1400
+ { pattern: /Selenium/i, name: "Selenium", category: "AUTOMATED" },
1401
+ { pattern: /Puppeteer/i, name: "Puppeteer", category: "AUTOMATED" },
1402
+ { pattern: /Playwright/i, name: "Playwright", category: "AUTOMATED" },
1403
+ { pattern: /Cypress/i, name: "Cypress", category: "AUTOMATED" },
1404
+ { pattern: /webdriver/i, name: "WebDriver", category: "AUTOMATED" },
1405
+ { pattern: /MSIE 6\.0/i, name: "Fake IE6", category: "AUTOMATED" },
1406
+ // --- SCRAPERS / CLI TOOLS ---
1407
+ { pattern: /^curl\//i, name: "curl", category: "SCRAPER" },
1408
+ { pattern: /^wget\//i, name: "wget", category: "SCRAPER" },
1409
+ { pattern: /^python-requests\//i, name: "python-requests", category: "SCRAPER" },
1410
+ { pattern: /^python-httpx\//i, name: "python-httpx", category: "SCRAPER" },
1411
+ { pattern: /^Python-urllib/i, name: "Python-urllib", category: "SCRAPER" },
1412
+ { pattern: /^aiohttp\//i, name: "aiohttp", category: "SCRAPER" },
1413
+ { pattern: /^Go-http-client/i, name: "Go-http-client", category: "SCRAPER" },
1414
+ { pattern: /^Java\//i, name: "Java HttpClient", category: "SCRAPER" },
1415
+ { pattern: /^Apache-HttpClient/i, name: "Apache HttpClient", category: "SCRAPER" },
1416
+ { pattern: /^okhttp\//i, name: "OkHttp", category: "SCRAPER" },
1417
+ { pattern: /^node-fetch\//i, name: "node-fetch", category: "SCRAPER" },
1418
+ { pattern: /^axios\//i, name: "axios", category: "SCRAPER" },
1419
+ { pattern: /^got\//i, name: "got", category: "SCRAPER" },
1420
+ { pattern: /^libwww-perl/i, name: "libwww-perl", category: "SCRAPER" },
1421
+ { pattern: /^Ruby/i, name: "Ruby", category: "SCRAPER" },
1422
+ { pattern: /^PHP\//i, name: "PHP", category: "SCRAPER" },
1423
+ { pattern: /Scrapy/i, name: "Scrapy", category: "SCRAPER" },
1424
+ { pattern: /^Postman/i, name: "Postman", category: "SCRAPER" },
1425
+ { pattern: /^Insomnia/i, name: "Insomnia", category: "SCRAPER" },
1426
+ { pattern: /^HTTPie\//i, name: "HTTPie", category: "SCRAPER" }
1427
+ ];
1428
+ function detectBehavioralSignals(req) {
1429
+ const signals = [];
1430
+ const headers = req.headers;
1431
+ if (!headers["user-agent"]) {
1432
+ signals.push("missing_user_agent");
1433
+ }
1434
+ if (!headers["accept"]) {
1435
+ signals.push("missing_accept");
1436
+ }
1437
+ if (!headers["accept-language"]) {
1438
+ signals.push("missing_accept_language");
1439
+ }
1440
+ if (!headers["accept-encoding"]) {
1441
+ signals.push("missing_accept_encoding");
1442
+ }
1443
+ if (headers["connection"] === "close") {
1444
+ signals.push("connection_close");
1445
+ }
1446
+ return signals;
1447
+ }
1448
+ function detectBot(req) {
1449
+ const rawUa = req.headers["user-agent"] ?? "";
1450
+ const ua = rawUa.length > 2048 ? rawUa.slice(0, 2048) : rawUa;
1451
+ const signals = detectBehavioralSignals(req);
1452
+ if (!ua) {
1453
+ return {
1454
+ isBot: true,
1455
+ category: "UNKNOWN",
1456
+ name: null,
1457
+ confidence: 0.8,
1458
+ signals
1459
+ };
1460
+ }
1461
+ for (const bot of BOT_PATTERNS) {
1462
+ if (bot.pattern.test(ua)) {
1463
+ return {
1464
+ isBot: true,
1465
+ category: bot.category,
1466
+ name: bot.name,
1467
+ confidence: 0.95,
1468
+ signals
1469
+ };
1470
+ }
1471
+ }
1472
+ const behaviorScore = signals.length;
1473
+ if (behaviorScore >= 3) {
1474
+ return {
1475
+ isBot: true,
1476
+ category: "UNKNOWN",
1477
+ name: null,
1478
+ confidence: Math.min(1, 0.6 + behaviorScore * 0.1),
1479
+ signals
1480
+ };
1481
+ }
1482
+ return {
1483
+ isBot: false,
1484
+ category: "HUMAN",
1485
+ name: null,
1486
+ confidence: Math.max(0, 1 - behaviorScore * 0.15),
1487
+ signals
1488
+ };
1489
+ }
1490
+ function botProtection(options = {}) {
1491
+ const {
1492
+ allow = ["SEARCH_ENGINE", "SOCIAL", "MONITORING"],
1493
+ deny = ["AUTOMATED"],
1494
+ defaultAction = "allow",
1495
+ statusCode = 403,
1496
+ message = "Access denied.",
1497
+ onDetected
1498
+ } = options;
1499
+ const allowSet = new Set(allow);
1500
+ const denySet = new Set(deny);
1501
+ return (req, res, next) => {
1502
+ const result = detectBot(req);
1503
+ req.botDetection = result;
1504
+ if (!result.isBot) {
1505
+ return next();
1506
+ }
1507
+ if (allowSet.has(result.category)) {
1508
+ return next();
1509
+ }
1510
+ if (denySet.has(result.category)) {
1511
+ if (onDetected) {
1512
+ return onDetected(req, res, result);
1513
+ }
1514
+ res.status(statusCode).json({ error: message });
1515
+ return;
1516
+ }
1517
+ if (defaultAction === "deny") {
1518
+ if (onDetected) {
1519
+ return onDetected(req, res, result);
1520
+ }
1521
+ res.status(statusCode).json({ error: message });
1522
+ return;
1523
+ }
1524
+ next();
1525
+ };
1526
+ }
1527
+
1528
+ export { arcis, arcisWithMethods as arcisFunction, botProtection, createCors, createErrorHandler, createHeaders, createRateLimiter, createSecureCookies, createSlidingWindowLimiter, createTokenBucketLimiter, main_default as default, detectBot, enforceSecureCookie, errorHandler, rateLimit, safeCors, secureCookieDefaults, securityHeaders };
1155
1529
  //# sourceMappingURL=index.mjs.map
1156
1530
  //# sourceMappingURL=index.mjs.map