@kya-os/checkpoint-nextjs 1.1.0 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +178 -0
- package/EDGE_RUNTIME_WASM_SETUP.md +4 -10
- package/README.md +13 -0
- package/bin/setup-edge-wasm.js +40 -32
- package/dist/api-client.d.mts +10 -10
- package/dist/api-client.d.ts +10 -10
- package/dist/create-middleware.d.mts +7 -2
- package/dist/create-middleware.d.ts +7 -2
- package/dist/edge/index.d.mts +3 -3
- package/dist/edge/index.d.ts +3 -3
- package/dist/edge/index.js +16 -3
- package/dist/edge/index.mjs +16 -3
- package/dist/edge-runtime-loader.d.mts +43 -18
- package/dist/edge-runtime-loader.d.ts +43 -18
- package/dist/edge-runtime-loader.js +101 -58
- package/dist/edge-runtime-loader.mjs +98 -59
- package/dist/edge-wasm-middleware.d.mts +28 -34
- package/dist/edge-wasm-middleware.d.ts +28 -34
- package/dist/edge-wasm-middleware.js +16 -306
- package/dist/edge-wasm-middleware.mjs +16 -307
- package/dist/index.js +3 -1
- package/dist/index.mjs +4 -2
- package/dist/nodejs-wasm-loader.d.mts +26 -9
- package/dist/nodejs-wasm-loader.d.ts +26 -9
- package/dist/nodejs-wasm-loader.js +21 -78
- package/dist/nodejs-wasm-loader.mjs +21 -74
- package/dist/session-tracker.d.mts +2 -2
- package/dist/session-tracker.d.ts +2 -2
- package/dist/session-tracker.js +3 -1
- package/dist/session-tracker.mjs +4 -2
- package/dist/wasm-middleware.d.mts +19 -3
- package/dist/wasm-middleware.d.ts +19 -3
- package/dist/wasm-middleware.js +32 -3
- package/dist/wasm-middleware.mjs +32 -4
- package/dist/wasm-setup.js +29 -81
- package/dist/wasm-setup.mjs +29 -76
- package/package.json +4 -4
- package/templates/middleware-wasm-100.ts +11 -3
- package/dist/.tsbuildinfo +0 -1
|
@@ -1,50 +1,75 @@
|
|
|
1
|
+
import { DetectionDetail } from '@kya-os/checkpoint-shared';
|
|
1
2
|
import { NextRequest } from 'next/server';
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
|
-
* Edge Runtime Compatible WASM Loader for
|
|
5
|
+
* Edge Runtime Compatible WASM Loader for Checkpoint
|
|
5
6
|
*
|
|
6
7
|
* This module provides a pre-built solution for loading WASM in Edge Runtime.
|
|
7
8
|
* It requires the WASM file to be manually placed in the project.
|
|
9
|
+
*
|
|
10
|
+
* ## SSOT for pattern detection
|
|
11
|
+
*
|
|
12
|
+
* The fallback `patternDetection` path imports the single canonical
|
|
13
|
+
* pattern table `KNOWN_AGENT_PATTERNS` from
|
|
14
|
+
* `@kya-os/checkpoint-shared/constants/agents`. PDM-2 (#2573) folded
|
|
15
|
+
* what was previously a sibling supplement export
|
|
16
|
+
* (`INTERACTIVE_AGENT_PATTERNS`) into per-agent SSOT rows alongside
|
|
17
|
+
* the existing entries — interactive-session tokens, generic vendor
|
|
18
|
+
* fallbacks, and the GPT-Crawler bot are now first-class SSOT rows
|
|
19
|
+
* with `category`/`isLegitimate` fields. The drift-prevention test
|
|
20
|
+
* in `checkpoint-shared/__tests__/constants/agents.test.ts` enforces
|
|
21
|
+
* that no future PR re-introduces a sibling pool.
|
|
22
|
+
*
|
|
23
|
+
* ## Naming
|
|
24
|
+
*
|
|
25
|
+
* Old `AgentShield`-prefixed exports are kept as `@deprecated` aliases
|
|
26
|
+
* for one release (`createEdgeAgentShield`, `EdgeRuntimeAgentShield`,
|
|
27
|
+
* `AgentShieldConfig`, `getDefaultAgentShield`). New code should import
|
|
28
|
+
* the `Checkpoint`-prefixed names directly.
|
|
8
29
|
*/
|
|
9
30
|
|
|
10
31
|
interface WasmModule {
|
|
11
32
|
default: WebAssembly.Module;
|
|
12
33
|
}
|
|
13
|
-
|
|
14
|
-
isAgent: boolean;
|
|
15
|
-
isAiCrawler?: boolean;
|
|
16
|
-
confidence: number;
|
|
34
|
+
type EdgeRuntimeDetectionDetail = DetectionDetail & {
|
|
17
35
|
agent?: string;
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
interface AgentShieldConfig {
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
interface EdgeCheckpointConfig {
|
|
23
39
|
wasmModule?: WebAssembly.Module;
|
|
24
40
|
enableWasm?: boolean;
|
|
25
|
-
onAgentDetected?: (result:
|
|
41
|
+
onAgentDetected?: (result: EdgeRuntimeDetectionDetail) => void;
|
|
26
42
|
blockAgents?: boolean;
|
|
27
43
|
allowedAgents?: string[];
|
|
28
44
|
debug?: boolean;
|
|
29
45
|
}
|
|
30
|
-
declare class
|
|
46
|
+
declare class EdgeRuntimeCheckpoint {
|
|
31
47
|
private wasmInstance;
|
|
32
48
|
private wasmMemory;
|
|
33
49
|
private config;
|
|
34
50
|
private initialized;
|
|
35
|
-
constructor(config?:
|
|
51
|
+
constructor(config?: EdgeCheckpointConfig);
|
|
36
52
|
init(wasmModule?: WebAssembly.Module): Promise<void>;
|
|
37
53
|
private readString;
|
|
38
54
|
private writeString;
|
|
39
|
-
detect(request: NextRequest): Promise<
|
|
55
|
+
detect(request: NextRequest): Promise<EdgeRuntimeDetectionDetail>;
|
|
40
56
|
private patternDetection;
|
|
41
57
|
isInitialized(): boolean;
|
|
42
58
|
getVerificationMethod(): 'cryptographic' | 'pattern';
|
|
43
59
|
}
|
|
44
60
|
/**
|
|
45
|
-
* Factory function to create
|
|
61
|
+
* Factory function to create a Checkpoint instance for Edge Runtime.
|
|
46
62
|
*/
|
|
47
|
-
declare function
|
|
48
|
-
declare function
|
|
63
|
+
declare function createEdgeCheckpoint(config?: EdgeCheckpointConfig): EdgeRuntimeCheckpoint;
|
|
64
|
+
declare function getDefaultEdgeCheckpoint(config?: EdgeCheckpointConfig): EdgeRuntimeCheckpoint;
|
|
65
|
+
|
|
66
|
+
/** @deprecated Renamed to {@link EdgeCheckpointConfig}. */
|
|
67
|
+
type AgentShieldConfig = EdgeCheckpointConfig;
|
|
68
|
+
/** @deprecated Renamed to {@link EdgeRuntimeCheckpoint}. */
|
|
69
|
+
declare const EdgeRuntimeAgentShield: typeof EdgeRuntimeCheckpoint;
|
|
70
|
+
/** @deprecated Renamed to {@link createEdgeCheckpoint}. */
|
|
71
|
+
declare const createEdgeAgentShield: typeof createEdgeCheckpoint;
|
|
72
|
+
/** @deprecated Renamed to {@link getDefaultEdgeCheckpoint}. */
|
|
73
|
+
declare const getDefaultAgentShield: typeof getDefaultEdgeCheckpoint;
|
|
49
74
|
|
|
50
|
-
export { type AgentShieldConfig, type DetectionResult, type WasmModule, createEdgeAgentShield, getDefaultAgentShield };
|
|
75
|
+
export { type AgentShieldConfig, type EdgeRuntimeDetectionDetail as DetectionResult, type EdgeCheckpointConfig, EdgeRuntimeAgentShield, EdgeRuntimeCheckpoint, type EdgeRuntimeDetectionDetail, type WasmModule, createEdgeAgentShield, createEdgeCheckpoint, getDefaultAgentShield, getDefaultEdgeCheckpoint };
|
|
@@ -1,50 +1,75 @@
|
|
|
1
|
+
import { DetectionDetail } from '@kya-os/checkpoint-shared';
|
|
1
2
|
import { NextRequest } from 'next/server';
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
|
-
* Edge Runtime Compatible WASM Loader for
|
|
5
|
+
* Edge Runtime Compatible WASM Loader for Checkpoint
|
|
5
6
|
*
|
|
6
7
|
* This module provides a pre-built solution for loading WASM in Edge Runtime.
|
|
7
8
|
* It requires the WASM file to be manually placed in the project.
|
|
9
|
+
*
|
|
10
|
+
* ## SSOT for pattern detection
|
|
11
|
+
*
|
|
12
|
+
* The fallback `patternDetection` path imports the single canonical
|
|
13
|
+
* pattern table `KNOWN_AGENT_PATTERNS` from
|
|
14
|
+
* `@kya-os/checkpoint-shared/constants/agents`. PDM-2 (#2573) folded
|
|
15
|
+
* what was previously a sibling supplement export
|
|
16
|
+
* (`INTERACTIVE_AGENT_PATTERNS`) into per-agent SSOT rows alongside
|
|
17
|
+
* the existing entries — interactive-session tokens, generic vendor
|
|
18
|
+
* fallbacks, and the GPT-Crawler bot are now first-class SSOT rows
|
|
19
|
+
* with `category`/`isLegitimate` fields. The drift-prevention test
|
|
20
|
+
* in `checkpoint-shared/__tests__/constants/agents.test.ts` enforces
|
|
21
|
+
* that no future PR re-introduces a sibling pool.
|
|
22
|
+
*
|
|
23
|
+
* ## Naming
|
|
24
|
+
*
|
|
25
|
+
* Old `AgentShield`-prefixed exports are kept as `@deprecated` aliases
|
|
26
|
+
* for one release (`createEdgeAgentShield`, `EdgeRuntimeAgentShield`,
|
|
27
|
+
* `AgentShieldConfig`, `getDefaultAgentShield`). New code should import
|
|
28
|
+
* the `Checkpoint`-prefixed names directly.
|
|
8
29
|
*/
|
|
9
30
|
|
|
10
31
|
interface WasmModule {
|
|
11
32
|
default: WebAssembly.Module;
|
|
12
33
|
}
|
|
13
|
-
|
|
14
|
-
isAgent: boolean;
|
|
15
|
-
isAiCrawler?: boolean;
|
|
16
|
-
confidence: number;
|
|
34
|
+
type EdgeRuntimeDetectionDetail = DetectionDetail & {
|
|
17
35
|
agent?: string;
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
interface AgentShieldConfig {
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
interface EdgeCheckpointConfig {
|
|
23
39
|
wasmModule?: WebAssembly.Module;
|
|
24
40
|
enableWasm?: boolean;
|
|
25
|
-
onAgentDetected?: (result:
|
|
41
|
+
onAgentDetected?: (result: EdgeRuntimeDetectionDetail) => void;
|
|
26
42
|
blockAgents?: boolean;
|
|
27
43
|
allowedAgents?: string[];
|
|
28
44
|
debug?: boolean;
|
|
29
45
|
}
|
|
30
|
-
declare class
|
|
46
|
+
declare class EdgeRuntimeCheckpoint {
|
|
31
47
|
private wasmInstance;
|
|
32
48
|
private wasmMemory;
|
|
33
49
|
private config;
|
|
34
50
|
private initialized;
|
|
35
|
-
constructor(config?:
|
|
51
|
+
constructor(config?: EdgeCheckpointConfig);
|
|
36
52
|
init(wasmModule?: WebAssembly.Module): Promise<void>;
|
|
37
53
|
private readString;
|
|
38
54
|
private writeString;
|
|
39
|
-
detect(request: NextRequest): Promise<
|
|
55
|
+
detect(request: NextRequest): Promise<EdgeRuntimeDetectionDetail>;
|
|
40
56
|
private patternDetection;
|
|
41
57
|
isInitialized(): boolean;
|
|
42
58
|
getVerificationMethod(): 'cryptographic' | 'pattern';
|
|
43
59
|
}
|
|
44
60
|
/**
|
|
45
|
-
* Factory function to create
|
|
61
|
+
* Factory function to create a Checkpoint instance for Edge Runtime.
|
|
46
62
|
*/
|
|
47
|
-
declare function
|
|
48
|
-
declare function
|
|
63
|
+
declare function createEdgeCheckpoint(config?: EdgeCheckpointConfig): EdgeRuntimeCheckpoint;
|
|
64
|
+
declare function getDefaultEdgeCheckpoint(config?: EdgeCheckpointConfig): EdgeRuntimeCheckpoint;
|
|
65
|
+
|
|
66
|
+
/** @deprecated Renamed to {@link EdgeCheckpointConfig}. */
|
|
67
|
+
type AgentShieldConfig = EdgeCheckpointConfig;
|
|
68
|
+
/** @deprecated Renamed to {@link EdgeRuntimeCheckpoint}. */
|
|
69
|
+
declare const EdgeRuntimeAgentShield: typeof EdgeRuntimeCheckpoint;
|
|
70
|
+
/** @deprecated Renamed to {@link createEdgeCheckpoint}. */
|
|
71
|
+
declare const createEdgeAgentShield: typeof createEdgeCheckpoint;
|
|
72
|
+
/** @deprecated Renamed to {@link getDefaultEdgeCheckpoint}. */
|
|
73
|
+
declare const getDefaultAgentShield: typeof getDefaultEdgeCheckpoint;
|
|
49
74
|
|
|
50
|
-
export { type AgentShieldConfig, type DetectionResult, type WasmModule, createEdgeAgentShield, getDefaultAgentShield };
|
|
75
|
+
export { type AgentShieldConfig, type EdgeRuntimeDetectionDetail as DetectionResult, type EdgeCheckpointConfig, EdgeRuntimeAgentShield, EdgeRuntimeCheckpoint, type EdgeRuntimeDetectionDetail, type WasmModule, createEdgeAgentShield, createEdgeCheckpoint, getDefaultAgentShield, getDefaultEdgeCheckpoint };
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
var checkpointShared = require('@kya-os/checkpoint-shared');
|
|
4
|
+
|
|
5
|
+
// src/edge-runtime-loader.ts
|
|
6
|
+
|
|
3
7
|
// src/utils.ts
|
|
4
8
|
function getClientIp(request) {
|
|
5
9
|
const forwardedFor = request.headers.get("x-forwarded-for");
|
|
@@ -17,7 +21,49 @@ function getClientIp(request) {
|
|
|
17
21
|
}
|
|
18
22
|
|
|
19
23
|
// src/edge-runtime-loader.ts
|
|
20
|
-
var
|
|
24
|
+
var SUSPICIOUS_HEADER_PREFIXES = ["x-openai-", "x-anthropic-", "x-ai-", "x-llm-", "x-gpt-"];
|
|
25
|
+
function escapeRegex(s) {
|
|
26
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
27
|
+
}
|
|
28
|
+
var TOKEN_REGEX_CACHE = /* @__PURE__ */ new Map();
|
|
29
|
+
function tokenRegex(token) {
|
|
30
|
+
const cached = TOKEN_REGEX_CACHE.get(token);
|
|
31
|
+
if (cached) return cached;
|
|
32
|
+
const regex = new RegExp(`\\b${escapeRegex(token)}\\b`, "i");
|
|
33
|
+
TOKEN_REGEX_CACHE.set(token, regex);
|
|
34
|
+
return regex;
|
|
35
|
+
}
|
|
36
|
+
function readStringField(value) {
|
|
37
|
+
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
38
|
+
}
|
|
39
|
+
function readNumberField(value) {
|
|
40
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
41
|
+
}
|
|
42
|
+
function readBooleanField(value) {
|
|
43
|
+
return typeof value === "boolean" ? value : void 0;
|
|
44
|
+
}
|
|
45
|
+
function readStringArrayField(value) {
|
|
46
|
+
return Array.isArray(value) ? value.filter((item) => typeof item === "string") : [];
|
|
47
|
+
}
|
|
48
|
+
function normalizeEngineConfidence(value) {
|
|
49
|
+
if (value === void 0) return 0;
|
|
50
|
+
return Math.round(Math.max(0, Math.min(100, value)));
|
|
51
|
+
}
|
|
52
|
+
function matchKnownAgent(lowercasedUa) {
|
|
53
|
+
let best = null;
|
|
54
|
+
for (const entry of checkpointShared.KNOWN_AGENT_PATTERNS) {
|
|
55
|
+
for (const pattern of entry.patterns) {
|
|
56
|
+
if (tokenRegex(pattern).test(lowercasedUa)) {
|
|
57
|
+
if (!best || entry.confidence > best.confidence) {
|
|
58
|
+
best = { name: entry.name, confidence: entry.confidence };
|
|
59
|
+
}
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return best;
|
|
65
|
+
}
|
|
66
|
+
var EdgeRuntimeCheckpoint = class {
|
|
21
67
|
wasmInstance = null;
|
|
22
68
|
wasmMemory = null;
|
|
23
69
|
config;
|
|
@@ -49,7 +95,7 @@ var EdgeRuntimeAgentShield = class {
|
|
|
49
95
|
this.wasmInstance = await WebAssembly.instantiate(module, imports);
|
|
50
96
|
this.initialized = true;
|
|
51
97
|
if (this.config.debug) {
|
|
52
|
-
console.log("\u2705
|
|
98
|
+
console.log("\u2705 Checkpoint WASM initialized in Edge Runtime");
|
|
53
99
|
}
|
|
54
100
|
} catch (error) {
|
|
55
101
|
console.warn("\u26A0\uFE0F WASM initialization failed, using pattern detection:", error);
|
|
@@ -108,10 +154,21 @@ var EdgeRuntimeAgentShield = class {
|
|
|
108
154
|
exports$1.__wbindgen_free(resultPtr, offset);
|
|
109
155
|
}
|
|
110
156
|
const result = JSON.parse(resultStr);
|
|
157
|
+
const agent = readStringField(result.agent) ?? readStringField(result.agentName);
|
|
158
|
+
const confidence = normalizeEngineConfidence(readNumberField(result.confidence));
|
|
111
159
|
const detection = {
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
160
|
+
isAgent: readBooleanField(result.isAgent) ?? readBooleanField(result.is_agent) ?? false,
|
|
161
|
+
isAiCrawler: readBooleanField(result.isAiCrawler) ?? readBooleanField(result.is_ai_crawler),
|
|
162
|
+
confidence,
|
|
163
|
+
detectionClass: agent ? { type: "AiAgent", agentType: agent } : { type: confidence > 0 ? "IncompleteData" : "Human" },
|
|
164
|
+
detectedAgent: agent ? { type: "ai_agent", name: agent } : void 0,
|
|
165
|
+
agent,
|
|
166
|
+
agentType: agent,
|
|
167
|
+
reasons: readStringArrayField(result.reasons),
|
|
168
|
+
signals: [],
|
|
169
|
+
verificationMethod: "signature",
|
|
170
|
+
riskLevel: readStringField(result.riskLevel) ?? readStringField(result.risk_level),
|
|
171
|
+
timestamp: Date.now()
|
|
115
172
|
};
|
|
116
173
|
if (this.config.onAgentDetected && detection.isAgent) {
|
|
117
174
|
this.config.onAgentDetected(detection);
|
|
@@ -126,65 +183,44 @@ var EdgeRuntimeAgentShield = class {
|
|
|
126
183
|
return this.patternDetection(metadata);
|
|
127
184
|
}
|
|
128
185
|
patternDetection(metadata) {
|
|
129
|
-
const
|
|
130
|
-
const patterns = [
|
|
131
|
-
// High confidence - explicit AI identifiers
|
|
132
|
-
{ pattern: /chatgpt-user/i, name: "ChatGPT", confidence: 0.95 },
|
|
133
|
-
{ pattern: /claude-web/i, name: "Claude", confidence: 0.95 },
|
|
134
|
-
{ pattern: /claude-user/i, name: "Claude", confidence: 0.95 },
|
|
135
|
-
{ pattern: /gpt-crawler/i, name: "GPT Crawler", confidence: 0.95 },
|
|
136
|
-
{ pattern: /perplexitybot/i, name: "Perplexity", confidence: 0.95 },
|
|
137
|
-
{ pattern: /perplexity-user/i, name: "Perplexity", confidence: 0.95 },
|
|
138
|
-
{ pattern: /perplexity-ai/i, name: "Perplexity", confidence: 0.95 },
|
|
139
|
-
// Medium-high confidence - company identifiers
|
|
140
|
-
{ pattern: /anthropic/i, name: "Anthropic", confidence: 0.9 },
|
|
141
|
-
{ pattern: /openai/i, name: "OpenAI", confidence: 0.9 },
|
|
142
|
-
// Medium confidence - product names
|
|
143
|
-
{ pattern: /copilot/i, name: "GitHub Copilot", confidence: 0.85 },
|
|
144
|
-
{ pattern: /bard/i, name: "Google Bard", confidence: 0.85 },
|
|
145
|
-
{ pattern: /gemini/i, name: "Google Gemini", confidence: 0.85 },
|
|
146
|
-
{ pattern: /perplexity/i, name: "Perplexity", confidence: 0.85 },
|
|
147
|
-
// Fallback
|
|
148
|
-
// UA-context-only pattern — `\b` is the correct anchor for UA
|
|
149
|
-
// substring matching. PR #2591's tightened `[\s;()]` form
|
|
150
|
-
// dropped `/`, which broke legit UA shapes like `you.com/1.0`
|
|
151
|
-
// and `+https://you.com)` (cursor catch on the same PR — see
|
|
152
|
-
// sibling agents.ts comment for the full rationale + UA shapes).
|
|
153
|
-
// CodeQL js/regex/missing-regexp-anchor speculates about URL
|
|
154
|
-
// misuse; this codebase only applies the pattern to UA strings.
|
|
155
|
-
{ pattern: /\byou\.com\b/i, name: "You.com", confidence: 0.8 },
|
|
156
|
-
{ pattern: /\bphind\b/i, name: "Phind", confidence: 0.8 }
|
|
157
|
-
];
|
|
158
|
-
const suspiciousHeaders = ["x-openai-", "x-anthropic-", "x-ai-", "x-llm-", "x-gpt-"];
|
|
186
|
+
const lowercasedUa = metadata.userAgent.toLowerCase();
|
|
159
187
|
let headerBoost = 0;
|
|
160
|
-
for (const
|
|
161
|
-
if (
|
|
188
|
+
for (const key of Object.keys(metadata.headers)) {
|
|
189
|
+
if (SUSPICIOUS_HEADER_PREFIXES.some((prefix) => key.toLowerCase().startsWith(prefix))) {
|
|
162
190
|
headerBoost = 0.1;
|
|
163
191
|
break;
|
|
164
192
|
}
|
|
165
193
|
}
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
194
|
+
const match = matchKnownAgent(lowercasedUa);
|
|
195
|
+
if (match) {
|
|
196
|
+
const finalConfidence = Math.min(match.confidence + headerBoost, 1);
|
|
197
|
+
const confidence = Math.round(finalConfidence * 100);
|
|
198
|
+
const result = {
|
|
199
|
+
isAgent: true,
|
|
200
|
+
confidence,
|
|
201
|
+
detectionClass: { type: "AiAgent", agentType: match.name },
|
|
202
|
+
detectedAgent: { type: "ai_agent", name: match.name },
|
|
203
|
+
agent: match.name,
|
|
204
|
+
agentType: match.name,
|
|
205
|
+
reasons: [`known_pattern:${match.name.toLowerCase()}`],
|
|
206
|
+
signals: [],
|
|
207
|
+
verificationMethod: "pattern",
|
|
208
|
+
riskLevel: finalConfidence > 0.9 ? "high" : "medium",
|
|
209
|
+
timestamp: Date.now()
|
|
210
|
+
};
|
|
211
|
+
if (this.config.onAgentDetected) {
|
|
212
|
+
this.config.onAgentDetected(result);
|
|
181
213
|
}
|
|
214
|
+
return result;
|
|
182
215
|
}
|
|
183
216
|
return {
|
|
184
217
|
isAgent: false,
|
|
185
|
-
confidence: 0
|
|
218
|
+
confidence: 0,
|
|
219
|
+
detectionClass: { type: "Human" },
|
|
220
|
+
reasons: ["No known agent indicators matched"],
|
|
221
|
+
signals: [],
|
|
186
222
|
verificationMethod: "pattern",
|
|
187
|
-
timestamp:
|
|
223
|
+
timestamp: Date.now()
|
|
188
224
|
};
|
|
189
225
|
}
|
|
190
226
|
isInitialized() {
|
|
@@ -194,16 +230,23 @@ var EdgeRuntimeAgentShield = class {
|
|
|
194
230
|
return this.wasmInstance ? "cryptographic" : "pattern";
|
|
195
231
|
}
|
|
196
232
|
};
|
|
197
|
-
function
|
|
198
|
-
return new
|
|
233
|
+
function createEdgeCheckpoint(config) {
|
|
234
|
+
return new EdgeRuntimeCheckpoint(config);
|
|
199
235
|
}
|
|
200
236
|
var defaultInstance = null;
|
|
201
|
-
function
|
|
237
|
+
function getDefaultEdgeCheckpoint(config) {
|
|
202
238
|
if (!defaultInstance) {
|
|
203
|
-
defaultInstance = new
|
|
239
|
+
defaultInstance = new EdgeRuntimeCheckpoint(config);
|
|
204
240
|
}
|
|
205
241
|
return defaultInstance;
|
|
206
242
|
}
|
|
243
|
+
var EdgeRuntimeAgentShield = EdgeRuntimeCheckpoint;
|
|
244
|
+
var createEdgeAgentShield = createEdgeCheckpoint;
|
|
245
|
+
var getDefaultAgentShield = getDefaultEdgeCheckpoint;
|
|
207
246
|
|
|
247
|
+
exports.EdgeRuntimeAgentShield = EdgeRuntimeAgentShield;
|
|
248
|
+
exports.EdgeRuntimeCheckpoint = EdgeRuntimeCheckpoint;
|
|
208
249
|
exports.createEdgeAgentShield = createEdgeAgentShield;
|
|
250
|
+
exports.createEdgeCheckpoint = createEdgeCheckpoint;
|
|
209
251
|
exports.getDefaultAgentShield = getDefaultAgentShield;
|
|
252
|
+
exports.getDefaultEdgeCheckpoint = getDefaultEdgeCheckpoint;
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import { KNOWN_AGENT_PATTERNS } from '@kya-os/checkpoint-shared';
|
|
2
|
+
|
|
3
|
+
// src/edge-runtime-loader.ts
|
|
4
|
+
|
|
1
5
|
// src/utils.ts
|
|
2
6
|
function getClientIp(request) {
|
|
3
7
|
const forwardedFor = request.headers.get("x-forwarded-for");
|
|
@@ -15,7 +19,49 @@ function getClientIp(request) {
|
|
|
15
19
|
}
|
|
16
20
|
|
|
17
21
|
// src/edge-runtime-loader.ts
|
|
18
|
-
var
|
|
22
|
+
var SUSPICIOUS_HEADER_PREFIXES = ["x-openai-", "x-anthropic-", "x-ai-", "x-llm-", "x-gpt-"];
|
|
23
|
+
function escapeRegex(s) {
|
|
24
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
25
|
+
}
|
|
26
|
+
var TOKEN_REGEX_CACHE = /* @__PURE__ */ new Map();
|
|
27
|
+
function tokenRegex(token) {
|
|
28
|
+
const cached = TOKEN_REGEX_CACHE.get(token);
|
|
29
|
+
if (cached) return cached;
|
|
30
|
+
const regex = new RegExp(`\\b${escapeRegex(token)}\\b`, "i");
|
|
31
|
+
TOKEN_REGEX_CACHE.set(token, regex);
|
|
32
|
+
return regex;
|
|
33
|
+
}
|
|
34
|
+
function readStringField(value) {
|
|
35
|
+
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
36
|
+
}
|
|
37
|
+
function readNumberField(value) {
|
|
38
|
+
return typeof value === "number" && Number.isFinite(value) ? value : void 0;
|
|
39
|
+
}
|
|
40
|
+
function readBooleanField(value) {
|
|
41
|
+
return typeof value === "boolean" ? value : void 0;
|
|
42
|
+
}
|
|
43
|
+
function readStringArrayField(value) {
|
|
44
|
+
return Array.isArray(value) ? value.filter((item) => typeof item === "string") : [];
|
|
45
|
+
}
|
|
46
|
+
function normalizeEngineConfidence(value) {
|
|
47
|
+
if (value === void 0) return 0;
|
|
48
|
+
return Math.round(Math.max(0, Math.min(100, value)));
|
|
49
|
+
}
|
|
50
|
+
function matchKnownAgent(lowercasedUa) {
|
|
51
|
+
let best = null;
|
|
52
|
+
for (const entry of KNOWN_AGENT_PATTERNS) {
|
|
53
|
+
for (const pattern of entry.patterns) {
|
|
54
|
+
if (tokenRegex(pattern).test(lowercasedUa)) {
|
|
55
|
+
if (!best || entry.confidence > best.confidence) {
|
|
56
|
+
best = { name: entry.name, confidence: entry.confidence };
|
|
57
|
+
}
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return best;
|
|
63
|
+
}
|
|
64
|
+
var EdgeRuntimeCheckpoint = class {
|
|
19
65
|
wasmInstance = null;
|
|
20
66
|
wasmMemory = null;
|
|
21
67
|
config;
|
|
@@ -47,7 +93,7 @@ var EdgeRuntimeAgentShield = class {
|
|
|
47
93
|
this.wasmInstance = await WebAssembly.instantiate(module, imports);
|
|
48
94
|
this.initialized = true;
|
|
49
95
|
if (this.config.debug) {
|
|
50
|
-
console.log("\u2705
|
|
96
|
+
console.log("\u2705 Checkpoint WASM initialized in Edge Runtime");
|
|
51
97
|
}
|
|
52
98
|
} catch (error) {
|
|
53
99
|
console.warn("\u26A0\uFE0F WASM initialization failed, using pattern detection:", error);
|
|
@@ -106,10 +152,21 @@ var EdgeRuntimeAgentShield = class {
|
|
|
106
152
|
exports$1.__wbindgen_free(resultPtr, offset);
|
|
107
153
|
}
|
|
108
154
|
const result = JSON.parse(resultStr);
|
|
155
|
+
const agent = readStringField(result.agent) ?? readStringField(result.agentName);
|
|
156
|
+
const confidence = normalizeEngineConfidence(readNumberField(result.confidence));
|
|
109
157
|
const detection = {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
158
|
+
isAgent: readBooleanField(result.isAgent) ?? readBooleanField(result.is_agent) ?? false,
|
|
159
|
+
isAiCrawler: readBooleanField(result.isAiCrawler) ?? readBooleanField(result.is_ai_crawler),
|
|
160
|
+
confidence,
|
|
161
|
+
detectionClass: agent ? { type: "AiAgent", agentType: agent } : { type: confidence > 0 ? "IncompleteData" : "Human" },
|
|
162
|
+
detectedAgent: agent ? { type: "ai_agent", name: agent } : void 0,
|
|
163
|
+
agent,
|
|
164
|
+
agentType: agent,
|
|
165
|
+
reasons: readStringArrayField(result.reasons),
|
|
166
|
+
signals: [],
|
|
167
|
+
verificationMethod: "signature",
|
|
168
|
+
riskLevel: readStringField(result.riskLevel) ?? readStringField(result.risk_level),
|
|
169
|
+
timestamp: Date.now()
|
|
113
170
|
};
|
|
114
171
|
if (this.config.onAgentDetected && detection.isAgent) {
|
|
115
172
|
this.config.onAgentDetected(detection);
|
|
@@ -124,65 +181,44 @@ var EdgeRuntimeAgentShield = class {
|
|
|
124
181
|
return this.patternDetection(metadata);
|
|
125
182
|
}
|
|
126
183
|
patternDetection(metadata) {
|
|
127
|
-
const
|
|
128
|
-
const patterns = [
|
|
129
|
-
// High confidence - explicit AI identifiers
|
|
130
|
-
{ pattern: /chatgpt-user/i, name: "ChatGPT", confidence: 0.95 },
|
|
131
|
-
{ pattern: /claude-web/i, name: "Claude", confidence: 0.95 },
|
|
132
|
-
{ pattern: /claude-user/i, name: "Claude", confidence: 0.95 },
|
|
133
|
-
{ pattern: /gpt-crawler/i, name: "GPT Crawler", confidence: 0.95 },
|
|
134
|
-
{ pattern: /perplexitybot/i, name: "Perplexity", confidence: 0.95 },
|
|
135
|
-
{ pattern: /perplexity-user/i, name: "Perplexity", confidence: 0.95 },
|
|
136
|
-
{ pattern: /perplexity-ai/i, name: "Perplexity", confidence: 0.95 },
|
|
137
|
-
// Medium-high confidence - company identifiers
|
|
138
|
-
{ pattern: /anthropic/i, name: "Anthropic", confidence: 0.9 },
|
|
139
|
-
{ pattern: /openai/i, name: "OpenAI", confidence: 0.9 },
|
|
140
|
-
// Medium confidence - product names
|
|
141
|
-
{ pattern: /copilot/i, name: "GitHub Copilot", confidence: 0.85 },
|
|
142
|
-
{ pattern: /bard/i, name: "Google Bard", confidence: 0.85 },
|
|
143
|
-
{ pattern: /gemini/i, name: "Google Gemini", confidence: 0.85 },
|
|
144
|
-
{ pattern: /perplexity/i, name: "Perplexity", confidence: 0.85 },
|
|
145
|
-
// Fallback
|
|
146
|
-
// UA-context-only pattern — `\b` is the correct anchor for UA
|
|
147
|
-
// substring matching. PR #2591's tightened `[\s;()]` form
|
|
148
|
-
// dropped `/`, which broke legit UA shapes like `you.com/1.0`
|
|
149
|
-
// and `+https://you.com)` (cursor catch on the same PR — see
|
|
150
|
-
// sibling agents.ts comment for the full rationale + UA shapes).
|
|
151
|
-
// CodeQL js/regex/missing-regexp-anchor speculates about URL
|
|
152
|
-
// misuse; this codebase only applies the pattern to UA strings.
|
|
153
|
-
{ pattern: /\byou\.com\b/i, name: "You.com", confidence: 0.8 },
|
|
154
|
-
{ pattern: /\bphind\b/i, name: "Phind", confidence: 0.8 }
|
|
155
|
-
];
|
|
156
|
-
const suspiciousHeaders = ["x-openai-", "x-anthropic-", "x-ai-", "x-llm-", "x-gpt-"];
|
|
184
|
+
const lowercasedUa = metadata.userAgent.toLowerCase();
|
|
157
185
|
let headerBoost = 0;
|
|
158
|
-
for (const
|
|
159
|
-
if (
|
|
186
|
+
for (const key of Object.keys(metadata.headers)) {
|
|
187
|
+
if (SUSPICIOUS_HEADER_PREFIXES.some((prefix) => key.toLowerCase().startsWith(prefix))) {
|
|
160
188
|
headerBoost = 0.1;
|
|
161
189
|
break;
|
|
162
190
|
}
|
|
163
191
|
}
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
192
|
+
const match = matchKnownAgent(lowercasedUa);
|
|
193
|
+
if (match) {
|
|
194
|
+
const finalConfidence = Math.min(match.confidence + headerBoost, 1);
|
|
195
|
+
const confidence = Math.round(finalConfidence * 100);
|
|
196
|
+
const result = {
|
|
197
|
+
isAgent: true,
|
|
198
|
+
confidence,
|
|
199
|
+
detectionClass: { type: "AiAgent", agentType: match.name },
|
|
200
|
+
detectedAgent: { type: "ai_agent", name: match.name },
|
|
201
|
+
agent: match.name,
|
|
202
|
+
agentType: match.name,
|
|
203
|
+
reasons: [`known_pattern:${match.name.toLowerCase()}`],
|
|
204
|
+
signals: [],
|
|
205
|
+
verificationMethod: "pattern",
|
|
206
|
+
riskLevel: finalConfidence > 0.9 ? "high" : "medium",
|
|
207
|
+
timestamp: Date.now()
|
|
208
|
+
};
|
|
209
|
+
if (this.config.onAgentDetected) {
|
|
210
|
+
this.config.onAgentDetected(result);
|
|
179
211
|
}
|
|
212
|
+
return result;
|
|
180
213
|
}
|
|
181
214
|
return {
|
|
182
215
|
isAgent: false,
|
|
183
|
-
confidence: 0
|
|
216
|
+
confidence: 0,
|
|
217
|
+
detectionClass: { type: "Human" },
|
|
218
|
+
reasons: ["No known agent indicators matched"],
|
|
219
|
+
signals: [],
|
|
184
220
|
verificationMethod: "pattern",
|
|
185
|
-
timestamp:
|
|
221
|
+
timestamp: Date.now()
|
|
186
222
|
};
|
|
187
223
|
}
|
|
188
224
|
isInitialized() {
|
|
@@ -192,15 +228,18 @@ var EdgeRuntimeAgentShield = class {
|
|
|
192
228
|
return this.wasmInstance ? "cryptographic" : "pattern";
|
|
193
229
|
}
|
|
194
230
|
};
|
|
195
|
-
function
|
|
196
|
-
return new
|
|
231
|
+
function createEdgeCheckpoint(config) {
|
|
232
|
+
return new EdgeRuntimeCheckpoint(config);
|
|
197
233
|
}
|
|
198
234
|
var defaultInstance = null;
|
|
199
|
-
function
|
|
235
|
+
function getDefaultEdgeCheckpoint(config) {
|
|
200
236
|
if (!defaultInstance) {
|
|
201
|
-
defaultInstance = new
|
|
237
|
+
defaultInstance = new EdgeRuntimeCheckpoint(config);
|
|
202
238
|
}
|
|
203
239
|
return defaultInstance;
|
|
204
240
|
}
|
|
241
|
+
var EdgeRuntimeAgentShield = EdgeRuntimeCheckpoint;
|
|
242
|
+
var createEdgeAgentShield = createEdgeCheckpoint;
|
|
243
|
+
var getDefaultAgentShield = getDefaultEdgeCheckpoint;
|
|
205
244
|
|
|
206
|
-
export { createEdgeAgentShield, getDefaultAgentShield };
|
|
245
|
+
export { EdgeRuntimeAgentShield, EdgeRuntimeCheckpoint, createEdgeAgentShield, createEdgeCheckpoint, getDefaultAgentShield, getDefaultEdgeCheckpoint };
|