@nekzus/liop 1.3.0-alpha.1 → 2.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -17
- package/dist/gateway/router.d.ts +7 -0
- package/dist/gateway/router.js +21 -3
- package/dist/sandbox/wasi.js +24 -0
- package/dist/server/index.d.ts +23 -1
- package/dist/server/index.js +140 -30
- package/dist/server/ner-scanner.d.ts +29 -0
- package/dist/server/ner-scanner.js +141 -0
- package/dist/server/pii.d.ts +27 -1
- package/dist/server/pii.js +167 -5
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -188,9 +188,24 @@ new LiopServer(
|
|
|
188
188
|
serverInfo: { name: string; version: string },
|
|
189
189
|
config?: {
|
|
190
190
|
capabilities?: Record<string, unknown>;
|
|
191
|
+
workerPool?: {
|
|
192
|
+
enabled?: boolean; // Enable OS-thread sandboxing (default: true)
|
|
193
|
+
maxThreads?: number; // Max worker threads (default: CPU count)
|
|
194
|
+
maxHeapMb?: number; // V8 heap limit per worker (default: 64, env: LIOP_WORKER_MAX_HEAP_MB)
|
|
195
|
+
};
|
|
191
196
|
security?: {
|
|
192
197
|
piiPatterns?: PiiRule[]; // Regex/validator rules for PII detection
|
|
193
198
|
forbiddenKeys?: string[]; // Keys stripped from outgoing responses
|
|
199
|
+
enableNerScanning?: boolean; // NLP entity detection via compromise (default: false)
|
|
200
|
+
rateLimit?: { // Sliding window rate limiter per tool
|
|
201
|
+
maxPerWindow?: number; // Max calls per window (default: 30)
|
|
202
|
+
windowMs?: number; // Window duration in ms (default: 60000)
|
|
203
|
+
};
|
|
204
|
+
};
|
|
205
|
+
taxonomy?: { // Data domain classification
|
|
206
|
+
domain?: string; // e.g., "finance", "healthcare"
|
|
207
|
+
clearanceTier?: string; // e.g., "tier-0", "tier-1"
|
|
208
|
+
executionTypes?: string[]; // e.g., ["aggregation", "analytics"]
|
|
194
209
|
};
|
|
195
210
|
}
|
|
196
211
|
)
|
|
@@ -243,22 +258,31 @@ await bridge.connect();
|
|
|
243
258
|
### The Shield — Multi-Layer Defense
|
|
244
259
|
|
|
245
260
|
```
|
|
246
|
-
|
|
247
|
-
│ Layer 1: Guardian AST (Zero-Time Static Analysis)
|
|
248
|
-
│ Blocks: require, import(), fs, eval, fetch,
|
|
249
|
-
│
|
|
250
|
-
|
|
251
|
-
│ Layer 2: WASI Sandbox (V8 Isolate)
|
|
252
|
-
│
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
│
|
|
256
|
-
│
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
│
|
|
260
|
-
│
|
|
261
|
-
|
|
261
|
+
┌───────────────────────────────────────────────────────────┐
|
|
262
|
+
│ Layer 1: Guardian AST (Zero-Time Static Analysis) │
|
|
263
|
+
│ Blocks: require, import(), fs, eval, fetch, process, │
|
|
264
|
+
│ global, __proto__, XMLHttpRequest • 128 import cap │
|
|
265
|
+
├───────────────────────────────────────────────────────────┤
|
|
266
|
+
│ Layer 2: WASI Sandbox (V8 Isolate) │
|
|
267
|
+
│ 25 poisoned globals (incl. Date, TypedArrays) • │
|
|
268
|
+
│ CPU Fuel limits • 5s timeout • maxHeapMb (64MB default) │
|
|
269
|
+
├───────────────────────────────────────────────────────────┤
|
|
270
|
+
│ Layer 3: Prototype Pollution Defense │
|
|
271
|
+
│ Object.freeze() on 6 core prototypes (Object, Array, │
|
|
272
|
+
│ String, Number, Boolean, Function) inside sandbox IIFE │
|
|
273
|
+
├───────────────────────────────────────────────────────────┤
|
|
274
|
+
│ Layer 4: PII Shield (Egress Filter) │
|
|
275
|
+
│ Scans output for Email, SSN, Credit Card, IP, IBAN, │
|
|
276
|
+
│ Passport MRZ • Strips forbidden keys • NER opt-in │
|
|
277
|
+
├───────────────────────────────────────────────────────────┤
|
|
278
|
+
│ Layer 5: Aggregation-First Policy │
|
|
279
|
+
│ Blocks raw row export • maxOutputRows (default: 10) • │
|
|
280
|
+
│ Conditional error: detailed (dev) vs opaque (production) │
|
|
281
|
+
├───────────────────────────────────────────────────────────┤
|
|
282
|
+
│ Layer 6: ZK-Receipt (Integrity Verification) │
|
|
283
|
+
│ SHA-256 ImageID + HMAC-SHA256 Seal (Kyber768-derived) │
|
|
284
|
+
│ LiopMcpBridge verifies before forwarding to LLM │
|
|
285
|
+
└───────────────────────────────────────────────────────────┘
|
|
262
286
|
```
|
|
263
287
|
|
|
264
288
|
### PII Patterns
|
|
@@ -390,7 +414,7 @@ await server.connectToMesh();
|
|
|
390
414
|
|
|
391
415
|
This package is continuously tested across multiple platforms and Node.js versions via CI/CD:
|
|
392
416
|
|
|
393
|
-
- **
|
|
417
|
+
- **227+ tests** spanning unit, integration, conformance, adversarial, and crossnet suites
|
|
394
418
|
- **Multi-OS matrix:** Ubuntu, Windows, macOS
|
|
395
419
|
- **Node.js versions:** 22.x, 24.x
|
|
396
420
|
- **Code quality:** Enforced by [Biome.js](https://biomejs.dev/) (linting + formatting)
|
package/dist/gateway/router.d.ts
CHANGED
|
@@ -62,6 +62,13 @@ export declare class LiopMcpRouter {
|
|
|
62
62
|
* by searching the manifest cache. Supports exact names and suffixed names.
|
|
63
63
|
*/
|
|
64
64
|
private resolveManifestTarget;
|
|
65
|
+
/**
|
|
66
|
+
* Redacts a PeerID for external-facing diagnostics.
|
|
67
|
+
* LIOP_DIAGNOSTIC_LEVEL controls verbosity:
|
|
68
|
+
* - "redacted" (default): truncated to last 8 chars
|
|
69
|
+
* - "full": complete PeerID (development only)
|
|
70
|
+
*/
|
|
71
|
+
private redactPeerId;
|
|
65
72
|
private transcodeMcpToLiop;
|
|
66
73
|
private routeToRemoteProvider;
|
|
67
74
|
private performTranscoding;
|
package/dist/gateway/router.js
CHANGED
|
@@ -72,6 +72,11 @@ export class LiopMcpRouter {
|
|
|
72
72
|
log.info(`[LIOP-Router] Failed to announce manifest: ${err instanceof Error ? err.message : String(err)}`);
|
|
73
73
|
});
|
|
74
74
|
}
|
|
75
|
+
// [OWASP-A01] Startup warning when diagnostic level exposes full topology
|
|
76
|
+
if (process.env.LIOP_DIAGNOSTIC_LEVEL === "full") {
|
|
77
|
+
process.stderr.write("⚠️ [LIOP-Security] Diagnostic level set to FULL — " +
|
|
78
|
+
"PeerIDs and network topology are exposed. Do NOT use in production.\n");
|
|
79
|
+
}
|
|
75
80
|
}
|
|
76
81
|
shouldSkipManifestQuery(peerId) {
|
|
77
82
|
const state = this.manifestFailureState.get(peerId);
|
|
@@ -691,6 +696,18 @@ export class LiopMcpRouter {
|
|
|
691
696
|
}
|
|
692
697
|
return null;
|
|
693
698
|
}
|
|
699
|
+
/**
|
|
700
|
+
* Redacts a PeerID for external-facing diagnostics.
|
|
701
|
+
* LIOP_DIAGNOSTIC_LEVEL controls verbosity:
|
|
702
|
+
* - "redacted" (default): truncated to last 8 chars
|
|
703
|
+
* - "full": complete PeerID (development only)
|
|
704
|
+
*/
|
|
705
|
+
redactPeerId(peerId) {
|
|
706
|
+
const level = process.env.LIOP_DIAGNOSTIC_LEVEL || "redacted";
|
|
707
|
+
if (level === "full")
|
|
708
|
+
return peerId;
|
|
709
|
+
return `***${peerId.slice(-8)}`;
|
|
710
|
+
}
|
|
694
711
|
// biome-ignore lint/suspicious/noExplicitAny: MCP JSON-RPC params/id are polymorphic
|
|
695
712
|
async transcodeMcpToLiop(id, params) {
|
|
696
713
|
const toolName = params.name;
|
|
@@ -724,16 +741,17 @@ export class LiopMcpRouter {
|
|
|
724
741
|
.map((addr) => {
|
|
725
742
|
const parts = addr.split("/");
|
|
726
743
|
const id = parts[parts.length - 1];
|
|
727
|
-
return ` • ${id ? id.slice(-8) : "Unknown"} (
|
|
744
|
+
return ` • ${id ? id.slice(-8) : "Unknown"} (bootstrap)`;
|
|
728
745
|
})
|
|
729
746
|
.join("\n");
|
|
730
747
|
const routingTableSize = this.meshNode
|
|
731
748
|
? // biome-ignore lint/suspicious/noExplicitAny: access internal nodes
|
|
732
749
|
this.meshNode.getRoutingTableSize()
|
|
733
750
|
: 0;
|
|
734
|
-
const
|
|
751
|
+
const rawPeerId = this.meshNode?.getPeerId() || "Offline";
|
|
752
|
+
const localPeerId = rawPeerId === "Offline" ? rawPeerId : this.redactPeerId(rawPeerId);
|
|
735
753
|
const cachedToolList = Array.from(this.manifestCache.entries())
|
|
736
|
-
.flatMap(([peerId, { manifest }]) => manifest.tools.map((t) => ` • ${t.name} (from origin: ${peerId})`))
|
|
754
|
+
.flatMap(([peerId, { manifest }]) => manifest.tools.map((t) => ` • ${t.name} (from origin: ${this.redactPeerId(peerId)})`))
|
|
737
755
|
.join("\n");
|
|
738
756
|
const statusText = [
|
|
739
757
|
`LIOP Mesh Status: ${meshState === "Active" ? "Active" : "Offline"}`,
|
package/dist/sandbox/wasi.js
CHANGED
|
@@ -117,6 +117,23 @@ export class WasiSandbox {
|
|
|
117
117
|
sandboxEnv.eval = undefined;
|
|
118
118
|
sandboxEnv.Function = undefined;
|
|
119
119
|
sandboxEnv.SharedArrayBuffer = undefined;
|
|
120
|
+
sandboxEnv.Date = undefined;
|
|
121
|
+
// [DoS Defense] Block off-heap memory allocation vectors.
|
|
122
|
+
// Logic-on-Origin operates on JSON data (env.records) — binary buffers
|
|
123
|
+
// serve no legitimate purpose and enable memory exhaustion DoS.
|
|
124
|
+
// (Uint8Array(2GB) bypassed Piscina's maxOldGenerationSizeMb limit)
|
|
125
|
+
sandboxEnv.ArrayBuffer = undefined;
|
|
126
|
+
sandboxEnv.Uint8Array = undefined;
|
|
127
|
+
sandboxEnv.Int8Array = undefined;
|
|
128
|
+
sandboxEnv.Uint16Array = undefined;
|
|
129
|
+
sandboxEnv.Int16Array = undefined;
|
|
130
|
+
sandboxEnv.Uint32Array = undefined;
|
|
131
|
+
sandboxEnv.Int32Array = undefined;
|
|
132
|
+
sandboxEnv.Float32Array = undefined;
|
|
133
|
+
sandboxEnv.Float64Array = undefined;
|
|
134
|
+
sandboxEnv.BigInt64Array = undefined;
|
|
135
|
+
sandboxEnv.BigUint64Array = undefined;
|
|
136
|
+
sandboxEnv.DataView = undefined;
|
|
120
137
|
// Inject strictly monitored globals
|
|
121
138
|
sandboxEnv.records = JSON.parse(JSON.stringify(records)); // Deep copy safety
|
|
122
139
|
sandboxEnv.env = JSON.parse(JSON.stringify(env));
|
|
@@ -155,6 +172,13 @@ export class WasiSandbox {
|
|
|
155
172
|
const scriptCode = `
|
|
156
173
|
(function() {
|
|
157
174
|
try {
|
|
175
|
+
Object.freeze(Object.prototype);
|
|
176
|
+
Object.freeze(Array.prototype);
|
|
177
|
+
Object.freeze(String.prototype);
|
|
178
|
+
Object.freeze(Number.prototype);
|
|
179
|
+
Object.freeze(Boolean.prototype);
|
|
180
|
+
Object.freeze(Object.getPrototypeOf(function(){}));
|
|
181
|
+
|
|
158
182
|
${processedLogic}
|
|
159
183
|
if (typeof liop_main === 'function') {
|
|
160
184
|
return liop_main(env);
|
package/dist/server/index.d.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import { MeshNode } from "../mesh/node.js";
|
|
3
3
|
import type { CallToolRequest, CallToolResult, GetPromptRequest, GetPromptResult, Prompt, Resource, ServerInfo, Tool } from "../types.js";
|
|
4
|
+
import { NerScanner } from "./ner-scanner.js";
|
|
4
5
|
import { PII_PATTERNS, PII_PRESETS, type PiiRule, PiiScanner } from "./pii.js";
|
|
5
|
-
export { PII_PATTERNS, PII_PRESETS, type PiiRule, PiiScanner };
|
|
6
|
+
export { NerScanner, PII_PATTERNS, PII_PRESETS, type PiiRule, PiiScanner };
|
|
6
7
|
export type ToolHandler<T extends z.ZodRawShape = z.ZodRawShape> = (args: z.infer<z.ZodObject<T>>, extra: {
|
|
7
8
|
signal?: AbortSignal;
|
|
8
9
|
}) => Promise<CallToolResult>;
|
|
@@ -13,10 +14,21 @@ export interface LiopServerOptions {
|
|
|
13
14
|
minThreads?: number;
|
|
14
15
|
maxThreads?: number;
|
|
15
16
|
idleTimeout?: number;
|
|
17
|
+
/** Max heap memory per worker in MB (default: 64). Prevents heap bomb DoS. */
|
|
18
|
+
maxHeapMb?: number;
|
|
16
19
|
};
|
|
17
20
|
security?: {
|
|
18
21
|
piiPatterns?: PiiRule[];
|
|
19
22
|
forbiddenKeys?: string[];
|
|
23
|
+
/** Enable NLP-based Named Entity Recognition scanning on output values. */
|
|
24
|
+
enableNerScanning?: boolean;
|
|
25
|
+
/** Rate limiting configuration for tool calls (OWASP A01). */
|
|
26
|
+
rateLimit?: {
|
|
27
|
+
/** Maximum calls per window per tool (default: 30). */
|
|
28
|
+
maxPerWindow?: number;
|
|
29
|
+
/** Sliding window duration in milliseconds (default: 60000 = 1 min). */
|
|
30
|
+
windowMs?: number;
|
|
31
|
+
};
|
|
20
32
|
};
|
|
21
33
|
taxonomy?: {
|
|
22
34
|
domain?: string;
|
|
@@ -53,6 +65,9 @@ export declare class LiopServer {
|
|
|
53
65
|
private readonly CACHE_TTL_MS;
|
|
54
66
|
private readonly THROTTLE_THRESHOLD;
|
|
55
67
|
private readonly THROTTLE_COOLDOWN_MS;
|
|
68
|
+
private toolCallWindows;
|
|
69
|
+
private readonly toolCallMaxPerWindow;
|
|
70
|
+
private readonly toolCallWindowMs;
|
|
56
71
|
private tools;
|
|
57
72
|
private resources;
|
|
58
73
|
private prompts;
|
|
@@ -128,6 +143,13 @@ export declare class LiopServer {
|
|
|
128
143
|
* Manually invalidates the AST Logic Cache (e.g. for Zero-Day patches).
|
|
129
144
|
*/
|
|
130
145
|
clearAstCache(): void;
|
|
146
|
+
/**
|
|
147
|
+
* Sliding window rate limiter for tool call frequency.
|
|
148
|
+
* Prevents micro-query exfiltration attacks where an attacker
|
|
149
|
+
* makes hundreds of individually-legitimate calls to reconstruct
|
|
150
|
+
* the full dataset field by field. (OWASP A01)
|
|
151
|
+
*/
|
|
152
|
+
private checkToolCallRateLimit;
|
|
131
153
|
/**
|
|
132
154
|
* Emulates calling a tool (used locally or via LIOPMcpBridge)
|
|
133
155
|
*/
|
package/dist/server/index.js
CHANGED
|
@@ -10,8 +10,9 @@ import { zodToJsonSchema } from "zod-to-json-schema";
|
|
|
10
10
|
import { MeshNode } from "../mesh/node.js";
|
|
11
11
|
import { LiopRpcServer } from "../rpc/server.js";
|
|
12
12
|
import { log } from "../utils/logger.js";
|
|
13
|
+
import { NerScanner } from "./ner-scanner.js";
|
|
13
14
|
import { PII_PATTERNS, PII_PRESETS, PiiScanner } from "./pii.js";
|
|
14
|
-
export { PII_PATTERNS, PII_PRESETS, PiiScanner };
|
|
15
|
+
export { NerScanner, PII_PATTERNS, PII_PRESETS, PiiScanner };
|
|
15
16
|
/**
|
|
16
17
|
* When enabled, `payload` tools that are not LIOP v1 envelopes are passed through to the
|
|
17
18
|
* registered handler unchanged (no worker extraction). Default off for strict protocol tests.
|
|
@@ -29,6 +30,10 @@ export class LiopServer {
|
|
|
29
30
|
CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
|
|
30
31
|
THROTTLE_THRESHOLD = 5;
|
|
31
32
|
THROTTLE_COOLDOWN_MS = 60 * 1000; // 60 seconds
|
|
33
|
+
// [OWASP-A01] Sliding window rate limiter — prevents micro-query exfiltration
|
|
34
|
+
toolCallWindows = new Map();
|
|
35
|
+
toolCallMaxPerWindow;
|
|
36
|
+
toolCallWindowMs;
|
|
32
37
|
tools = new Map();
|
|
33
38
|
resources = new Map();
|
|
34
39
|
prompts = new Map();
|
|
@@ -67,8 +72,10 @@ export class LiopServer {
|
|
|
67
72
|
const compact = logic.replace(/\s+/g, " ");
|
|
68
73
|
if (policy.enforceAggregationFirst) {
|
|
69
74
|
const rowExtractionPatterns = [
|
|
70
|
-
|
|
71
|
-
|
|
75
|
+
// Block raw record dumps but allow safe aggregation chains
|
|
76
|
+
// (.reduce, .length, .filter().length, .every, .some)
|
|
77
|
+
/return\s+env\.records(?!\s*\.\s*(?:reduce|length|filter|every|some|find)\b)/i,
|
|
78
|
+
/return\s*\{[\s\S]*\b(accounts|patients|rows|records)\s*:\s*env\.records(?!\s*\.\s*(?:reduce|length|filter)\b)/i,
|
|
72
79
|
];
|
|
73
80
|
if (rowExtractionPatterns.some((p) => p.test(compact))) {
|
|
74
81
|
return "Preflight policy rejected: potential row-level export pattern detected.";
|
|
@@ -84,15 +91,29 @@ export class LiopServer {
|
|
|
84
91
|
return null;
|
|
85
92
|
const parsed = this.parseUnknownJson(output);
|
|
86
93
|
if (policy.outputSchema) {
|
|
87
|
-
|
|
94
|
+
// SEC-HARDENING: Force strict mode on ZodObject schemas to prevent
|
|
95
|
+
// key aliasing bypasses via .passthrough(). However, respect schemas
|
|
96
|
+
// that explicitly use .catchall() — calling .strict() would override
|
|
97
|
+
// the catchall with ZodNever, destroying the developer's intent.
|
|
98
|
+
const effectiveSchema = (() => {
|
|
99
|
+
if (!(policy.outputSchema instanceof z.ZodObject)) {
|
|
100
|
+
return policy.outputSchema;
|
|
101
|
+
}
|
|
102
|
+
const obj = policy.outputSchema;
|
|
103
|
+
// If schema has an explicit catchall (not ZodNever), respect it
|
|
104
|
+
if (!(obj._def.catchall instanceof z.ZodNever)) {
|
|
105
|
+
return obj;
|
|
106
|
+
}
|
|
107
|
+
// Otherwise force strict to block unrecognized keys by default
|
|
108
|
+
return obj.strict();
|
|
109
|
+
})();
|
|
110
|
+
const schemaResult = effectiveSchema.safeParse(parsed);
|
|
88
111
|
if (!schemaResult.success) {
|
|
89
|
-
//
|
|
90
|
-
|
|
91
|
-
? parsed.slice(0, 200)
|
|
92
|
-
: JSON.stringify(parsed).slice(0, 200);
|
|
112
|
+
// SEC-CRITICAL: Never expose rejected data in error messages.
|
|
113
|
+
// Only report the structural violation (unrecognized keys, type mismatches).
|
|
93
114
|
return `[LIOP] Output schema violation for ${toolName}: ${schemaResult.error.issues
|
|
94
115
|
.map((i) => `${i.path.join(".") || "<root>"} ${i.message}`)
|
|
95
|
-
.join("; ")}.
|
|
116
|
+
.join("; ")}. HINT: Your output must conform to the declared schema. Use 'env.records' to access the dataset and return only allowed fields.`;
|
|
96
117
|
}
|
|
97
118
|
}
|
|
98
119
|
if (policy.enforceAggregationFirst &&
|
|
@@ -143,6 +164,14 @@ export class LiopServer {
|
|
|
143
164
|
return this.unwrapForAggregationPolicyScan(joined);
|
|
144
165
|
}
|
|
145
166
|
violatesAggregationFirstPolicy(input, policyObj) {
|
|
167
|
+
const maxRows = typeof policyObj === "object" &&
|
|
168
|
+
typeof policyObj.maxOutputRows === "number"
|
|
169
|
+
? policyObj.maxOutputRows
|
|
170
|
+
: 10;
|
|
171
|
+
const allowPrimitives = typeof policyObj === "object" &&
|
|
172
|
+
typeof policyObj.allowPrimitiveArrays === "boolean"
|
|
173
|
+
? policyObj.allowPrimitiveArrays
|
|
174
|
+
: true;
|
|
146
175
|
if (typeof input === "string") {
|
|
147
176
|
const trimmed = input.trim();
|
|
148
177
|
if ((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
|
|
@@ -157,14 +186,6 @@ export class LiopServer {
|
|
|
157
186
|
return false;
|
|
158
187
|
}
|
|
159
188
|
if (Array.isArray(input)) {
|
|
160
|
-
const maxRows = typeof policyObj === "object" &&
|
|
161
|
-
typeof policyObj.maxOutputRows === "number"
|
|
162
|
-
? policyObj.maxOutputRows
|
|
163
|
-
: 10;
|
|
164
|
-
const allowPrimitives = typeof policyObj === "object" &&
|
|
165
|
-
typeof policyObj.allowPrimitiveArrays === "boolean"
|
|
166
|
-
? policyObj.allowPrimitiveArrays
|
|
167
|
-
: true;
|
|
168
189
|
if (input.length > 0 &&
|
|
169
190
|
input.every((item) => typeof item === "object" && item !== null)) {
|
|
170
191
|
// Treat tabular row export as non-aggregated leakage risk if above threshold.
|
|
@@ -182,6 +203,11 @@ export class LiopServer {
|
|
|
182
203
|
return input.some((item) => this.violatesAggregationFirstPolicy(item, policyObj));
|
|
183
204
|
}
|
|
184
205
|
if (input && typeof input === "object") {
|
|
206
|
+
const keys = Object.keys(input);
|
|
207
|
+
// Treat flat dictionary with too many keys as non-aggregated leakage risk (Dynamic Key Bypass).
|
|
208
|
+
if (keys.length > maxRows) {
|
|
209
|
+
return true;
|
|
210
|
+
}
|
|
185
211
|
return Object.values(input).some((value) => this.violatesAggregationFirstPolicy(value, policyObj));
|
|
186
212
|
}
|
|
187
213
|
return false;
|
|
@@ -189,6 +215,9 @@ export class LiopServer {
|
|
|
189
215
|
constructor(serverInfo, config) {
|
|
190
216
|
this.serverInfo = serverInfo;
|
|
191
217
|
this.config = config;
|
|
218
|
+
const nerScanner = this.config?.security?.enableNerScanning
|
|
219
|
+
? new NerScanner()
|
|
220
|
+
: null;
|
|
192
221
|
this.piiScanner = new PiiScanner(this.config?.security?.piiPatterns ?? PII_PRESETS.GLOBAL_STRICT, this.config?.security?.forbiddenKeys ?? [
|
|
193
222
|
"id",
|
|
194
223
|
"name",
|
|
@@ -210,7 +239,15 @@ export class LiopServer {
|
|
|
210
239
|
"token",
|
|
211
240
|
"secret",
|
|
212
241
|
"privateKey",
|
|
213
|
-
]);
|
|
242
|
+
], nerScanner);
|
|
243
|
+
// [OWASP-A01] Rate limit: config > env > default (30 calls/min)
|
|
244
|
+
const rlConfig = this.config?.security?.rateLimit;
|
|
245
|
+
this.toolCallWindowMs =
|
|
246
|
+
rlConfig?.windowMs ??
|
|
247
|
+
Number.parseInt(process.env.LIOP_RATE_LIMIT_WINDOW_MS ?? "60000", 10);
|
|
248
|
+
this.toolCallMaxPerWindow =
|
|
249
|
+
rlConfig?.maxPerWindow ??
|
|
250
|
+
Number.parseInt(process.env.LIOP_RATE_LIMIT_MAX ?? "30", 10);
|
|
214
251
|
// Initialize Zero-Blocking Worker Pool for Heavy Cryptography & Sandboxing
|
|
215
252
|
const isTS = import.meta.url.endsWith(".ts");
|
|
216
253
|
const workerExt = isTS ? ".ts" : ".js";
|
|
@@ -239,6 +276,12 @@ export class LiopServer {
|
|
|
239
276
|
maxQueue: "auto",
|
|
240
277
|
taskQueue: new FixedQueue(),
|
|
241
278
|
execArgv,
|
|
279
|
+
// [DoS Defense] Enforce hard memory ceiling per worker thread.
|
|
280
|
+
// Workers exceeding this limit are terminated by Node.js runtime.
|
|
281
|
+
resourceLimits: {
|
|
282
|
+
maxOldGenerationSizeMb: this.config?.workerPool?.maxHeapMb ??
|
|
283
|
+
Number.parseInt(process.env.LIOP_WORKER_MAX_HEAP_MB ?? "64", 10),
|
|
284
|
+
},
|
|
242
285
|
});
|
|
243
286
|
// [Token Economy] Auto-register LIOP protocol spec as a single Resource.
|
|
244
287
|
// This centralizes the envelope documentation that was previously
|
|
@@ -568,6 +611,37 @@ Protocol Adherence is mandatory for successful execution.`,
|
|
|
568
611
|
this.logicCache.clear();
|
|
569
612
|
log.info("[LIOP-SDK] AST Security Cache cleared by Admin.");
|
|
570
613
|
}
|
|
614
|
+
/**
|
|
615
|
+
* Sliding window rate limiter for tool call frequency.
|
|
616
|
+
* Prevents micro-query exfiltration attacks where an attacker
|
|
617
|
+
* makes hundreds of individually-legitimate calls to reconstruct
|
|
618
|
+
* the full dataset field by field. (OWASP A01)
|
|
619
|
+
*/
|
|
620
|
+
checkToolCallRateLimit(toolName) {
|
|
621
|
+
const now = Date.now();
|
|
622
|
+
const windowMs = this.toolCallWindowMs;
|
|
623
|
+
const maxPerWindow = this.toolCallMaxPerWindow;
|
|
624
|
+
const window = this.toolCallWindows.get(toolName) || [];
|
|
625
|
+
// Evict expired timestamps outside the sliding window
|
|
626
|
+
const active = window.filter((t) => now - t < windowMs);
|
|
627
|
+
if (active.length >= maxPerWindow) {
|
|
628
|
+
const retryAfterSec = Math.ceil((active[0] + windowMs - now) / 1000);
|
|
629
|
+
return {
|
|
630
|
+
content: [
|
|
631
|
+
{
|
|
632
|
+
type: "text",
|
|
633
|
+
text: `LIOP_RATE_LIMITED: Too many calls to ${toolName}. ` +
|
|
634
|
+
`Max ${maxPerWindow} per ${windowMs / 1000}s window. ` +
|
|
635
|
+
`Retry after ${retryAfterSec}s.`,
|
|
636
|
+
},
|
|
637
|
+
],
|
|
638
|
+
isError: true,
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
active.push(now);
|
|
642
|
+
this.toolCallWindows.set(toolName, active);
|
|
643
|
+
return null;
|
|
644
|
+
}
|
|
571
645
|
/**
|
|
572
646
|
* Emulates calling a tool (used locally or via LIOPMcpBridge)
|
|
573
647
|
*/
|
|
@@ -576,6 +650,10 @@ Protocol Adherence is mandatory for successful execution.`,
|
|
|
576
650
|
if (!entry) {
|
|
577
651
|
throw new Error(`Tool not found: ${request.name}`);
|
|
578
652
|
}
|
|
653
|
+
// [OWASP-A01] Rate limiting: prevent micro-query exfiltration
|
|
654
|
+
const rateLimitResult = this.checkToolCallRateLimit(request.name);
|
|
655
|
+
if (rateLimitResult)
|
|
656
|
+
return rateLimitResult;
|
|
579
657
|
try {
|
|
580
658
|
// Validate inputs natively with Zod before execution
|
|
581
659
|
const parsedArgs = entry.schema.parse(request.arguments || {});
|
|
@@ -813,10 +891,11 @@ Protocol Adherence is mandatory for successful execution.`,
|
|
|
813
891
|
]);
|
|
814
892
|
const aggregationViolation = this.violatesAggregationFirstPolicy(this.unwrapForAggregationPolicyScan(finalOutput));
|
|
815
893
|
if (violation || aggregationViolation) {
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
log.info(`[LIOP-RPC] Secure egress blocked in gRPC stream: ${
|
|
819
|
-
response.semantic_evidence =
|
|
894
|
+
// SEC-CRITICAL: Log details server-side, never expose to caller
|
|
895
|
+
const internalReason = violation || "Aggregation-First Policy Violation";
|
|
896
|
+
log.info(`[LIOP-RPC] Secure egress blocked in gRPC stream: ${internalReason}`);
|
|
897
|
+
response.semantic_evidence =
|
|
898
|
+
"[LIOP] Egress Security Violation. Output blocked due to policy enforcement.";
|
|
820
899
|
response.is_error = true;
|
|
821
900
|
}
|
|
822
901
|
call.write(response, () => {
|
|
@@ -825,10 +904,16 @@ Protocol Adherence is mandatory for successful execution.`,
|
|
|
825
904
|
}
|
|
826
905
|
catch (error) {
|
|
827
906
|
const e = error;
|
|
828
|
-
|
|
907
|
+
const isDev = process.env.NODE_ENV === "development" ||
|
|
908
|
+
process.env.NODE_ENV === "test";
|
|
909
|
+
const detail = e.message || String(error);
|
|
910
|
+
log.error(`[LIOP-RPC] Execution Error: ${detail}`);
|
|
911
|
+
const errorMessage = isDev
|
|
912
|
+
? `Execution Error: ${detail}`
|
|
913
|
+
: "[LIOP] Execution Failed. The injected logic violated runtime constraints or encountered a fatal error.";
|
|
829
914
|
// Send error response before closing, avoiding "stream closed without results"
|
|
830
915
|
const errorResponse = {
|
|
831
|
-
semantic_evidence:
|
|
916
|
+
semantic_evidence: errorMessage,
|
|
832
917
|
cryptographic_proof: Buffer.from(""),
|
|
833
918
|
zk_receipt: Buffer.from(""),
|
|
834
919
|
is_error: true,
|
|
@@ -881,9 +966,20 @@ Protocol Adherence is mandatory for successful execution.`,
|
|
|
881
966
|
: undefined;
|
|
882
967
|
const policyViolation = this.validateOutputPolicy(toolName || "unknown_tool", workerResponse.output, toolPolicy);
|
|
883
968
|
if (policyViolation) {
|
|
969
|
+
// SEC-CRITICAL: Log details server-side, never expose to caller in Production
|
|
884
970
|
log.info(`[LIOP-SDK] Output policy blocked for ${toolName || "unknown_tool"}: ${policyViolation}`);
|
|
971
|
+
const isDev = process.env.NODE_ENV === "development" ||
|
|
972
|
+
process.env.NODE_ENV === "test";
|
|
973
|
+
const errorMessage = isDev
|
|
974
|
+
? policyViolation
|
|
975
|
+
: "[LIOP] Egress Security Violation. Output blocked due to policy enforcement. HINT: Return only aggregated, non-PII results using .reduce() to produce a flat {key:value} object with allowed schema fields.";
|
|
885
976
|
return {
|
|
886
|
-
content: [
|
|
977
|
+
content: [
|
|
978
|
+
{
|
|
979
|
+
type: "text",
|
|
980
|
+
text: errorMessage,
|
|
981
|
+
},
|
|
982
|
+
],
|
|
887
983
|
isError: true,
|
|
888
984
|
};
|
|
889
985
|
}
|
|
@@ -891,14 +987,21 @@ Protocol Adherence is mandatory for successful execution.`,
|
|
|
891
987
|
const violation = this.piiScanner.scan(content);
|
|
892
988
|
const aggregationViolation = this.violatesAggregationFirstPolicy(workerResponse.output);
|
|
893
989
|
if (violation || aggregationViolation) {
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
990
|
+
// SEC-CRITICAL: Log the specific violation reason server-side only.
|
|
991
|
+
// Never expose detection details (entity names, matched values) to the caller in Production.
|
|
992
|
+
const internalReason = violation ||
|
|
993
|
+
"Aggregation-First Policy Violation: Output blocked due to dynamic flat-key policy enforcement.";
|
|
994
|
+
log.info(`[LIOP-SDK] Secure egress blocked in local execution: ${internalReason}`);
|
|
995
|
+
const isDev = process.env.NODE_ENV === "development" ||
|
|
996
|
+
process.env.NODE_ENV === "test";
|
|
997
|
+
const errorMessage = isDev
|
|
998
|
+
? `[LIOP] Egress Security Violation: ${internalReason}`
|
|
999
|
+
: "[LIOP] Egress Security Violation. Output blocked due to policy enforcement. HINT: Return only aggregated, non-PII results using .reduce() to produce a flat {key:value} object with allowed schema fields.";
|
|
897
1000
|
return {
|
|
898
1001
|
content: [
|
|
899
1002
|
{
|
|
900
1003
|
type: "text",
|
|
901
|
-
text:
|
|
1004
|
+
text: errorMessage,
|
|
902
1005
|
},
|
|
903
1006
|
],
|
|
904
1007
|
isError: true,
|
|
@@ -908,11 +1011,18 @@ Protocol Adherence is mandatory for successful execution.`,
|
|
|
908
1011
|
}
|
|
909
1012
|
catch (error) {
|
|
910
1013
|
const e = error;
|
|
1014
|
+
const isDev = process.env.NODE_ENV === "development" ||
|
|
1015
|
+
process.env.NODE_ENV === "test";
|
|
1016
|
+
const detail = e.message || String(error);
|
|
1017
|
+
log.error(`[LIOP-SDK] WorkerPool Execution Fault: ${detail}`);
|
|
1018
|
+
const errorMessage = isDev
|
|
1019
|
+
? `WorkerPoolError: ${detail}`
|
|
1020
|
+
: "[LIOP] Execution Failed. The injected logic violated runtime constraints or encountered a fatal error.";
|
|
911
1021
|
return {
|
|
912
1022
|
content: [
|
|
913
1023
|
{
|
|
914
1024
|
type: "text",
|
|
915
|
-
text:
|
|
1025
|
+
text: errorMessage,
|
|
916
1026
|
},
|
|
917
1027
|
],
|
|
918
1028
|
isError: true,
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/** Single named entity detected by the NER scanner. */
|
|
2
|
+
export interface NerEntity {
|
|
3
|
+
type: "person" | "place" | "organization";
|
|
4
|
+
text: string;
|
|
5
|
+
}
|
|
6
|
+
/** Result of an NER scan operation. */
|
|
7
|
+
export interface NerScanResult {
|
|
8
|
+
detected: boolean;
|
|
9
|
+
entities: NerEntity[];
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Scans text content for named entities that may represent PII.
|
|
13
|
+
* Uses `compromise/three` for person, place, and organization detection.
|
|
14
|
+
*
|
|
15
|
+
* Designed for egress filtering — optimized for recall over precision
|
|
16
|
+
* to ensure sensitive data does not leak through aliased output keys.
|
|
17
|
+
*/
|
|
18
|
+
export declare class NerScanner {
|
|
19
|
+
/**
|
|
20
|
+
* Scans a single string value for named entities.
|
|
21
|
+
* Returns detected entities if the text contains recognizable PII.
|
|
22
|
+
*/
|
|
23
|
+
scan(text: string): NerScanResult;
|
|
24
|
+
/**
|
|
25
|
+
* Recursively scans all string values within an object/array.
|
|
26
|
+
* Stops at the first detection for performance (fail-fast).
|
|
27
|
+
*/
|
|
28
|
+
scanDeep(input: unknown, seen?: WeakSet<object>): NerScanResult;
|
|
29
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LIOP NER Content Scanner (The Shield V3 — Named Entity Recognition Layer)
|
|
3
|
+
*
|
|
4
|
+
* Lightweight NER scanner using `compromise` NLP for detecting
|
|
5
|
+
* person names, places, and organizations in free-text output values.
|
|
6
|
+
*
|
|
7
|
+
* This layer operates AFTER the regex-based PII scanner and
|
|
8
|
+
* catches entities that lack a deterministic format pattern
|
|
9
|
+
* (e.g., "Evelyn Reed" cannot be detected by regex).
|
|
10
|
+
*
|
|
11
|
+
* Architecture: opt-in per-server via `enableNerScanning: true`.
|
|
12
|
+
* Performance: ~10ms for typical SDK output sizes (< 10KB).
|
|
13
|
+
*
|
|
14
|
+
* @see https://github.com/spencermountain/compromise
|
|
15
|
+
*/
|
|
16
|
+
import nlp from "compromise/three";
|
|
17
|
+
/**
|
|
18
|
+
* Medical/pharmaceutical vocabulary safelist.
|
|
19
|
+
* These terms are tagged as #Medication to prevent the NER
|
|
20
|
+
* from misclassifying them as person/organization names.
|
|
21
|
+
* Extends progressively — add terms as false positives arise.
|
|
22
|
+
*/
|
|
23
|
+
const MEDICAL_VOCABULARY = {
|
|
24
|
+
aspirin: "Medication",
|
|
25
|
+
lisinopril: "Medication",
|
|
26
|
+
metformin: "Medication",
|
|
27
|
+
amlodipine: "Medication",
|
|
28
|
+
atorvastatin: "Medication",
|
|
29
|
+
omeprazole: "Medication",
|
|
30
|
+
losartan: "Medication",
|
|
31
|
+
simvastatin: "Medication",
|
|
32
|
+
levothyroxine: "Medication",
|
|
33
|
+
ibuprofen: "Medication",
|
|
34
|
+
acetaminophen: "Medication",
|
|
35
|
+
amoxicillin: "Medication",
|
|
36
|
+
ciprofloxacin: "Medication",
|
|
37
|
+
prednisone: "Medication",
|
|
38
|
+
warfarin: "Medication",
|
|
39
|
+
insulin: "Medication",
|
|
40
|
+
hydrochlorothiazide: "Medication",
|
|
41
|
+
gabapentin: "Medication",
|
|
42
|
+
albuterol: "Medication",
|
|
43
|
+
pantoprazole: "Medication",
|
|
44
|
+
// Generic clinical terms
|
|
45
|
+
hypertension: "Condition",
|
|
46
|
+
diabetes: "Condition",
|
|
47
|
+
bronchitis: "Condition",
|
|
48
|
+
pneumonia: "Condition",
|
|
49
|
+
asthma: "Condition",
|
|
50
|
+
};
|
|
51
|
+
// Register medical vocabulary BEFORE any scan operations.
|
|
52
|
+
// compromise's addWords() overrides the default classification,
|
|
53
|
+
// preventing these terms from being tagged as #Person or #Organization.
|
|
54
|
+
nlp.addWords(MEDICAL_VOCABULARY);
|
|
55
|
+
// Minimum string length to attempt NER analysis.
|
|
56
|
+
// Shorter strings are unlikely to contain meaningful named entities.
|
|
57
|
+
const MIN_TEXT_LENGTH = 4;
|
|
58
|
+
// Pattern to identify strings that are purely numeric/symbolic (skip NER)
|
|
59
|
+
const NON_TEXT_PATTERN = /^[\d\s.,:;!?()[\]{}<>@#$%^&*+=|\\/"'`~_-]+$/;
|
|
60
|
+
/**
|
|
61
|
+
* Scans text content for named entities that may represent PII.
|
|
62
|
+
* Uses `compromise/three` for person, place, and organization detection.
|
|
63
|
+
*
|
|
64
|
+
* Designed for egress filtering — optimized for recall over precision
|
|
65
|
+
* to ensure sensitive data does not leak through aliased output keys.
|
|
66
|
+
*/
|
|
67
|
+
export class NerScanner {
|
|
68
|
+
/**
|
|
69
|
+
* Scans a single string value for named entities.
|
|
70
|
+
* Returns detected entities if the text contains recognizable PII.
|
|
71
|
+
*/
|
|
72
|
+
scan(text) {
|
|
73
|
+
if (text.length < MIN_TEXT_LENGTH || NON_TEXT_PATTERN.test(text)) {
|
|
74
|
+
return { detected: false, entities: [] };
|
|
75
|
+
}
|
|
76
|
+
const doc = nlp(text);
|
|
77
|
+
const entities = [];
|
|
78
|
+
const people = doc.people().out("array");
|
|
79
|
+
for (const person of people) {
|
|
80
|
+
const trimmed = person.trim();
|
|
81
|
+
if (trimmed.length >= MIN_TEXT_LENGTH) {
|
|
82
|
+
entities.push({ type: "person", text: trimmed });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
const places = doc.places().out("array");
|
|
86
|
+
for (const place of places) {
|
|
87
|
+
const trimmed = place.trim();
|
|
88
|
+
if (trimmed.length >= MIN_TEXT_LENGTH) {
|
|
89
|
+
entities.push({ type: "place", text: trimmed });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
const orgs = doc.organizations().out("array");
|
|
93
|
+
for (const org of orgs) {
|
|
94
|
+
const trimmed = org.trim();
|
|
95
|
+
if (trimmed.length >= MIN_TEXT_LENGTH) {
|
|
96
|
+
entities.push({ type: "organization", text: trimmed });
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return {
|
|
100
|
+
detected: entities.length > 0,
|
|
101
|
+
entities,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Recursively scans all string values within an object/array.
|
|
106
|
+
* Stops at the first detection for performance (fail-fast).
|
|
107
|
+
*/
|
|
108
|
+
scanDeep(input, seen = new WeakSet()) {
|
|
109
|
+
if (input === null || input === undefined) {
|
|
110
|
+
return { detected: false, entities: [] };
|
|
111
|
+
}
|
|
112
|
+
if (typeof input === "string") {
|
|
113
|
+
return this.scan(input);
|
|
114
|
+
}
|
|
115
|
+
if (typeof input === "object") {
|
|
116
|
+
if (seen.has(input)) {
|
|
117
|
+
return { detected: false, entities: [] };
|
|
118
|
+
}
|
|
119
|
+
seen.add(input);
|
|
120
|
+
const values = Array.isArray(input)
|
|
121
|
+
? input
|
|
122
|
+
: Object.values(input);
|
|
123
|
+
const allEntities = [];
|
|
124
|
+
for (const value of values) {
|
|
125
|
+
const result = this.scanDeep(value, seen);
|
|
126
|
+
if (result.detected) {
|
|
127
|
+
allEntities.push(...result.entities);
|
|
128
|
+
// Fail-fast: return immediately on first person detection
|
|
129
|
+
if (result.entities.some((e) => e.type === "person")) {
|
|
130
|
+
return { detected: true, entities: allEntities };
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
detected: allEntities.length > 0,
|
|
136
|
+
entities: allEntities,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
return { detected: false, entities: [] };
|
|
140
|
+
}
|
|
141
|
+
}
|
package/dist/server/pii.d.ts
CHANGED
|
@@ -30,11 +30,37 @@ export declare const PII_PRESETS: {
|
|
|
30
30
|
export declare class PiiScanner {
|
|
31
31
|
private patterns;
|
|
32
32
|
private forbiddenKeysSet;
|
|
33
|
-
|
|
33
|
+
private nerScanner;
|
|
34
|
+
/**
|
|
35
|
+
* Safelist of keys that contain forbidden substrings but are NOT PII.
|
|
36
|
+
* Prevents false positives from fuzzy matching (e.g., "grid" contains "id").
|
|
37
|
+
*/
|
|
38
|
+
private static readonly KEY_SAFELIST;
|
|
39
|
+
/**
|
|
40
|
+
* Short forbidden tokens (< 4 chars) that require boundary-aware matching.
|
|
41
|
+
* Uses regex boundary detection to avoid false positives.
|
|
42
|
+
*/
|
|
43
|
+
private shortTokenBoundaryPatterns;
|
|
44
|
+
/**
|
|
45
|
+
* Long forbidden tokens (>= 4 chars) that use substring containment.
|
|
46
|
+
*/
|
|
47
|
+
private longForbiddenTokens;
|
|
48
|
+
constructor(patterns?: PiiRule[], forbiddenKeys?: string[], nerScanner?: import("./ner-scanner.js").NerScanner | null);
|
|
34
49
|
/**
|
|
35
50
|
* Scans any input (string, object, array) for PII violations.
|
|
36
51
|
* Returns the pattern/rule name that triggered the violation, or null if safe.
|
|
52
|
+
*
|
|
53
|
+
* Detection pipeline (fail-fast):
|
|
54
|
+
* 1. Exact key match (O(1) Set lookup)
|
|
55
|
+
* 2. Fuzzy key match (boundary detection for short tokens, substring for long)
|
|
56
|
+
* 3. Regex/algorithmic pattern match on string values
|
|
57
|
+
* 4. NER content scan on string values (if enabled)
|
|
37
58
|
*/
|
|
38
59
|
scan(input: unknown, seen?: WeakSet<object>): string | null;
|
|
60
|
+
/**
|
|
61
|
+
* Checks a key against fuzzy matching rules.
|
|
62
|
+
* Short tokens use boundary-aware regex; long tokens use substring containment.
|
|
63
|
+
*/
|
|
64
|
+
private checkKeyFuzzy;
|
|
39
65
|
private checkString;
|
|
40
66
|
}
|
package/dist/server/pii.js
CHANGED
|
@@ -159,14 +159,136 @@ export const PII_PRESETS = {
|
|
|
159
159
|
export class PiiScanner {
|
|
160
160
|
patterns;
|
|
161
161
|
forbiddenKeysSet;
|
|
162
|
-
|
|
162
|
+
nerScanner;
|
|
163
|
+
/**
|
|
164
|
+
* Safelist of keys that contain forbidden substrings but are NOT PII.
|
|
165
|
+
* Prevents false positives from fuzzy matching (e.g., "grid" contains "id").
|
|
166
|
+
*/
|
|
167
|
+
static KEY_SAFELIST = new Set([
|
|
168
|
+
// Common words containing "id" substring
|
|
169
|
+
"grid",
|
|
170
|
+
"video",
|
|
171
|
+
"android",
|
|
172
|
+
"identity",
|
|
173
|
+
"provide",
|
|
174
|
+
"override",
|
|
175
|
+
"validate",
|
|
176
|
+
"hidden",
|
|
177
|
+
"widget",
|
|
178
|
+
"guidelines",
|
|
179
|
+
"beside",
|
|
180
|
+
"guideline",
|
|
181
|
+
"outside",
|
|
182
|
+
"inside",
|
|
183
|
+
"collide",
|
|
184
|
+
"decide",
|
|
185
|
+
"divide",
|
|
186
|
+
"aside",
|
|
187
|
+
"ride",
|
|
188
|
+
"side",
|
|
189
|
+
"wide",
|
|
190
|
+
"hide",
|
|
191
|
+
"tide",
|
|
192
|
+
"pride",
|
|
193
|
+
"bride",
|
|
194
|
+
"slide",
|
|
195
|
+
"guide",
|
|
196
|
+
"stride",
|
|
197
|
+
"oxide",
|
|
198
|
+
"dioxide",
|
|
199
|
+
"suicide",
|
|
200
|
+
"homicide",
|
|
201
|
+
"pesticide",
|
|
202
|
+
"valid",
|
|
203
|
+
"invalid",
|
|
204
|
+
"void",
|
|
205
|
+
"avoid",
|
|
206
|
+
// Common words containing "name" substring
|
|
207
|
+
"diagnosis",
|
|
208
|
+
"medication",
|
|
209
|
+
"namespace",
|
|
210
|
+
"namesake",
|
|
211
|
+
"rename",
|
|
212
|
+
"filename",
|
|
213
|
+
"hostname",
|
|
214
|
+
"typename",
|
|
215
|
+
"unnamed",
|
|
216
|
+
"renamed",
|
|
217
|
+
// Common words containing "phone" substring
|
|
218
|
+
"phonetic",
|
|
219
|
+
"phoneme",
|
|
220
|
+
"microphone",
|
|
221
|
+
"headphone",
|
|
222
|
+
"telephone",
|
|
223
|
+
"saxophone",
|
|
224
|
+
"smartphone",
|
|
225
|
+
// Common words containing "address" substring
|
|
226
|
+
"streetview",
|
|
227
|
+
"addressable",
|
|
228
|
+
"addressing",
|
|
229
|
+
// Common words containing "city" substring
|
|
230
|
+
"cityscape",
|
|
231
|
+
"electricity",
|
|
232
|
+
"capacity",
|
|
233
|
+
"velocity",
|
|
234
|
+
"opacity",
|
|
235
|
+
// Common technical terms
|
|
236
|
+
"timestamp",
|
|
237
|
+
"timezone",
|
|
238
|
+
// LIOP Protocol Internal Keys (must never be blocked)
|
|
239
|
+
"image_id",
|
|
240
|
+
"computation_result",
|
|
241
|
+
"zk_receipt",
|
|
242
|
+
"testid",
|
|
243
|
+
"toolid",
|
|
244
|
+
"sessionid",
|
|
245
|
+
"peerid",
|
|
246
|
+
"nodeid",
|
|
247
|
+
"requestid",
|
|
248
|
+
"correlationid",
|
|
249
|
+
"traceid",
|
|
250
|
+
"spanid",
|
|
251
|
+
]);
|
|
252
|
+
/**
|
|
253
|
+
* Short forbidden tokens (< 4 chars) that require boundary-aware matching.
|
|
254
|
+
* Uses regex boundary detection to avoid false positives.
|
|
255
|
+
*/
|
|
256
|
+
shortTokenBoundaryPatterns;
|
|
257
|
+
/**
|
|
258
|
+
* Long forbidden tokens (>= 4 chars) that use substring containment.
|
|
259
|
+
*/
|
|
260
|
+
longForbiddenTokens;
|
|
261
|
+
constructor(patterns = [], forbiddenKeys = [], nerScanner) {
|
|
163
262
|
this.patterns = patterns;
|
|
164
|
-
// Optimizes large recursive evaluations using O(1) continuous key lookup
|
|
165
263
|
this.forbiddenKeysSet = new Set(forbiddenKeys.map((k) => k.toLowerCase()));
|
|
264
|
+
this.nerScanner = nerScanner ?? null;
|
|
265
|
+
// Pre-compute fuzzy matching structures for performance
|
|
266
|
+
this.shortTokenBoundaryPatterns = new Map();
|
|
267
|
+
this.longForbiddenTokens = [];
|
|
268
|
+
for (const token of this.forbiddenKeysSet) {
|
|
269
|
+
if (token.length < 4) {
|
|
270
|
+
// Short tokens: require word boundary (camelCase, snake_case, kebab-case, or exact)
|
|
271
|
+
// "id" matches: "patientId", "record_id", "user-id", "id"
|
|
272
|
+
// "id" does NOT match: "grid", "video", "android"
|
|
273
|
+
this.shortTokenBoundaryPatterns.set(token, new RegExp(`(?:^|[_-])${token}(?:$|[_-])|` + // snake/kebab boundary
|
|
274
|
+
`(?:^|[a-z])${token.charAt(0).toUpperCase()}${token.slice(1)}|` + // camelCase boundary (e.g., patientId)
|
|
275
|
+
`^${token}$`, // exact match
|
|
276
|
+
"i"));
|
|
277
|
+
}
|
|
278
|
+
else {
|
|
279
|
+
this.longForbiddenTokens.push(token);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
166
282
|
}
|
|
167
283
|
/**
|
|
168
284
|
* Scans any input (string, object, array) for PII violations.
|
|
169
285
|
* Returns the pattern/rule name that triggered the violation, or null if safe.
|
|
286
|
+
*
|
|
287
|
+
* Detection pipeline (fail-fast):
|
|
288
|
+
* 1. Exact key match (O(1) Set lookup)
|
|
289
|
+
* 2. Fuzzy key match (boundary detection for short tokens, substring for long)
|
|
290
|
+
* 3. Regex/algorithmic pattern match on string values
|
|
291
|
+
* 4. NER content scan on string values (if enabled)
|
|
170
292
|
*/
|
|
171
293
|
scan(input, seen = new WeakSet()) {
|
|
172
294
|
if (input === null || input === undefined)
|
|
@@ -189,8 +311,21 @@ export class PiiScanner {
|
|
|
189
311
|
// Silent fallback: It looked like JSON but wasn't valid. Proceed with raw string check.
|
|
190
312
|
}
|
|
191
313
|
}
|
|
192
|
-
//
|
|
193
|
-
|
|
314
|
+
// Check string value against regex patterns
|
|
315
|
+
const patternViolation = this.checkString(input);
|
|
316
|
+
if (patternViolation)
|
|
317
|
+
return patternViolation;
|
|
318
|
+
// Layer 3: NER Content Scan — detect person names in free-text values
|
|
319
|
+
if (this.nerScanner) {
|
|
320
|
+
const nerResult = this.nerScanner.scan(input);
|
|
321
|
+
if (nerResult.detected) {
|
|
322
|
+
const personEntity = nerResult.entities.find((e) => e.type === "person");
|
|
323
|
+
if (personEntity) {
|
|
324
|
+
return `PII Entity Detected: person name "${personEntity.text}"`;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
return null;
|
|
194
329
|
}
|
|
195
330
|
// 2. Recursive Objects/Arrays Scan
|
|
196
331
|
if (typeof input === "object") {
|
|
@@ -207,10 +342,14 @@ export class PiiScanner {
|
|
|
207
342
|
}
|
|
208
343
|
else {
|
|
209
344
|
for (const [key, value] of Object.entries(input)) {
|
|
210
|
-
//
|
|
345
|
+
// Layer 1: Exact key match — O(1) constant time
|
|
211
346
|
if (this.forbiddenKeysSet.has(key.toLowerCase())) {
|
|
212
347
|
return `Forbidden Key: ${key}`;
|
|
213
348
|
}
|
|
349
|
+
// Layer 2: Fuzzy key match — catches aliases and variations
|
|
350
|
+
const fuzzyViolation = this.checkKeyFuzzy(key);
|
|
351
|
+
if (fuzzyViolation)
|
|
352
|
+
return fuzzyViolation;
|
|
214
353
|
// Recurse into values
|
|
215
354
|
const violation = this.scan(value, seen);
|
|
216
355
|
if (violation)
|
|
@@ -220,6 +359,29 @@ export class PiiScanner {
|
|
|
220
359
|
}
|
|
221
360
|
return null;
|
|
222
361
|
}
|
|
362
|
+
/**
|
|
363
|
+
* Checks a key against fuzzy matching rules.
|
|
364
|
+
* Short tokens use boundary-aware regex; long tokens use substring containment.
|
|
365
|
+
*/
|
|
366
|
+
checkKeyFuzzy(key) {
|
|
367
|
+
const normalized = key.toLowerCase();
|
|
368
|
+
// Skip safelisted keys entirely
|
|
369
|
+
if (PiiScanner.KEY_SAFELIST.has(normalized))
|
|
370
|
+
return null;
|
|
371
|
+
// Short token boundary matching (e.g., "id" in "patientId" but not "grid")
|
|
372
|
+
for (const [token, pattern] of this.shortTokenBoundaryPatterns) {
|
|
373
|
+
if (pattern.test(key)) {
|
|
374
|
+
return `Forbidden Key (fuzzy): ${key} matches boundary pattern "${token}"`;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
// Long token substring matching (e.g., "name" in "firstName", "names")
|
|
378
|
+
for (const token of this.longForbiddenTokens) {
|
|
379
|
+
if (normalized.includes(token)) {
|
|
380
|
+
return `Forbidden Key (fuzzy): ${key} contains restricted token "${token}"`;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
return null;
|
|
384
|
+
}
|
|
223
385
|
checkString(text) {
|
|
224
386
|
for (const rule of this.patterns) {
|
|
225
387
|
if (typeof rule === "string") {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nekzus/liop",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0-alpha.1",
|
|
4
4
|
"description": "Official SDK for Logic-Injection-on-Origin Protocol (LIOP). Deploy Logic-on-Origin with WebAssembly at gRPC speed and bidirectional MCP compatibility.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -131,6 +131,7 @@
|
|
|
131
131
|
"@modelcontextprotocol/sdk": "^1.28.0",
|
|
132
132
|
"@multiformats/multiaddr": "^13.0.1",
|
|
133
133
|
"@opentelemetry/api": "^1.9.1",
|
|
134
|
+
"compromise": "14.15.0",
|
|
134
135
|
"gpt-tokenizer": "^3.4.0",
|
|
135
136
|
"hono": "^4.12.5",
|
|
136
137
|
"it-pipe": "^3.0.1",
|