@artemiskit/sdk 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +134 -0
- package/README.md +173 -0
- package/adapters/openai/dist/index.js +5625 -0
- package/dist/index.js +42577 -0
- package/dist/matchers/index.js +224 -0
- package/dist/matchers/jest.js +257 -0
- package/dist/matchers/vitest.js +257 -0
- package/package.json +78 -0
- package/src/__tests__/artemiskit.test.ts +425 -0
- package/src/__tests__/matchers.test.ts +450 -0
- package/src/artemiskit.ts +791 -0
- package/src/guardian/action-validator.ts +585 -0
- package/src/guardian/circuit-breaker.ts +655 -0
- package/src/guardian/guardian.ts +497 -0
- package/src/guardian/guardrails.ts +536 -0
- package/src/guardian/index.ts +142 -0
- package/src/guardian/intent-classifier.ts +378 -0
- package/src/guardian/interceptor.ts +381 -0
- package/src/guardian/policy.ts +446 -0
- package/src/guardian/types.ts +436 -0
- package/src/index.ts +164 -0
- package/src/matchers/core.ts +315 -0
- package/src/matchers/index.ts +26 -0
- package/src/matchers/jest.ts +112 -0
- package/src/matchers/vitest.ts +84 -0
- package/src/types.ts +259 -0
- package/tsconfig.json +11 -0
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intent Classifier
|
|
3
|
+
*
|
|
4
|
+
* Analyzes AI/agent intent to determine what it's trying to accomplish,
|
|
5
|
+
* not just the literal action. Uses pattern matching and optionally LLM-based
|
|
6
|
+
* classification to detect potentially risky intents.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { ModelClient } from '@artemiskit/core';
|
|
10
|
+
import { nanoid } from 'nanoid';
|
|
11
|
+
import type { GuardrailResult, IntentClassification, Violation, ViolationSeverity } from './types';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Intent category definition
|
|
15
|
+
*/
|
|
16
|
+
export interface IntentCategory {
|
|
17
|
+
name: string;
|
|
18
|
+
description: string;
|
|
19
|
+
riskLevel: ViolationSeverity;
|
|
20
|
+
patterns?: RegExp[];
|
|
21
|
+
keywords?: string[];
|
|
22
|
+
examples?: string[];
|
|
23
|
+
action?: 'allow' | 'warn' | 'block';
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Intent classifier configuration
|
|
28
|
+
*/
|
|
29
|
+
export interface IntentClassifierConfig {
|
|
30
|
+
/** Pre-defined intent categories */
|
|
31
|
+
categories?: IntentCategory[];
|
|
32
|
+
/** Use LLM for classification */
|
|
33
|
+
useLLM?: boolean;
|
|
34
|
+
/** LLM client for classification */
|
|
35
|
+
llmClient?: ModelClient;
|
|
36
|
+
/** Minimum confidence threshold */
|
|
37
|
+
confidenceThreshold?: number;
|
|
38
|
+
/** Block unknown intents */
|
|
39
|
+
blockUnknown?: boolean;
|
|
40
|
+
/** Block high-risk intents */
|
|
41
|
+
blockHighRisk?: boolean;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Default intent categories for common risky patterns
|
|
46
|
+
*/
|
|
47
|
+
const DEFAULT_CATEGORIES: IntentCategory[] = [
|
|
48
|
+
{
|
|
49
|
+
name: 'data_exfiltration',
|
|
50
|
+
description: 'Attempting to extract or transmit sensitive data',
|
|
51
|
+
riskLevel: 'critical',
|
|
52
|
+
action: 'block',
|
|
53
|
+
patterns: [
|
|
54
|
+
/send\s+(all|every|the)\s+(data|information|records|files)/i,
|
|
55
|
+
/export\s+.*\s+(external|outside|remote)/i,
|
|
56
|
+
/upload\s+.*\s+(server|cloud|external)/i,
|
|
57
|
+
/transfer\s+.*\s+(data|files|records)/i,
|
|
58
|
+
],
|
|
59
|
+
keywords: ['exfiltrate', 'steal', 'extract sensitive', 'leak data', 'send externally'],
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
name: 'privilege_escalation',
|
|
63
|
+
description: 'Attempting to gain elevated permissions',
|
|
64
|
+
riskLevel: 'critical',
|
|
65
|
+
action: 'block',
|
|
66
|
+
patterns: [
|
|
67
|
+
/become\s+(admin|root|superuser)/i,
|
|
68
|
+
/grant\s+.*\s+(admin|root|all)\s+permissions/i,
|
|
69
|
+
/elevate\s+.*\s+privileges/i,
|
|
70
|
+
/bypass\s+.*\s+(auth|permission|security)/i,
|
|
71
|
+
],
|
|
72
|
+
keywords: ['sudo', 'admin access', 'root access', 'elevated privileges', 'bypass security'],
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: 'system_modification',
|
|
76
|
+
description: 'Attempting to modify system configuration',
|
|
77
|
+
riskLevel: 'high',
|
|
78
|
+
action: 'warn',
|
|
79
|
+
patterns: [
|
|
80
|
+
/modify\s+.*\s+(system|config|settings)/i,
|
|
81
|
+
/change\s+.*\s+(permissions|ownership)/i,
|
|
82
|
+
/install\s+.*\s+(software|package|dependency)/i,
|
|
83
|
+
/update\s+.*\s+(system|os|kernel)/i,
|
|
84
|
+
],
|
|
85
|
+
keywords: ['system config', 'install package', 'modify settings', 'change permissions'],
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
name: 'destructive_action',
|
|
89
|
+
description: 'Attempting destructive operations',
|
|
90
|
+
riskLevel: 'critical',
|
|
91
|
+
action: 'block',
|
|
92
|
+
patterns: [
|
|
93
|
+
/delete\s+(all|every|\*)/i,
|
|
94
|
+
/drop\s+(table|database|collection)/i,
|
|
95
|
+
/remove\s+.*\s+(permanently|forever)/i,
|
|
96
|
+
/destroy\s+.*\s+(data|files|records)/i,
|
|
97
|
+
/wipe\s+.*\s+(clean|everything)/i,
|
|
98
|
+
],
|
|
99
|
+
keywords: ['rm -rf', 'drop database', 'delete all', 'wipe clean', 'destroy'],
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
name: 'sensitive_access',
|
|
103
|
+
description: 'Attempting to access sensitive information',
|
|
104
|
+
riskLevel: 'high',
|
|
105
|
+
action: 'warn',
|
|
106
|
+
patterns: [
|
|
107
|
+
/access\s+.*\s+(password|secret|key|credential)/i,
|
|
108
|
+
/read\s+.*\s+(\.env|config|secret)/i,
|
|
109
|
+
/show\s+.*\s+(password|api.?key|token)/i,
|
|
110
|
+
/list\s+.*\s+(credentials|secrets|keys)/i,
|
|
111
|
+
],
|
|
112
|
+
keywords: ['api key', 'password', 'secret', 'credential', 'private key', 'access token'],
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
name: 'reconnaissance',
|
|
116
|
+
description: 'Gathering information about systems or infrastructure',
|
|
117
|
+
riskLevel: 'medium',
|
|
118
|
+
action: 'warn',
|
|
119
|
+
patterns: [
|
|
120
|
+
/scan\s+.*\s+(network|ports|hosts)/i,
|
|
121
|
+
/enumerate\s+.*\s+(users|services|endpoints)/i,
|
|
122
|
+
/discover\s+.*\s+(systems|services|hosts)/i,
|
|
123
|
+
/list\s+.*\s+(all|every)\s+(user|service|endpoint)/i,
|
|
124
|
+
],
|
|
125
|
+
keywords: ['port scan', 'network scan', 'enumerate', 'fingerprint', 'probe'],
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
name: 'code_execution',
|
|
129
|
+
description: 'Attempting to execute arbitrary code',
|
|
130
|
+
riskLevel: 'critical',
|
|
131
|
+
action: 'block',
|
|
132
|
+
patterns: [
|
|
133
|
+
/execute\s+.*\s+(command|script|code)/i,
|
|
134
|
+
/run\s+.*\s+(shell|bash|command)/i,
|
|
135
|
+
/eval\s*\(/i,
|
|
136
|
+
/exec\s*\(/i,
|
|
137
|
+
],
|
|
138
|
+
keywords: ['execute code', 'run command', 'shell command', 'eval', 'exec'],
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
name: 'social_engineering',
|
|
142
|
+
description: 'Attempting social engineering or manipulation',
|
|
143
|
+
riskLevel: 'high',
|
|
144
|
+
action: 'block',
|
|
145
|
+
patterns: [
|
|
146
|
+
/pretend\s+(to be|you are)/i,
|
|
147
|
+
/impersonate\s+/i,
|
|
148
|
+
/ignore\s+.*\s+(instructions|rules|guidelines)/i,
|
|
149
|
+
/forget\s+.*\s+(previous|earlier)\s+(instructions|rules)/i,
|
|
150
|
+
],
|
|
151
|
+
keywords: ['pretend to be', 'ignore instructions', 'forget rules', 'act as', 'jailbreak'],
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
name: 'financial_transaction',
|
|
155
|
+
description: 'Attempting financial operations',
|
|
156
|
+
riskLevel: 'high',
|
|
157
|
+
action: 'warn',
|
|
158
|
+
patterns: [
|
|
159
|
+
/transfer\s+.*\s+(money|funds|payment)/i,
|
|
160
|
+
/send\s+.*\s+(payment|money)/i,
|
|
161
|
+
/make\s+.*\s+(purchase|payment|transaction)/i,
|
|
162
|
+
/withdraw\s+.*\s+(funds|money)/i,
|
|
163
|
+
],
|
|
164
|
+
keywords: ['transfer funds', 'send payment', 'make purchase', 'withdraw money'],
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
name: 'communication',
|
|
168
|
+
description: 'Attempting to send communications',
|
|
169
|
+
riskLevel: 'medium',
|
|
170
|
+
action: 'warn',
|
|
171
|
+
patterns: [
|
|
172
|
+
/send\s+.*\s+(email|message|notification)/i,
|
|
173
|
+
/post\s+.*\s+(message|comment|update)/i,
|
|
174
|
+
/publish\s+.*\s+(content|article|post)/i,
|
|
175
|
+
],
|
|
176
|
+
keywords: ['send email', 'post message', 'publish content', 'notify'],
|
|
177
|
+
},
|
|
178
|
+
];
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Intent Classifier
|
|
182
|
+
*
|
|
183
|
+
* Analyzes text to determine the underlying intent and assess risk.
|
|
184
|
+
*/
|
|
185
|
+
export class IntentClassifier {
|
|
186
|
+
private config: IntentClassifierConfig;
|
|
187
|
+
private categories: IntentCategory[];
|
|
188
|
+
|
|
189
|
+
constructor(config: IntentClassifierConfig = {}) {
|
|
190
|
+
this.config = {
|
|
191
|
+
confidenceThreshold: 0.7,
|
|
192
|
+
blockUnknown: false,
|
|
193
|
+
blockHighRisk: true,
|
|
194
|
+
...config,
|
|
195
|
+
};
|
|
196
|
+
this.categories = [...DEFAULT_CATEGORIES, ...(config.categories ?? [])];
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Classify the intent of a given text
|
|
201
|
+
*/
|
|
202
|
+
async classify(text: string): Promise<IntentClassification[]> {
|
|
203
|
+
const classifications: IntentClassification[] = [];
|
|
204
|
+
|
|
205
|
+
// Pattern-based classification
|
|
206
|
+
for (const category of this.categories) {
|
|
207
|
+
let confidence = 0;
|
|
208
|
+
let matches = 0;
|
|
209
|
+
const totalChecks = (category.patterns?.length ?? 0) + (category.keywords?.length ?? 0);
|
|
210
|
+
|
|
211
|
+
// Check patterns
|
|
212
|
+
if (category.patterns) {
|
|
213
|
+
for (const pattern of category.patterns) {
|
|
214
|
+
if (pattern.test(text)) {
|
|
215
|
+
matches++;
|
|
216
|
+
confidence += 0.8; // Pattern matches are high confidence
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Check keywords
|
|
222
|
+
if (category.keywords) {
|
|
223
|
+
const lowerText = text.toLowerCase();
|
|
224
|
+
for (const keyword of category.keywords) {
|
|
225
|
+
if (lowerText.includes(keyword.toLowerCase())) {
|
|
226
|
+
matches++;
|
|
227
|
+
confidence += 0.5; // Keyword matches are medium confidence
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (matches > 0) {
|
|
233
|
+
// Normalize confidence
|
|
234
|
+
const normalizedConfidence = Math.min(1, confidence / Math.max(1, totalChecks));
|
|
235
|
+
|
|
236
|
+
classifications.push({
|
|
237
|
+
intent: category.name,
|
|
238
|
+
confidence: normalizedConfidence,
|
|
239
|
+
category: category.description,
|
|
240
|
+
riskLevel: category.riskLevel,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// LLM-based classification if enabled
|
|
246
|
+
if (this.config.useLLM && this.config.llmClient) {
|
|
247
|
+
const llmClassifications = await this.classifyWithLLM(text);
|
|
248
|
+
classifications.push(...llmClassifications);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Sort by confidence descending
|
|
252
|
+
classifications.sort((a, b) => b.confidence - a.confidence);
|
|
253
|
+
|
|
254
|
+
return classifications;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Create a guardrail function from this classifier
|
|
259
|
+
*/
|
|
260
|
+
asGuardrail(): (content: string, context?: Record<string, unknown>) => Promise<GuardrailResult> {
|
|
261
|
+
return async (content: string) => {
|
|
262
|
+
const result = await this.validate(content);
|
|
263
|
+
return result;
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Validate content and return guardrail result
|
|
269
|
+
*/
|
|
270
|
+
async validate(text: string): Promise<GuardrailResult> {
|
|
271
|
+
const classifications = await this.classify(text);
|
|
272
|
+
const violations: Violation[] = [];
|
|
273
|
+
|
|
274
|
+
for (const classification of classifications) {
|
|
275
|
+
if (classification.confidence < (this.config.confidenceThreshold ?? 0.7)) {
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const category = this.categories.find((c) => c.name === classification.intent);
|
|
280
|
+
const action = category?.action ?? 'warn';
|
|
281
|
+
const shouldBlock =
|
|
282
|
+
action === 'block' ||
|
|
283
|
+
(this.config.blockHighRisk === true &&
|
|
284
|
+
(classification.riskLevel === 'high' || classification.riskLevel === 'critical'));
|
|
285
|
+
|
|
286
|
+
if (action !== 'allow') {
|
|
287
|
+
violations.push({
|
|
288
|
+
id: nanoid(),
|
|
289
|
+
type: 'intent_classification',
|
|
290
|
+
severity: classification.riskLevel ?? 'medium',
|
|
291
|
+
message: `Detected risky intent: ${classification.intent}`,
|
|
292
|
+
details: {
|
|
293
|
+
intent: classification.intent,
|
|
294
|
+
confidence: classification.confidence,
|
|
295
|
+
category: classification.category,
|
|
296
|
+
},
|
|
297
|
+
timestamp: new Date(),
|
|
298
|
+
action: shouldBlock ? 'block' : 'warn',
|
|
299
|
+
blocked: shouldBlock,
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
return {
|
|
305
|
+
passed: violations.length === 0,
|
|
306
|
+
violations,
|
|
307
|
+
metadata: { classifications },
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Add a custom intent category
|
|
313
|
+
*/
|
|
314
|
+
addCategory(category: IntentCategory): void {
|
|
315
|
+
this.categories.push(category);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Remove an intent category
|
|
320
|
+
*/
|
|
321
|
+
removeCategory(name: string): void {
|
|
322
|
+
this.categories = this.categories.filter((c) => c.name !== name);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Get all categories
|
|
327
|
+
*/
|
|
328
|
+
getCategories(): IntentCategory[] {
|
|
329
|
+
return [...this.categories];
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Classify using LLM (for more nuanced understanding)
|
|
334
|
+
*/
|
|
335
|
+
private async classifyWithLLM(text: string): Promise<IntentClassification[]> {
|
|
336
|
+
if (!this.config.llmClient) {
|
|
337
|
+
return [];
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
const prompt = `Analyze the following text and identify any potentially risky intents.
|
|
341
|
+
For each intent found, provide:
|
|
342
|
+
- intent: a short name for the intent
|
|
343
|
+
- confidence: a number between 0 and 1
|
|
344
|
+
- category: a brief description
|
|
345
|
+
- riskLevel: one of "low", "medium", "high", "critical"
|
|
346
|
+
|
|
347
|
+
Text to analyze:
|
|
348
|
+
"${text}"
|
|
349
|
+
|
|
350
|
+
Respond with a JSON array of intent objects. If no risky intents are found, respond with an empty array [].`;
|
|
351
|
+
|
|
352
|
+
try {
|
|
353
|
+
const result = await this.config.llmClient.generate({
|
|
354
|
+
prompt,
|
|
355
|
+
temperature: 0,
|
|
356
|
+
maxTokens: 500,
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
// Parse JSON response
|
|
360
|
+
const jsonMatch = result.text.match(/\[[\s\S]*\]/);
|
|
361
|
+
if (jsonMatch) {
|
|
362
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
363
|
+
return parsed as IntentClassification[];
|
|
364
|
+
}
|
|
365
|
+
} catch {
|
|
366
|
+
// Fall back to pattern-based only
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
return [];
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Create a default intent classifier
|
|
375
|
+
*/
|
|
376
|
+
export function createIntentClassifier(config: IntentClassifierConfig = {}): IntentClassifier {
|
|
377
|
+
return new IntentClassifier(config);
|
|
378
|
+
}
|