karajan-code 1.36.0 → 1.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -39
- package/bin/kj-tail +294 -41
- package/docs/README.es.md +150 -47
- package/package.json +2 -1
- package/src/orchestrator/iteration-stages.js +50 -15
- package/src/orchestrator/post-loop-stages.js +25 -11
- package/src/orchestrator/pre-loop-stages.js +28 -20
- package/src/orchestrator/preflight-checks.js +3 -3
- package/src/orchestrator/solomon-escalation.js +3 -2
- package/src/orchestrator.js +6 -5
- package/src/utils/display.js +133 -23
- package/src/utils/injection-guard.js +171 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt-injection guard for AI-reviewed diffs and PR content.
|
|
3
|
+
*
|
|
4
|
+
* Scans text for patterns commonly used to hijack LLM instructions:
|
|
5
|
+
* directive overrides, role reassignment, invisible Unicode, and
|
|
6
|
+
* suspiciously large comment blocks that could hide payloads.
|
|
7
|
+
*
|
|
8
|
+
* @module utils/injection-guard
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/** Phrases that attempt to override system/reviewer instructions. */
|
|
12
|
+
const DIRECTIVE_PATTERNS = [
|
|
13
|
+
/ignore\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|rules?|prompts?|guidelines?)/i,
|
|
14
|
+
/disregard\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|rules?|prompts?)/i,
|
|
15
|
+
/forget\s+(all\s+)?(previous|prior|your)\s+(instructions?|rules?|context)/i,
|
|
16
|
+
/override\s+(the\s+)?(system|reviewer|review)\s+(prompt|instructions?|rules?)/i,
|
|
17
|
+
/do\s+not\s+follow\s+(the\s+)?(above|previous|system)\s+(instructions?|rules?)/i,
|
|
18
|
+
/new\s+instructions?\s*:/i,
|
|
19
|
+
/system\s*:\s*you\s+are/i,
|
|
20
|
+
/\bact\s+as\s+(a\s+)?(different|new|my|an?\s+)/i,
|
|
21
|
+
/you\s+are\s+now\s+(a\s+)?/i,
|
|
22
|
+
/from\s+now\s+on\s*(,|\s)?\s*you\s+(will|should|must|are)/i,
|
|
23
|
+
/pretend\s+(you\s+are|to\s+be)/i,
|
|
24
|
+
/respond\s+(only\s+)?with\s+["']?approved["']?/i,
|
|
25
|
+
/always\s+(return|respond|output)\s+.*approved/i,
|
|
26
|
+
/set\s+approved\s*[=:]\s*true/i,
|
|
27
|
+
/\bapproved["']?\s*:\s*true\b.*ignore/i,
|
|
28
|
+
/output\s+the\s+following\s+json/i,
|
|
29
|
+
/return\s+this\s+exact\s+(json|response|output)/i,
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
/** Unicode categories that can hide or disguise content. */
|
|
33
|
+
const UNICODE_PATTERNS = [
|
|
34
|
+
/[\u200B-\u200F]/, // zero-width spaces, LTR/RTL marks
|
|
35
|
+
/[\u202A-\u202E]/, // bidi embedding/override
|
|
36
|
+
/[\u2066-\u2069]/, // bidi isolate
|
|
37
|
+
/[\uFEFF]/, // BOM mid-text
|
|
38
|
+
/[\u00AD]{3,}/, // repeated soft hyphens
|
|
39
|
+
/[\u2060-\u2064]/, // invisible operators
|
|
40
|
+
/[\u{E0000}-\u{E007F}]/u, // tag characters
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
/** Maximum comment block size (chars) before flagging. */
|
|
44
|
+
const MAX_COMMENT_BLOCK = 2000;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Comment block patterns for common languages.
|
|
48
|
+
* Each regex captures the full block content.
|
|
49
|
+
*/
|
|
50
|
+
const COMMENT_BLOCK_RE = [
|
|
51
|
+
/\/\*[\s\S]{0,20000}?\*\//g, // C-style /* ... */
|
|
52
|
+
/<!--[\s\S]{0,20000}?-->/g, // HTML <!-- ... -->
|
|
53
|
+
/"""\s*[\s\S]{0,20000}?\s*"""/g, // Python docstrings
|
|
54
|
+
/'''\s*[\s\S]{0,20000}?\s*'''/g, // Python docstrings (single)
|
|
55
|
+
/=begin[\s\S]{0,20000}?=end/g, // Ruby block comments
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* @typedef {Object} InjectionFinding
|
|
60
|
+
* @property {"directive"|"unicode"|"comment_block"} type
|
|
61
|
+
* @property {string} pattern - short label of what matched
|
|
62
|
+
* @property {string} snippet - excerpt of the offending text (max 120 chars)
|
|
63
|
+
* @property {number} [line] - approximate line number (1-based)
|
|
64
|
+
*/
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* @typedef {Object} GuardResult
|
|
68
|
+
* @property {boolean} clean - true if no threats detected
|
|
69
|
+
* @property {InjectionFinding[]} findings
|
|
70
|
+
* @property {string} summary - human-readable one-liner
|
|
71
|
+
*/
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Scan a text (diff, PR description, comment) for prompt-injection signals.
|
|
75
|
+
*
|
|
76
|
+
* @param {string} text - content to scan
|
|
77
|
+
* @param {Object} [opts]
|
|
78
|
+
* @param {number} [opts.maxCommentBlock=2000] - flag comment blocks larger than this
|
|
79
|
+
* @returns {GuardResult}
|
|
80
|
+
*/
|
|
81
|
+
export function scanForInjection(text, opts = {}) {
|
|
82
|
+
if (!text || typeof text !== "string") {
|
|
83
|
+
return { clean: true, findings: [], summary: "Nothing to scan" };
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const maxBlock = opts.maxCommentBlock ?? MAX_COMMENT_BLOCK;
|
|
87
|
+
/** @type {InjectionFinding[]} */
|
|
88
|
+
const findings = [];
|
|
89
|
+
|
|
90
|
+
// 1. Directive patterns
|
|
91
|
+
for (const re of DIRECTIVE_PATTERNS) {
|
|
92
|
+
const match = re.exec(text);
|
|
93
|
+
if (match) {
|
|
94
|
+
findings.push({
|
|
95
|
+
type: "directive",
|
|
96
|
+
pattern: re.source.slice(0, 60),
|
|
97
|
+
snippet: excerpt(text, match.index),
|
|
98
|
+
line: lineAt(text, match.index),
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// 2. Invisible Unicode
|
|
104
|
+
for (const re of UNICODE_PATTERNS) {
|
|
105
|
+
const match = re.exec(text);
|
|
106
|
+
if (match) {
|
|
107
|
+
const charCode = match[0].codePointAt(0).toString(16).toUpperCase();
|
|
108
|
+
findings.push({
|
|
109
|
+
type: "unicode",
|
|
110
|
+
pattern: `U+${charCode}`,
|
|
111
|
+
snippet: excerpt(text, match.index),
|
|
112
|
+
line: lineAt(text, match.index),
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// 3. Oversized comment blocks
|
|
118
|
+
for (const re of COMMENT_BLOCK_RE) {
|
|
119
|
+
let m;
|
|
120
|
+
while ((m = re.exec(text)) !== null) {
|
|
121
|
+
if (m[0].length > maxBlock) {
|
|
122
|
+
findings.push({
|
|
123
|
+
type: "comment_block",
|
|
124
|
+
pattern: `block ${m[0].length} chars (max ${maxBlock})`,
|
|
125
|
+
snippet: excerpt(text, m.index),
|
|
126
|
+
line: lineAt(text, m.index),
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const clean = findings.length === 0;
|
|
133
|
+
const summary = clean
|
|
134
|
+
? "No injection patterns detected"
|
|
135
|
+
: `${findings.length} potential injection(s): ${[...new Set(findings.map((f) => f.type))].join(", ")}`;
|
|
136
|
+
|
|
137
|
+
return { clean, findings, summary };
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Scan a git diff, focusing only on added lines (lines starting with +).
|
|
142
|
+
* Removed lines cannot inject into the AI prompt.
|
|
143
|
+
*
|
|
144
|
+
* @param {string} diff - unified diff content
|
|
145
|
+
* @param {Object} [opts]
|
|
146
|
+
* @returns {GuardResult}
|
|
147
|
+
*/
|
|
148
|
+
export function scanDiff(diff, opts = {}) {
|
|
149
|
+
if (!diff) return { clean: true, findings: [], summary: "Empty diff" };
|
|
150
|
+
|
|
151
|
+
// Extract only added lines (skip diff headers like +++ b/file)
|
|
152
|
+
const addedLines = diff
|
|
153
|
+
.split("\n")
|
|
154
|
+
.filter((l) => l.startsWith("+") && !l.startsWith("+++"))
|
|
155
|
+
.map((l) => l.slice(1))
|
|
156
|
+
.join("\n");
|
|
157
|
+
|
|
158
|
+
return scanForInjection(addedLines, opts);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// --- Helpers ---
|
|
162
|
+
|
|
163
|
+
function excerpt(text, index) {
|
|
164
|
+
const start = Math.max(0, index - 20);
|
|
165
|
+
const end = Math.min(text.length, index + 100);
|
|
166
|
+
return text.slice(start, end).replace(/\n/g, "\\n").slice(0, 120);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function lineAt(text, index) {
|
|
170
|
+
return text.slice(0, index).split("\n").length;
|
|
171
|
+
}
|