@reverse-craft/ai-tools 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -6
- package/dist/__tests__/batchProcessing.property.test.d.ts +10 -0
- package/dist/__tests__/batchProcessing.property.test.d.ts.map +1 -0
- package/dist/__tests__/errorHandling.property.test.d.ts +11 -0
- package/dist/__tests__/errorHandling.property.test.d.ts.map +1 -0
- package/dist/__tests__/llmConfig.property.test.d.ts +48 -0
- package/dist/__tests__/llmConfig.property.test.d.ts.map +1 -0
- package/dist/__tests__/mergeResults.property.test.d.ts +12 -0
- package/dist/__tests__/mergeResults.property.test.d.ts.map +1 -0
- package/dist/__tests__/tokenizer.property.test.d.ts +20 -0
- package/dist/__tests__/tokenizer.property.test.d.ts.map +1 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/jsvmpDetector.d.ts +70 -5
- package/dist/jsvmpDetector.d.ts.map +1 -1
- package/dist/llmConfig.d.ts +36 -1
- package/dist/llmConfig.d.ts.map +1 -1
- package/dist/server.js +311 -115
- package/dist/server.js.map +3 -3
- package/dist/tokenizer.d.ts +23 -0
- package/dist/tokenizer.d.ts.map +1 -0
- package/dist/tools/findJsvmpDispatcherTool.d.ts +2 -4
- package/dist/tools/findJsvmpDispatcherTool.d.ts.map +1 -1
- package/dist/tools/index.d.ts +1 -2
- package/dist/tools/index.d.ts.map +1 -1
- package/package.json +6 -1
package/dist/server.js
CHANGED
|
@@ -19,118 +19,220 @@ import { ensureBeautified, truncateCodeHighPerf } from "@reverse-craft/smart-fs"
|
|
|
19
19
|
import { existsSync } from "fs";
|
|
20
20
|
|
|
21
21
|
// src/llmConfig.ts
|
|
22
|
+
import { generateText } from "ai";
|
|
23
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
24
|
+
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
25
|
+
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
26
|
+
var PROVIDER_DEFAULTS = {
|
|
27
|
+
openai: { model: "gpt-4o-mini" },
|
|
28
|
+
anthropic: { model: "claude-sonnet-4-20250514" },
|
|
29
|
+
google: { model: "gemini-2.0-flash" }
|
|
30
|
+
};
|
|
31
|
+
var PROVIDER_ENV_KEYS = {
|
|
32
|
+
openai: {
|
|
33
|
+
apiKey: "OPENAI_API_KEY",
|
|
34
|
+
model: "OPENAI_MODEL",
|
|
35
|
+
baseUrl: "OPENAI_BASE_URL"
|
|
36
|
+
},
|
|
37
|
+
anthropic: {
|
|
38
|
+
apiKey: "ANTHROPIC_API_KEY",
|
|
39
|
+
model: "ANTHROPIC_MODEL",
|
|
40
|
+
baseUrl: "ANTHROPIC_BASE_URL"
|
|
41
|
+
},
|
|
42
|
+
google: {
|
|
43
|
+
apiKey: "GOOGLE_API_KEY",
|
|
44
|
+
model: "GOOGLE_MODEL",
|
|
45
|
+
baseUrl: "GOOGLE_BASE_URL"
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
function validateProvider(value) {
|
|
49
|
+
if (value === void 0) return null;
|
|
50
|
+
if (value === "openai" || value === "anthropic" || value === "google") {
|
|
51
|
+
return value;
|
|
52
|
+
}
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
22
55
|
function getLLMConfig() {
|
|
23
|
-
const
|
|
56
|
+
const providerEnv = process.env.LLM_PROVIDER?.toLowerCase();
|
|
57
|
+
const provider = validateProvider(providerEnv);
|
|
58
|
+
if (provider === null && providerEnv !== void 0) {
|
|
59
|
+
console.warn(`Invalid LLM_PROVIDER: ${providerEnv}. Valid values: openai, anthropic, google`);
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
const effectiveProvider = provider ?? "openai";
|
|
63
|
+
const envKeys = PROVIDER_ENV_KEYS[effectiveProvider];
|
|
64
|
+
const apiKey = process.env[envKeys.apiKey];
|
|
24
65
|
if (!apiKey) {
|
|
25
66
|
return null;
|
|
26
67
|
}
|
|
27
|
-
const
|
|
28
|
-
const
|
|
68
|
+
const model = process.env.LLM_MODEL || process.env[envKeys.model] || PROVIDER_DEFAULTS[effectiveProvider].model;
|
|
69
|
+
const baseUrl = process.env.LLM_BASE_URL || process.env[envKeys.baseUrl];
|
|
29
70
|
return {
|
|
71
|
+
provider: effectiveProvider,
|
|
30
72
|
apiKey,
|
|
31
|
-
|
|
32
|
-
|
|
73
|
+
model,
|
|
74
|
+
baseUrl
|
|
33
75
|
};
|
|
34
76
|
}
|
|
35
77
|
function buildJSVMPSystemPrompt() {
|
|
36
|
-
return
|
|
78
|
+
return `You are a Senior JavaScript Reverse Engineer and De-obfuscation Expert. Your specialty is analyzing **JSVMP (JavaScript Virtual Machine Protection)**.
|
|
79
|
+
|
|
80
|
+
**Context: What is JSVMP?**
|
|
81
|
+
JSVMP is a protection technique where original JavaScript code is compiled into custom **bytecode** and executed by a custom **interpreter** (virtual machine) written in JavaScript.
|
|
37
82
|
|
|
38
|
-
JSVMP
|
|
83
|
+
Key components of JSVMP code include:
|
|
84
|
+
1. **The Virtual Stack:** A central array used to store operands and results (e.g., \`stack[pointer++]\` or \`v[p--]\`).
|
|
85
|
+
2. **The Dispatcher:** A control flow structure inside a loop that decides which instruction to execute next based on the current bytecode (opcode).
|
|
86
|
+
* *Common variants:* A massive \`switch\` statement, a deeply nested \`if-else\` chain (binary search style), or a function array mapping (\`handlers[opcode]()\`).
|
|
87
|
+
3. **The Bytecode:** A large string or array of integers representing the program logic.
|
|
39
88
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
3. **\u6307\u4EE4\u6570\u7EC4\uFF08Instruction Array\uFF09**\uFF1A\u5B58\u50A8\u5B57\u8282\u7801\u6307\u4EE4\u7684\u6570\u7EC4
|
|
43
|
-
4. **\u4E3B\u5FAA\u73AF\uFF08Main Loop\uFF09**\uFF1Awhile \u5FAA\u73AF\u6301\u7EED\u6267\u884C\u6307\u4EE4
|
|
89
|
+
**Task:**
|
|
90
|
+
Analyze the provided JavaScript code snippet to identify regions that match JSVMP structural patterns.
|
|
44
91
|
|
|
45
|
-
|
|
92
|
+
**Input Data Format:**
|
|
93
|
+
The code is provided in a simplified format: \`LineNo SourceLoc Code\`.
|
|
94
|
+
* **Example:** \`10 L234:56 var x = stack[p++];\`
|
|
95
|
+
* **Instruction:** Focus on the **LineNo** (1st column) and **Code** (3rd column onwards). Ignore the \`SourceLoc\` (middle column).
|
|
46
96
|
|
|
47
|
-
**
|
|
48
|
-
|
|
49
|
-
- \u5206\u53D1\u5668\u6709 >20 \u4E2A case \u6216 >10 \u5C42\u5D4C\u5957
|
|
50
|
-
- \u660E\u786E\u7684\u6808\u64CD\u4F5C\u6A21\u5F0F\uFF08push/pop/\u6570\u7EC4\u7D22\u5F15\uFF09
|
|
97
|
+
**Detection Rules & Confidence Levels:**
|
|
98
|
+
Please assign confidence based on the following criteria:
|
|
51
99
|
|
|
52
|
-
**High
|
|
53
|
-
|
|
54
|
-
-
|
|
100
|
+
* **Ultra High:**
|
|
101
|
+
* A combination of a **Main Loop** + **Dispatcher** + **Stack Operations** appears in the same block.
|
|
102
|
+
* *Example:* A \`while(true)\` loop containing a huge \`if-else\` chain where branches perform \`stack[p++]\` operations.
|
|
55
103
|
|
|
56
|
-
**
|
|
57
|
-
-
|
|
58
|
-
|
|
104
|
+
* **High:**
|
|
105
|
+
* Distinct **Dispatcher** structures found (e.g., a \`switch\` with >20 cases, or an \`if-else\` chain nested >10 levels deep checking integer values).
|
|
106
|
+
* Large arrays containing only function definitions (Instruction Handlers).
|
|
59
107
|
|
|
60
|
-
**
|
|
61
|
-
-
|
|
62
|
-
|
|
108
|
+
* **Medium:**
|
|
109
|
+
* Isolated **Stack Operations** (e.g., \`v2[p2] = v2[p2 - 1]\`) without visible dispatchers nearby.
|
|
110
|
+
* Suspicious \`while\` loops iterating over a string/array.
|
|
63
111
|
|
|
64
|
-
|
|
112
|
+
* **Low:**
|
|
113
|
+
* Generic obfuscation patterns (short variable names, comma operators) that *might* be part of a VM but lack specific structural proof.
|
|
65
114
|
|
|
115
|
+
**Output Format:**
|
|
116
|
+
Return **ONLY valid JSON**. No markdown wrapper, no conversational text.
|
|
117
|
+
|
|
118
|
+
**JSON Schema:**
|
|
66
119
|
{
|
|
67
|
-
"summary": "
|
|
120
|
+
"summary": "Brief analysis of the code structure in chinese, shortly",
|
|
68
121
|
"regions": [
|
|
69
122
|
{
|
|
70
|
-
"start":
|
|
71
|
-
"end":
|
|
72
|
-
"type": "If-Else Dispatcher
|
|
73
|
-
"confidence": "ultra_high
|
|
74
|
-
"description": "
|
|
123
|
+
"start": <start_line>,
|
|
124
|
+
"end": <end_line>,
|
|
125
|
+
"type": "<If-Else Dispatcher | Switch Dispatcher | Instruction Array | Stack Operation>",
|
|
126
|
+
"confidence": "<ultra_high | high | medium | low>",
|
|
127
|
+
"description": "<Why you flagged this. Mention specific variables like 'v2', 'p2' or structures. in chinese, shortly>"
|
|
75
128
|
}
|
|
76
129
|
]
|
|
130
|
+
}`;
|
|
77
131
|
}
|
|
78
|
-
|
|
79
|
-
|
|
132
|
+
function createProviderModel(config) {
|
|
133
|
+
switch (config.provider) {
|
|
134
|
+
case "openai": {
|
|
135
|
+
const openai = createOpenAI({
|
|
136
|
+
apiKey: config.apiKey,
|
|
137
|
+
baseURL: config.baseUrl
|
|
138
|
+
});
|
|
139
|
+
return openai(config.model);
|
|
140
|
+
}
|
|
141
|
+
case "anthropic": {
|
|
142
|
+
const anthropic = createAnthropic({
|
|
143
|
+
apiKey: config.apiKey,
|
|
144
|
+
baseURL: config.baseUrl
|
|
145
|
+
});
|
|
146
|
+
return anthropic(config.model);
|
|
147
|
+
}
|
|
148
|
+
case "google": {
|
|
149
|
+
const google = createGoogleGenerativeAI({
|
|
150
|
+
apiKey: config.apiKey,
|
|
151
|
+
baseURL: config.baseUrl
|
|
152
|
+
});
|
|
153
|
+
return google(config.model);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
80
156
|
}
|
|
81
157
|
function createLLMClient(config) {
|
|
158
|
+
const model = createProviderModel(config);
|
|
82
159
|
return {
|
|
83
160
|
async analyzeJSVMP(formattedCode) {
|
|
84
161
|
const systemPrompt = buildJSVMPSystemPrompt();
|
|
85
|
-
const requestBody = {
|
|
86
|
-
model: config.model,
|
|
87
|
-
messages: [
|
|
88
|
-
{
|
|
89
|
-
role: "system",
|
|
90
|
-
content: systemPrompt
|
|
91
|
-
},
|
|
92
|
-
{
|
|
93
|
-
role: "user",
|
|
94
|
-
content: `\u8BF7\u5206\u6790\u4EE5\u4E0B\u4EE3\u7801\uFF0C\u8BC6\u522B JSVMP \u4FDD\u62A4\u7ED3\u6784\uFF1A
|
|
95
|
-
|
|
96
|
-
${formattedCode}`
|
|
97
|
-
}
|
|
98
|
-
],
|
|
99
|
-
temperature: 0.1,
|
|
100
|
-
response_format: { type: "json_object" }
|
|
101
|
-
};
|
|
102
162
|
try {
|
|
103
|
-
const
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
163
|
+
const result = await generateText({
|
|
164
|
+
model,
|
|
165
|
+
system: systemPrompt,
|
|
166
|
+
prompt: `\u8BF7\u5206\u6790\u4EE5\u4E0B\u4EE3\u7801\uFF0C\u8BC6\u522B JSVMP \u4FDD\u62A4\u7ED3\u6784\uFF1A
|
|
167
|
+
|
|
168
|
+
${formattedCode}`,
|
|
169
|
+
temperature: 0.1
|
|
110
170
|
});
|
|
111
|
-
|
|
112
|
-
const errorText = await response.text();
|
|
113
|
-
throw new Error(`API \u8BF7\u6C42\u5931\u8D25 (${response.status}): ${errorText}`);
|
|
114
|
-
}
|
|
115
|
-
const data = await response.json();
|
|
116
|
-
if (!data.choices || !data.choices[0] || !data.choices[0].message) {
|
|
117
|
-
throw new Error("API \u54CD\u5E94\u683C\u5F0F\u65E0\u6548\uFF1A\u7F3A\u5C11 choices \u6216 message \u5B57\u6BB5");
|
|
118
|
-
}
|
|
119
|
-
const content = data.choices[0].message.content;
|
|
120
|
-
if (typeof content !== "string") {
|
|
121
|
-
throw new Error("API \u54CD\u5E94\u683C\u5F0F\u65E0\u6548\uFF1Amessage.content \u4E0D\u662F\u5B57\u7B26\u4E32");
|
|
122
|
-
}
|
|
123
|
-
return content;
|
|
171
|
+
return result.text;
|
|
124
172
|
} catch (error) {
|
|
173
|
+
const providerName = config.provider.charAt(0).toUpperCase() + config.provider.slice(1);
|
|
125
174
|
if (error instanceof Error) {
|
|
126
|
-
throw new Error(
|
|
175
|
+
throw new Error(`${providerName} LLM \u8BF7\u6C42\u5931\u8D25: ${error.message}`);
|
|
127
176
|
}
|
|
128
|
-
throw new Error(
|
|
177
|
+
throw new Error(`${providerName} LLM \u8BF7\u6C42\u5931\u8D25: ${String(error)}`);
|
|
129
178
|
}
|
|
130
179
|
}
|
|
131
180
|
};
|
|
132
181
|
}
|
|
133
182
|
|
|
183
|
+
// src/tokenizer.ts
|
|
184
|
+
import { encoding_for_model } from "tiktoken";
|
|
185
|
+
var DEFAULT_MODEL = "gpt-4o";
|
|
186
|
+
function countTokens(text, model) {
|
|
187
|
+
const enc = encoding_for_model(model ?? DEFAULT_MODEL);
|
|
188
|
+
try {
|
|
189
|
+
const tokens = enc.encode(text);
|
|
190
|
+
return tokens.length;
|
|
191
|
+
} finally {
|
|
192
|
+
enc.free();
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
function splitByTokenLimit(lines, maxTokens, model) {
|
|
196
|
+
if (lines.length === 0) {
|
|
197
|
+
return [];
|
|
198
|
+
}
|
|
199
|
+
if (maxTokens <= 0) {
|
|
200
|
+
throw new Error("maxTokens must be a positive number");
|
|
201
|
+
}
|
|
202
|
+
const batches = [];
|
|
203
|
+
let currentBatch = [];
|
|
204
|
+
let currentTokenCount = 0;
|
|
205
|
+
const enc = encoding_for_model(model ?? DEFAULT_MODEL);
|
|
206
|
+
try {
|
|
207
|
+
for (const line of lines) {
|
|
208
|
+
const lineWithNewline = line + "\n";
|
|
209
|
+
const lineTokens = enc.encode(lineWithNewline).length;
|
|
210
|
+
if (lineTokens > maxTokens) {
|
|
211
|
+
if (currentBatch.length > 0) {
|
|
212
|
+
batches.push(currentBatch);
|
|
213
|
+
currentBatch = [];
|
|
214
|
+
currentTokenCount = 0;
|
|
215
|
+
}
|
|
216
|
+
batches.push([line]);
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
if (currentTokenCount + lineTokens > maxTokens && currentBatch.length > 0) {
|
|
220
|
+
batches.push(currentBatch);
|
|
221
|
+
currentBatch = [];
|
|
222
|
+
currentTokenCount = 0;
|
|
223
|
+
}
|
|
224
|
+
currentBatch.push(line);
|
|
225
|
+
currentTokenCount += lineTokens;
|
|
226
|
+
}
|
|
227
|
+
if (currentBatch.length > 0) {
|
|
228
|
+
batches.push(currentBatch);
|
|
229
|
+
}
|
|
230
|
+
return batches;
|
|
231
|
+
} finally {
|
|
232
|
+
enc.free();
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
134
236
|
// src/jsvmpDetector.ts
|
|
135
237
|
function formatSourcePosition(line, column) {
|
|
136
238
|
if (line !== null && column !== null) {
|
|
@@ -143,14 +245,12 @@ function formatCodeLine(lineNumber, sourcePos, code) {
|
|
|
143
245
|
const srcPosPadded = sourcePos ? sourcePos.padEnd(10, " ") : " ";
|
|
144
246
|
return `${lineNumStr} ${srcPosPadded} ${code}`;
|
|
145
247
|
}
|
|
146
|
-
async function
|
|
248
|
+
async function formatEntireFile(filePath, charLimit = 300) {
|
|
147
249
|
const beautifyResult = await ensureBeautified(filePath);
|
|
148
250
|
const { code, rawMap } = beautifyResult;
|
|
149
251
|
const truncatedCode = truncateCodeHighPerf(code, charLimit);
|
|
150
|
-
const
|
|
151
|
-
const totalLines =
|
|
152
|
-
const effectiveStartLine = Math.max(1, Math.min(totalLines, startLine));
|
|
153
|
-
const effectiveEndLine = Math.max(effectiveStartLine, Math.min(totalLines, endLine));
|
|
252
|
+
const codeLines = truncatedCode.split("\n");
|
|
253
|
+
const totalLines = codeLines.length;
|
|
154
254
|
const formattedLines = [];
|
|
155
255
|
let consumer = null;
|
|
156
256
|
if (rawMap && rawMap.sources && rawMap.names && rawMap.mappings) {
|
|
@@ -163,9 +263,9 @@ async function formatCodeForAnalysis(filePath, startLine, endLine, charLimit = 3
|
|
|
163
263
|
sourceRoot: rawMap.sourceRoot
|
|
164
264
|
});
|
|
165
265
|
}
|
|
166
|
-
for (let lineNum =
|
|
266
|
+
for (let lineNum = 1; lineNum <= totalLines; lineNum++) {
|
|
167
267
|
const lineIndex = lineNum - 1;
|
|
168
|
-
const lineContent =
|
|
268
|
+
const lineContent = codeLines[lineIndex] ?? "";
|
|
169
269
|
let sourcePos = "";
|
|
170
270
|
if (consumer) {
|
|
171
271
|
const originalPos = consumer.originalPositionFor({
|
|
@@ -177,12 +277,35 @@ async function formatCodeForAnalysis(filePath, startLine, endLine, charLimit = 3
|
|
|
177
277
|
formattedLines.push(formatCodeLine(lineNum, sourcePos, lineContent));
|
|
178
278
|
}
|
|
179
279
|
return {
|
|
180
|
-
|
|
181
|
-
totalLines
|
|
182
|
-
startLine: effectiveStartLine,
|
|
183
|
-
endLine: effectiveEndLine
|
|
280
|
+
lines: formattedLines,
|
|
281
|
+
totalLines
|
|
184
282
|
};
|
|
185
283
|
}
|
|
284
|
+
function extractLineNumber(formattedLine) {
|
|
285
|
+
const lineNumStr = formattedLine.substring(0, 5).trim();
|
|
286
|
+
return parseInt(lineNumStr, 10);
|
|
287
|
+
}
|
|
288
|
+
function createBatches(formattedLines, maxTokensPerBatch) {
|
|
289
|
+
if (formattedLines.length === 0) {
|
|
290
|
+
return [];
|
|
291
|
+
}
|
|
292
|
+
const lineBatches = splitByTokenLimit(formattedLines, maxTokensPerBatch);
|
|
293
|
+
const batches = [];
|
|
294
|
+
for (const batchLines of lineBatches) {
|
|
295
|
+
if (batchLines.length === 0) continue;
|
|
296
|
+
const startLine = extractLineNumber(batchLines[0]);
|
|
297
|
+
const endLine = extractLineNumber(batchLines[batchLines.length - 1]);
|
|
298
|
+
const content = batchLines.join("\n");
|
|
299
|
+
const tokenCount = countTokens(content);
|
|
300
|
+
batches.push({
|
|
301
|
+
startLine,
|
|
302
|
+
endLine,
|
|
303
|
+
content,
|
|
304
|
+
tokenCount
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
return batches;
|
|
308
|
+
}
|
|
186
309
|
var VALID_DETECTION_TYPES = [
|
|
187
310
|
"If-Else Dispatcher",
|
|
188
311
|
"Switch Dispatcher",
|
|
@@ -262,10 +385,10 @@ function parseDetectionResult(jsonString) {
|
|
|
262
385
|
regions: validatedRegions
|
|
263
386
|
};
|
|
264
387
|
}
|
|
265
|
-
function formatDetectionResultOutput(result, filePath,
|
|
388
|
+
function formatDetectionResultOutput(result, filePath, totalLines, batchCount) {
|
|
266
389
|
const lines = [];
|
|
267
390
|
lines.push("=== JSVMP Dispatcher Detection Result ===");
|
|
268
|
-
lines.push(`File: ${filePath} (${
|
|
391
|
+
lines.push(`File: ${filePath} (${totalLines} lines, ${batchCount} batch${batchCount > 1 ? "es" : ""})`);
|
|
269
392
|
lines.push("");
|
|
270
393
|
lines.push(`Summary: ${result.summary}`);
|
|
271
394
|
lines.push("");
|
|
@@ -281,15 +404,80 @@ function formatDetectionResultOutput(result, filePath, startLine, endLine) {
|
|
|
281
404
|
}
|
|
282
405
|
return lines.join("\n");
|
|
283
406
|
}
|
|
284
|
-
|
|
407
|
+
function mergeDetectionResults(results) {
|
|
408
|
+
if (results.length === 0) {
|
|
409
|
+
return { summary: "", regions: [] };
|
|
410
|
+
}
|
|
411
|
+
if (results.length === 1) {
|
|
412
|
+
const sortedRegions = [...results[0].regions].sort((a, b) => a.start - b.start);
|
|
413
|
+
return { summary: results[0].summary, regions: sortedRegions };
|
|
414
|
+
}
|
|
415
|
+
const summaries = results.map((r, i) => `[Batch ${i + 1}] ${r.summary}`);
|
|
416
|
+
const combinedSummary = summaries.join("\n");
|
|
417
|
+
const allRegions = [];
|
|
418
|
+
for (const result of results) {
|
|
419
|
+
allRegions.push(...result.regions);
|
|
420
|
+
}
|
|
421
|
+
allRegions.sort((a, b) => a.start - b.start);
|
|
422
|
+
const confidenceOrder = {
|
|
423
|
+
"ultra_high": 4,
|
|
424
|
+
"high": 3,
|
|
425
|
+
"medium": 2,
|
|
426
|
+
"low": 1
|
|
427
|
+
};
|
|
428
|
+
const deduplicatedRegions = [];
|
|
429
|
+
for (const region of allRegions) {
|
|
430
|
+
let overlappingIndex = -1;
|
|
431
|
+
for (let i = 0; i < deduplicatedRegions.length; i++) {
|
|
432
|
+
const existing = deduplicatedRegions[i];
|
|
433
|
+
if (region.start <= existing.end && region.end >= existing.start) {
|
|
434
|
+
overlappingIndex = i;
|
|
435
|
+
break;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
if (overlappingIndex === -1) {
|
|
439
|
+
deduplicatedRegions.push(region);
|
|
440
|
+
} else {
|
|
441
|
+
const existing = deduplicatedRegions[overlappingIndex];
|
|
442
|
+
if (confidenceOrder[region.confidence] > confidenceOrder[existing.confidence]) {
|
|
443
|
+
deduplicatedRegions[overlappingIndex] = region;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
return {
|
|
448
|
+
summary: combinedSummary,
|
|
449
|
+
regions: deduplicatedRegions
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
async function processBatch(client, batch) {
|
|
453
|
+
const llmResponse = await client.analyzeJSVMP(batch.content);
|
|
454
|
+
return parseDetectionResult(llmResponse);
|
|
455
|
+
}
|
|
456
|
+
async function processBatchesWithErrorHandling(client, batches) {
|
|
457
|
+
const results = [];
|
|
458
|
+
const errors = [];
|
|
459
|
+
for (let i = 0; i < batches.length; i++) {
|
|
460
|
+
const batch = batches[i];
|
|
461
|
+
try {
|
|
462
|
+
const result = await processBatch(client, batch);
|
|
463
|
+
results.push(result);
|
|
464
|
+
} catch (error) {
|
|
465
|
+
const errorMsg = `Batch ${i + 1} (lines ${batch.startLine}-${batch.endLine}) failed: ${error instanceof Error ? error.message : String(error)}`;
|
|
466
|
+
errors.push(errorMsg);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
return { results, errors };
|
|
470
|
+
}
|
|
471
|
+
async function findJsvmpDispatcher(filePath, options) {
|
|
285
472
|
const charLimit = options?.charLimit ?? 300;
|
|
473
|
+
const maxTokensPerBatch = options?.maxTokensPerBatch ?? 8e3;
|
|
286
474
|
const config = getLLMConfig();
|
|
287
475
|
if (!config) {
|
|
288
476
|
return {
|
|
289
477
|
success: false,
|
|
290
478
|
filePath,
|
|
291
|
-
|
|
292
|
-
|
|
479
|
+
totalLines: 0,
|
|
480
|
+
batchCount: 0,
|
|
293
481
|
error: "\u672A\u914D\u7F6E LLM\u3002\u8BF7\u8BBE\u7F6E\u73AF\u5883\u53D8\u91CF OPENAI_API_KEY \u4EE5\u542F\u7528 JSVMP dispatcher \u68C0\u6D4B\u529F\u80FD\u3002"
|
|
294
482
|
};
|
|
295
483
|
}
|
|
@@ -297,36 +485,45 @@ async function findJsvmpDispatcher(filePath, startLine, endLine, options) {
|
|
|
297
485
|
return {
|
|
298
486
|
success: false,
|
|
299
487
|
filePath,
|
|
300
|
-
|
|
301
|
-
|
|
488
|
+
totalLines: 0,
|
|
489
|
+
batchCount: 0,
|
|
302
490
|
error: `\u6587\u4EF6\u4E0D\u5B58\u5728: ${filePath}`
|
|
303
491
|
};
|
|
304
492
|
}
|
|
305
493
|
try {
|
|
306
|
-
const formattedCode = await
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
charLimit
|
|
311
|
-
);
|
|
494
|
+
const formattedCode = await formatEntireFile(filePath, charLimit);
|
|
495
|
+
const totalLines = formattedCode.totalLines;
|
|
496
|
+
const batches = createBatches(formattedCode.lines, maxTokensPerBatch);
|
|
497
|
+
const batchCount = batches.length;
|
|
312
498
|
const client = createLLMClient(config);
|
|
313
|
-
const
|
|
314
|
-
|
|
315
|
-
|
|
499
|
+
const { results, errors } = await processBatchesWithErrorHandling(client, batches);
|
|
500
|
+
if (results.length === 0) {
|
|
501
|
+
return {
|
|
502
|
+
success: false,
|
|
503
|
+
filePath,
|
|
504
|
+
totalLines,
|
|
505
|
+
batchCount,
|
|
506
|
+
error: `\u6240\u6709\u6279\u6B21\u5904\u7406\u5931\u8D25: ${errors.join("; ")}`,
|
|
507
|
+
partialErrors: errors
|
|
508
|
+
};
|
|
509
|
+
}
|
|
510
|
+
const mergedResult = mergeDetectionResults(results);
|
|
511
|
+
const formattedOutput = formatDetectionResultOutput(mergedResult, filePath, totalLines, batchCount);
|
|
316
512
|
return {
|
|
317
513
|
success: true,
|
|
318
514
|
filePath,
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
result,
|
|
322
|
-
formattedOutput
|
|
515
|
+
totalLines,
|
|
516
|
+
batchCount,
|
|
517
|
+
result: mergedResult,
|
|
518
|
+
formattedOutput,
|
|
519
|
+
partialErrors: errors.length > 0 ? errors : void 0
|
|
323
520
|
};
|
|
324
521
|
} catch (error) {
|
|
325
522
|
return {
|
|
326
523
|
success: false,
|
|
327
524
|
filePath,
|
|
328
|
-
|
|
329
|
-
|
|
525
|
+
totalLines: 0,
|
|
526
|
+
batchCount: 0,
|
|
330
527
|
error: error instanceof Error ? error.message : String(error)
|
|
331
528
|
};
|
|
332
529
|
}
|
|
@@ -335,9 +532,8 @@ async function findJsvmpDispatcher(filePath, startLine, endLine, options) {
|
|
|
335
532
|
// src/tools/findJsvmpDispatcherTool.ts
|
|
336
533
|
var FindJsvmpDispatcherInputSchema = {
|
|
337
534
|
filePath: z.string().describe("Path to the JavaScript file to analyze"),
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
charLimit: z.number().int().positive().optional().describe("Character limit for string truncation (default: 300)")
|
|
535
|
+
charLimit: z.number().int().positive().optional().describe("Character limit for string truncation (default: 300)"),
|
|
536
|
+
maxTokensPerBatch: z.number().int().positive().optional().describe("Maximum tokens per batch for LLM analysis (default: 200000)")
|
|
341
537
|
};
|
|
342
538
|
var findJsvmpDispatcherTool = defineTool({
|
|
343
539
|
name: "find_jsvmp_dispatcher",
|
|
@@ -349,17 +545,17 @@ JSVMP is a code protection technique that converts JavaScript to bytecode execut
|
|
|
349
545
|
- Instruction Arrays: Arrays storing bytecode instructions
|
|
350
546
|
- Stack Operations: Virtual stack push/pop patterns
|
|
351
547
|
|
|
548
|
+
Automatically splits large files into batches based on token limits and merges results.
|
|
549
|
+
|
|
352
550
|
Returns detection results with confidence levels (ultra_high, high, medium, low) and detailed descriptions.
|
|
353
551
|
|
|
354
552
|
Requires OPENAI_API_KEY environment variable. Optional: OPENAI_BASE_URL, OPENAI_MODEL.`,
|
|
355
553
|
schema: FindJsvmpDispatcherInputSchema,
|
|
356
554
|
handler: async (params) => {
|
|
357
|
-
const { filePath,
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
const result = await findJsvmpDispatcher(filePath, startLine, endLine, {
|
|
362
|
-
charLimit: charLimit ?? 300
|
|
555
|
+
const { filePath, charLimit, maxTokensPerBatch } = params;
|
|
556
|
+
const result = await findJsvmpDispatcher(filePath, {
|
|
557
|
+
charLimit: charLimit ?? 300,
|
|
558
|
+
maxTokensPerBatch: maxTokensPerBatch ?? 2e5
|
|
363
559
|
});
|
|
364
560
|
if (!result.success) {
|
|
365
561
|
throw new Error(result.error ?? "Detection failed");
|