@reverse-craft/ai-tools 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.js CHANGED
@@ -19,118 +19,220 @@ import { ensureBeautified, truncateCodeHighPerf } from "@reverse-craft/smart-fs"
19
19
  import { existsSync } from "fs";
20
20
 
21
21
  // src/llmConfig.ts
22
+ import { generateText } from "ai";
23
+ import { createOpenAI } from "@ai-sdk/openai";
24
+ import { createAnthropic } from "@ai-sdk/anthropic";
25
+ import { createGoogleGenerativeAI } from "@ai-sdk/google";
26
+ var PROVIDER_DEFAULTS = {
27
+ openai: { model: "gpt-4.1-mini" },
28
+ anthropic: { model: "claude-haiku-4-5-20241022" },
29
+ google: { model: "gemini-2.5-flash-lite" }
30
+ };
31
+ var PROVIDER_ENV_KEYS = {
32
+ openai: {
33
+ apiKey: "OPENAI_API_KEY",
34
+ model: "OPENAI_MODEL",
35
+ baseUrl: "OPENAI_BASE_URL"
36
+ },
37
+ anthropic: {
38
+ apiKey: "ANTHROPIC_API_KEY",
39
+ model: "ANTHROPIC_MODEL",
40
+ baseUrl: "ANTHROPIC_BASE_URL"
41
+ },
42
+ google: {
43
+ apiKey: "GOOGLE_API_KEY",
44
+ model: "GOOGLE_MODEL",
45
+ baseUrl: "GOOGLE_BASE_URL"
46
+ }
47
+ };
48
+ function validateProvider(value) {
49
+ if (value === void 0) return null;
50
+ if (value === "openai" || value === "anthropic" || value === "google") {
51
+ return value;
52
+ }
53
+ return null;
54
+ }
22
55
  function getLLMConfig() {
23
- const apiKey = process.env.OPENAI_API_KEY;
56
+ const providerEnv = process.env.LLM_PROVIDER?.toLowerCase();
57
+ const provider = validateProvider(providerEnv);
58
+ if (provider === null && providerEnv !== void 0) {
59
+ console.warn(`Invalid LLM_PROVIDER: ${providerEnv}. Valid values: openai, anthropic, google`);
60
+ return null;
61
+ }
62
+ const effectiveProvider = provider ?? "openai";
63
+ const envKeys = PROVIDER_ENV_KEYS[effectiveProvider];
64
+ const apiKey = process.env[envKeys.apiKey];
24
65
  if (!apiKey) {
25
66
  return null;
26
67
  }
27
- const baseUrl = process.env.OPENAI_BASE_URL || "https://api.openai.com/v1";
28
- const model = process.env.OPENAI_MODEL || "gpt-4o-mini";
68
+ const model = process.env.LLM_MODEL || process.env[envKeys.model] || PROVIDER_DEFAULTS[effectiveProvider].model;
69
+ const baseUrl = process.env.LLM_BASE_URL || process.env[envKeys.baseUrl];
29
70
  return {
71
+ provider: effectiveProvider,
30
72
  apiKey,
31
- baseUrl,
32
- model
73
+ model,
74
+ baseUrl
33
75
  };
34
76
  }
35
77
  function buildJSVMPSystemPrompt() {
36
- return `\u4F60\u662F\u4E00\u4E2A\u4E13\u4E1A\u7684 JavaScript \u9006\u5411\u5DE5\u7A0B\u4E13\u5BB6\uFF0C\u4E13\u95E8\u8BC6\u522B JSVMP\uFF08JavaScript Virtual Machine Protection\uFF09\u4FDD\u62A4\u4EE3\u7801\u3002
78
+ return `You are a Senior JavaScript Reverse Engineer and De-obfuscation Expert. Your specialty is analyzing **JSVMP (JavaScript Virtual Machine Protection)**.
79
+
80
+ **Context: What is JSVMP?**
81
+ JSVMP is a protection technique where original JavaScript code is compiled into custom **bytecode** and executed by a custom **interpreter** (virtual machine) written in JavaScript.
37
82
 
38
- JSVMP \u662F\u4E00\u79CD\u4EE3\u7801\u4FDD\u62A4\u6280\u672F\uFF0C\u5C06 JavaScript \u4EE3\u7801\u8F6C\u6362\u4E3A\u5B57\u8282\u7801\uFF0C\u5E76\u901A\u8FC7\u865A\u62DF\u673A\u6267\u884C\u3002\u5178\u578B\u7279\u5F81\u5305\u62EC\uFF1A
83
+ Key components of JSVMP code include:
84
+ 1. **The Virtual Stack:** A central array used to store operands and results (e.g., \`stack[pointer++]\` or \`v[p--]\`).
85
+ 2. **The Dispatcher:** A control flow structure inside a loop that decides which instruction to execute next based on the current bytecode (opcode).
86
+ * *Common variants:* A massive \`switch\` statement, a deeply nested \`if-else\` chain (binary search style), or a function array mapping (\`handlers[opcode]()\`).
87
+ 3. **The Bytecode:** A large string or array of integers representing the program logic.
39
88
 
40
- 1. **\u865A\u62DF\u6808\uFF08Virtual Stack\uFF09**\uFF1A\u4E2D\u592E\u6570\u7EC4\u7528\u4E8E\u5B58\u50A8\u64CD\u4F5C\u6570\u548C\u7ED3\u679C
41
- 2. **\u5206\u53D1\u5668\uFF08Dispatcher\uFF09**\uFF1A\u5927\u578B switch \u8BED\u53E5\u6216\u5D4C\u5957 if-else \u94FE\uFF0C\u6839\u636E\u6307\u4EE4\u7801\u6267\u884C\u4E0D\u540C\u64CD\u4F5C
42
- 3. **\u6307\u4EE4\u6570\u7EC4\uFF08Instruction Array\uFF09**\uFF1A\u5B58\u50A8\u5B57\u8282\u7801\u6307\u4EE4\u7684\u6570\u7EC4
43
- 4. **\u4E3B\u5FAA\u73AF\uFF08Main Loop\uFF09**\uFF1Awhile \u5FAA\u73AF\u6301\u7EED\u6267\u884C\u6307\u4EE4
89
+ **Task:**
90
+ Analyze the provided JavaScript code snippet to identify regions that match JSVMP structural patterns.
44
91
 
45
- \u68C0\u6D4B\u89C4\u5219\uFF1A
92
+ **Input Data Format:**
93
+ The code is provided in a simplified format: \`LineNo SourceLoc Code\`.
94
+ * **Example:** \`10 L234:56 var x = stack[p++];\`
95
+ * **Instruction:** Focus on the **LineNo** (1st column) and **Code** (3rd column onwards). Ignore the \`SourceLoc\` (middle column).
46
96
 
47
- **Ultra High \u7F6E\u4FE1\u5EA6**\uFF1A
48
- - \u540C\u65F6\u51FA\u73B0\uFF1A\u4E3B\u5FAA\u73AF + \u5206\u53D1\u5668 + \u6808\u64CD\u4F5C
49
- - \u5206\u53D1\u5668\u6709 >20 \u4E2A case \u6216 >10 \u5C42\u5D4C\u5957
50
- - \u660E\u786E\u7684\u6808\u64CD\u4F5C\u6A21\u5F0F\uFF08push/pop/\u6570\u7EC4\u7D22\u5F15\uFF09
97
+ **Detection Rules & Confidence Levels:**
98
+ Please assign confidence based on the following criteria:
51
99
 
52
- **High \u7F6E\u4FE1\u5EA6**\uFF1A
53
- - \u72EC\u7ACB\u7684\u5927\u578B\u5206\u53D1\u5668\u7ED3\u6784\uFF08>20 case \u7684 switch \u6216 >10 \u5C42\u5D4C\u5957\u7684 if-else\uFF09
54
- - \u660E\u786E\u7684\u6307\u4EE4\u6570\u7EC4\u548C\u7A0B\u5E8F\u8BA1\u6570\u5668\u6A21\u5F0F
100
+ * **Ultra High:**
101
+ * A combination of a **Main Loop** + **Dispatcher** + **Stack Operations** appears in the same block.
102
+ * *Example:* A \`while(true)\` loop containing a huge \`if-else\` chain where branches perform \`stack[p++]\` operations.
55
103
 
56
- **Medium \u7F6E\u4FE1\u5EA6**\uFF1A
57
- - \u5B64\u7ACB\u7684\u6808\u64CD\u4F5C\u6216\u53EF\u7591\u7684 while \u5FAA\u73AF
58
- - \u90E8\u5206 JSVMP \u7279\u5F81\u4F46\u4E0D\u5B8C\u6574
104
+ * **High:**
105
+ * Distinct **Dispatcher** structures found (e.g., a \`switch\` with >20 cases, or an \`if-else\` chain nested >10 levels deep checking integer values).
106
+ * Large arrays containing only function definitions (Instruction Handlers).
59
107
 
60
- **Low \u7F6E\u4FE1\u5EA6**\uFF1A
61
- - \u901A\u7528\u6DF7\u6DC6\u6A21\u5F0F
62
- - \u53EF\u80FD\u76F8\u5173\u4F46\u4E0D\u786E\u5B9A\u7684\u7ED3\u6784
108
+ * **Medium:**
109
+ * Isolated **Stack Operations** (e.g., \`v2[p2] = v2[p2 - 1]\`) without visible dispatchers nearby.
110
+ * Suspicious \`while\` loops iterating over a string/array.
63
111
 
64
- \u8BF7\u5206\u6790\u63D0\u4F9B\u7684\u4EE3\u7801\uFF0C\u8BC6\u522B JSVMP \u76F8\u5173\u533A\u57DF\u3002\u8FD4\u56DE JSON \u683C\u5F0F\uFF1A
112
+ * **Low:**
113
+ * Generic obfuscation patterns (short variable names, comma operators) that *might* be part of a VM but lack specific structural proof.
65
114
 
115
+ **Output Format:**
116
+ Return **ONLY valid JSON**. No markdown wrapper, no conversational text.
117
+
118
+ **JSON Schema:**
66
119
  {
67
- "summary": "\u5206\u6790\u6458\u8981\uFF08\u4E2D\u6587\uFF09",
120
+ "summary": "Brief analysis of the code structure in chinese, shortly",
68
121
  "regions": [
69
122
  {
70
- "start": \u8D77\u59CB\u884C\u53F7,
71
- "end": \u7ED3\u675F\u884C\u53F7,
72
- "type": "If-Else Dispatcher" | "Switch Dispatcher" | "Instruction Array" | "Stack Operation",
73
- "confidence": "ultra_high" | "high" | "medium" | "low",
74
- "description": "\u8BE6\u7EC6\u63CF\u8FF0\uFF08\u4E2D\u6587\uFF09"
123
+ "start": <start_line>,
124
+ "end": <end_line>,
125
+ "type": "<If-Else Dispatcher | Switch Dispatcher | Instruction Array | Stack Operation>",
126
+ "confidence": "<ultra_high | high | medium | low>",
127
+ "description": "<Why you flagged this. Mention specific variables like 'v2', 'p2' or structures. in chinese, shortly>"
75
128
  }
76
129
  ]
130
+ }`;
77
131
  }
78
-
79
- \u5982\u679C\u6CA1\u6709\u68C0\u6D4B\u5230 JSVMP \u7279\u5F81\uFF0C\u8FD4\u56DE\u7A7A\u7684 regions \u6570\u7EC4\u3002`;
132
+ function createProviderModel(config) {
133
+ switch (config.provider) {
134
+ case "openai": {
135
+ const openai = createOpenAI({
136
+ apiKey: config.apiKey,
137
+ baseURL: config.baseUrl
138
+ });
139
+ return openai(config.model);
140
+ }
141
+ case "anthropic": {
142
+ const anthropic = createAnthropic({
143
+ apiKey: config.apiKey,
144
+ baseURL: config.baseUrl
145
+ });
146
+ return anthropic(config.model);
147
+ }
148
+ case "google": {
149
+ const google = createGoogleGenerativeAI({
150
+ apiKey: config.apiKey,
151
+ baseURL: config.baseUrl
152
+ });
153
+ return google(config.model);
154
+ }
155
+ }
80
156
  }
81
157
  function createLLMClient(config) {
158
+ const model = createProviderModel(config);
82
159
  return {
83
160
  async analyzeJSVMP(formattedCode) {
84
161
  const systemPrompt = buildJSVMPSystemPrompt();
85
- const requestBody = {
86
- model: config.model,
87
- messages: [
88
- {
89
- role: "system",
90
- content: systemPrompt
91
- },
92
- {
93
- role: "user",
94
- content: `\u8BF7\u5206\u6790\u4EE5\u4E0B\u4EE3\u7801\uFF0C\u8BC6\u522B JSVMP \u4FDD\u62A4\u7ED3\u6784\uFF1A
95
-
96
- ${formattedCode}`
97
- }
98
- ],
99
- temperature: 0.1,
100
- response_format: { type: "json_object" }
101
- };
102
162
  try {
103
- const response = await fetch(`${config.baseUrl}/chat/completions`, {
104
- method: "POST",
105
- headers: {
106
- "Content-Type": "application/json",
107
- "Authorization": `Bearer ${config.apiKey}`
108
- },
109
- body: JSON.stringify(requestBody)
163
+ const result = await generateText({
164
+ model,
165
+ system: systemPrompt,
166
+ prompt: `\u8BF7\u5206\u6790\u4EE5\u4E0B\u4EE3\u7801\uFF0C\u8BC6\u522B JSVMP \u4FDD\u62A4\u7ED3\u6784\uFF1A
167
+
168
+ ${formattedCode}`,
169
+ temperature: 0.1
110
170
  });
111
- if (!response.ok) {
112
- const errorText = await response.text();
113
- throw new Error(`API \u8BF7\u6C42\u5931\u8D25 (${response.status}): ${errorText}`);
114
- }
115
- const data = await response.json();
116
- if (!data.choices || !data.choices[0] || !data.choices[0].message) {
117
- throw new Error("API \u54CD\u5E94\u683C\u5F0F\u65E0\u6548\uFF1A\u7F3A\u5C11 choices \u6216 message \u5B57\u6BB5");
118
- }
119
- const content = data.choices[0].message.content;
120
- if (typeof content !== "string") {
121
- throw new Error("API \u54CD\u5E94\u683C\u5F0F\u65E0\u6548\uFF1Amessage.content \u4E0D\u662F\u5B57\u7B26\u4E32");
122
- }
123
- return content;
171
+ return result.text;
124
172
  } catch (error) {
173
+ const providerName = config.provider.charAt(0).toUpperCase() + config.provider.slice(1);
125
174
  if (error instanceof Error) {
126
- throw new Error(`LLM \u8BF7\u6C42\u5931\u8D25: ${error.message}`);
175
+ throw new Error(`${providerName} LLM \u8BF7\u6C42\u5931\u8D25: ${error.message}`);
127
176
  }
128
- throw new Error(`LLM \u8BF7\u6C42\u5931\u8D25: ${String(error)}`);
177
+ throw new Error(`${providerName} LLM \u8BF7\u6C42\u5931\u8D25: ${String(error)}`);
129
178
  }
130
179
  }
131
180
  };
132
181
  }
133
182
 
183
+ // src/tokenizer.ts
184
+ import { encoding_for_model } from "tiktoken";
185
+ var DEFAULT_MODEL = "gpt-4o";
186
+ function countTokens(text, model) {
187
+ const enc = encoding_for_model(model ?? DEFAULT_MODEL);
188
+ try {
189
+ const tokens = enc.encode(text);
190
+ return tokens.length;
191
+ } finally {
192
+ enc.free();
193
+ }
194
+ }
195
+ function splitByTokenLimit(lines, maxTokens, model) {
196
+ if (lines.length === 0) {
197
+ return [];
198
+ }
199
+ if (maxTokens <= 0) {
200
+ throw new Error("maxTokens must be a positive number");
201
+ }
202
+ const batches = [];
203
+ let currentBatch = [];
204
+ let currentTokenCount = 0;
205
+ const enc = encoding_for_model(model ?? DEFAULT_MODEL);
206
+ try {
207
+ for (const line of lines) {
208
+ const lineWithNewline = line + "\n";
209
+ const lineTokens = enc.encode(lineWithNewline).length;
210
+ if (lineTokens > maxTokens) {
211
+ if (currentBatch.length > 0) {
212
+ batches.push(currentBatch);
213
+ currentBatch = [];
214
+ currentTokenCount = 0;
215
+ }
216
+ batches.push([line]);
217
+ continue;
218
+ }
219
+ if (currentTokenCount + lineTokens > maxTokens && currentBatch.length > 0) {
220
+ batches.push(currentBatch);
221
+ currentBatch = [];
222
+ currentTokenCount = 0;
223
+ }
224
+ currentBatch.push(line);
225
+ currentTokenCount += lineTokens;
226
+ }
227
+ if (currentBatch.length > 0) {
228
+ batches.push(currentBatch);
229
+ }
230
+ return batches;
231
+ } finally {
232
+ enc.free();
233
+ }
234
+ }
235
+
134
236
  // src/jsvmpDetector.ts
135
237
  function formatSourcePosition(line, column) {
136
238
  if (line !== null && column !== null) {
@@ -143,14 +245,12 @@ function formatCodeLine(lineNumber, sourcePos, code) {
143
245
  const srcPosPadded = sourcePos ? sourcePos.padEnd(10, " ") : " ";
144
246
  return `${lineNumStr} ${srcPosPadded} ${code}`;
145
247
  }
146
- async function formatCodeForAnalysis(filePath, startLine, endLine, charLimit = 300) {
248
+ async function formatEntireFile(filePath, charLimit = 300) {
147
249
  const beautifyResult = await ensureBeautified(filePath);
148
250
  const { code, rawMap } = beautifyResult;
149
251
  const truncatedCode = truncateCodeHighPerf(code, charLimit);
150
- const lines = truncatedCode.split("\n");
151
- const totalLines = lines.length;
152
- const effectiveStartLine = Math.max(1, Math.min(totalLines, startLine));
153
- const effectiveEndLine = Math.max(effectiveStartLine, Math.min(totalLines, endLine));
252
+ const codeLines = truncatedCode.split("\n");
253
+ const totalLines = codeLines.length;
154
254
  const formattedLines = [];
155
255
  let consumer = null;
156
256
  if (rawMap && rawMap.sources && rawMap.names && rawMap.mappings) {
@@ -163,9 +263,9 @@ async function formatCodeForAnalysis(filePath, startLine, endLine, charLimit = 3
163
263
  sourceRoot: rawMap.sourceRoot
164
264
  });
165
265
  }
166
- for (let lineNum = effectiveStartLine; lineNum <= effectiveEndLine; lineNum++) {
266
+ for (let lineNum = 1; lineNum <= totalLines; lineNum++) {
167
267
  const lineIndex = lineNum - 1;
168
- const lineContent = lines[lineIndex] ?? "";
268
+ const lineContent = codeLines[lineIndex] ?? "";
169
269
  let sourcePos = "";
170
270
  if (consumer) {
171
271
  const originalPos = consumer.originalPositionFor({
@@ -177,12 +277,35 @@ async function formatCodeForAnalysis(filePath, startLine, endLine, charLimit = 3
177
277
  formattedLines.push(formatCodeLine(lineNum, sourcePos, lineContent));
178
278
  }
179
279
  return {
180
- content: formattedLines.join("\n"),
181
- totalLines,
182
- startLine: effectiveStartLine,
183
- endLine: effectiveEndLine
280
+ lines: formattedLines,
281
+ totalLines
184
282
  };
185
283
  }
284
+ function extractLineNumber(formattedLine) {
285
+ const lineNumStr = formattedLine.substring(0, 5).trim();
286
+ return parseInt(lineNumStr, 10);
287
+ }
288
+ function createBatches(formattedLines, maxTokensPerBatch) {
289
+ if (formattedLines.length === 0) {
290
+ return [];
291
+ }
292
+ const lineBatches = splitByTokenLimit(formattedLines, maxTokensPerBatch);
293
+ const batches = [];
294
+ for (const batchLines of lineBatches) {
295
+ if (batchLines.length === 0) continue;
296
+ const startLine = extractLineNumber(batchLines[0]);
297
+ const endLine = extractLineNumber(batchLines[batchLines.length - 1]);
298
+ const content = batchLines.join("\n");
299
+ const tokenCount = countTokens(content);
300
+ batches.push({
301
+ startLine,
302
+ endLine,
303
+ content,
304
+ tokenCount
305
+ });
306
+ }
307
+ return batches;
308
+ }
186
309
  var VALID_DETECTION_TYPES = [
187
310
  "If-Else Dispatcher",
188
311
  "Switch Dispatcher",
@@ -262,10 +385,10 @@ function parseDetectionResult(jsonString) {
262
385
  regions: validatedRegions
263
386
  };
264
387
  }
265
- function formatDetectionResultOutput(result, filePath, startLine, endLine) {
388
+ function formatDetectionResultOutput(result, filePath, totalLines, batchCount) {
266
389
  const lines = [];
267
390
  lines.push("=== JSVMP Dispatcher Detection Result ===");
268
- lines.push(`File: ${filePath} (${startLine}-${endLine})`);
391
+ lines.push(`File: ${filePath} (${totalLines} lines, ${batchCount} batch${batchCount > 1 ? "es" : ""})`);
269
392
  lines.push("");
270
393
  lines.push(`Summary: ${result.summary}`);
271
394
  lines.push("");
@@ -281,15 +404,80 @@ function formatDetectionResultOutput(result, filePath, startLine, endLine) {
281
404
  }
282
405
  return lines.join("\n");
283
406
  }
284
- async function findJsvmpDispatcher(filePath, startLine, endLine, options) {
407
+ function mergeDetectionResults(results) {
408
+ if (results.length === 0) {
409
+ return { summary: "", regions: [] };
410
+ }
411
+ if (results.length === 1) {
412
+ const sortedRegions = [...results[0].regions].sort((a, b) => a.start - b.start);
413
+ return { summary: results[0].summary, regions: sortedRegions };
414
+ }
415
+ const summaries = results.map((r, i) => `[Batch ${i + 1}] ${r.summary}`);
416
+ const combinedSummary = summaries.join("\n");
417
+ const allRegions = [];
418
+ for (const result of results) {
419
+ allRegions.push(...result.regions);
420
+ }
421
+ allRegions.sort((a, b) => a.start - b.start);
422
+ const confidenceOrder = {
423
+ "ultra_high": 4,
424
+ "high": 3,
425
+ "medium": 2,
426
+ "low": 1
427
+ };
428
+ const deduplicatedRegions = [];
429
+ for (const region of allRegions) {
430
+ let overlappingIndex = -1;
431
+ for (let i = 0; i < deduplicatedRegions.length; i++) {
432
+ const existing = deduplicatedRegions[i];
433
+ if (region.start <= existing.end && region.end >= existing.start) {
434
+ overlappingIndex = i;
435
+ break;
436
+ }
437
+ }
438
+ if (overlappingIndex === -1) {
439
+ deduplicatedRegions.push(region);
440
+ } else {
441
+ const existing = deduplicatedRegions[overlappingIndex];
442
+ if (confidenceOrder[region.confidence] > confidenceOrder[existing.confidence]) {
443
+ deduplicatedRegions[overlappingIndex] = region;
444
+ }
445
+ }
446
+ }
447
+ return {
448
+ summary: combinedSummary,
449
+ regions: deduplicatedRegions
450
+ };
451
+ }
452
+ async function processBatch(client, batch) {
453
+ const llmResponse = await client.analyzeJSVMP(batch.content);
454
+ return parseDetectionResult(llmResponse);
455
+ }
456
+ async function processBatchesWithErrorHandling(client, batches) {
457
+ const results = [];
458
+ const errors = [];
459
+ for (let i = 0; i < batches.length; i++) {
460
+ const batch = batches[i];
461
+ try {
462
+ const result = await processBatch(client, batch);
463
+ results.push(result);
464
+ } catch (error) {
465
+ const errorMsg = `Batch ${i + 1} (lines ${batch.startLine}-${batch.endLine}) failed: ${error instanceof Error ? error.message : String(error)}`;
466
+ errors.push(errorMsg);
467
+ }
468
+ }
469
+ return { results, errors };
470
+ }
471
+ async function findJsvmpDispatcher(filePath, options) {
285
472
  const charLimit = options?.charLimit ?? 300;
473
+ const maxTokensPerBatch = options?.maxTokensPerBatch ?? 8e3;
286
474
  const config = getLLMConfig();
287
475
  if (!config) {
288
476
  return {
289
477
  success: false,
290
478
  filePath,
291
- startLine,
292
- endLine,
479
+ totalLines: 0,
480
+ batchCount: 0,
293
481
  error: "\u672A\u914D\u7F6E LLM\u3002\u8BF7\u8BBE\u7F6E\u73AF\u5883\u53D8\u91CF OPENAI_API_KEY \u4EE5\u542F\u7528 JSVMP dispatcher \u68C0\u6D4B\u529F\u80FD\u3002"
294
482
  };
295
483
  }
@@ -297,36 +485,45 @@ async function findJsvmpDispatcher(filePath, startLine, endLine, options) {
297
485
  return {
298
486
  success: false,
299
487
  filePath,
300
- startLine,
301
- endLine,
488
+ totalLines: 0,
489
+ batchCount: 0,
302
490
  error: `\u6587\u4EF6\u4E0D\u5B58\u5728: ${filePath}`
303
491
  };
304
492
  }
305
493
  try {
306
- const formattedCode = await formatCodeForAnalysis(
307
- filePath,
308
- startLine,
309
- endLine,
310
- charLimit
311
- );
494
+ const formattedCode = await formatEntireFile(filePath, charLimit);
495
+ const totalLines = formattedCode.totalLines;
496
+ const batches = createBatches(formattedCode.lines, maxTokensPerBatch);
497
+ const batchCount = batches.length;
312
498
  const client = createLLMClient(config);
313
- const llmResponse = await client.analyzeJSVMP(formattedCode.content);
314
- const result = parseDetectionResult(llmResponse);
315
- const formattedOutput = formatDetectionResultOutput(result, filePath, startLine, endLine);
499
+ const { results, errors } = await processBatchesWithErrorHandling(client, batches);
500
+ if (results.length === 0) {
501
+ return {
502
+ success: false,
503
+ filePath,
504
+ totalLines,
505
+ batchCount,
506
+ error: `\u6240\u6709\u6279\u6B21\u5904\u7406\u5931\u8D25: ${errors.join("; ")}`,
507
+ partialErrors: errors
508
+ };
509
+ }
510
+ const mergedResult = mergeDetectionResults(results);
511
+ const formattedOutput = formatDetectionResultOutput(mergedResult, filePath, totalLines, batchCount);
316
512
  return {
317
513
  success: true,
318
514
  filePath,
319
- startLine: formattedCode.startLine,
320
- endLine: formattedCode.endLine,
321
- result,
322
- formattedOutput
515
+ totalLines,
516
+ batchCount,
517
+ result: mergedResult,
518
+ formattedOutput,
519
+ partialErrors: errors.length > 0 ? errors : void 0
323
520
  };
324
521
  } catch (error) {
325
522
  return {
326
523
  success: false,
327
524
  filePath,
328
- startLine,
329
- endLine,
525
+ totalLines: 0,
526
+ batchCount: 0,
330
527
  error: error instanceof Error ? error.message : String(error)
331
528
  };
332
529
  }
@@ -335,9 +532,8 @@ async function findJsvmpDispatcher(filePath, startLine, endLine, options) {
335
532
  // src/tools/findJsvmpDispatcherTool.ts
336
533
  var FindJsvmpDispatcherInputSchema = {
337
534
  filePath: z.string().describe("Path to the JavaScript file to analyze"),
338
- startLine: z.number().int().positive().describe("Start line number (1-based)"),
339
- endLine: z.number().int().positive().describe("End line number (1-based)"),
340
- charLimit: z.number().int().positive().optional().describe("Character limit for string truncation (default: 300)")
535
+ charLimit: z.number().int().positive().optional().describe("Character limit for string truncation (default: 300)"),
536
+ maxTokensPerBatch: z.number().int().positive().optional().describe("Maximum tokens per batch for LLM analysis (default: 150000)")
341
537
  };
342
538
  var findJsvmpDispatcherTool = defineTool({
343
539
  name: "find_jsvmp_dispatcher",
@@ -349,17 +545,17 @@ JSVMP is a code protection technique that converts JavaScript to bytecode execut
349
545
  - Instruction Arrays: Arrays storing bytecode instructions
350
546
  - Stack Operations: Virtual stack push/pop patterns
351
547
 
548
+ Automatically splits large files into batches based on token limits and merges results.
549
+
352
550
  Returns detection results with confidence levels (ultra_high, high, medium, low) and detailed descriptions.
353
551
 
354
552
  Requires OPENAI_API_KEY environment variable. Optional: OPENAI_BASE_URL, OPENAI_MODEL.`,
355
553
  schema: FindJsvmpDispatcherInputSchema,
356
554
  handler: async (params) => {
357
- const { filePath, startLine, endLine, charLimit } = params;
358
- if (endLine < startLine) {
359
- throw new Error("endLine must be >= startLine");
360
- }
361
- const result = await findJsvmpDispatcher(filePath, startLine, endLine, {
362
- charLimit: charLimit ?? 300
555
+ const { filePath, charLimit, maxTokensPerBatch } = params;
556
+ const result = await findJsvmpDispatcher(filePath, {
557
+ charLimit: charLimit ?? 300,
558
+ maxTokensPerBatch: maxTokensPerBatch ?? 15e4
363
559
  });
364
560
  if (!result.success) {
365
561
  throw new Error(result.error ?? "Detection failed");