pulsemcp-cms-admin-mcp-server 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/shared/src/exam-result-store.js +132 -20
- package/build/shared/src/tools/get-exam-result.js +1 -1
- package/build/shared/src/tools/run-exam-for-mirror.js +11 -8
- package/build/shared/src/tools/save-results-for-mirror.js +29 -10
- package/package.json +1 -1
- package/shared/exam-result-store.d.ts +38 -7
- package/shared/exam-result-store.js +132 -20
- package/shared/tools/get-exam-result.js +1 -1
- package/shared/tools/run-exam-for-mirror.js +11 -8
- package/shared/tools/save-results-for-mirror.d.ts +1 -1
- package/shared/tools/save-results-for-mirror.js +29 -10
|
@@ -1,18 +1,50 @@
|
|
|
1
1
|
import { randomUUID } from 'crypto';
|
|
2
|
+
import { readFileSync, writeFileSync, unlinkSync, readdirSync, mkdirSync, existsSync } from 'fs';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
import { tmpdir } from 'os';
|
|
2
5
|
/**
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
+
* Extract exam_id from a proctor exam stream line, checking both the
|
|
7
|
+
* data payload and top-level fields. The API may place exam_id in
|
|
8
|
+
* either location depending on the exam type.
|
|
9
|
+
*/
|
|
10
|
+
export function extractExamId(line) {
|
|
11
|
+
const data = line.data;
|
|
12
|
+
return (data?.exam_id ||
|
|
13
|
+
line.exam_id ||
|
|
14
|
+
data?.exam_type ||
|
|
15
|
+
line.exam_type ||
|
|
16
|
+
'unknown');
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Extract status from a proctor exam stream line, checking both the
|
|
20
|
+
* data payload and top-level fields.
|
|
21
|
+
*/
|
|
22
|
+
export function extractStatus(line) {
|
|
23
|
+
const data = line.data;
|
|
24
|
+
return data?.status || line.status || 'unknown';
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Maximum number of results to keep on disk. Oldest results are evicted
|
|
28
|
+
* when this limit is reached (FIFO by insertion order).
|
|
6
29
|
*/
|
|
7
30
|
const MAX_RESULTS = 100;
|
|
31
|
+
const STORE_DIR = join(tmpdir(), 'pulsemcp-exam-results');
|
|
32
|
+
const FILE_SUFFIX = '.json';
|
|
8
33
|
/**
|
|
9
|
-
*
|
|
34
|
+
* File-based store for proctor exam results.
|
|
10
35
|
*
|
|
11
|
-
* When `run_exam_for_mirror` completes, the full result is
|
|
12
|
-
* and a UUID `result_id` is returned. This avoids
|
|
13
|
-
* (~60KB+ for servers with many tools) into the
|
|
36
|
+
* When `run_exam_for_mirror` completes, the full result is written to a
|
|
37
|
+
* JSON file in /tmp/ and a UUID `result_id` is returned. This avoids
|
|
38
|
+
* dumping large payloads (~60KB+ for servers with many tools) into the
|
|
39
|
+
* LLM context, and survives across tool calls without relying on
|
|
40
|
+
* in-memory state.
|
|
14
41
|
*
|
|
15
|
-
*
|
|
42
|
+
* Files are named with a zero-padded sequence number prefix so that
|
|
43
|
+
* lexicographic sorting preserves insertion order for FIFO eviction.
|
|
44
|
+
* The sequence counter is initialized from existing files on disk so
|
|
45
|
+
* that new entries sort after old ones even across process restarts.
|
|
46
|
+
*
|
|
47
|
+
* Eviction: When the store exceeds MAX_RESULTS files, the oldest result
|
|
16
48
|
* is evicted (FIFO). Results are also deleted after successful save via
|
|
17
49
|
* `save_results_for_mirror`.
|
|
18
50
|
*
|
|
@@ -21,38 +53,118 @@ const MAX_RESULTS = 100;
|
|
|
21
53
|
* - Pass `result_id` to `save_results_for_mirror` instead of the full payload
|
|
22
54
|
*/
|
|
23
55
|
class ExamResultStore {
|
|
24
|
-
|
|
56
|
+
seq;
|
|
57
|
+
constructor() {
|
|
58
|
+
this.seq = this.initSeqFromDisk();
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Scan existing files to find the highest sequence number and start
|
|
62
|
+
* one past it. This ensures new files always sort after existing ones,
|
|
63
|
+
* even across process restarts.
|
|
64
|
+
*/
|
|
65
|
+
initSeqFromDisk() {
|
|
66
|
+
this.ensureDir();
|
|
67
|
+
const files = readdirSync(STORE_DIR)
|
|
68
|
+
.filter((f) => f.endsWith(FILE_SUFFIX) && f.length > FILE_SUFFIX.length)
|
|
69
|
+
.sort();
|
|
70
|
+
if (files.length === 0)
|
|
71
|
+
return 0;
|
|
72
|
+
const lastFile = files[files.length - 1];
|
|
73
|
+
const seqStr = lastFile.slice(0, 10);
|
|
74
|
+
const parsed = parseInt(seqStr, 10);
|
|
75
|
+
return isNaN(parsed) ? 0 : parsed + 1;
|
|
76
|
+
}
|
|
77
|
+
ensureDir() {
|
|
78
|
+
if (!existsSync(STORE_DIR)) {
|
|
79
|
+
mkdirSync(STORE_DIR, { recursive: true, mode: 0o700 });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/** Filename format: {seq}-{uuid}.json — seq is zero-padded for lexicographic ordering */
|
|
83
|
+
fileName(seq, resultId) {
|
|
84
|
+
return `${String(seq).padStart(10, '0')}-${resultId}${FILE_SUFFIX}`;
|
|
85
|
+
}
|
|
86
|
+
extractResultId(fileName) {
|
|
87
|
+
// Format: 0000000001-<uuid>.json
|
|
88
|
+
return fileName.slice(11, -FILE_SUFFIX.length);
|
|
89
|
+
}
|
|
90
|
+
listResultFiles() {
|
|
91
|
+
this.ensureDir();
|
|
92
|
+
return readdirSync(STORE_DIR)
|
|
93
|
+
.filter((f) => f.endsWith(FILE_SUFFIX) && f.length > FILE_SUFFIX.length)
|
|
94
|
+
.sort(); // Lexicographic sort gives insertion order via zero-padded seq
|
|
95
|
+
}
|
|
96
|
+
findFileForResult(resultId) {
|
|
97
|
+
const files = this.listResultFiles();
|
|
98
|
+
return files.find((f) => this.extractResultId(f) === resultId);
|
|
99
|
+
}
|
|
25
100
|
store(mirrorIds, runtimeId, examType, lines) {
|
|
101
|
+
this.ensureDir();
|
|
26
102
|
const resultId = randomUUID();
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
103
|
+
const seqNum = this.seq++;
|
|
104
|
+
// Evict oldest entries if at capacity (files are sorted by seq prefix)
|
|
105
|
+
const files = this.listResultFiles();
|
|
106
|
+
const toEvict = files.length - MAX_RESULTS + 1;
|
|
107
|
+
for (let i = 0; i < toEvict; i++) {
|
|
108
|
+
try {
|
|
109
|
+
unlinkSync(join(STORE_DIR, files[i]));
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
// ignore
|
|
32
113
|
}
|
|
33
114
|
}
|
|
34
|
-
|
|
115
|
+
const stored = {
|
|
35
116
|
result_id: resultId,
|
|
36
117
|
mirror_ids: mirrorIds,
|
|
37
118
|
runtime_id: runtimeId,
|
|
38
119
|
exam_type: examType,
|
|
39
120
|
lines,
|
|
40
121
|
stored_at: new Date().toISOString(),
|
|
122
|
+
};
|
|
123
|
+
writeFileSync(join(STORE_DIR, this.fileName(seqNum, resultId)), JSON.stringify(stored), {
|
|
124
|
+
encoding: 'utf-8',
|
|
125
|
+
mode: 0o600,
|
|
41
126
|
});
|
|
42
127
|
return resultId;
|
|
43
128
|
}
|
|
44
129
|
get(resultId) {
|
|
45
|
-
|
|
130
|
+
const file = this.findFileForResult(resultId);
|
|
131
|
+
if (!file)
|
|
132
|
+
return undefined;
|
|
133
|
+
try {
|
|
134
|
+
const content = readFileSync(join(STORE_DIR, file), 'utf-8');
|
|
135
|
+
return JSON.parse(content);
|
|
136
|
+
}
|
|
137
|
+
catch {
|
|
138
|
+
return undefined;
|
|
139
|
+
}
|
|
46
140
|
}
|
|
47
141
|
delete(resultId) {
|
|
48
|
-
|
|
142
|
+
const file = this.findFileForResult(resultId);
|
|
143
|
+
if (!file)
|
|
144
|
+
return false;
|
|
145
|
+
try {
|
|
146
|
+
unlinkSync(join(STORE_DIR, file));
|
|
147
|
+
return true;
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
49
152
|
}
|
|
50
153
|
get size() {
|
|
51
|
-
return this.
|
|
154
|
+
return this.listResultFiles().length;
|
|
52
155
|
}
|
|
53
|
-
/** For testing only */
|
|
156
|
+
/** For testing only — removes all result files and resets the sequence counter */
|
|
54
157
|
clear() {
|
|
55
|
-
this.
|
|
158
|
+
this.ensureDir();
|
|
159
|
+
for (const file of this.listResultFiles()) {
|
|
160
|
+
try {
|
|
161
|
+
unlinkSync(join(STORE_DIR, file));
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
// ignore
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
this.seq = 0;
|
|
56
168
|
}
|
|
57
169
|
}
|
|
58
170
|
/** Singleton instance shared across all tool factories */
|
|
@@ -49,7 +49,7 @@ Typical usage:
|
|
|
49
49
|
content: [
|
|
50
50
|
{
|
|
51
51
|
type: 'text',
|
|
52
|
-
text: `No stored result found for result_id "${validatedArgs.result_id}".
|
|
52
|
+
text: `No stored result found for result_id "${validatedArgs.result_id}". The result file may have been cleaned up or the /tmp directory cleared.`,
|
|
53
53
|
},
|
|
54
54
|
],
|
|
55
55
|
isError: true,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { examResultStore } from '../exam-result-store.js';
|
|
2
|
+
import { examResultStore, extractExamId, extractStatus } from '../exam-result-store.js';
|
|
3
3
|
const PARAM_DESCRIPTIONS = {
|
|
4
4
|
mirror_ids: 'Array of unofficial mirror IDs to run exams against. Mirrors without saved mcp_json configs will be skipped.',
|
|
5
5
|
runtime_id: 'The Fly Machines runtime ID to use for running the exam containers (e.g., "fly-machines-v1")',
|
|
@@ -66,7 +66,7 @@ Available exam types:
|
|
|
66
66
|
|
|
67
67
|
Mirrors without saved mcp_json configurations are automatically skipped.
|
|
68
68
|
|
|
69
|
-
Results are stored server-side and a \`result_id\` UUID is returned. The response includes a truncated summary (status, tool names/counts, errors) that fits within MCP size limits. Use \`get_exam_result\` to drill into full details, or pass the \`result_id\` directly to \`save_results_for_mirror\`.
|
|
69
|
+
Results are stored server-side in a local file and a \`result_id\` UUID is returned. The response includes a truncated summary (status, tool names/counts, errors) that fits within MCP size limits. Use \`get_exam_result\` to drill into full details, or pass the \`result_id\` directly to \`save_results_for_mirror\`.
|
|
70
70
|
|
|
71
71
|
Use cases:
|
|
72
72
|
- Test if an unofficial mirror's MCP server is working correctly before linking it
|
|
@@ -119,15 +119,18 @@ Use cases:
|
|
|
119
119
|
case 'log':
|
|
120
120
|
content += `[LOG] ${line.message || JSON.stringify(line)}\n`;
|
|
121
121
|
break;
|
|
122
|
-
case 'exam_result':
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
content +=
|
|
126
|
-
|
|
127
|
-
|
|
122
|
+
case 'exam_result': {
|
|
123
|
+
const data = line.data;
|
|
124
|
+
const mirrorId = line.mirror_id ?? data?.mirror_id ?? 'unknown';
|
|
125
|
+
content += `\n**Exam Result** (Mirror: ${mirrorId})\n`;
|
|
126
|
+
content += ` Exam: ${extractExamId(line)}\n`;
|
|
127
|
+
content += ` Status: ${extractStatus(line)}\n`;
|
|
128
|
+
if (data) {
|
|
129
|
+
const truncatedData = truncateExamResultData(data);
|
|
128
130
|
content += ` Data: ${JSON.stringify(truncatedData, null, 2)}\n`;
|
|
129
131
|
}
|
|
130
132
|
break;
|
|
133
|
+
}
|
|
131
134
|
case 'summary':
|
|
132
135
|
content += `\n**Summary**\n`;
|
|
133
136
|
content += ` Total: ${line.total || 0}\n`;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { examResultStore } from '../exam-result-store.js';
|
|
2
|
+
import { examResultStore, extractExamId, extractStatus } from '../exam-result-store.js';
|
|
3
3
|
const PARAM_DESCRIPTIONS = {
|
|
4
4
|
mirror_id: 'The ID of the unofficial mirror to save results for',
|
|
5
5
|
runtime_id: 'The runtime ID that was used to run the exams',
|
|
6
|
-
result_id: 'The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the
|
|
6
|
+
result_id: 'The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the local file store — no need to pass the results array. This is the preferred approach.',
|
|
7
7
|
results: 'Array of exam results to save. Each result must include exam_id, status, and optional data. Only needed if result_id is not provided.',
|
|
8
8
|
exam_id: 'The exam identifier (e.g., "auth-check", "init-tools-list")',
|
|
9
9
|
status: 'The result status (e.g., "pass", "fail", "error", "skip")',
|
|
@@ -32,7 +32,7 @@ export function saveResultsForMirror(_server, clientFactory) {
|
|
|
32
32
|
name: 'save_results_for_mirror',
|
|
33
33
|
description: `Save proctor exam results for an unofficial mirror.
|
|
34
34
|
|
|
35
|
-
**Preferred**: Pass the \`result_id\` returned by \`run_exam_for_mirror\`. The full result is retrieved from the
|
|
35
|
+
**Preferred**: Pass the \`result_id\` returned by \`run_exam_for_mirror\`. The full result is retrieved from the local file store server-side — no need to pass the large results payload through the LLM context.
|
|
36
36
|
|
|
37
37
|
**Fallback**: Pass results directly (as before) if result_id is not available.
|
|
38
38
|
|
|
@@ -88,20 +88,39 @@ Typical workflow:
|
|
|
88
88
|
content: [
|
|
89
89
|
{
|
|
90
90
|
type: 'text',
|
|
91
|
-
text: `No stored result found for result_id "${validatedArgs.result_id}".
|
|
91
|
+
text: `No stored result found for result_id "${validatedArgs.result_id}". The result file may have been cleaned up or the /tmp directory cleared. Pass the results array directly instead.`,
|
|
92
92
|
},
|
|
93
93
|
],
|
|
94
94
|
isError: true,
|
|
95
95
|
};
|
|
96
96
|
}
|
|
97
|
-
// Extract exam_result lines from stored data
|
|
97
|
+
// Extract exam_result lines from stored data.
|
|
98
|
+
// The exam_id may live at the top level of the stream line OR inside
|
|
99
|
+
// line.data (the actual result payload). Prefer the data payload to
|
|
100
|
+
// avoid reading from potentially incomplete display metadata.
|
|
101
|
+
//
|
|
102
|
+
// The real proctor API returns line.data as a metadata wrapper:
|
|
103
|
+
// { mirror_id, exam_id, status, result: { status, output: {...} } }
|
|
104
|
+
// The actual output lives inside line.data.result. When we pass the
|
|
105
|
+
// entire line.data as `data`, the output ends up nested too deeply
|
|
106
|
+
// (result.data.result.output) and the backend saves empty output.
|
|
107
|
+
// Use line.data.result when present so that `output` is at the
|
|
108
|
+
// expected depth (result.data.output).
|
|
98
109
|
results = stored.lines
|
|
99
110
|
.filter((line) => line.type === 'exam_result')
|
|
100
|
-
.map((line) =>
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
111
|
+
.map((line) => {
|
|
112
|
+
const data = line.data;
|
|
113
|
+
// Prefer the nested result object (contains output, input, etc.)
|
|
114
|
+
// over the full data wrapper (contains metadata like mirror_id)
|
|
115
|
+
const resultData = data?.result && typeof data.result === 'object' && !Array.isArray(data.result)
|
|
116
|
+
? data.result
|
|
117
|
+
: data;
|
|
118
|
+
return {
|
|
119
|
+
exam_id: extractExamId(line),
|
|
120
|
+
status: extractStatus(line),
|
|
121
|
+
...(resultData ? { data: resultData } : {}),
|
|
122
|
+
};
|
|
123
|
+
});
|
|
105
124
|
if (!runtimeId) {
|
|
106
125
|
runtimeId = stored.runtime_id;
|
|
107
126
|
}
|
package/package.json
CHANGED
|
@@ -8,13 +8,31 @@ export interface StoredExamResult {
|
|
|
8
8
|
stored_at: string;
|
|
9
9
|
}
|
|
10
10
|
/**
|
|
11
|
-
*
|
|
11
|
+
* Extract exam_id from a proctor exam stream line, checking both the
|
|
12
|
+
* data payload and top-level fields. The API may place exam_id in
|
|
13
|
+
* either location depending on the exam type.
|
|
14
|
+
*/
|
|
15
|
+
export declare function extractExamId(line: ProctorExamStreamLine): string;
|
|
16
|
+
/**
|
|
17
|
+
* Extract status from a proctor exam stream line, checking both the
|
|
18
|
+
* data payload and top-level fields.
|
|
19
|
+
*/
|
|
20
|
+
export declare function extractStatus(line: ProctorExamStreamLine): string;
|
|
21
|
+
/**
|
|
22
|
+
* File-based store for proctor exam results.
|
|
23
|
+
*
|
|
24
|
+
* When `run_exam_for_mirror` completes, the full result is written to a
|
|
25
|
+
* JSON file in /tmp/ and a UUID `result_id` is returned. This avoids
|
|
26
|
+
* dumping large payloads (~60KB+ for servers with many tools) into the
|
|
27
|
+
* LLM context, and survives across tool calls without relying on
|
|
28
|
+
* in-memory state.
|
|
12
29
|
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
30
|
+
* Files are named with a zero-padded sequence number prefix so that
|
|
31
|
+
* lexicographic sorting preserves insertion order for FIFO eviction.
|
|
32
|
+
* The sequence counter is initialized from existing files on disk so
|
|
33
|
+
* that new entries sort after old ones even across process restarts.
|
|
16
34
|
*
|
|
17
|
-
* Eviction: When the store exceeds MAX_RESULTS
|
|
35
|
+
* Eviction: When the store exceeds MAX_RESULTS files, the oldest result
|
|
18
36
|
* is evicted (FIFO). Results are also deleted after successful save via
|
|
19
37
|
* `save_results_for_mirror`.
|
|
20
38
|
*
|
|
@@ -23,12 +41,25 @@ export interface StoredExamResult {
|
|
|
23
41
|
* - Pass `result_id` to `save_results_for_mirror` instead of the full payload
|
|
24
42
|
*/
|
|
25
43
|
declare class ExamResultStore {
|
|
26
|
-
private
|
|
44
|
+
private seq;
|
|
45
|
+
constructor();
|
|
46
|
+
/**
|
|
47
|
+
* Scan existing files to find the highest sequence number and start
|
|
48
|
+
* one past it. This ensures new files always sort after existing ones,
|
|
49
|
+
* even across process restarts.
|
|
50
|
+
*/
|
|
51
|
+
private initSeqFromDisk;
|
|
52
|
+
private ensureDir;
|
|
53
|
+
/** Filename format: {seq}-{uuid}.json — seq is zero-padded for lexicographic ordering */
|
|
54
|
+
private fileName;
|
|
55
|
+
private extractResultId;
|
|
56
|
+
private listResultFiles;
|
|
57
|
+
private findFileForResult;
|
|
27
58
|
store(mirrorIds: number[], runtimeId: string, examType: string, lines: ProctorExamStreamLine[]): string;
|
|
28
59
|
get(resultId: string): StoredExamResult | undefined;
|
|
29
60
|
delete(resultId: string): boolean;
|
|
30
61
|
get size(): number;
|
|
31
|
-
/** For testing only */
|
|
62
|
+
/** For testing only — removes all result files and resets the sequence counter */
|
|
32
63
|
clear(): void;
|
|
33
64
|
}
|
|
34
65
|
/** Singleton instance shared across all tool factories */
|
|
@@ -1,18 +1,50 @@
|
|
|
1
1
|
import { randomUUID } from 'crypto';
|
|
2
|
+
import { readFileSync, writeFileSync, unlinkSync, readdirSync, mkdirSync, existsSync } from 'fs';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
import { tmpdir } from 'os';
|
|
2
5
|
/**
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
+
* Extract exam_id from a proctor exam stream line, checking both the
|
|
7
|
+
* data payload and top-level fields. The API may place exam_id in
|
|
8
|
+
* either location depending on the exam type.
|
|
9
|
+
*/
|
|
10
|
+
export function extractExamId(line) {
|
|
11
|
+
const data = line.data;
|
|
12
|
+
return (data?.exam_id ||
|
|
13
|
+
line.exam_id ||
|
|
14
|
+
data?.exam_type ||
|
|
15
|
+
line.exam_type ||
|
|
16
|
+
'unknown');
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Extract status from a proctor exam stream line, checking both the
|
|
20
|
+
* data payload and top-level fields.
|
|
21
|
+
*/
|
|
22
|
+
export function extractStatus(line) {
|
|
23
|
+
const data = line.data;
|
|
24
|
+
return data?.status || line.status || 'unknown';
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Maximum number of results to keep on disk. Oldest results are evicted
|
|
28
|
+
* when this limit is reached (FIFO by insertion order).
|
|
6
29
|
*/
|
|
7
30
|
const MAX_RESULTS = 100;
|
|
31
|
+
const STORE_DIR = join(tmpdir(), 'pulsemcp-exam-results');
|
|
32
|
+
const FILE_SUFFIX = '.json';
|
|
8
33
|
/**
|
|
9
|
-
*
|
|
34
|
+
* File-based store for proctor exam results.
|
|
10
35
|
*
|
|
11
|
-
* When `run_exam_for_mirror` completes, the full result is
|
|
12
|
-
* and a UUID `result_id` is returned. This avoids
|
|
13
|
-
* (~60KB+ for servers with many tools) into the
|
|
36
|
+
* When `run_exam_for_mirror` completes, the full result is written to a
|
|
37
|
+
* JSON file in /tmp/ and a UUID `result_id` is returned. This avoids
|
|
38
|
+
* dumping large payloads (~60KB+ for servers with many tools) into the
|
|
39
|
+
* LLM context, and survives across tool calls without relying on
|
|
40
|
+
* in-memory state.
|
|
14
41
|
*
|
|
15
|
-
*
|
|
42
|
+
* Files are named with a zero-padded sequence number prefix so that
|
|
43
|
+
* lexicographic sorting preserves insertion order for FIFO eviction.
|
|
44
|
+
* The sequence counter is initialized from existing files on disk so
|
|
45
|
+
* that new entries sort after old ones even across process restarts.
|
|
46
|
+
*
|
|
47
|
+
* Eviction: When the store exceeds MAX_RESULTS files, the oldest result
|
|
16
48
|
* is evicted (FIFO). Results are also deleted after successful save via
|
|
17
49
|
* `save_results_for_mirror`.
|
|
18
50
|
*
|
|
@@ -21,38 +53,118 @@ const MAX_RESULTS = 100;
|
|
|
21
53
|
* - Pass `result_id` to `save_results_for_mirror` instead of the full payload
|
|
22
54
|
*/
|
|
23
55
|
class ExamResultStore {
|
|
24
|
-
|
|
56
|
+
seq;
|
|
57
|
+
constructor() {
|
|
58
|
+
this.seq = this.initSeqFromDisk();
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Scan existing files to find the highest sequence number and start
|
|
62
|
+
* one past it. This ensures new files always sort after existing ones,
|
|
63
|
+
* even across process restarts.
|
|
64
|
+
*/
|
|
65
|
+
initSeqFromDisk() {
|
|
66
|
+
this.ensureDir();
|
|
67
|
+
const files = readdirSync(STORE_DIR)
|
|
68
|
+
.filter((f) => f.endsWith(FILE_SUFFIX) && f.length > FILE_SUFFIX.length)
|
|
69
|
+
.sort();
|
|
70
|
+
if (files.length === 0)
|
|
71
|
+
return 0;
|
|
72
|
+
const lastFile = files[files.length - 1];
|
|
73
|
+
const seqStr = lastFile.slice(0, 10);
|
|
74
|
+
const parsed = parseInt(seqStr, 10);
|
|
75
|
+
return isNaN(parsed) ? 0 : parsed + 1;
|
|
76
|
+
}
|
|
77
|
+
ensureDir() {
|
|
78
|
+
if (!existsSync(STORE_DIR)) {
|
|
79
|
+
mkdirSync(STORE_DIR, { recursive: true, mode: 0o700 });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/** Filename format: {seq}-{uuid}.json — seq is zero-padded for lexicographic ordering */
|
|
83
|
+
fileName(seq, resultId) {
|
|
84
|
+
return `${String(seq).padStart(10, '0')}-${resultId}${FILE_SUFFIX}`;
|
|
85
|
+
}
|
|
86
|
+
extractResultId(fileName) {
|
|
87
|
+
// Format: 0000000001-<uuid>.json
|
|
88
|
+
return fileName.slice(11, -FILE_SUFFIX.length);
|
|
89
|
+
}
|
|
90
|
+
listResultFiles() {
|
|
91
|
+
this.ensureDir();
|
|
92
|
+
return readdirSync(STORE_DIR)
|
|
93
|
+
.filter((f) => f.endsWith(FILE_SUFFIX) && f.length > FILE_SUFFIX.length)
|
|
94
|
+
.sort(); // Lexicographic sort gives insertion order via zero-padded seq
|
|
95
|
+
}
|
|
96
|
+
findFileForResult(resultId) {
|
|
97
|
+
const files = this.listResultFiles();
|
|
98
|
+
return files.find((f) => this.extractResultId(f) === resultId);
|
|
99
|
+
}
|
|
25
100
|
store(mirrorIds, runtimeId, examType, lines) {
|
|
101
|
+
this.ensureDir();
|
|
26
102
|
const resultId = randomUUID();
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
103
|
+
const seqNum = this.seq++;
|
|
104
|
+
// Evict oldest entries if at capacity (files are sorted by seq prefix)
|
|
105
|
+
const files = this.listResultFiles();
|
|
106
|
+
const toEvict = files.length - MAX_RESULTS + 1;
|
|
107
|
+
for (let i = 0; i < toEvict; i++) {
|
|
108
|
+
try {
|
|
109
|
+
unlinkSync(join(STORE_DIR, files[i]));
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
// ignore
|
|
32
113
|
}
|
|
33
114
|
}
|
|
34
|
-
|
|
115
|
+
const stored = {
|
|
35
116
|
result_id: resultId,
|
|
36
117
|
mirror_ids: mirrorIds,
|
|
37
118
|
runtime_id: runtimeId,
|
|
38
119
|
exam_type: examType,
|
|
39
120
|
lines,
|
|
40
121
|
stored_at: new Date().toISOString(),
|
|
122
|
+
};
|
|
123
|
+
writeFileSync(join(STORE_DIR, this.fileName(seqNum, resultId)), JSON.stringify(stored), {
|
|
124
|
+
encoding: 'utf-8',
|
|
125
|
+
mode: 0o600,
|
|
41
126
|
});
|
|
42
127
|
return resultId;
|
|
43
128
|
}
|
|
44
129
|
get(resultId) {
|
|
45
|
-
|
|
130
|
+
const file = this.findFileForResult(resultId);
|
|
131
|
+
if (!file)
|
|
132
|
+
return undefined;
|
|
133
|
+
try {
|
|
134
|
+
const content = readFileSync(join(STORE_DIR, file), 'utf-8');
|
|
135
|
+
return JSON.parse(content);
|
|
136
|
+
}
|
|
137
|
+
catch {
|
|
138
|
+
return undefined;
|
|
139
|
+
}
|
|
46
140
|
}
|
|
47
141
|
delete(resultId) {
|
|
48
|
-
|
|
142
|
+
const file = this.findFileForResult(resultId);
|
|
143
|
+
if (!file)
|
|
144
|
+
return false;
|
|
145
|
+
try {
|
|
146
|
+
unlinkSync(join(STORE_DIR, file));
|
|
147
|
+
return true;
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
49
152
|
}
|
|
50
153
|
get size() {
|
|
51
|
-
return this.
|
|
154
|
+
return this.listResultFiles().length;
|
|
52
155
|
}
|
|
53
|
-
/** For testing only */
|
|
156
|
+
/** For testing only — removes all result files and resets the sequence counter */
|
|
54
157
|
clear() {
|
|
55
|
-
this.
|
|
158
|
+
this.ensureDir();
|
|
159
|
+
for (const file of this.listResultFiles()) {
|
|
160
|
+
try {
|
|
161
|
+
unlinkSync(join(STORE_DIR, file));
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
// ignore
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
this.seq = 0;
|
|
56
168
|
}
|
|
57
169
|
}
|
|
58
170
|
/** Singleton instance shared across all tool factories */
|
|
@@ -49,7 +49,7 @@ Typical usage:
|
|
|
49
49
|
content: [
|
|
50
50
|
{
|
|
51
51
|
type: 'text',
|
|
52
|
-
text: `No stored result found for result_id "${validatedArgs.result_id}".
|
|
52
|
+
text: `No stored result found for result_id "${validatedArgs.result_id}". The result file may have been cleaned up or the /tmp directory cleared.`,
|
|
53
53
|
},
|
|
54
54
|
],
|
|
55
55
|
isError: true,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { examResultStore } from '../exam-result-store.js';
|
|
2
|
+
import { examResultStore, extractExamId, extractStatus } from '../exam-result-store.js';
|
|
3
3
|
const PARAM_DESCRIPTIONS = {
|
|
4
4
|
mirror_ids: 'Array of unofficial mirror IDs to run exams against. Mirrors without saved mcp_json configs will be skipped.',
|
|
5
5
|
runtime_id: 'The Fly Machines runtime ID to use for running the exam containers (e.g., "fly-machines-v1")',
|
|
@@ -66,7 +66,7 @@ Available exam types:
|
|
|
66
66
|
|
|
67
67
|
Mirrors without saved mcp_json configurations are automatically skipped.
|
|
68
68
|
|
|
69
|
-
Results are stored server-side and a \`result_id\` UUID is returned. The response includes a truncated summary (status, tool names/counts, errors) that fits within MCP size limits. Use \`get_exam_result\` to drill into full details, or pass the \`result_id\` directly to \`save_results_for_mirror\`.
|
|
69
|
+
Results are stored server-side in a local file and a \`result_id\` UUID is returned. The response includes a truncated summary (status, tool names/counts, errors) that fits within MCP size limits. Use \`get_exam_result\` to drill into full details, or pass the \`result_id\` directly to \`save_results_for_mirror\`.
|
|
70
70
|
|
|
71
71
|
Use cases:
|
|
72
72
|
- Test if an unofficial mirror's MCP server is working correctly before linking it
|
|
@@ -119,15 +119,18 @@ Use cases:
|
|
|
119
119
|
case 'log':
|
|
120
120
|
content += `[LOG] ${line.message || JSON.stringify(line)}\n`;
|
|
121
121
|
break;
|
|
122
|
-
case 'exam_result':
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
content +=
|
|
126
|
-
|
|
127
|
-
|
|
122
|
+
case 'exam_result': {
|
|
123
|
+
const data = line.data;
|
|
124
|
+
const mirrorId = line.mirror_id ?? data?.mirror_id ?? 'unknown';
|
|
125
|
+
content += `\n**Exam Result** (Mirror: ${mirrorId})\n`;
|
|
126
|
+
content += ` Exam: ${extractExamId(line)}\n`;
|
|
127
|
+
content += ` Status: ${extractStatus(line)}\n`;
|
|
128
|
+
if (data) {
|
|
129
|
+
const truncatedData = truncateExamResultData(data);
|
|
128
130
|
content += ` Data: ${JSON.stringify(truncatedData, null, 2)}\n`;
|
|
129
131
|
}
|
|
130
132
|
break;
|
|
133
|
+
}
|
|
131
134
|
case 'summary':
|
|
132
135
|
content += `\n**Summary**\n`;
|
|
133
136
|
content += ` Total: ${line.total || 0}\n`;
|
|
@@ -17,7 +17,7 @@ export declare function saveResultsForMirror(_server: Server, clientFactory: Cli
|
|
|
17
17
|
result_id: {
|
|
18
18
|
type: string;
|
|
19
19
|
format: string;
|
|
20
|
-
description: "The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the
|
|
20
|
+
description: "The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the local file store — no need to pass the results array. This is the preferred approach.";
|
|
21
21
|
};
|
|
22
22
|
results: {
|
|
23
23
|
type: string;
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { examResultStore } from '../exam-result-store.js';
|
|
2
|
+
import { examResultStore, extractExamId, extractStatus } from '../exam-result-store.js';
|
|
3
3
|
const PARAM_DESCRIPTIONS = {
|
|
4
4
|
mirror_id: 'The ID of the unofficial mirror to save results for',
|
|
5
5
|
runtime_id: 'The runtime ID that was used to run the exams',
|
|
6
|
-
result_id: 'The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the
|
|
6
|
+
result_id: 'The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the local file store — no need to pass the results array. This is the preferred approach.',
|
|
7
7
|
results: 'Array of exam results to save. Each result must include exam_id, status, and optional data. Only needed if result_id is not provided.',
|
|
8
8
|
exam_id: 'The exam identifier (e.g., "auth-check", "init-tools-list")',
|
|
9
9
|
status: 'The result status (e.g., "pass", "fail", "error", "skip")',
|
|
@@ -32,7 +32,7 @@ export function saveResultsForMirror(_server, clientFactory) {
|
|
|
32
32
|
name: 'save_results_for_mirror',
|
|
33
33
|
description: `Save proctor exam results for an unofficial mirror.
|
|
34
34
|
|
|
35
|
-
**Preferred**: Pass the \`result_id\` returned by \`run_exam_for_mirror\`. The full result is retrieved from the
|
|
35
|
+
**Preferred**: Pass the \`result_id\` returned by \`run_exam_for_mirror\`. The full result is retrieved from the local file store server-side — no need to pass the large results payload through the LLM context.
|
|
36
36
|
|
|
37
37
|
**Fallback**: Pass results directly (as before) if result_id is not available.
|
|
38
38
|
|
|
@@ -88,20 +88,39 @@ Typical workflow:
|
|
|
88
88
|
content: [
|
|
89
89
|
{
|
|
90
90
|
type: 'text',
|
|
91
|
-
text: `No stored result found for result_id "${validatedArgs.result_id}".
|
|
91
|
+
text: `No stored result found for result_id "${validatedArgs.result_id}". The result file may have been cleaned up or the /tmp directory cleared. Pass the results array directly instead.`,
|
|
92
92
|
},
|
|
93
93
|
],
|
|
94
94
|
isError: true,
|
|
95
95
|
};
|
|
96
96
|
}
|
|
97
|
-
// Extract exam_result lines from stored data
|
|
97
|
+
// Extract exam_result lines from stored data.
|
|
98
|
+
// The exam_id may live at the top level of the stream line OR inside
|
|
99
|
+
// line.data (the actual result payload). Prefer the data payload to
|
|
100
|
+
// avoid reading from potentially incomplete display metadata.
|
|
101
|
+
//
|
|
102
|
+
// The real proctor API returns line.data as a metadata wrapper:
|
|
103
|
+
// { mirror_id, exam_id, status, result: { status, output: {...} } }
|
|
104
|
+
// The actual output lives inside line.data.result. When we pass the
|
|
105
|
+
// entire line.data as `data`, the output ends up nested too deeply
|
|
106
|
+
// (result.data.result.output) and the backend saves empty output.
|
|
107
|
+
// Use line.data.result when present so that `output` is at the
|
|
108
|
+
// expected depth (result.data.output).
|
|
98
109
|
results = stored.lines
|
|
99
110
|
.filter((line) => line.type === 'exam_result')
|
|
100
|
-
.map((line) =>
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
111
|
+
.map((line) => {
|
|
112
|
+
const data = line.data;
|
|
113
|
+
// Prefer the nested result object (contains output, input, etc.)
|
|
114
|
+
// over the full data wrapper (contains metadata like mirror_id)
|
|
115
|
+
const resultData = data?.result && typeof data.result === 'object' && !Array.isArray(data.result)
|
|
116
|
+
? data.result
|
|
117
|
+
: data;
|
|
118
|
+
return {
|
|
119
|
+
exam_id: extractExamId(line),
|
|
120
|
+
status: extractStatus(line),
|
|
121
|
+
...(resultData ? { data: resultData } : {}),
|
|
122
|
+
};
|
|
123
|
+
});
|
|
105
124
|
if (!runtimeId) {
|
|
106
125
|
runtimeId = stored.runtime_id;
|
|
107
126
|
}
|