pulsemcp-cms-admin-mcp-server 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,50 @@
1
1
  import { randomUUID } from 'crypto';
2
+ import { readFileSync, writeFileSync, unlinkSync, readdirSync, mkdirSync, existsSync } from 'fs';
3
+ import { join } from 'path';
4
+ import { tmpdir } from 'os';
2
5
  /**
3
- * Maximum number of results to keep in memory. Oldest results are evicted
4
- * when this limit is reached (FIFO). Each result can be 60KB+ for servers
5
- * with many tools, so 100 entries ≈ 6MB worst case.
6
+ * Extract exam_id from a proctor exam stream line, checking both the
7
+ * data payload and top-level fields. The API may place exam_id in
8
+ * either location depending on the exam type.
9
+ */
10
+ export function extractExamId(line) {
11
+ const data = line.data;
12
+ return (data?.exam_id ||
13
+ line.exam_id ||
14
+ data?.exam_type ||
15
+ line.exam_type ||
16
+ 'unknown');
17
+ }
18
+ /**
19
+ * Extract status from a proctor exam stream line, checking both the
20
+ * data payload and top-level fields.
21
+ */
22
+ export function extractStatus(line) {
23
+ const data = line.data;
24
+ return data?.status || line.status || 'unknown';
25
+ }
26
+ /**
27
+ * Maximum number of results to keep on disk. Oldest results are evicted
28
+ * when this limit is reached (FIFO by insertion order).
6
29
  */
7
30
  const MAX_RESULTS = 100;
31
+ const STORE_DIR = join(tmpdir(), 'pulsemcp-exam-results');
32
+ const FILE_SUFFIX = '.json';
8
33
  /**
9
- * In-memory store for proctor exam results.
34
+ * File-based store for proctor exam results.
10
35
  *
11
- * When `run_exam_for_mirror` completes, the full result is stored here
12
- * and a UUID `result_id` is returned. This avoids dumping large payloads
13
- * (~60KB+ for servers with many tools) into the LLM context.
36
+ * When `run_exam_for_mirror` completes, the full result is written to a
37
+ * JSON file in /tmp/ and a UUID `result_id` is returned. This avoids
38
+ * dumping large payloads (~60KB+ for servers with many tools) into the
39
+ * LLM context, and survives across tool calls without relying on
40
+ * in-memory state.
14
41
  *
15
- * Eviction: When the store exceeds MAX_RESULTS entries, the oldest result
42
+ * Files are named with a zero-padded sequence number prefix so that
43
+ * lexicographic sorting preserves insertion order for FIFO eviction.
44
+ * The sequence counter is initialized from existing files on disk so
45
+ * that new entries sort after old ones even across process restarts.
46
+ *
47
+ * Eviction: When the store exceeds MAX_RESULTS files, the oldest result
16
48
  * is evicted (FIFO). Results are also deleted after successful save via
17
49
  * `save_results_for_mirror`.
18
50
  *
@@ -21,38 +53,118 @@ const MAX_RESULTS = 100;
21
53
  * - Pass `result_id` to `save_results_for_mirror` instead of the full payload
22
54
  */
23
55
  class ExamResultStore {
24
- results = new Map();
56
+ seq;
57
+ constructor() {
58
+ this.seq = this.initSeqFromDisk();
59
+ }
60
+ /**
61
+ * Scan existing files to find the highest sequence number and start
62
+ * one past it. This ensures new files always sort after existing ones,
63
+ * even across process restarts.
64
+ */
65
+ initSeqFromDisk() {
66
+ this.ensureDir();
67
+ const files = readdirSync(STORE_DIR)
68
+ .filter((f) => f.endsWith(FILE_SUFFIX) && f.length > FILE_SUFFIX.length)
69
+ .sort();
70
+ if (files.length === 0)
71
+ return 0;
72
+ const lastFile = files[files.length - 1];
73
+ const seqStr = lastFile.slice(0, 10);
74
+ const parsed = parseInt(seqStr, 10);
75
+ return isNaN(parsed) ? 0 : parsed + 1;
76
+ }
77
+ ensureDir() {
78
+ if (!existsSync(STORE_DIR)) {
79
+ mkdirSync(STORE_DIR, { recursive: true, mode: 0o700 });
80
+ }
81
+ }
82
+ /** Filename format: {seq}-{uuid}.json — seq is zero-padded for lexicographic ordering */
83
+ fileName(seq, resultId) {
84
+ return `${String(seq).padStart(10, '0')}-${resultId}${FILE_SUFFIX}`;
85
+ }
86
+ extractResultId(fileName) {
87
+ // Format: 0000000001-<uuid>.json
88
+ return fileName.slice(11, -FILE_SUFFIX.length);
89
+ }
90
+ listResultFiles() {
91
+ this.ensureDir();
92
+ return readdirSync(STORE_DIR)
93
+ .filter((f) => f.endsWith(FILE_SUFFIX) && f.length > FILE_SUFFIX.length)
94
+ .sort(); // Lexicographic sort gives insertion order via zero-padded seq
95
+ }
96
+ findFileForResult(resultId) {
97
+ const files = this.listResultFiles();
98
+ return files.find((f) => this.extractResultId(f) === resultId);
99
+ }
25
100
  store(mirrorIds, runtimeId, examType, lines) {
101
+ this.ensureDir();
26
102
  const resultId = randomUUID();
27
- // Evict oldest entries if at capacity (Map preserves insertion order)
28
- while (this.results.size >= MAX_RESULTS) {
29
- const oldestKey = this.results.keys().next().value;
30
- if (oldestKey !== undefined) {
31
- this.results.delete(oldestKey);
103
+ const seqNum = this.seq++;
104
+ // Evict oldest entries if at capacity (files are sorted by seq prefix)
105
+ const files = this.listResultFiles();
106
+ const toEvict = files.length - MAX_RESULTS + 1;
107
+ for (let i = 0; i < toEvict; i++) {
108
+ try {
109
+ unlinkSync(join(STORE_DIR, files[i]));
110
+ }
111
+ catch {
112
+ // ignore
32
113
  }
33
114
  }
34
- this.results.set(resultId, {
115
+ const stored = {
35
116
  result_id: resultId,
36
117
  mirror_ids: mirrorIds,
37
118
  runtime_id: runtimeId,
38
119
  exam_type: examType,
39
120
  lines,
40
121
  stored_at: new Date().toISOString(),
122
+ };
123
+ writeFileSync(join(STORE_DIR, this.fileName(seqNum, resultId)), JSON.stringify(stored), {
124
+ encoding: 'utf-8',
125
+ mode: 0o600,
41
126
  });
42
127
  return resultId;
43
128
  }
44
129
  get(resultId) {
45
- return this.results.get(resultId);
130
+ const file = this.findFileForResult(resultId);
131
+ if (!file)
132
+ return undefined;
133
+ try {
134
+ const content = readFileSync(join(STORE_DIR, file), 'utf-8');
135
+ return JSON.parse(content);
136
+ }
137
+ catch {
138
+ return undefined;
139
+ }
46
140
  }
47
141
  delete(resultId) {
48
- return this.results.delete(resultId);
142
+ const file = this.findFileForResult(resultId);
143
+ if (!file)
144
+ return false;
145
+ try {
146
+ unlinkSync(join(STORE_DIR, file));
147
+ return true;
148
+ }
149
+ catch {
150
+ return false;
151
+ }
49
152
  }
50
153
  get size() {
51
- return this.results.size;
154
+ return this.listResultFiles().length;
52
155
  }
53
- /** For testing only */
156
+ /** For testing only — removes all result files and resets the sequence counter */
54
157
  clear() {
55
- this.results.clear();
158
+ this.ensureDir();
159
+ for (const file of this.listResultFiles()) {
160
+ try {
161
+ unlinkSync(join(STORE_DIR, file));
162
+ }
163
+ catch {
164
+ // ignore
165
+ }
166
+ }
167
+ this.seq = 0;
56
168
  }
57
169
  }
58
170
  /** Singleton instance shared across all tool factories */
@@ -49,7 +49,7 @@ Typical usage:
49
49
  content: [
50
50
  {
51
51
  type: 'text',
52
- text: `No stored result found for result_id "${validatedArgs.result_id}". Results are stored in-memory and may have been lost if the server restarted.`,
52
+ text: `No stored result found for result_id "${validatedArgs.result_id}". The result file may have been cleaned up or the /tmp directory cleared.`,
53
53
  },
54
54
  ],
55
55
  isError: true,
@@ -1,5 +1,5 @@
1
1
  import { z } from 'zod';
2
- import { examResultStore } from '../exam-result-store.js';
2
+ import { examResultStore, extractExamId, extractStatus } from '../exam-result-store.js';
3
3
  const PARAM_DESCRIPTIONS = {
4
4
  mirror_ids: 'Array of unofficial mirror IDs to run exams against. Mirrors without saved mcp_json configs will be skipped.',
5
5
  runtime_id: 'The Fly Machines runtime ID to use for running the exam containers (e.g., "fly-machines-v1")',
@@ -66,7 +66,7 @@ Available exam types:
66
66
 
67
67
  Mirrors without saved mcp_json configurations are automatically skipped.
68
68
 
69
- Results are stored server-side and a \`result_id\` UUID is returned. The response includes a truncated summary (status, tool names/counts, errors) that fits within MCP size limits. Use \`get_exam_result\` to drill into full details, or pass the \`result_id\` directly to \`save_results_for_mirror\`.
69
+ Results are stored server-side in a local file and a \`result_id\` UUID is returned. The response includes a truncated summary (status, tool names/counts, errors) that fits within MCP size limits. Use \`get_exam_result\` to drill into full details, or pass the \`result_id\` directly to \`save_results_for_mirror\`.
70
70
 
71
71
  Use cases:
72
72
  - Test if an unofficial mirror's MCP server is working correctly before linking it
@@ -119,15 +119,18 @@ Use cases:
119
119
  case 'log':
120
120
  content += `[LOG] ${line.message || JSON.stringify(line)}\n`;
121
121
  break;
122
- case 'exam_result':
123
- content += `\n**Exam Result** (Mirror: ${line.mirror_id || 'unknown'})\n`;
124
- content += ` Exam: ${line.exam_id || line.exam_type || 'unknown'}\n`;
125
- content += ` Status: ${line.status || 'unknown'}\n`;
126
- if (line.data) {
127
- const truncatedData = truncateExamResultData(line.data);
122
+ case 'exam_result': {
123
+ const data = line.data;
124
+ const mirrorId = line.mirror_id ?? data?.mirror_id ?? 'unknown';
125
+ content += `\n**Exam Result** (Mirror: ${mirrorId})\n`;
126
+ content += ` Exam: ${extractExamId(line)}\n`;
127
+ content += ` Status: ${extractStatus(line)}\n`;
128
+ if (data) {
129
+ const truncatedData = truncateExamResultData(data);
128
130
  content += ` Data: ${JSON.stringify(truncatedData, null, 2)}\n`;
129
131
  }
130
132
  break;
133
+ }
131
134
  case 'summary':
132
135
  content += `\n**Summary**\n`;
133
136
  content += ` Total: ${line.total || 0}\n`;
@@ -1,9 +1,9 @@
1
1
  import { z } from 'zod';
2
- import { examResultStore } from '../exam-result-store.js';
2
+ import { examResultStore, extractExamId, extractStatus } from '../exam-result-store.js';
3
3
  const PARAM_DESCRIPTIONS = {
4
4
  mirror_id: 'The ID of the unofficial mirror to save results for',
5
5
  runtime_id: 'The runtime ID that was used to run the exams',
6
- result_id: 'The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the in-memory store — no need to pass the results array. This is the preferred approach.',
6
+ result_id: 'The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the local file store — no need to pass the results array. This is the preferred approach.',
7
7
  results: 'Array of exam results to save. Each result must include exam_id, status, and optional data. Only needed if result_id is not provided.',
8
8
  exam_id: 'The exam identifier (e.g., "auth-check", "init-tools-list")',
9
9
  status: 'The result status (e.g., "pass", "fail", "error", "skip")',
@@ -32,7 +32,7 @@ export function saveResultsForMirror(_server, clientFactory) {
32
32
  name: 'save_results_for_mirror',
33
33
  description: `Save proctor exam results for an unofficial mirror.
34
34
 
35
- **Preferred**: Pass the \`result_id\` returned by \`run_exam_for_mirror\`. The full result is retrieved from the in-memory store server-side — no need to pass the large results payload through the LLM context.
35
+ **Preferred**: Pass the \`result_id\` returned by \`run_exam_for_mirror\`. The full result is retrieved from the local file store server-side — no need to pass the large results payload through the LLM context.
36
36
 
37
37
  **Fallback**: Pass results directly (as before) if result_id is not available.
38
38
 
@@ -88,20 +88,39 @@ Typical workflow:
88
88
  content: [
89
89
  {
90
90
  type: 'text',
91
- text: `No stored result found for result_id "${validatedArgs.result_id}". Results are stored in-memory and may have been lost if the server restarted. Pass the results array directly instead.`,
91
+ text: `No stored result found for result_id "${validatedArgs.result_id}". The result file may have been cleaned up or the /tmp directory cleared. Pass the results array directly instead.`,
92
92
  },
93
93
  ],
94
94
  isError: true,
95
95
  };
96
96
  }
97
- // Extract exam_result lines from stored data
97
+ // Extract exam_result lines from stored data.
98
+ // The exam_id may live at the top level of the stream line OR inside
99
+ // line.data (the actual result payload). Prefer the data payload to
100
+ // avoid reading from potentially incomplete display metadata.
101
+ //
102
+ // The real proctor API returns line.data as a metadata wrapper:
103
+ // { mirror_id, exam_id, status, result: { status, output: {...} } }
104
+ // The actual output lives inside line.data.result. When we pass the
105
+ // entire line.data as `data`, the output ends up nested too deeply
106
+ // (result.data.result.output) and the backend saves empty output.
107
+ // Use line.data.result when present so that `output` is at the
108
+ // expected depth (result.data.output).
98
109
  results = stored.lines
99
110
  .filter((line) => line.type === 'exam_result')
100
- .map((line) => ({
101
- exam_id: (line.exam_id || line.exam_type || 'unknown'),
102
- status: (line.status || 'unknown'),
103
- ...(line.data ? { data: line.data } : {}),
104
- }));
111
+ .map((line) => {
112
+ const data = line.data;
113
+ // Prefer the nested result object (contains output, input, etc.)
114
+ // over the full data wrapper (contains metadata like mirror_id)
115
+ const resultData = data?.result && typeof data.result === 'object' && !Array.isArray(data.result)
116
+ ? data.result
117
+ : data;
118
+ return {
119
+ exam_id: extractExamId(line),
120
+ status: extractStatus(line),
121
+ ...(resultData ? { data: resultData } : {}),
122
+ };
123
+ });
105
124
  if (!runtimeId) {
106
125
  runtimeId = stored.runtime_id;
107
126
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pulsemcp-cms-admin-mcp-server",
3
- "version": "0.7.0",
3
+ "version": "0.7.2",
4
4
  "description": "Local implementation of PulseMCP CMS Admin MCP server",
5
5
  "mcpName": "com.pulsemcp.servers/pulsemcp-cms-admin",
6
6
  "main": "build/index.js",
@@ -8,13 +8,31 @@ export interface StoredExamResult {
8
8
  stored_at: string;
9
9
  }
10
10
  /**
11
- * In-memory store for proctor exam results.
11
+ * Extract exam_id from a proctor exam stream line, checking both the
12
+ * data payload and top-level fields. The API may place exam_id in
13
+ * either location depending on the exam type.
14
+ */
15
+ export declare function extractExamId(line: ProctorExamStreamLine): string;
16
+ /**
17
+ * Extract status from a proctor exam stream line, checking both the
18
+ * data payload and top-level fields.
19
+ */
20
+ export declare function extractStatus(line: ProctorExamStreamLine): string;
21
+ /**
22
+ * File-based store for proctor exam results.
23
+ *
24
+ * When `run_exam_for_mirror` completes, the full result is written to a
25
+ * JSON file in /tmp/ and a UUID `result_id` is returned. This avoids
26
+ * dumping large payloads (~60KB+ for servers with many tools) into the
27
+ * LLM context, and survives across tool calls without relying on
28
+ * in-memory state.
12
29
  *
13
- * When `run_exam_for_mirror` completes, the full result is stored here
14
- * and a UUID `result_id` is returned. This avoids dumping large payloads
15
- * (~60KB+ for servers with many tools) into the LLM context.
30
+ * Files are named with a zero-padded sequence number prefix so that
31
+ * lexicographic sorting preserves insertion order for FIFO eviction.
32
+ * The sequence counter is initialized from existing files on disk so
33
+ * that new entries sort after old ones even across process restarts.
16
34
  *
17
- * Eviction: When the store exceeds MAX_RESULTS entries, the oldest result
35
+ * Eviction: When the store exceeds MAX_RESULTS files, the oldest result
18
36
  * is evicted (FIFO). Results are also deleted after successful save via
19
37
  * `save_results_for_mirror`.
20
38
  *
@@ -23,12 +41,25 @@ export interface StoredExamResult {
23
41
  * - Pass `result_id` to `save_results_for_mirror` instead of the full payload
24
42
  */
25
43
  declare class ExamResultStore {
26
- private results;
44
+ private seq;
45
+ constructor();
46
+ /**
47
+ * Scan existing files to find the highest sequence number and start
48
+ * one past it. This ensures new files always sort after existing ones,
49
+ * even across process restarts.
50
+ */
51
+ private initSeqFromDisk;
52
+ private ensureDir;
53
+ /** Filename format: {seq}-{uuid}.json — seq is zero-padded for lexicographic ordering */
54
+ private fileName;
55
+ private extractResultId;
56
+ private listResultFiles;
57
+ private findFileForResult;
27
58
  store(mirrorIds: number[], runtimeId: string, examType: string, lines: ProctorExamStreamLine[]): string;
28
59
  get(resultId: string): StoredExamResult | undefined;
29
60
  delete(resultId: string): boolean;
30
61
  get size(): number;
31
- /** For testing only */
62
+ /** For testing only — removes all result files and resets the sequence counter */
32
63
  clear(): void;
33
64
  }
34
65
  /** Singleton instance shared across all tool factories */
@@ -1,18 +1,50 @@
1
1
  import { randomUUID } from 'crypto';
2
+ import { readFileSync, writeFileSync, unlinkSync, readdirSync, mkdirSync, existsSync } from 'fs';
3
+ import { join } from 'path';
4
+ import { tmpdir } from 'os';
2
5
  /**
3
- * Maximum number of results to keep in memory. Oldest results are evicted
4
- * when this limit is reached (FIFO). Each result can be 60KB+ for servers
5
- * with many tools, so 100 entries ≈ 6MB worst case.
6
+ * Extract exam_id from a proctor exam stream line, checking both the
7
+ * data payload and top-level fields. The API may place exam_id in
8
+ * either location depending on the exam type.
9
+ */
10
+ export function extractExamId(line) {
11
+ const data = line.data;
12
+ return (data?.exam_id ||
13
+ line.exam_id ||
14
+ data?.exam_type ||
15
+ line.exam_type ||
16
+ 'unknown');
17
+ }
18
+ /**
19
+ * Extract status from a proctor exam stream line, checking both the
20
+ * data payload and top-level fields.
21
+ */
22
+ export function extractStatus(line) {
23
+ const data = line.data;
24
+ return data?.status || line.status || 'unknown';
25
+ }
26
+ /**
27
+ * Maximum number of results to keep on disk. Oldest results are evicted
28
+ * when this limit is reached (FIFO by insertion order).
6
29
  */
7
30
  const MAX_RESULTS = 100;
31
+ const STORE_DIR = join(tmpdir(), 'pulsemcp-exam-results');
32
+ const FILE_SUFFIX = '.json';
8
33
  /**
9
- * In-memory store for proctor exam results.
34
+ * File-based store for proctor exam results.
10
35
  *
11
- * When `run_exam_for_mirror` completes, the full result is stored here
12
- * and a UUID `result_id` is returned. This avoids dumping large payloads
13
- * (~60KB+ for servers with many tools) into the LLM context.
36
+ * When `run_exam_for_mirror` completes, the full result is written to a
37
+ * JSON file in /tmp/ and a UUID `result_id` is returned. This avoids
38
+ * dumping large payloads (~60KB+ for servers with many tools) into the
39
+ * LLM context, and survives across tool calls without relying on
40
+ * in-memory state.
14
41
  *
15
- * Eviction: When the store exceeds MAX_RESULTS entries, the oldest result
42
+ * Files are named with a zero-padded sequence number prefix so that
43
+ * lexicographic sorting preserves insertion order for FIFO eviction.
44
+ * The sequence counter is initialized from existing files on disk so
45
+ * that new entries sort after old ones even across process restarts.
46
+ *
47
+ * Eviction: When the store exceeds MAX_RESULTS files, the oldest result
16
48
  * is evicted (FIFO). Results are also deleted after successful save via
17
49
  * `save_results_for_mirror`.
18
50
  *
@@ -21,38 +53,118 @@ const MAX_RESULTS = 100;
21
53
  * - Pass `result_id` to `save_results_for_mirror` instead of the full payload
22
54
  */
23
55
  class ExamResultStore {
24
- results = new Map();
56
+ seq;
57
+ constructor() {
58
+ this.seq = this.initSeqFromDisk();
59
+ }
60
+ /**
61
+ * Scan existing files to find the highest sequence number and start
62
+ * one past it. This ensures new files always sort after existing ones,
63
+ * even across process restarts.
64
+ */
65
+ initSeqFromDisk() {
66
+ this.ensureDir();
67
+ const files = readdirSync(STORE_DIR)
68
+ .filter((f) => f.endsWith(FILE_SUFFIX) && f.length > FILE_SUFFIX.length)
69
+ .sort();
70
+ if (files.length === 0)
71
+ return 0;
72
+ const lastFile = files[files.length - 1];
73
+ const seqStr = lastFile.slice(0, 10);
74
+ const parsed = parseInt(seqStr, 10);
75
+ return isNaN(parsed) ? 0 : parsed + 1;
76
+ }
77
+ ensureDir() {
78
+ if (!existsSync(STORE_DIR)) {
79
+ mkdirSync(STORE_DIR, { recursive: true, mode: 0o700 });
80
+ }
81
+ }
82
+ /** Filename format: {seq}-{uuid}.json — seq is zero-padded for lexicographic ordering */
83
+ fileName(seq, resultId) {
84
+ return `${String(seq).padStart(10, '0')}-${resultId}${FILE_SUFFIX}`;
85
+ }
86
+ extractResultId(fileName) {
87
+ // Format: 0000000001-<uuid>.json
88
+ return fileName.slice(11, -FILE_SUFFIX.length);
89
+ }
90
+ listResultFiles() {
91
+ this.ensureDir();
92
+ return readdirSync(STORE_DIR)
93
+ .filter((f) => f.endsWith(FILE_SUFFIX) && f.length > FILE_SUFFIX.length)
94
+ .sort(); // Lexicographic sort gives insertion order via zero-padded seq
95
+ }
96
+ findFileForResult(resultId) {
97
+ const files = this.listResultFiles();
98
+ return files.find((f) => this.extractResultId(f) === resultId);
99
+ }
25
100
  store(mirrorIds, runtimeId, examType, lines) {
101
+ this.ensureDir();
26
102
  const resultId = randomUUID();
27
- // Evict oldest entries if at capacity (Map preserves insertion order)
28
- while (this.results.size >= MAX_RESULTS) {
29
- const oldestKey = this.results.keys().next().value;
30
- if (oldestKey !== undefined) {
31
- this.results.delete(oldestKey);
103
+ const seqNum = this.seq++;
104
+ // Evict oldest entries if at capacity (files are sorted by seq prefix)
105
+ const files = this.listResultFiles();
106
+ const toEvict = files.length - MAX_RESULTS + 1;
107
+ for (let i = 0; i < toEvict; i++) {
108
+ try {
109
+ unlinkSync(join(STORE_DIR, files[i]));
110
+ }
111
+ catch {
112
+ // ignore
32
113
  }
33
114
  }
34
- this.results.set(resultId, {
115
+ const stored = {
35
116
  result_id: resultId,
36
117
  mirror_ids: mirrorIds,
37
118
  runtime_id: runtimeId,
38
119
  exam_type: examType,
39
120
  lines,
40
121
  stored_at: new Date().toISOString(),
122
+ };
123
+ writeFileSync(join(STORE_DIR, this.fileName(seqNum, resultId)), JSON.stringify(stored), {
124
+ encoding: 'utf-8',
125
+ mode: 0o600,
41
126
  });
42
127
  return resultId;
43
128
  }
44
129
  get(resultId) {
45
- return this.results.get(resultId);
130
+ const file = this.findFileForResult(resultId);
131
+ if (!file)
132
+ return undefined;
133
+ try {
134
+ const content = readFileSync(join(STORE_DIR, file), 'utf-8');
135
+ return JSON.parse(content);
136
+ }
137
+ catch {
138
+ return undefined;
139
+ }
46
140
  }
47
141
  delete(resultId) {
48
- return this.results.delete(resultId);
142
+ const file = this.findFileForResult(resultId);
143
+ if (!file)
144
+ return false;
145
+ try {
146
+ unlinkSync(join(STORE_DIR, file));
147
+ return true;
148
+ }
149
+ catch {
150
+ return false;
151
+ }
49
152
  }
50
153
  get size() {
51
- return this.results.size;
154
+ return this.listResultFiles().length;
52
155
  }
53
- /** For testing only */
156
+ /** For testing only — removes all result files and resets the sequence counter */
54
157
  clear() {
55
- this.results.clear();
158
+ this.ensureDir();
159
+ for (const file of this.listResultFiles()) {
160
+ try {
161
+ unlinkSync(join(STORE_DIR, file));
162
+ }
163
+ catch {
164
+ // ignore
165
+ }
166
+ }
167
+ this.seq = 0;
56
168
  }
57
169
  }
58
170
  /** Singleton instance shared across all tool factories */
@@ -49,7 +49,7 @@ Typical usage:
49
49
  content: [
50
50
  {
51
51
  type: 'text',
52
- text: `No stored result found for result_id "${validatedArgs.result_id}". Results are stored in-memory and may have been lost if the server restarted.`,
52
+ text: `No stored result found for result_id "${validatedArgs.result_id}". The result file may have been cleaned up or the /tmp directory cleared.`,
53
53
  },
54
54
  ],
55
55
  isError: true,
@@ -1,5 +1,5 @@
1
1
  import { z } from 'zod';
2
- import { examResultStore } from '../exam-result-store.js';
2
+ import { examResultStore, extractExamId, extractStatus } from '../exam-result-store.js';
3
3
  const PARAM_DESCRIPTIONS = {
4
4
  mirror_ids: 'Array of unofficial mirror IDs to run exams against. Mirrors without saved mcp_json configs will be skipped.',
5
5
  runtime_id: 'The Fly Machines runtime ID to use for running the exam containers (e.g., "fly-machines-v1")',
@@ -66,7 +66,7 @@ Available exam types:
66
66
 
67
67
  Mirrors without saved mcp_json configurations are automatically skipped.
68
68
 
69
- Results are stored server-side and a \`result_id\` UUID is returned. The response includes a truncated summary (status, tool names/counts, errors) that fits within MCP size limits. Use \`get_exam_result\` to drill into full details, or pass the \`result_id\` directly to \`save_results_for_mirror\`.
69
+ Results are stored server-side in a local file and a \`result_id\` UUID is returned. The response includes a truncated summary (status, tool names/counts, errors) that fits within MCP size limits. Use \`get_exam_result\` to drill into full details, or pass the \`result_id\` directly to \`save_results_for_mirror\`.
70
70
 
71
71
  Use cases:
72
72
  - Test if an unofficial mirror's MCP server is working correctly before linking it
@@ -119,15 +119,18 @@ Use cases:
119
119
  case 'log':
120
120
  content += `[LOG] ${line.message || JSON.stringify(line)}\n`;
121
121
  break;
122
- case 'exam_result':
123
- content += `\n**Exam Result** (Mirror: ${line.mirror_id || 'unknown'})\n`;
124
- content += ` Exam: ${line.exam_id || line.exam_type || 'unknown'}\n`;
125
- content += ` Status: ${line.status || 'unknown'}\n`;
126
- if (line.data) {
127
- const truncatedData = truncateExamResultData(line.data);
122
+ case 'exam_result': {
123
+ const data = line.data;
124
+ const mirrorId = line.mirror_id ?? data?.mirror_id ?? 'unknown';
125
+ content += `\n**Exam Result** (Mirror: ${mirrorId})\n`;
126
+ content += ` Exam: ${extractExamId(line)}\n`;
127
+ content += ` Status: ${extractStatus(line)}\n`;
128
+ if (data) {
129
+ const truncatedData = truncateExamResultData(data);
128
130
  content += ` Data: ${JSON.stringify(truncatedData, null, 2)}\n`;
129
131
  }
130
132
  break;
133
+ }
131
134
  case 'summary':
132
135
  content += `\n**Summary**\n`;
133
136
  content += ` Total: ${line.total || 0}\n`;
@@ -17,7 +17,7 @@ export declare function saveResultsForMirror(_server: Server, clientFactory: Cli
17
17
  result_id: {
18
18
  type: string;
19
19
  format: string;
20
- description: "The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the in-memory store — no need to pass the results array. This is the preferred approach.";
20
+ description: "The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the local file store — no need to pass the results array. This is the preferred approach.";
21
21
  };
22
22
  results: {
23
23
  type: string;
@@ -1,9 +1,9 @@
1
1
  import { z } from 'zod';
2
- import { examResultStore } from '../exam-result-store.js';
2
+ import { examResultStore, extractExamId, extractStatus } from '../exam-result-store.js';
3
3
  const PARAM_DESCRIPTIONS = {
4
4
  mirror_id: 'The ID of the unofficial mirror to save results for',
5
5
  runtime_id: 'The runtime ID that was used to run the exams',
6
- result_id: 'The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the in-memory store — no need to pass the results array. This is the preferred approach.',
6
+ result_id: 'The UUID returned by run_exam_for_mirror. When provided, the server retrieves the full result from the local file store — no need to pass the results array. This is the preferred approach.',
7
7
  results: 'Array of exam results to save. Each result must include exam_id, status, and optional data. Only needed if result_id is not provided.',
8
8
  exam_id: 'The exam identifier (e.g., "auth-check", "init-tools-list")',
9
9
  status: 'The result status (e.g., "pass", "fail", "error", "skip")',
@@ -32,7 +32,7 @@ export function saveResultsForMirror(_server, clientFactory) {
32
32
  name: 'save_results_for_mirror',
33
33
  description: `Save proctor exam results for an unofficial mirror.
34
34
 
35
- **Preferred**: Pass the \`result_id\` returned by \`run_exam_for_mirror\`. The full result is retrieved from the in-memory store server-side — no need to pass the large results payload through the LLM context.
35
+ **Preferred**: Pass the \`result_id\` returned by \`run_exam_for_mirror\`. The full result is retrieved from the local file store server-side — no need to pass the large results payload through the LLM context.
36
36
 
37
37
  **Fallback**: Pass results directly (as before) if result_id is not available.
38
38
 
@@ -88,20 +88,39 @@ Typical workflow:
88
88
  content: [
89
89
  {
90
90
  type: 'text',
91
- text: `No stored result found for result_id "${validatedArgs.result_id}". Results are stored in-memory and may have been lost if the server restarted. Pass the results array directly instead.`,
91
+ text: `No stored result found for result_id "${validatedArgs.result_id}". The result file may have been cleaned up or the /tmp directory cleared. Pass the results array directly instead.`,
92
92
  },
93
93
  ],
94
94
  isError: true,
95
95
  };
96
96
  }
97
- // Extract exam_result lines from stored data
97
+ // Extract exam_result lines from stored data.
98
+ // The exam_id may live at the top level of the stream line OR inside
99
+ // line.data (the actual result payload). Prefer the data payload to
100
+ // avoid reading from potentially incomplete display metadata.
101
+ //
102
+ // The real proctor API returns line.data as a metadata wrapper:
103
+ // { mirror_id, exam_id, status, result: { status, output: {...} } }
104
+ // The actual output lives inside line.data.result. When we pass the
105
+ // entire line.data as `data`, the output ends up nested too deeply
106
+ // (result.data.result.output) and the backend saves empty output.
107
+ // Use line.data.result when present so that `output` is at the
108
+ // expected depth (result.data.output).
98
109
  results = stored.lines
99
110
  .filter((line) => line.type === 'exam_result')
100
- .map((line) => ({
101
- exam_id: (line.exam_id || line.exam_type || 'unknown'),
102
- status: (line.status || 'unknown'),
103
- ...(line.data ? { data: line.data } : {}),
104
- }));
111
+ .map((line) => {
112
+ const data = line.data;
113
+ // Prefer the nested result object (contains output, input, etc.)
114
+ // over the full data wrapper (contains metadata like mirror_id)
115
+ const resultData = data?.result && typeof data.result === 'object' && !Array.isArray(data.result)
116
+ ? data.result
117
+ : data;
118
+ return {
119
+ exam_id: extractExamId(line),
120
+ status: extractStatus(line),
121
+ ...(resultData ? { data: resultData } : {}),
122
+ };
123
+ });
105
124
  if (!runtimeId) {
106
125
  runtimeId = stored.runtime_id;
107
126
  }