4runr-os 2.10.65 → 2.10.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/gateway/dist/apps/gateway/src/index.js +2 -0
- package/apps/gateway/dist/apps/gateway/src/index.js.map +1 -1
- package/apps/gateway/dist/apps/gateway/src/routes/sentinel-policies.d.ts.map +1 -1
- package/apps/gateway/dist/apps/gateway/src/routes/sentinel-policies.js +8 -3
- package/apps/gateway/dist/apps/gateway/src/routes/sentinel-policies.js.map +1 -1
- package/apps/gateway/dist/apps/gateway/src/security/sentinel-config-store.d.ts +14 -0
- package/apps/gateway/dist/apps/gateway/src/security/sentinel-config-store.d.ts.map +1 -0
- package/apps/gateway/dist/apps/gateway/src/security/sentinel-config-store.js +168 -0
- package/apps/gateway/dist/apps/gateway/src/security/sentinel-config-store.js.map +1 -0
- package/apps/gateway/package-lock.json +125 -125
- package/apps/gateway/src/__tests__/run-kill.test.ts +80 -80
- package/apps/gateway/src/__tests__/sentinel-events.test.ts +95 -95
- package/apps/gateway/src/__tests__/sentinel-execute-watch.test.ts +45 -45
- package/apps/gateway/src/__tests__/sentinel-policies.test.ts +90 -90
- package/apps/gateway/src/__tests__/sentinel-publish-kill.test.ts +30 -30
- package/apps/gateway/src/__tests__/sentinel-run-failure.test.ts +89 -89
- package/apps/gateway/src/adapters/gateway-cancel-adapter.ts +32 -32
- package/apps/gateway/src/queue/sentinel-execute-watch.ts +42 -42
- package/apps/gateway/src/routes/sentinel-policies.ts +2 -2
- package/apps/gateway/src/runs/run-kill.ts +117 -117
- package/apps/gateway/src/security/sentinel-config-store.ts +53 -5
- package/apps/gateway/src/security/sentinel-run-failure.ts +85 -85
- package/mk3-tui/src/app.rs +4 -19
- package/mk3-tui/src/ui/sentinel_config.rs +86 -113
- package/package.json +4 -4
- package/scripts/os-tools-smoke.cjs +460 -460
|
@@ -1,89 +1,89 @@
|
|
|
1
|
-
import { describe, it, expect } from '@jest/globals';
|
|
2
|
-
import {
|
|
3
|
-
appendRunLog,
|
|
4
|
-
buildSentinelFailureLog,
|
|
5
|
-
buildSentinelFailureOutput,
|
|
6
|
-
extractSentinelKillReason,
|
|
7
|
-
formatSentinelKillError,
|
|
8
|
-
isSentinelKillError,
|
|
9
|
-
parsePolicyFromKillReason,
|
|
10
|
-
SENTINEL_KILL_ERROR_PREFIX,
|
|
11
|
-
} from '../security/sentinel-run-failure.js';
|
|
12
|
-
|
|
13
|
-
describe('sentinel-run-failure', () => {
|
|
14
|
-
describe('parsePolicyFromKillReason', () => {
|
|
15
|
-
it('extracts policy name from policy_violation prefix', () => {
|
|
16
|
-
expect(parsePolicyFromKillReason('policy_violation:timeout')).toBe('timeout');
|
|
17
|
-
expect(parsePolicyFromKillReason('policy_violation:token_cap')).toBe('token_cap');
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
it('returns undefined for non-policy reasons', () => {
|
|
21
|
-
expect(parsePolicyFromKillReason('manual_cancellation')).toBeUndefined();
|
|
22
|
-
expect(parsePolicyFromKillReason('operator_stop')).toBeUndefined();
|
|
23
|
-
});
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
describe('buildSentinelFailureOutput', () => {
|
|
27
|
-
it('returns stable deny shape for policy violations', () => {
|
|
28
|
-
expect(buildSentinelFailureOutput('policy_violation:idle')).toEqual({
|
|
29
|
-
error: 'Run terminated by Sentinel',
|
|
30
|
-
source: 'sentinel',
|
|
31
|
-
action: 'deny',
|
|
32
|
-
reason: 'policy_violation:idle',
|
|
33
|
-
policy: 'idle',
|
|
34
|
-
});
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
it('returns stable deny shape for manual cancellation', () => {
|
|
38
|
-
expect(buildSentinelFailureOutput('manual_cancellation')).toEqual({
|
|
39
|
-
error: 'Run terminated by Sentinel',
|
|
40
|
-
source: 'sentinel',
|
|
41
|
-
action: 'deny',
|
|
42
|
-
reason: 'manual_cancellation',
|
|
43
|
-
});
|
|
44
|
-
});
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
describe('buildSentinelFailureLog', () => {
|
|
48
|
-
it('uses consistent log prefix', () => {
|
|
49
|
-
expect(buildSentinelFailureLog('policy_violation:cost')).toBe(
|
|
50
|
-
'Sentinel denied run: policy_violation:cost'
|
|
51
|
-
);
|
|
52
|
-
});
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
describe('extractSentinelKillReason', () => {
|
|
56
|
-
it('parses processor throw message', () => {
|
|
57
|
-
expect(
|
|
58
|
-
extractSentinelKillReason('Run killed by Sentinel: policy_violation:timeout')
|
|
59
|
-
).toBe('policy_violation:timeout');
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
it('returns null for unrelated errors', () => {
|
|
63
|
-
expect(extractSentinelKillReason('Agent not found')).toBeNull();
|
|
64
|
-
expect(isSentinelKillError('Agent not found')).toBe(false);
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
it('returns null when reason after prefix is empty', () => {
|
|
68
|
-
expect(extractSentinelKillReason(SENTINEL_KILL_ERROR_PREFIX)).toBeNull();
|
|
69
|
-
expect(extractSentinelKillReason(`${SENTINEL_KILL_ERROR_PREFIX} `)).toBeNull();
|
|
70
|
-
});
|
|
71
|
-
});
|
|
72
|
-
|
|
73
|
-
describe('formatSentinelKillError', () => {
|
|
74
|
-
it('round-trips with extractSentinelKillReason', () => {
|
|
75
|
-
const reason = 'policy_violation:idle';
|
|
76
|
-
const message = formatSentinelKillError(reason);
|
|
77
|
-
expect(extractSentinelKillReason(message)).toBe(reason);
|
|
78
|
-
});
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
describe('appendRunLog', () => {
|
|
82
|
-
it('preserves existing entries', () => {
|
|
83
|
-
const first = { timestamp: 't1', level: 'info', message: 'started' };
|
|
84
|
-
const second = { timestamp: 't2', level: 'error', message: 'denied' };
|
|
85
|
-
expect(appendRunLog([first], second)).toEqual([first, second]);
|
|
86
|
-
expect(appendRunLog(undefined, second)).toEqual([second]);
|
|
87
|
-
});
|
|
88
|
-
});
|
|
89
|
-
});
|
|
1
|
+
import { describe, it, expect } from '@jest/globals';
|
|
2
|
+
import {
|
|
3
|
+
appendRunLog,
|
|
4
|
+
buildSentinelFailureLog,
|
|
5
|
+
buildSentinelFailureOutput,
|
|
6
|
+
extractSentinelKillReason,
|
|
7
|
+
formatSentinelKillError,
|
|
8
|
+
isSentinelKillError,
|
|
9
|
+
parsePolicyFromKillReason,
|
|
10
|
+
SENTINEL_KILL_ERROR_PREFIX,
|
|
11
|
+
} from '../security/sentinel-run-failure.js';
|
|
12
|
+
|
|
13
|
+
describe('sentinel-run-failure', () => {
|
|
14
|
+
describe('parsePolicyFromKillReason', () => {
|
|
15
|
+
it('extracts policy name from policy_violation prefix', () => {
|
|
16
|
+
expect(parsePolicyFromKillReason('policy_violation:timeout')).toBe('timeout');
|
|
17
|
+
expect(parsePolicyFromKillReason('policy_violation:token_cap')).toBe('token_cap');
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it('returns undefined for non-policy reasons', () => {
|
|
21
|
+
expect(parsePolicyFromKillReason('manual_cancellation')).toBeUndefined();
|
|
22
|
+
expect(parsePolicyFromKillReason('operator_stop')).toBeUndefined();
|
|
23
|
+
});
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
describe('buildSentinelFailureOutput', () => {
|
|
27
|
+
it('returns stable deny shape for policy violations', () => {
|
|
28
|
+
expect(buildSentinelFailureOutput('policy_violation:idle')).toEqual({
|
|
29
|
+
error: 'Run terminated by Sentinel',
|
|
30
|
+
source: 'sentinel',
|
|
31
|
+
action: 'deny',
|
|
32
|
+
reason: 'policy_violation:idle',
|
|
33
|
+
policy: 'idle',
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('returns stable deny shape for manual cancellation', () => {
|
|
38
|
+
expect(buildSentinelFailureOutput('manual_cancellation')).toEqual({
|
|
39
|
+
error: 'Run terminated by Sentinel',
|
|
40
|
+
source: 'sentinel',
|
|
41
|
+
action: 'deny',
|
|
42
|
+
reason: 'manual_cancellation',
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
describe('buildSentinelFailureLog', () => {
|
|
48
|
+
it('uses consistent log prefix', () => {
|
|
49
|
+
expect(buildSentinelFailureLog('policy_violation:cost')).toBe(
|
|
50
|
+
'Sentinel denied run: policy_violation:cost'
|
|
51
|
+
);
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
describe('extractSentinelKillReason', () => {
|
|
56
|
+
it('parses processor throw message', () => {
|
|
57
|
+
expect(
|
|
58
|
+
extractSentinelKillReason('Run killed by Sentinel: policy_violation:timeout')
|
|
59
|
+
).toBe('policy_violation:timeout');
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('returns null for unrelated errors', () => {
|
|
63
|
+
expect(extractSentinelKillReason('Agent not found')).toBeNull();
|
|
64
|
+
expect(isSentinelKillError('Agent not found')).toBe(false);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it('returns null when reason after prefix is empty', () => {
|
|
68
|
+
expect(extractSentinelKillReason(SENTINEL_KILL_ERROR_PREFIX)).toBeNull();
|
|
69
|
+
expect(extractSentinelKillReason(`${SENTINEL_KILL_ERROR_PREFIX} `)).toBeNull();
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
describe('formatSentinelKillError', () => {
|
|
74
|
+
it('round-trips with extractSentinelKillReason', () => {
|
|
75
|
+
const reason = 'policy_violation:idle';
|
|
76
|
+
const message = formatSentinelKillError(reason);
|
|
77
|
+
expect(extractSentinelKillReason(message)).toBe(reason);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
describe('appendRunLog', () => {
|
|
82
|
+
it('preserves existing entries', () => {
|
|
83
|
+
const first = { timestamp: 't1', level: 'info', message: 'started' };
|
|
84
|
+
const second = { timestamp: 't2', level: 'error', message: 'denied' };
|
|
85
|
+
expect(appendRunLog([first], second)).toEqual([first, second]);
|
|
86
|
+
expect(appendRunLog(undefined, second)).toEqual([second]);
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
});
|
|
@@ -1,32 +1,32 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* In-process cancel adapter for @4runr/sentinel — avoids authenticated HTTP loopback.
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import type { CancelAdapter, CancelResult } from '@4runr/sentinel';
|
|
6
|
-
import { applyRunKill } from '../runs/run-kill.js';
|
|
7
|
-
|
|
8
|
-
export function createGatewayCancelAdapter(): CancelAdapter {
|
|
9
|
-
return {
|
|
10
|
-
async cancelRun(runId: string, reason: string): Promise<CancelResult> {
|
|
11
|
-
try {
|
|
12
|
-
const result = await applyRunKill(runId, reason);
|
|
13
|
-
return {
|
|
14
|
-
success: result.applied,
|
|
15
|
-
reason: result.idempotent
|
|
16
|
-
? 'Run already terminal'
|
|
17
|
-
: result.applied
|
|
18
|
-
? 'Run killed in store'
|
|
19
|
-
: 'Run kill not applied',
|
|
20
|
-
timestamp: Date.now(),
|
|
21
|
-
};
|
|
22
|
-
} catch (error) {
|
|
23
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
24
|
-
return {
|
|
25
|
-
success: false,
|
|
26
|
-
reason: message,
|
|
27
|
-
timestamp: Date.now(),
|
|
28
|
-
};
|
|
29
|
-
}
|
|
30
|
-
},
|
|
31
|
-
};
|
|
32
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* In-process cancel adapter for @4runr/sentinel — avoids authenticated HTTP loopback.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { CancelAdapter, CancelResult } from '@4runr/sentinel';
|
|
6
|
+
import { applyRunKill } from '../runs/run-kill.js';
|
|
7
|
+
|
|
8
|
+
export function createGatewayCancelAdapter(): CancelAdapter {
|
|
9
|
+
return {
|
|
10
|
+
async cancelRun(runId: string, reason: string): Promise<CancelResult> {
|
|
11
|
+
try {
|
|
12
|
+
const result = await applyRunKill(runId, reason);
|
|
13
|
+
return {
|
|
14
|
+
success: result.applied,
|
|
15
|
+
reason: result.idempotent
|
|
16
|
+
? 'Run already terminal'
|
|
17
|
+
: result.applied
|
|
18
|
+
? 'Run killed in store'
|
|
19
|
+
: 'Run kill not applied',
|
|
20
|
+
timestamp: Date.now(),
|
|
21
|
+
};
|
|
22
|
+
} catch (error) {
|
|
23
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
24
|
+
return {
|
|
25
|
+
success: false,
|
|
26
|
+
reason: message,
|
|
27
|
+
timestamp: Date.now(),
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
};
|
|
32
|
+
}
|
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Poll Sentinel buffer during blocking agent.execute().
|
|
3
|
-
* Does not abort the agent process — returns/rejects as soon as the run is marked killed.
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import { formatSentinelKillError } from '../security/sentinel-run-failure.js';
|
|
7
|
-
|
|
8
|
-
export const SENTINEL_KILL_CHECK_INTERVAL_MS = 250;
|
|
9
|
-
|
|
10
|
-
export type SentinelRunStatusSnapshot = {
|
|
11
|
-
status?: string;
|
|
12
|
-
killReason?: string;
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Race execute() against periodic Sentinel kill checks.
|
|
17
|
-
* Agent work may continue in the background until the executor finishes.
|
|
18
|
-
*/
|
|
19
|
-
export async function executeWithSentinelWatch<T>(
|
|
20
|
-
runId: string,
|
|
21
|
-
execute: () => Promise<T>,
|
|
22
|
-
getRunStatus: (runId: string) => SentinelRunStatusSnapshot | undefined
|
|
23
|
-
): Promise<T> {
|
|
24
|
-
let timer: ReturnType<typeof setInterval> | undefined;
|
|
25
|
-
|
|
26
|
-
const watchPromise = new Promise<never>((_, reject) => {
|
|
27
|
-
timer = setInterval(() => {
|
|
28
|
-
const state = getRunStatus(runId);
|
|
29
|
-
if (state?.status === 'killed') {
|
|
30
|
-
reject(new Error(formatSentinelKillError(state.killReason ?? 'unknown')));
|
|
31
|
-
}
|
|
32
|
-
}, SENTINEL_KILL_CHECK_INTERVAL_MS);
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
try {
|
|
36
|
-
return await Promise.race([execute(), watchPromise]);
|
|
37
|
-
} finally {
|
|
38
|
-
if (timer !== undefined) {
|
|
39
|
-
clearInterval(timer);
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Poll Sentinel buffer during blocking agent.execute().
|
|
3
|
+
* Does not abort the agent process — returns/rejects as soon as the run is marked killed.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { formatSentinelKillError } from '../security/sentinel-run-failure.js';
|
|
7
|
+
|
|
8
|
+
export const SENTINEL_KILL_CHECK_INTERVAL_MS = 250;
|
|
9
|
+
|
|
10
|
+
export type SentinelRunStatusSnapshot = {
|
|
11
|
+
status?: string;
|
|
12
|
+
killReason?: string;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Race execute() against periodic Sentinel kill checks.
|
|
17
|
+
* Agent work may continue in the background until the executor finishes.
|
|
18
|
+
*/
|
|
19
|
+
export async function executeWithSentinelWatch<T>(
|
|
20
|
+
runId: string,
|
|
21
|
+
execute: () => Promise<T>,
|
|
22
|
+
getRunStatus: (runId: string) => SentinelRunStatusSnapshot | undefined
|
|
23
|
+
): Promise<T> {
|
|
24
|
+
let timer: ReturnType<typeof setInterval> | undefined;
|
|
25
|
+
|
|
26
|
+
const watchPromise = new Promise<never>((_, reject) => {
|
|
27
|
+
timer = setInterval(() => {
|
|
28
|
+
const state = getRunStatus(runId);
|
|
29
|
+
if (state?.status === 'killed') {
|
|
30
|
+
reject(new Error(formatSentinelKillError(state.killReason ?? 'unknown')));
|
|
31
|
+
}
|
|
32
|
+
}, SENTINEL_KILL_CHECK_INTERVAL_MS);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
return await Promise.race([execute(), watchPromise]);
|
|
37
|
+
} finally {
|
|
38
|
+
if (timer !== undefined) {
|
|
39
|
+
clearInterval(timer);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -175,7 +175,7 @@ export async function sentinelPolicyRoutes(fastify: FastifyInstance) {
|
|
|
175
175
|
|
|
176
176
|
try {
|
|
177
177
|
sentinel.updateConfig(config);
|
|
178
|
-
const savedTo = persistSentinelConfigAfterApply(sentinel, config);
|
|
178
|
+
const savedTo = persistSentinelConfigAfterApply(sentinel, config, fastify.log);
|
|
179
179
|
return {
|
|
180
180
|
success: true,
|
|
181
181
|
template: templateName,
|
|
@@ -235,7 +235,7 @@ export async function sentinelPolicyRoutes(fastify: FastifyInstance) {
|
|
|
235
235
|
|
|
236
236
|
try {
|
|
237
237
|
sentinel.updateConfig(config);
|
|
238
|
-
const savedTo = persistSentinelConfigAfterApply(sentinel, config);
|
|
238
|
+
const savedTo = persistSentinelConfigAfterApply(sentinel, config, fastify.log);
|
|
239
239
|
return {
|
|
240
240
|
success: true,
|
|
241
241
|
config,
|
|
@@ -1,117 +1,117 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* In-process run kill — used by HTTP cancel and Sentinel policy kill (via cancel adapter).
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { getRunStore } from './index.js';
|
|
6
|
-
import { removeRunExecutionJob } from '../queue/index.js';
|
|
7
|
-
import {
|
|
8
|
-
appendRunLog,
|
|
9
|
-
buildSentinelFailureLog,
|
|
10
|
-
buildSentinelFailureOutput,
|
|
11
|
-
emitSentinelKilledSse,
|
|
12
|
-
} from '../security/sentinel-run-failure.js';
|
|
13
|
-
import { publishSentinelRunKillFireAndForget } from '../adapters/redis-sentinel-publisher.js';
|
|
14
|
-
import type { Run, RunStatus } from './types.js';
|
|
15
|
-
import { createLogger } from '@4runr/shared';
|
|
16
|
-
|
|
17
|
-
const logger = createLogger('Gateway:RunKill');
|
|
18
|
-
|
|
19
|
-
const TERMINAL_STATUSES: RunStatus[] = ['killed', 'completed', 'failed'];
|
|
20
|
-
|
|
21
|
-
export interface RunKillSideEffects {
|
|
22
|
-
decRunsActive?: () => void;
|
|
23
|
-
getSseConnections?: (runId: string) => Set<{ write: (chunk: string) => void }> | undefined;
|
|
24
|
-
onSseMessage?: () => void;
|
|
25
|
-
deleteEventStream?: (runId: string) => void;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export interface ApplyRunKillResult {
|
|
29
|
-
applied: boolean;
|
|
30
|
-
idempotent: boolean;
|
|
31
|
-
runId: string;
|
|
32
|
-
status: RunStatus;
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
let sideEffects: RunKillSideEffects = {};
|
|
36
|
-
|
|
37
|
-
export function setRunKillSideEffects(effects: RunKillSideEffects): void {
|
|
38
|
-
sideEffects = effects;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
export function isTerminalRunStatus(status: RunStatus): boolean {
|
|
42
|
-
return TERMINAL_STATUSES.includes(status);
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* Persist killed status, append deny log, notify SSE, remove queued job.
|
|
47
|
-
* Idempotent when the run is already terminal.
|
|
48
|
-
*/
|
|
49
|
-
export async function applyRunKill(
|
|
50
|
-
runId: string,
|
|
51
|
-
reason: string
|
|
52
|
-
): Promise<ApplyRunKillResult> {
|
|
53
|
-
const store = getRunStore();
|
|
54
|
-
const run = await store.getRunById(runId);
|
|
55
|
-
|
|
56
|
-
if (!run) {
|
|
57
|
-
throw new Error(`Run not found: ${runId}`);
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
if (isTerminalRunStatus(run.status)) {
|
|
61
|
-
logger.info('Run kill skipped — already terminal', { runId, status: run.status });
|
|
62
|
-
return {
|
|
63
|
-
applied: false,
|
|
64
|
-
idempotent: true,
|
|
65
|
-
runId,
|
|
66
|
-
status: run.status,
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
if (run.status === 'queued') {
|
|
71
|
-
try {
|
|
72
|
-
await removeRunExecutionJob(runId);
|
|
73
|
-
} catch (error) {
|
|
74
|
-
logger.warn('Failed to remove queued job during kill', {
|
|
75
|
-
runId,
|
|
76
|
-
error: error instanceof Error ? error.message : String(error),
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
const logEntry = {
|
|
82
|
-
timestamp: new Date().toISOString(),
|
|
83
|
-
level: 'error',
|
|
84
|
-
message: buildSentinelFailureLog(reason),
|
|
85
|
-
};
|
|
86
|
-
|
|
87
|
-
await store.updateRunStatus(runId, 'killed', {
|
|
88
|
-
completedAt: new Date().toISOString(),
|
|
89
|
-
output: buildSentinelFailureOutput(reason),
|
|
90
|
-
logs: appendRunLog(run.logs, logEntry),
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
sideEffects.decRunsActive?.();
|
|
94
|
-
|
|
95
|
-
const connections = sideEffects.getSseConnections?.(runId);
|
|
96
|
-
if (connections) {
|
|
97
|
-
connections.forEach((connection) => {
|
|
98
|
-
emitSentinelKilledSse(connection, runId, reason);
|
|
99
|
-
sideEffects.onSseMessage?.();
|
|
100
|
-
});
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
sideEffects.deleteEventStream?.(runId);
|
|
104
|
-
|
|
105
|
-
publishSentinelRunKillFireAndForget(runId, reason);
|
|
106
|
-
|
|
107
|
-
logger.info('Run killed in store', { runId, reason });
|
|
108
|
-
|
|
109
|
-
return {
|
|
110
|
-
applied: true,
|
|
111
|
-
idempotent: false,
|
|
112
|
-
runId,
|
|
113
|
-
status: 'killed',
|
|
114
|
-
};
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
export type { Run };
|
|
1
|
+
/**
|
|
2
|
+
* In-process run kill — used by HTTP cancel and Sentinel policy kill (via cancel adapter).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { getRunStore } from './index.js';
|
|
6
|
+
import { removeRunExecutionJob } from '../queue/index.js';
|
|
7
|
+
import {
|
|
8
|
+
appendRunLog,
|
|
9
|
+
buildSentinelFailureLog,
|
|
10
|
+
buildSentinelFailureOutput,
|
|
11
|
+
emitSentinelKilledSse,
|
|
12
|
+
} from '../security/sentinel-run-failure.js';
|
|
13
|
+
import { publishSentinelRunKillFireAndForget } from '../adapters/redis-sentinel-publisher.js';
|
|
14
|
+
import type { Run, RunStatus } from './types.js';
|
|
15
|
+
import { createLogger } from '@4runr/shared';
|
|
16
|
+
|
|
17
|
+
const logger = createLogger('Gateway:RunKill');
|
|
18
|
+
|
|
19
|
+
const TERMINAL_STATUSES: RunStatus[] = ['killed', 'completed', 'failed'];
|
|
20
|
+
|
|
21
|
+
export interface RunKillSideEffects {
|
|
22
|
+
decRunsActive?: () => void;
|
|
23
|
+
getSseConnections?: (runId: string) => Set<{ write: (chunk: string) => void }> | undefined;
|
|
24
|
+
onSseMessage?: () => void;
|
|
25
|
+
deleteEventStream?: (runId: string) => void;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface ApplyRunKillResult {
|
|
29
|
+
applied: boolean;
|
|
30
|
+
idempotent: boolean;
|
|
31
|
+
runId: string;
|
|
32
|
+
status: RunStatus;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
let sideEffects: RunKillSideEffects = {};
|
|
36
|
+
|
|
37
|
+
export function setRunKillSideEffects(effects: RunKillSideEffects): void {
|
|
38
|
+
sideEffects = effects;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function isTerminalRunStatus(status: RunStatus): boolean {
|
|
42
|
+
return TERMINAL_STATUSES.includes(status);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Persist killed status, append deny log, notify SSE, remove queued job.
|
|
47
|
+
* Idempotent when the run is already terminal.
|
|
48
|
+
*/
|
|
49
|
+
export async function applyRunKill(
|
|
50
|
+
runId: string,
|
|
51
|
+
reason: string
|
|
52
|
+
): Promise<ApplyRunKillResult> {
|
|
53
|
+
const store = getRunStore();
|
|
54
|
+
const run = await store.getRunById(runId);
|
|
55
|
+
|
|
56
|
+
if (!run) {
|
|
57
|
+
throw new Error(`Run not found: ${runId}`);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (isTerminalRunStatus(run.status)) {
|
|
61
|
+
logger.info('Run kill skipped — already terminal', { runId, status: run.status });
|
|
62
|
+
return {
|
|
63
|
+
applied: false,
|
|
64
|
+
idempotent: true,
|
|
65
|
+
runId,
|
|
66
|
+
status: run.status,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (run.status === 'queued') {
|
|
71
|
+
try {
|
|
72
|
+
await removeRunExecutionJob(runId);
|
|
73
|
+
} catch (error) {
|
|
74
|
+
logger.warn('Failed to remove queued job during kill', {
|
|
75
|
+
runId,
|
|
76
|
+
error: error instanceof Error ? error.message : String(error),
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const logEntry = {
|
|
82
|
+
timestamp: new Date().toISOString(),
|
|
83
|
+
level: 'error',
|
|
84
|
+
message: buildSentinelFailureLog(reason),
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
await store.updateRunStatus(runId, 'killed', {
|
|
88
|
+
completedAt: new Date().toISOString(),
|
|
89
|
+
output: buildSentinelFailureOutput(reason),
|
|
90
|
+
logs: appendRunLog(run.logs, logEntry),
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
sideEffects.decRunsActive?.();
|
|
94
|
+
|
|
95
|
+
const connections = sideEffects.getSseConnections?.(runId);
|
|
96
|
+
if (connections) {
|
|
97
|
+
connections.forEach((connection) => {
|
|
98
|
+
emitSentinelKilledSse(connection, runId, reason);
|
|
99
|
+
sideEffects.onSseMessage?.();
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
sideEffects.deleteEventStream?.(runId);
|
|
104
|
+
|
|
105
|
+
publishSentinelRunKillFireAndForget(runId, reason);
|
|
106
|
+
|
|
107
|
+
logger.info('Run killed in store', { runId, reason });
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
applied: true,
|
|
111
|
+
idempotent: false,
|
|
112
|
+
runId,
|
|
113
|
+
status: 'killed',
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export type { Run };
|
|
@@ -119,7 +119,7 @@ export function mergePersistedSentinelConfig(sentinel: Sentinel): boolean {
|
|
|
119
119
|
|
|
120
120
|
for (const { field } of ENV_FIELD_KEYS) {
|
|
121
121
|
if (envDefinesField(field)) continue;
|
|
122
|
-
(merged as
|
|
122
|
+
(merged as any)[field] = saved[field];
|
|
123
123
|
}
|
|
124
124
|
|
|
125
125
|
try {
|
|
@@ -137,12 +137,49 @@ export function applyPersistedSentinelConfig(sentinel: Sentinel, logger?: {
|
|
|
137
137
|
info: (msg: string, meta?: Record<string, unknown>) => void;
|
|
138
138
|
warn: (msg: string, meta?: Record<string, unknown>) => void;
|
|
139
139
|
}): boolean {
|
|
140
|
+
const filePath = getSentinelLimitsFilePath();
|
|
141
|
+
const saved = loadSentinelLimitsFromDisk();
|
|
142
|
+
|
|
143
|
+
if (!saved) {
|
|
144
|
+
logger?.info('No Sentinel limits file found on disk (will use env defaults)', {
|
|
145
|
+
expectedPath: filePath,
|
|
146
|
+
});
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Check which env vars are set and will override the disk values
|
|
151
|
+
const envOverrides: string[] = [];
|
|
152
|
+
const diskApplied: string[] = [];
|
|
153
|
+
|
|
154
|
+
for (const { field, envKeys } of ENV_FIELD_KEYS) {
|
|
155
|
+
if (envDefinesField(field)) {
|
|
156
|
+
const activeEnv = envKeys.find(k => process.env[k] !== undefined && process.env[k]?.trim() !== '');
|
|
157
|
+
if (activeEnv) {
|
|
158
|
+
envOverrides.push(`${field} (via ${activeEnv}=${process.env[activeEnv]})`);
|
|
159
|
+
}
|
|
160
|
+
} else {
|
|
161
|
+
diskApplied.push(`${field}=${saved[field]}`);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
140
165
|
const ok = mergePersistedSentinelConfig(sentinel);
|
|
166
|
+
|
|
141
167
|
if (ok) {
|
|
142
|
-
|
|
143
|
-
|
|
168
|
+
const finalConfig = sentinel.getConfig();
|
|
169
|
+
logger?.info('✅ Sentinel limits loaded from disk and merged', {
|
|
170
|
+
diskPath: filePath,
|
|
171
|
+
diskValues: saved,
|
|
172
|
+
envOverrides: envOverrides.length > 0 ? envOverrides : 'none',
|
|
173
|
+
diskAppliedFields: diskApplied.length > 0 ? diskApplied : 'none (all env-overridden)',
|
|
174
|
+
finalActiveConfig: finalConfig,
|
|
175
|
+
});
|
|
176
|
+
} else {
|
|
177
|
+
logger?.warn('Failed to apply Sentinel limits from disk', {
|
|
178
|
+
diskPath: filePath,
|
|
179
|
+
savedConfig: saved,
|
|
144
180
|
});
|
|
145
181
|
}
|
|
182
|
+
|
|
146
183
|
return ok;
|
|
147
184
|
}
|
|
148
185
|
|
|
@@ -153,8 +190,19 @@ export function hydrateSentinelConfigFromDisk(sentinel: Sentinel): void {
|
|
|
153
190
|
|
|
154
191
|
export function persistSentinelConfigAfterApply(
|
|
155
192
|
sentinel: Sentinel,
|
|
156
|
-
config: SentinelConfig
|
|
193
|
+
config: SentinelConfig,
|
|
194
|
+
logger?: {
|
|
195
|
+
info: (msg: string, meta?: Record<string, unknown>) => void;
|
|
196
|
+
}
|
|
157
197
|
): string {
|
|
198
|
+
const filePath = getSentinelLimitsFilePath();
|
|
158
199
|
saveSentinelLimitsToDisk(config);
|
|
159
|
-
|
|
200
|
+
|
|
201
|
+
logger?.info('✅ Sentinel limits saved to disk', {
|
|
202
|
+
path: filePath,
|
|
203
|
+
config: config,
|
|
204
|
+
note: 'Survives Gateway restart. Env vars in docker-compose override individual fields when set.',
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
return filePath;
|
|
160
208
|
}
|