@nosana/node 1.1.9-rc → 1.1.10-rc
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +1 -1
- package/dist/src/NodeManager/node/api/ApiHandler.js +5 -3
- package/dist/src/NodeManager/node/api/eventsource/index.js +1 -0
- package/dist/src/NodeManager/node/api/middlewares/ws/verifyWSJobOwnerSignatureMiddleware.d.ts +2 -5
- package/dist/src/NodeManager/node/api/middlewares/ws/verifyWSJobOwnerSignatureMiddleware.js +4 -4
- package/dist/src/NodeManager/node/api/middlewares/ws/verifyWSMiddleware.d.ts +2 -5
- package/dist/src/NodeManager/node/api/middlewares/ws/verifyWSMiddleware.js +3 -3
- package/dist/src/NodeManager/node/api/middlewares/ws/verifyWSNodeOrJobOwnerSignatureMiddleware.d.ts +2 -5
- package/dist/src/NodeManager/node/api/middlewares/ws/verifyWSNodeOrJobOwnerSignatureMiddleware.js +4 -4
- package/dist/src/NodeManager/node/api/routes/get/index.d.ts +2 -0
- package/dist/src/NodeManager/node/api/routes/get/index.js +2 -0
- package/dist/src/NodeManager/node/api/routes/get/job-stats-stream.d.ts +5 -0
- package/dist/src/NodeManager/node/api/routes/get/job-stats-stream.js +36 -0
- package/dist/src/NodeManager/node/api/routes/get/job-stats.d.ts +5 -0
- package/dist/src/NodeManager/node/api/routes/get/job-stats.js +22 -0
- package/dist/src/NodeManager/node/api/types/index.d.ts +3 -0
- package/dist/src/NodeManager/node/api/utils/interval.d.ts +9 -0
- package/dist/src/NodeManager/node/api/utils/interval.js +15 -0
- package/dist/src/NodeManager/node/task/TaskManager.d.ts +40 -0
- package/dist/src/NodeManager/node/task/TaskManager.js +20 -0
- package/dist/src/NodeManager/node/task/loggers/StatsBuffer.d.ts +11 -0
- package/dist/src/NodeManager/node/task/loggers/StatsBuffer.js +64 -0
- package/dist/src/NodeManager/node/task/loggers/statsManager.d.ts +9 -0
- package/dist/src/NodeManager/node/task/loggers/statsManager.js +42 -0
- package/dist/src/NodeManager/node/task/operations/runTaskManagerOperation.js +8 -0
- package/dist/src/NodeManager/provider/ContainerStateManager.d.ts +3 -0
- package/dist/src/NodeManager/provider/ContainerStateManager.js +58 -14
- package/dist/src/NodeManager/provider/utils/parseDockerStat.d.ts +3 -0
- package/dist/src/NodeManager/provider/utils/parseDockerStat.js +62 -0
- package/npm-shrinkwrap.json +2 -2
- package/package.json +1 -1
package/dist/package.json
CHANGED
|
@@ -6,8 +6,8 @@ import { configs } from '../../configs/configs.js';
|
|
|
6
6
|
import { sleep } from '../../utils/utils.js';
|
|
7
7
|
import { stateStreaming } from '../../monitoring/streaming/StateStreamer.js';
|
|
8
8
|
import { applyLoggingProxyToClass } from '../../monitoring/proxy/loggingProxy.js';
|
|
9
|
-
import { verifyBackendSignatureMiddleware, verifyJobOwnerSignatureMiddleware, verifyWSJobOwnerSignatureMiddleware,
|
|
10
|
-
import { getNodeInfoRoute, getJobResultsRoute, getServiceUrlRoute, postJobDefinitionRoute, postServiceStopRoute, postNodeValidation, wssLogRoute, wssStatusRoute, getCurrentGroupStatusHandler, getGroupStatusHandler, getOperationsStatusHandler, getOperationStatusHandler, restartGroupOperationHandler, restartOperationHandler, stopGroupOperationHandler, stopOperationHandler, wssTaskManagerLogRoute, moveGroupOperationHandler, getJobDefinitionRoute, getJobInfoRoute, } from './routes/index.js';
|
|
9
|
+
import { verifyBackendSignatureMiddleware, verifyJobOwnerSignatureMiddleware, verifyWSJobOwnerSignatureMiddleware, verifyWSNodeOrJobOwnerSignatureMiddleware, } from './middlewares/index.js';
|
|
10
|
+
import { getNodeInfoRoute, getJobResultsRoute, getServiceUrlRoute, postJobDefinitionRoute, postServiceStopRoute, postNodeValidation, wssLogRoute, wssStatusRoute, getCurrentGroupStatusHandler, getGroupStatusHandler, getOperationsStatusHandler, getOperationStatusHandler, restartGroupOperationHandler, restartOperationHandler, stopGroupOperationHandler, stopOperationHandler, wssTaskManagerLogRoute, moveGroupOperationHandler, getJobDefinitionRoute, getJobInfoRoute, getJobStatsRoute, getJobStatsStreamRoute, } from './routes/index.js';
|
|
11
11
|
import { NodeAlreadyActiveError } from '../../errors/NodeAlreadyActiveError.js';
|
|
12
12
|
export class ApiHandler {
|
|
13
13
|
constructor(sdk, repository, provider, port) {
|
|
@@ -88,7 +88,7 @@ export class ApiHandler {
|
|
|
88
88
|
await verifyWSJobOwnerSignatureMiddleware(ws, header, body, wssLogRoute);
|
|
89
89
|
break;
|
|
90
90
|
case '/flog':
|
|
91
|
-
await
|
|
91
|
+
await verifyWSJobOwnerSignatureMiddleware(ws, header, body, wssTaskManagerLogRoute);
|
|
92
92
|
break;
|
|
93
93
|
case '/status':
|
|
94
94
|
await verifyWSNodeOrJobOwnerSignatureMiddleware(ws, header, body, wssStatusRoute);
|
|
@@ -143,6 +143,8 @@ export class ApiHandler {
|
|
|
143
143
|
this.api.get('/job/:jobId/group/current', getCurrentGroupStatusHandler);
|
|
144
144
|
this.api.get('/job/:jobId/group/:group', getGroupStatusHandler);
|
|
145
145
|
this.api.get('/job/:jobId/endpoints', getServiceUrlRoute);
|
|
146
|
+
this.api.get('/job/:jobId/stats', getJobStatsRoute);
|
|
147
|
+
this.api.get('/job/:jobId/stats/stream', getJobStatsStreamRoute);
|
|
146
148
|
// POST Routes
|
|
147
149
|
this.api.post('/node/validate', verifyBackendSignatureMiddleware, postNodeValidation);
|
|
148
150
|
this.api.post('/job/:jobId/job-definition', postJobDefinitionRoute);
|
package/dist/src/NodeManager/node/api/middlewares/ws/verifyWSJobOwnerSignatureMiddleware.d.ts
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
1
|
import WebSocket from 'ws';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
}, nextFunction: (ws: WebSocket, headers: string, body: {
|
|
5
|
-
jobAddress: string;
|
|
6
|
-
}) => void): Promise<void>;
|
|
2
|
+
import { WSBody } from '../../types/index.js';
|
|
3
|
+
export declare function verifyWSJobOwnerSignatureMiddleware(ws: WebSocket, headers: string, body: WSBody, nextFunction: (ws: WebSocket, headers: string, body: WSBody) => void): Promise<void>;
|
|
@@ -4,13 +4,13 @@ export async function verifyWSJobOwnerSignatureMiddleware(ws, headers, body, nex
|
|
|
4
4
|
const sdk = getSDK();
|
|
5
5
|
const jobId = body.jobAddress;
|
|
6
6
|
if (!jobId) {
|
|
7
|
-
ws.close(
|
|
7
|
+
ws.close(1008, 'Expected body to contain jobAddress.');
|
|
8
8
|
return;
|
|
9
9
|
}
|
|
10
10
|
try {
|
|
11
11
|
const job = await sdk.jobs.get(jobId);
|
|
12
12
|
if (!job) {
|
|
13
|
-
ws.close(
|
|
13
|
+
ws.close(1008, `Could not find job with id ${jobId}`);
|
|
14
14
|
return;
|
|
15
15
|
}
|
|
16
16
|
try {
|
|
@@ -21,10 +21,10 @@ export async function verifyWSJobOwnerSignatureMiddleware(ws, headers, body, nex
|
|
|
21
21
|
}
|
|
22
22
|
}
|
|
23
23
|
catch (_) {
|
|
24
|
-
ws.close(
|
|
24
|
+
ws.close(4001, 'Unauthorized Request');
|
|
25
25
|
}
|
|
26
26
|
}
|
|
27
27
|
catch (error) {
|
|
28
|
-
ws.close(
|
|
28
|
+
ws.close(4001, `Unauthorized Request: ${error.message}`);
|
|
29
29
|
}
|
|
30
30
|
}
|
|
@@ -1,6 +1,3 @@
|
|
|
1
1
|
import WebSocket from 'ws';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
}, nextFunction: (ws: WebSocket, headers: string, body: {
|
|
5
|
-
jobAddress: string;
|
|
6
|
-
}) => void): Promise<void>;
|
|
2
|
+
import { WSBody } from '../../types/index.js';
|
|
3
|
+
export declare function verifyWSMiddleware(ws: WebSocket, headers: string, body: WSBody, nextFunction: (ws: WebSocket, headers: string, body: WSBody) => void): Promise<void>;
|
|
@@ -3,18 +3,18 @@ export async function verifyWSMiddleware(ws, headers, body, nextFunction) {
|
|
|
3
3
|
const sdk = getSDK();
|
|
4
4
|
const jobId = body.jobAddress;
|
|
5
5
|
if (!jobId) {
|
|
6
|
-
ws.close(
|
|
6
|
+
ws.close(1008, 'Expected body to contain jobAddress.');
|
|
7
7
|
return;
|
|
8
8
|
}
|
|
9
9
|
try {
|
|
10
10
|
const job = await sdk.jobs.get(jobId);
|
|
11
11
|
if (!job) {
|
|
12
|
-
ws.close(
|
|
12
|
+
ws.close(1008, `Could not find job with id ${jobId}`);
|
|
13
13
|
return;
|
|
14
14
|
}
|
|
15
15
|
nextFunction(ws, headers, body);
|
|
16
16
|
}
|
|
17
17
|
catch (error) {
|
|
18
|
-
ws.close(
|
|
18
|
+
ws.close(4001, `Unauthorized Request: ${error.message}`);
|
|
19
19
|
}
|
|
20
20
|
}
|
package/dist/src/NodeManager/node/api/middlewares/ws/verifyWSNodeOrJobOwnerSignatureMiddleware.d.ts
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
1
|
import WebSocket from 'ws';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
}, nextFunction: (ws: WebSocket, headers: string, body: {
|
|
5
|
-
jobAddress: string;
|
|
6
|
-
}) => void): Promise<void>;
|
|
2
|
+
import { WSBody } from '../../types/index.js';
|
|
3
|
+
export declare function verifyWSNodeOrJobOwnerSignatureMiddleware(ws: WebSocket, headers: string, body: WSBody, nextFunction: (ws: WebSocket, headers: string, body: WSBody) => void): Promise<void>;
|
package/dist/src/NodeManager/node/api/middlewares/ws/verifyWSNodeOrJobOwnerSignatureMiddleware.js
CHANGED
|
@@ -4,13 +4,13 @@ export async function verifyWSNodeOrJobOwnerSignatureMiddleware(ws, headers, bod
|
|
|
4
4
|
const sdk = getSDK();
|
|
5
5
|
const jobId = body.jobAddress;
|
|
6
6
|
if (!jobId) {
|
|
7
|
-
ws.close(
|
|
7
|
+
ws.close(1008, 'Expected body to contain jobAddress.');
|
|
8
8
|
return;
|
|
9
9
|
}
|
|
10
10
|
try {
|
|
11
11
|
const job = await sdk.jobs.get(jobId);
|
|
12
12
|
if (!job) {
|
|
13
|
-
ws.close(
|
|
13
|
+
ws.close(1008, `Could not find job with id ${jobId}`);
|
|
14
14
|
return;
|
|
15
15
|
}
|
|
16
16
|
try {
|
|
@@ -24,10 +24,10 @@ export async function verifyWSNodeOrJobOwnerSignatureMiddleware(ws, headers, bod
|
|
|
24
24
|
}
|
|
25
25
|
}
|
|
26
26
|
catch (_) {
|
|
27
|
-
ws.close(
|
|
27
|
+
ws.close(4001, 'Unauthorized Request');
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
30
|
catch (error) {
|
|
31
|
-
ws.close(
|
|
31
|
+
ws.close(4001, `Unauthorized Request: ${error.message}`);
|
|
32
32
|
}
|
|
33
33
|
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { parseInterval } from '../../utils/interval.js';
|
|
2
|
+
import { createEventSource } from '../../eventsource/index.js';
|
|
3
|
+
import { TaskManagerRegistry } from '../../../task/TaskManagerRegistry.js';
|
|
4
|
+
export function getJobStatsStreamRoute(req, res) {
|
|
5
|
+
const { jobId } = req.params;
|
|
6
|
+
const intervalParam = parseInterval(req, res);
|
|
7
|
+
if (intervalParam === null)
|
|
8
|
+
return;
|
|
9
|
+
const task = TaskManagerRegistry.getInstance().get(jobId);
|
|
10
|
+
if (!task) {
|
|
11
|
+
res.status(404).send('Invalid job address');
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
const { sendIfChanged, closeEventSource } = createEventSource(req, res);
|
|
15
|
+
const intervalMs = intervalParam * 1000;
|
|
16
|
+
let lastTimestamp = 0;
|
|
17
|
+
const statsInterval = setInterval(() => {
|
|
18
|
+
const stats = task.getLatestStatPerOp(lastTimestamp);
|
|
19
|
+
if (stats.length > 0) {
|
|
20
|
+
sendIfChanged(stats);
|
|
21
|
+
lastTimestamp = Math.max(...stats.map((s) => s.timestamp));
|
|
22
|
+
}
|
|
23
|
+
}, intervalMs);
|
|
24
|
+
const onFlowUpdated = ({ type }) => {
|
|
25
|
+
if (type === 'status:end' || type === 'status:failed') {
|
|
26
|
+
cleanup();
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
task.getEventsEmitter().on('flow:updated', onFlowUpdated);
|
|
30
|
+
const cleanup = () => {
|
|
31
|
+
clearInterval(statsInterval);
|
|
32
|
+
task.getEventsEmitter().off('flow:updated', onFlowUpdated);
|
|
33
|
+
closeEventSource();
|
|
34
|
+
};
|
|
35
|
+
req.on('close', cleanup);
|
|
36
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { parseInterval } from '../../utils/interval.js';
|
|
2
|
+
import { TaskManagerRegistry } from '../../../task/TaskManagerRegistry.js';
|
|
3
|
+
export function getJobStatsRoute(req, res) {
|
|
4
|
+
const { jobId } = req.params;
|
|
5
|
+
const intervalParam = parseInterval(req, res);
|
|
6
|
+
if (intervalParam === null)
|
|
7
|
+
return;
|
|
8
|
+
const task = TaskManagerRegistry.getInstance().get(jobId);
|
|
9
|
+
if (!task) {
|
|
10
|
+
res.status(404).send('Invalid job address');
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
const start = req.query.start ? Number(req.query.start) : undefined;
|
|
14
|
+
const end = req.query.end ? Number(req.query.end) : undefined;
|
|
15
|
+
if ((start !== undefined && isNaN(start)) || (end !== undefined && isNaN(end))) {
|
|
16
|
+
res.status(400).send('start and end must be valid timestamps (ms)');
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
const intervalMs = intervalParam * 1000;
|
|
20
|
+
const result = task.queryStats(start, end, intervalMs);
|
|
21
|
+
res.json(result);
|
|
22
|
+
}
|
|
@@ -3,6 +3,9 @@ import { PublicKey } from '@solana/web3.js';
|
|
|
3
3
|
import { NodeRepository } from '../../../repository/NodeRepository.js';
|
|
4
4
|
import ApiEventEmitter from '../ApiEventEmitter.js';
|
|
5
5
|
import { Provider } from '../../../provider/Provider.js';
|
|
6
|
+
export type WSBody = {
|
|
7
|
+
jobAddress: string;
|
|
8
|
+
};
|
|
6
9
|
export type NodeAPIRequest<Params = {}, Body = {}> = Request<Params, {}, Body> & {
|
|
7
10
|
address?: PublicKey;
|
|
8
11
|
eventEmitter?: ApiEventEmitter;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { Response } from 'express';
|
|
2
|
+
import { NodeAPIRequest } from '../types/index.js';
|
|
3
|
+
export declare const DEFAULT_INTERVAL = 5;
|
|
4
|
+
export declare const ALLOWED_INTERVALS: readonly [5, 10, 30, 60, 300, 1800];
|
|
5
|
+
export type Interval = (typeof ALLOWED_INTERVALS)[number];
|
|
6
|
+
export declare function validateInterval(value: number): value is Interval;
|
|
7
|
+
export declare function parseInterval(req: NodeAPIRequest<{
|
|
8
|
+
jobId: string;
|
|
9
|
+
}>, res: Response): Interval | null;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export const DEFAULT_INTERVAL = 5;
|
|
2
|
+
export const ALLOWED_INTERVALS = [5, 10, 30, 60, 300, 1800];
|
|
3
|
+
export function validateInterval(value) {
|
|
4
|
+
return ALLOWED_INTERVALS.includes(value);
|
|
5
|
+
}
|
|
6
|
+
export function parseInterval(req, res) {
|
|
7
|
+
const raw = Number(req.query.interval ?? DEFAULT_INTERVAL);
|
|
8
|
+
if (!validateInterval(raw)) {
|
|
9
|
+
res
|
|
10
|
+
.status(400)
|
|
11
|
+
.send(`Invalid interval. Allowed values (seconds): ${ALLOWED_INTERVALS.join(', ')}`);
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
return raw;
|
|
15
|
+
}
|
|
@@ -5,6 +5,7 @@ import { JobDefinition, Operation, OperationType } from '@nosana/sdk';
|
|
|
5
5
|
import { NodeRepository } from '../../repository/NodeRepository.js';
|
|
6
6
|
import { Provider } from '../../provider/Provider.js';
|
|
7
7
|
import { Flow } from '@nosana/sdk';
|
|
8
|
+
import { StatsBuffer } from './loggers/StatsBuffer.js';
|
|
8
9
|
export type TaskManagerOps = Array<Operation<OperationType>>;
|
|
9
10
|
export type ExecutionContext = {
|
|
10
11
|
group: string;
|
|
@@ -41,6 +42,26 @@ export declare const OperationProgressStatuses: {
|
|
|
41
42
|
readonly INIT: "init";
|
|
42
43
|
};
|
|
43
44
|
export type LogType = 'container' | 'info' | 'error';
|
|
45
|
+
export interface TaskStat {
|
|
46
|
+
opId: string;
|
|
47
|
+
timestamp: number;
|
|
48
|
+
cpu: {
|
|
49
|
+
cpu_percent: number;
|
|
50
|
+
};
|
|
51
|
+
memory: {
|
|
52
|
+
memory_usage: number;
|
|
53
|
+
memory_limit: number;
|
|
54
|
+
memory_percent: number;
|
|
55
|
+
};
|
|
56
|
+
disk: {
|
|
57
|
+
read: number;
|
|
58
|
+
write: number;
|
|
59
|
+
};
|
|
60
|
+
network: {
|
|
61
|
+
received: number;
|
|
62
|
+
sent: number;
|
|
63
|
+
};
|
|
64
|
+
}
|
|
44
65
|
export interface TaskLog {
|
|
45
66
|
opId: string;
|
|
46
67
|
group: string;
|
|
@@ -153,6 +174,10 @@ export default class TaskManager {
|
|
|
153
174
|
* save log buffer for streaming logs
|
|
154
175
|
*/
|
|
155
176
|
protected opLogBuffers: Map<string, TaskLog[]>;
|
|
177
|
+
/**
|
|
178
|
+
* save stat buffer for streaming container stats
|
|
179
|
+
*/
|
|
180
|
+
protected opStatBuffers: Map<string, StatsBuffer>;
|
|
156
181
|
/**
|
|
157
182
|
* this list of ws sub to the task managers events
|
|
158
183
|
*/
|
|
@@ -161,6 +186,14 @@ export default class TaskManager {
|
|
|
161
186
|
* stores filters
|
|
162
187
|
*/
|
|
163
188
|
protected logMatchers: Map<WebSocket, (log: TaskLog) => boolean>;
|
|
189
|
+
/**
|
|
190
|
+
* ws subscribers for stats streaming
|
|
191
|
+
*/
|
|
192
|
+
protected statSubscribers: Set<WebSocket>;
|
|
193
|
+
/**
|
|
194
|
+
* stores stat filters
|
|
195
|
+
*/
|
|
196
|
+
protected statMatchers: Map<WebSocket, (stat: TaskStat) => boolean>;
|
|
164
197
|
protected TOTAL_LOGS_COUNT: number;
|
|
165
198
|
/**
|
|
166
199
|
* Lifecycle status of the task manager.
|
|
@@ -195,6 +228,13 @@ export default class TaskManager {
|
|
|
195
228
|
getAllLogs: () => TaskLog[];
|
|
196
229
|
subscribe: (ws: WebSocket, matcher: (log: TaskLog) => boolean) => void;
|
|
197
230
|
unsubscribe: (ws: WebSocket) => void;
|
|
231
|
+
addStat: (stat: TaskStat) => void;
|
|
232
|
+
getStatsByOp: (opId: string) => TaskStat[];
|
|
233
|
+
getAllStats: () => TaskStat[];
|
|
234
|
+
queryStats: (start?: number, end?: number, intervalMs?: number) => TaskStat[];
|
|
235
|
+
getLatestStatPerOp: (since: number) => TaskStat[];
|
|
236
|
+
subscribeStats: (ws: WebSocket, matcher: (stat: TaskStat) => boolean) => void;
|
|
237
|
+
unsubscribeStats: (ws: WebSocket) => void;
|
|
198
238
|
setResult: (opId: string, key: string, value: any) => void;
|
|
199
239
|
setResults: (opId: string, values: Record<string, any>) => void;
|
|
200
240
|
setHost: (opId: string, host: string) => void;
|
|
@@ -12,6 +12,7 @@ import { createInitialFlow } from './helpers/createInitialFlow.js';
|
|
|
12
12
|
import { createDependencyMap } from './executions/createDependencyMap.js';
|
|
13
13
|
import { getCurrentGroup, getCurrentGroupStatus, getGroupStatus, getOperationsStatus, getOperationStatus, } from './operations/getOperationsInfos.js';
|
|
14
14
|
import { addLog, getAllLogs, getLogsByGroup, getLogsByOp, subscribe, unsubscribe, } from './loggers/logManager.js';
|
|
15
|
+
import { addStat, getAllStats, getStatsByOp, queryStats, getLatestStatPerOp, subscribeStats, unsubscribeStats, } from './loggers/statsManager.js';
|
|
15
16
|
import { moveTaskManagerGroupOperations } from './operations/moveTaskManagerGroupOperation.js';
|
|
16
17
|
import { setResult, setResults, setHost, setDefaults, setContainerIp, rehydrateEndpointsForOperation, getByPath, resolveLiteralsInString, interpolate, interpolateOperation, transformCollections, } from './globalStore/index.js';
|
|
17
18
|
import { configs } from '../../configs/configs.js';
|
|
@@ -106,6 +107,10 @@ export default class TaskManager {
|
|
|
106
107
|
* save log buffer for streaming logs
|
|
107
108
|
*/
|
|
108
109
|
this.opLogBuffers = new Map();
|
|
110
|
+
/**
|
|
111
|
+
* save stat buffer for streaming container stats
|
|
112
|
+
*/
|
|
113
|
+
this.opStatBuffers = new Map();
|
|
109
114
|
/**
|
|
110
115
|
* this list of ws sub to the task managers events
|
|
111
116
|
*/
|
|
@@ -114,6 +119,14 @@ export default class TaskManager {
|
|
|
114
119
|
* stores filters
|
|
115
120
|
*/
|
|
116
121
|
this.logMatchers = new Map();
|
|
122
|
+
/**
|
|
123
|
+
* ws subscribers for stats streaming
|
|
124
|
+
*/
|
|
125
|
+
this.statSubscribers = new Set();
|
|
126
|
+
/**
|
|
127
|
+
* stores stat filters
|
|
128
|
+
*/
|
|
129
|
+
this.statMatchers = new Map();
|
|
117
130
|
this.TOTAL_LOGS_COUNT = 0;
|
|
118
131
|
/**
|
|
119
132
|
* Lifecycle status of the task manager.
|
|
@@ -152,6 +165,13 @@ export default class TaskManager {
|
|
|
152
165
|
this.getAllLogs = getAllLogs.bind(this);
|
|
153
166
|
this.subscribe = subscribe.bind(this);
|
|
154
167
|
this.unsubscribe = unsubscribe.bind(this);
|
|
168
|
+
this.addStat = addStat.bind(this);
|
|
169
|
+
this.getStatsByOp = getStatsByOp.bind(this);
|
|
170
|
+
this.getAllStats = getAllStats.bind(this);
|
|
171
|
+
this.queryStats = queryStats.bind(this);
|
|
172
|
+
this.getLatestStatPerOp = getLatestStatPerOp.bind(this);
|
|
173
|
+
this.subscribeStats = subscribeStats.bind(this);
|
|
174
|
+
this.unsubscribeStats = unsubscribeStats.bind(this);
|
|
155
175
|
this.setResult = setResult.bind(this);
|
|
156
176
|
this.setResults = setResults.bind(this);
|
|
157
177
|
this.setHost = setHost.bind(this);
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { TaskStat } from '../TaskManager.js';
|
|
2
|
+
export declare class StatsBuffer {
|
|
3
|
+
private readonly buffer;
|
|
4
|
+
push(stat: TaskStat): void;
|
|
5
|
+
get length(): number;
|
|
6
|
+
at(i: number): TaskStat | undefined;
|
|
7
|
+
latest(): TaskStat | undefined;
|
|
8
|
+
query(start?: number, end?: number, intervalMs?: number): TaskStat[];
|
|
9
|
+
private bsearch;
|
|
10
|
+
toArray(): TaskStat[];
|
|
11
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
export class StatsBuffer {
|
|
2
|
+
constructor() {
|
|
3
|
+
this.buffer = new Array((24 * 60 * 60) / 5); // 24 hours of stats at 5s intervals
|
|
4
|
+
}
|
|
5
|
+
push(stat) {
|
|
6
|
+
this.buffer.push(stat);
|
|
7
|
+
}
|
|
8
|
+
get length() {
|
|
9
|
+
return this.buffer.length;
|
|
10
|
+
}
|
|
11
|
+
at(i) {
|
|
12
|
+
return this.buffer[i];
|
|
13
|
+
}
|
|
14
|
+
latest() {
|
|
15
|
+
return this.buffer[this.buffer.length - 1];
|
|
16
|
+
}
|
|
17
|
+
query(start, end, intervalMs) {
|
|
18
|
+
if (this.buffer.length === 0)
|
|
19
|
+
return [];
|
|
20
|
+
let slice = this.buffer;
|
|
21
|
+
if (start !== undefined || end !== undefined) {
|
|
22
|
+
const fromIdx = start !== undefined
|
|
23
|
+
? this.bsearch(start, false)
|
|
24
|
+
: 0;
|
|
25
|
+
if (fromIdx >= this.buffer.length)
|
|
26
|
+
return [];
|
|
27
|
+
const toIdx = end !== undefined
|
|
28
|
+
? this.bsearch(end, true)
|
|
29
|
+
: this.buffer.length;
|
|
30
|
+
if (toIdx <= fromIdx)
|
|
31
|
+
return [];
|
|
32
|
+
slice = this.buffer.slice(fromIdx, toIdx);
|
|
33
|
+
}
|
|
34
|
+
if (!intervalMs)
|
|
35
|
+
return slice;
|
|
36
|
+
const result = [];
|
|
37
|
+
let lastTimestamp = 0;
|
|
38
|
+
for (const stat of slice) {
|
|
39
|
+
if (stat.timestamp - lastTimestamp >= intervalMs) {
|
|
40
|
+
lastTimestamp = stat.timestamp;
|
|
41
|
+
result.push(stat);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return result;
|
|
45
|
+
}
|
|
46
|
+
bsearch(timestamp, inclusive) {
|
|
47
|
+
let lo = 0;
|
|
48
|
+
let hi = this.buffer.length;
|
|
49
|
+
while (lo < hi) {
|
|
50
|
+
const mid = (lo + hi) >>> 1;
|
|
51
|
+
const cmp = inclusive
|
|
52
|
+
? this.buffer[mid].timestamp <= timestamp
|
|
53
|
+
: this.buffer[mid].timestamp < timestamp;
|
|
54
|
+
if (cmp)
|
|
55
|
+
lo = mid + 1;
|
|
56
|
+
else
|
|
57
|
+
hi = mid;
|
|
58
|
+
}
|
|
59
|
+
return lo;
|
|
60
|
+
}
|
|
61
|
+
toArray() {
|
|
62
|
+
return [...this.buffer];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import WebSocket from 'ws';
|
|
2
|
+
import TaskManager, { type TaskStat } from '../TaskManager.js';
|
|
3
|
+
export declare function addStat(this: TaskManager, stat: TaskStat): void;
|
|
4
|
+
export declare function getStatsByOp(this: TaskManager, opId: string): TaskStat[];
|
|
5
|
+
export declare function getAllStats(this: TaskManager): TaskStat[];
|
|
6
|
+
export declare function queryStats(this: TaskManager, start?: number, end?: number, intervalMs?: number): TaskStat[];
|
|
7
|
+
export declare function getLatestStatPerOp(this: TaskManager, since: number): TaskStat[];
|
|
8
|
+
export declare function subscribeStats(this: TaskManager, ws: WebSocket, matcher: (stat: TaskStat) => boolean): void;
|
|
9
|
+
export declare function unsubscribeStats(this: TaskManager, ws: WebSocket): void;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { StatsBuffer } from './StatsBuffer.js';
|
|
2
|
+
export function addStat(stat) {
|
|
3
|
+
if (!this.opStatBuffers.has(stat.opId)) {
|
|
4
|
+
this.opStatBuffers.set(stat.opId, new StatsBuffer());
|
|
5
|
+
}
|
|
6
|
+
const buffer = this.opStatBuffers.get(stat.opId);
|
|
7
|
+
buffer.push(stat);
|
|
8
|
+
for (const ws of this.statSubscribers) {
|
|
9
|
+
const matcher = this.statMatchers.get(ws);
|
|
10
|
+
if (matcher && matcher(stat)) {
|
|
11
|
+
try {
|
|
12
|
+
ws.send(JSON.stringify({ path: 'fstat', data: stat }));
|
|
13
|
+
}
|
|
14
|
+
catch (_) { }
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
export function getStatsByOp(opId) {
|
|
19
|
+
return this.opStatBuffers.get(opId)?.toArray() || [];
|
|
20
|
+
}
|
|
21
|
+
export function getAllStats() {
|
|
22
|
+
return Array.from(this.opStatBuffers.values()).flatMap((b) => b.toArray());
|
|
23
|
+
}
|
|
24
|
+
export function queryStats(start, end, intervalMs) {
|
|
25
|
+
return Array.from(this.opStatBuffers.values()).flatMap((b) => b.query(start, end, intervalMs));
|
|
26
|
+
}
|
|
27
|
+
export function getLatestStatPerOp(since) {
|
|
28
|
+
return Array.from(this.opStatBuffers.values())
|
|
29
|
+
.map((b) => b.latest())
|
|
30
|
+
.filter((s) => s !== undefined && s.timestamp > since);
|
|
31
|
+
}
|
|
32
|
+
export function subscribeStats(ws, matcher) {
|
|
33
|
+
this.statSubscribers.add(ws);
|
|
34
|
+
this.statMatchers.set(ws, matcher);
|
|
35
|
+
ws.on('close', () => {
|
|
36
|
+
unsubscribeStats.call(this, ws);
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
export function unsubscribeStats(ws) {
|
|
40
|
+
this.statSubscribers.delete(ws);
|
|
41
|
+
this.statMatchers.delete(ws);
|
|
42
|
+
}
|
|
@@ -115,6 +115,14 @@ export async function runTaskManagerOperation(flow, op, dependent) {
|
|
|
115
115
|
this.repository.displayLog(log);
|
|
116
116
|
}
|
|
117
117
|
});
|
|
118
|
+
/**
|
|
119
|
+
* Subscribes to 'stat' events emitted by the container stats stream.
|
|
120
|
+
* Each stat snapshot is stored in the TaskManager's stat buffer and
|
|
121
|
+
* fanned out to any active WebSocket subscribers.
|
|
122
|
+
*/
|
|
123
|
+
emitter.on('stat', (stat) => {
|
|
124
|
+
this.addStat({ ...stat, opId: op.id });
|
|
125
|
+
});
|
|
118
126
|
/**
|
|
119
127
|
* Subscribes to 'updateOpState' events that allow partial mutation of the operation's state.
|
|
120
128
|
*
|
|
@@ -13,6 +13,7 @@ export declare class ContainerStateManager {
|
|
|
13
13
|
private lastLogTimestamp;
|
|
14
14
|
private readonly EXITED_CHECKS_REQUIRED;
|
|
15
15
|
private currentLogStream;
|
|
16
|
+
private currentStatsStream;
|
|
16
17
|
private pollingInterval;
|
|
17
18
|
constructor(container: Dockerode.Container, controller: AbortController, emitter: EventEmitter, restartPolicy: RestartPolicy | undefined);
|
|
18
19
|
getState(): ContainerState;
|
|
@@ -20,5 +21,7 @@ export declare class ContainerStateManager {
|
|
|
20
21
|
private attachLogStream;
|
|
21
22
|
waitForExit(): Promise<void>;
|
|
22
23
|
stopMonitoring(): void;
|
|
24
|
+
private readonly STATS_INTERVAL_MS;
|
|
25
|
+
private attachStatsStream;
|
|
23
26
|
}
|
|
24
27
|
export {};
|
|
@@ -1,4 +1,15 @@
|
|
|
1
|
+
import { Readable } from 'stream';
|
|
2
|
+
import { createInterface } from 'readline';
|
|
1
3
|
import { parseBuffer } from './utils/parseBuffer.js';
|
|
4
|
+
import { parseDockerStat } from './utils/parseDockerStat.js';
|
|
5
|
+
function destroyStream(stream) {
|
|
6
|
+
if (!stream)
|
|
7
|
+
return;
|
|
8
|
+
stream.removeAllListeners();
|
|
9
|
+
if (stream instanceof Readable) {
|
|
10
|
+
stream.destroy();
|
|
11
|
+
}
|
|
12
|
+
}
|
|
2
13
|
export class ContainerStateManager {
|
|
3
14
|
constructor(container, controller, emitter, restartPolicy) {
|
|
4
15
|
this.container = container;
|
|
@@ -10,13 +21,15 @@ export class ContainerStateManager {
|
|
|
10
21
|
this.lastLogTimestamp = 0; // Unix timestamp in seconds from actual container logs
|
|
11
22
|
this.EXITED_CHECKS_REQUIRED = 3; // Require 3 consecutive checks before confirming exit
|
|
12
23
|
this.currentLogStream = null;
|
|
24
|
+
this.currentStatsStream = null;
|
|
13
25
|
this.pollingInterval = null;
|
|
26
|
+
this.STATS_INTERVAL_MS = 5000;
|
|
14
27
|
}
|
|
15
28
|
getState() {
|
|
16
29
|
return this.state;
|
|
17
30
|
}
|
|
18
31
|
async startMonitoring() {
|
|
19
|
-
await this.attachLogStream();
|
|
32
|
+
await Promise.all([this.attachLogStream(), this.attachStatsStream()]);
|
|
20
33
|
if (!this.restartPolicy) {
|
|
21
34
|
this.container
|
|
22
35
|
.wait({ abortSignal: this.controller.signal })
|
|
@@ -77,12 +90,7 @@ export class ContainerStateManager {
|
|
|
77
90
|
});
|
|
78
91
|
// Clean up on abort
|
|
79
92
|
const abortHandler = () => {
|
|
80
|
-
this.currentLogStream
|
|
81
|
-
if (this.currentLogStream &&
|
|
82
|
-
'destroy' in this.currentLogStream &&
|
|
83
|
-
typeof this.currentLogStream.destroy === 'function') {
|
|
84
|
-
this.currentLogStream.destroy();
|
|
85
|
-
}
|
|
93
|
+
destroyStream(this.currentLogStream);
|
|
86
94
|
this.currentLogStream = null;
|
|
87
95
|
};
|
|
88
96
|
this.controller.signal.addEventListener('abort', abortHandler, {
|
|
@@ -110,13 +118,49 @@ export class ContainerStateManager {
|
|
|
110
118
|
clearInterval(this.pollingInterval);
|
|
111
119
|
this.pollingInterval = null;
|
|
112
120
|
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
121
|
+
destroyStream(this.currentLogStream);
|
|
122
|
+
this.currentLogStream = null;
|
|
123
|
+
destroyStream(this.currentStatsStream);
|
|
124
|
+
this.currentStatsStream = null;
|
|
125
|
+
}
|
|
126
|
+
async attachStatsStream() {
|
|
127
|
+
if (this.controller.signal.aborted)
|
|
128
|
+
return;
|
|
129
|
+
try {
|
|
130
|
+
this.currentStatsStream = await this.container.stats({ stream: true });
|
|
131
|
+
let peakStat = null;
|
|
132
|
+
const rl = createInterface({ input: this.currentStatsStream });
|
|
133
|
+
rl.on('line', (line) => {
|
|
134
|
+
try {
|
|
135
|
+
const raw = JSON.parse(line);
|
|
136
|
+
const stat = parseDockerStat(raw);
|
|
137
|
+
if (!stat)
|
|
138
|
+
return;
|
|
139
|
+
if (!peakStat || stat.cpu.cpu_percent > peakStat.cpu.cpu_percent) {
|
|
140
|
+
peakStat = stat;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
catch { }
|
|
144
|
+
});
|
|
145
|
+
const statsInterval = setInterval(() => {
|
|
146
|
+
if (peakStat) {
|
|
147
|
+
this.emitter.emit('stat', peakStat);
|
|
148
|
+
peakStat = null;
|
|
149
|
+
}
|
|
150
|
+
}, this.STATS_INTERVAL_MS);
|
|
151
|
+
const cleanup = () => {
|
|
152
|
+
clearInterval(statsInterval);
|
|
153
|
+
this.currentStatsStream = null;
|
|
154
|
+
};
|
|
155
|
+
this.currentStatsStream.on('close', cleanup);
|
|
156
|
+
this.currentStatsStream.on('error', cleanup);
|
|
157
|
+
const abortHandler = () => {
|
|
158
|
+
clearInterval(statsInterval);
|
|
159
|
+
destroyStream(this.currentStatsStream);
|
|
160
|
+
this.currentStatsStream = null;
|
|
161
|
+
};
|
|
162
|
+
this.controller.signal.addEventListener('abort', abortHandler, { once: true });
|
|
120
163
|
}
|
|
164
|
+
catch { }
|
|
121
165
|
}
|
|
122
166
|
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
const MB = 1024 * 1024;
|
|
2
|
+
const GB = 1024 * 1024 * 1024;
|
|
3
|
+
function cpuPercent(raw) {
|
|
4
|
+
const cpuDelta = raw.cpu_stats.cpu_usage.total_usage - raw.precpu_stats.cpu_usage.total_usage;
|
|
5
|
+
const systemDelta = raw.cpu_stats.system_cpu_usage - raw.precpu_stats.system_cpu_usage;
|
|
6
|
+
const numCpus = raw.cpu_stats.online_cpus ?? raw.cpu_stats.cpu_usage.percpu_usage?.length ?? 1;
|
|
7
|
+
return systemDelta > 0 ? (cpuDelta / systemDelta) * numCpus * 100 : 0;
|
|
8
|
+
}
|
|
9
|
+
function networkBytes(raw) {
|
|
10
|
+
let rx = 0;
|
|
11
|
+
let tx = 0;
|
|
12
|
+
if (raw.networks) {
|
|
13
|
+
for (const iface of Object.values(raw.networks)) {
|
|
14
|
+
rx += iface.rx_bytes;
|
|
15
|
+
tx += iface.tx_bytes;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return { rx, tx };
|
|
19
|
+
}
|
|
20
|
+
function blockBytes(raw) {
|
|
21
|
+
let read = 0;
|
|
22
|
+
let write = 0;
|
|
23
|
+
for (const entry of raw.blkio_stats?.io_service_bytes_recursive ?? []) {
|
|
24
|
+
if (entry.op.toLowerCase() === 'read')
|
|
25
|
+
read += entry.value;
|
|
26
|
+
else if (entry.op.toLowerCase() === 'write')
|
|
27
|
+
write += entry.value;
|
|
28
|
+
}
|
|
29
|
+
return { read, write };
|
|
30
|
+
}
|
|
31
|
+
export function parseDockerStat(raw) {
|
|
32
|
+
try {
|
|
33
|
+
const cpu = cpuPercent(raw);
|
|
34
|
+
const memory_usage_bytes = raw.memory_stats.usage ?? 0;
|
|
35
|
+
const memory_limit_bytes = raw.memory_stats.limit ?? 0;
|
|
36
|
+
const memory_percent = memory_limit_bytes > 0 ? (memory_usage_bytes / memory_limit_bytes) * 100 : 0;
|
|
37
|
+
const net = networkBytes(raw);
|
|
38
|
+
const blk = blockBytes(raw);
|
|
39
|
+
return {
|
|
40
|
+
timestamp: new Date(raw.read).getTime() || Date.now(),
|
|
41
|
+
cpu: {
|
|
42
|
+
cpu_percent: parseFloat(Math.max(0, cpu).toFixed(2)),
|
|
43
|
+
},
|
|
44
|
+
memory: {
|
|
45
|
+
memory_usage: parseFloat((memory_usage_bytes / MB).toFixed(2)),
|
|
46
|
+
memory_limit: parseFloat((memory_limit_bytes / GB).toFixed(2)),
|
|
47
|
+
memory_percent: parseFloat(Math.max(0, memory_percent).toFixed(2)),
|
|
48
|
+
},
|
|
49
|
+
disk: {
|
|
50
|
+
read: parseFloat((blk.read / MB).toFixed(2)),
|
|
51
|
+
write: parseFloat((blk.write / MB).toFixed(2)),
|
|
52
|
+
},
|
|
53
|
+
network: {
|
|
54
|
+
received: parseFloat((net.rx / MB).toFixed(2)),
|
|
55
|
+
sent: parseFloat((net.tx / MB).toFixed(2)),
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
}
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nosana/node",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.10-rc",
|
|
4
4
|
"lockfileVersion": 3,
|
|
5
5
|
"requires": true,
|
|
6
6
|
"packages": {
|
|
7
7
|
"": {
|
|
8
8
|
"name": "@nosana/node",
|
|
9
|
-
"version": "1.1.
|
|
9
|
+
"version": "1.1.10-rc",
|
|
10
10
|
"license": "ISC",
|
|
11
11
|
"dependencies": {
|
|
12
12
|
"@coral-xyz/anchor": "^0.28.1-beta.1",
|