@poolzin/pool-bot 2026.3.9 → 2026.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/README.md +147 -69
- package/dist/.buildstamp +1 -1
- package/dist/agents/error-classifier.js +26 -77
- package/dist/agents/skills/security.js +1 -7
- package/dist/build-info.json +3 -3
- package/dist/cli/cron-cli/register.cron-dashboard.js +339 -0
- package/dist/cli/cron-cli/register.js +2 -0
- package/dist/cli/errors.js +187 -0
- package/dist/cli/program/command-registry.js +13 -0
- package/dist/cli/program/register.maintenance.js +21 -0
- package/dist/cli/program/register.subclis.js +9 -0
- package/dist/cli/swarm-cli/register.js +8 -0
- package/dist/cli/swarm-cli/register.swarm-status.js +488 -0
- package/dist/cli/telemetry-cli/register.js +10 -0
- package/dist/cli/telemetry-cli/register.telemetry-alerts.js +176 -0
- package/dist/cli/telemetry-cli/register.telemetry-metrics.js +323 -0
- package/dist/cli/telemetry-cli/register.telemetry-status.js +179 -0
- package/dist/commands/doctor-checks.js +498 -0
- package/dist/context-engine/index.js +1 -1
- package/dist/context-engine/legacy.js +1 -3
- package/dist/context-engine/summarizing.js +5 -8
- package/dist/cron/service/timer.js +18 -0
- package/dist/gateway/protocol/index.js +5 -2
- package/dist/gateway/protocol/schema/error-codes.js +1 -0
- package/dist/gateway/protocol/schema/swarm.js +80 -0
- package/dist/gateway/protocol/schema.js +1 -0
- package/dist/gateway/server-close.js +4 -0
- package/dist/gateway/server-constants.js +1 -0
- package/dist/gateway/server-cron.js +29 -0
- package/dist/gateway/server-maintenance.js +35 -2
- package/dist/gateway/server-methods/swarm.js +58 -0
- package/dist/gateway/server-methods/telemetry.js +71 -0
- package/dist/gateway/server-methods-list.js +8 -0
- package/dist/gateway/server-methods.js +9 -2
- package/dist/gateway/server.impl.js +33 -16
- package/dist/infra/abort-pattern.js +4 -4
- package/dist/infra/retry.js +3 -1
- package/dist/skills/commands.js +7 -25
- package/dist/skills/index.js +14 -17
- package/dist/skills/parser.js +12 -27
- package/dist/skills/registry.js +3 -6
- package/dist/skills/security.js +2 -8
- package/dist/swarm/service.js +247 -0
- package/dist/telemetry/alert-engine.js +258 -0
- package/dist/telemetry/cron-instrumentation.js +49 -0
- package/dist/telemetry/gateway-instrumentation.js +80 -0
- package/dist/telemetry/instrumentation.js +66 -0
- package/dist/telemetry/service.js +345 -0
- package/dist/tui/components/assistant-message.js +6 -2
- package/dist/tui/components/hyperlink-markdown.js +32 -0
- package/dist/tui/components/searchable-select-list.js +12 -1
- package/dist/tui/components/user-message.js +6 -2
- package/dist/tui/index.js +22 -6
- package/dist/tui/theme/theme-detection.js +226 -0
- package/dist/tui/tui-command-handlers.js +20 -0
- package/dist/tui/tui-formatters.js +4 -3
- package/dist/tui/utils/ctrl-c-handler.js +67 -0
- package/dist/tui/utils/osc8-hyperlinks.js +208 -0
- package/dist/tui/utils/safe-stop.js +180 -0
- package/dist/tui/utils/session-key-utils.js +81 -0
- package/dist/tui/utils/text-sanitization.js +284 -0
- package/dist/utils/lru-cache.js +116 -0
- package/dist/utils/performance.js +199 -0
- package/dist/utils/retry.js +240 -0
- package/docs/MELHORIAS_IMPLEMENTADAS.md +228 -0
- package/docs/MELHORIAS_PROFISSIONAIS.md +282 -0
- package/docs/PLANO_ACAO_TUI.md +357 -0
- package/docs/PROGRESSO_TUI.md +66 -0
- package/docs/RELATORIO_FINAL.md +217 -0
- package/docs/diagnostico-shell-completion.md +265 -0
- package/docs/features/advanced-memory.md +585 -0
- package/docs/features/discord-components-v2.md +277 -0
- package/docs/features/swarm.md +100 -0
- package/docs/features/telemetry.md +284 -0
- package/docs/integrations/INTEGRATION_PLAN.md +665 -345
- package/docs/models/provider-infrastructure.md +400 -0
- package/docs/security/exec-approvals.md +294 -0
- package/extensions/bluebubbles/package.json +1 -1
- package/extensions/copilot-proxy/package.json +1 -1
- package/extensions/diagnostics-otel/package.json +1 -1
- package/extensions/discord/package.json +1 -1
- package/extensions/feishu/package.json +1 -1
- package/extensions/google-antigravity-auth/package.json +1 -1
- package/extensions/google-gemini-cli-auth/package.json +1 -1
- package/extensions/googlechat/package.json +1 -1
- package/extensions/hexstrike-bridge/README.md +119 -0
- package/extensions/hexstrike-bridge/index.test.ts +247 -0
- package/extensions/hexstrike-bridge/index.ts +487 -0
- package/extensions/hexstrike-bridge/package.json +17 -0
- package/extensions/imessage/package.json +1 -1
- package/extensions/irc/package.json +1 -1
- package/extensions/line/package.json +1 -1
- package/extensions/llm-task/package.json +1 -1
- package/extensions/lobster/package.json +1 -1
- package/extensions/matrix/CHANGELOG.md +10 -0
- package/extensions/matrix/package.json +1 -1
- package/extensions/mattermost/package.json +1 -1
- package/extensions/mavalie/README.md +97 -0
- package/extensions/mavalie/package.json +15 -0
- package/extensions/mavalie/src/index.ts +62 -0
- package/extensions/mcp-server/index.ts +14 -0
- package/extensions/mcp-server/package.json +11 -0
- package/extensions/mcp-server/src/service.ts +540 -0
- package/extensions/memory-core/package.json +1 -1
- package/extensions/memory-lancedb/package.json +1 -1
- package/extensions/minimax-portal-auth/package.json +1 -1
- package/extensions/msteams/CHANGELOG.md +10 -0
- package/extensions/msteams/package.json +1 -1
- package/extensions/nextcloud-talk/package.json +1 -1
- package/extensions/nostr/CHANGELOG.md +10 -0
- package/extensions/nostr/package.json +1 -1
- package/extensions/open-prose/package.json +1 -1
- package/extensions/openai-codex-auth/package.json +1 -1
- package/extensions/signal/package.json +1 -1
- package/extensions/slack/package.json +1 -1
- package/extensions/telegram/package.json +1 -1
- package/extensions/tlon/package.json +1 -1
- package/extensions/twitch/CHANGELOG.md +10 -0
- package/extensions/twitch/package.json +1 -1
- package/extensions/voice-call/CHANGELOG.md +10 -0
- package/extensions/voice-call/package.json +1 -1
- package/extensions/whatsapp/package.json +1 -1
- package/extensions/zalo/CHANGELOG.md +10 -0
- package/extensions/zalo/package.json +1 -1
- package/extensions/zalouser/CHANGELOG.md +10 -0
- package/extensions/zalouser/package.json +1 -1
- package/package.json +8 -1
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { Type } from "@sinclair/typebox";
|
|
2
|
+
export const SwarmStrategySchema = Type.Union([
|
|
3
|
+
Type.Literal("round_robin"),
|
|
4
|
+
Type.Literal("least_busy"),
|
|
5
|
+
Type.Literal("capability_match"),
|
|
6
|
+
Type.Literal("priority_queue"),
|
|
7
|
+
]);
|
|
8
|
+
export const SwarmMemberSchema = Type.Object({
|
|
9
|
+
agentId: Type.String(),
|
|
10
|
+
sessionKey: Type.String(),
|
|
11
|
+
status: Type.Union([Type.Literal("idle"), Type.Literal("working"), Type.Literal("offline")]),
|
|
12
|
+
capabilities: Type.Array(Type.String()),
|
|
13
|
+
joinedAt: Type.Number(),
|
|
14
|
+
lastHeartbeatAt: Type.Number(),
|
|
15
|
+
completedTasks: Type.Number(),
|
|
16
|
+
failedTasks: Type.Number(),
|
|
17
|
+
currentTaskId: Type.Optional(Type.String()),
|
|
18
|
+
});
|
|
19
|
+
export const SwarmTaskSchema = Type.Object({
|
|
20
|
+
id: Type.String(),
|
|
21
|
+
description: Type.String(),
|
|
22
|
+
status: Type.Union([
|
|
23
|
+
Type.Literal("pending"),
|
|
24
|
+
Type.Literal("in_progress"),
|
|
25
|
+
Type.Literal("completed"),
|
|
26
|
+
Type.Literal("failed"),
|
|
27
|
+
]),
|
|
28
|
+
priority: Type.Number(),
|
|
29
|
+
createdAt: Type.Number(),
|
|
30
|
+
startedAt: Type.Optional(Type.Number()),
|
|
31
|
+
completedAt: Type.Optional(Type.Number()),
|
|
32
|
+
assignedTo: Type.Optional(Type.String()),
|
|
33
|
+
requiredCapabilities: Type.Array(Type.String()),
|
|
34
|
+
});
|
|
35
|
+
export const SwarmStateSchema = Type.Object({
|
|
36
|
+
id: Type.String(),
|
|
37
|
+
name: Type.String(),
|
|
38
|
+
description: Type.Optional(Type.String()),
|
|
39
|
+
createdAt: Type.Number(),
|
|
40
|
+
orchestratorAgentId: Type.String(),
|
|
41
|
+
members: Type.Array(SwarmMemberSchema),
|
|
42
|
+
tasks: Type.Array(SwarmTaskSchema),
|
|
43
|
+
strategy: SwarmStrategySchema,
|
|
44
|
+
status: Type.Union([Type.Literal("active"), Type.Literal("paused"), Type.Literal("shutdown")]),
|
|
45
|
+
});
|
|
46
|
+
export const SwarmListParamsSchema = Type.Object({});
|
|
47
|
+
export const SwarmStatusParamsSchema = Type.Object({
|
|
48
|
+
swarmId: Type.Optional(Type.String()),
|
|
49
|
+
});
|
|
50
|
+
export const SwarmCreateParamsSchema = Type.Object({
|
|
51
|
+
name: Type.String(),
|
|
52
|
+
description: Type.Optional(Type.String()),
|
|
53
|
+
strategy: SwarmStrategySchema,
|
|
54
|
+
orchestrator: Type.Optional(Type.String()),
|
|
55
|
+
});
|
|
56
|
+
export const SwarmListResultSchema = Type.Object({
|
|
57
|
+
swarms: Type.Array(Type.Object({
|
|
58
|
+
id: Type.String(),
|
|
59
|
+
name: Type.String(),
|
|
60
|
+
status: Type.Union([
|
|
61
|
+
Type.Literal("active"),
|
|
62
|
+
Type.Literal("paused"),
|
|
63
|
+
Type.Literal("shutdown"),
|
|
64
|
+
]),
|
|
65
|
+
members: Type.Number(),
|
|
66
|
+
tasks: Type.Number(),
|
|
67
|
+
})),
|
|
68
|
+
});
|
|
69
|
+
export const SwarmStatusResultSchema = Type.Object({
|
|
70
|
+
swarm: Type.Union([SwarmStateSchema, Type.Null()]),
|
|
71
|
+
});
|
|
72
|
+
export const SwarmCreateResultSchema = Type.Object({
|
|
73
|
+
id: Type.String(),
|
|
74
|
+
name: Type.String(),
|
|
75
|
+
description: Type.Optional(Type.String()),
|
|
76
|
+
strategy: SwarmStrategySchema,
|
|
77
|
+
orchestratorAgentId: Type.String(),
|
|
78
|
+
createdAt: Type.Number(),
|
|
79
|
+
status: Type.Union([Type.Literal("active"), Type.Literal("paused"), Type.Literal("shutdown")]),
|
|
80
|
+
});
|
|
@@ -13,5 +13,6 @@ export * from "./schema/push.js";
|
|
|
13
13
|
export * from "./schema/protocol-schemas.js";
|
|
14
14
|
export * from "./schema/sessions.js";
|
|
15
15
|
export * from "./schema/snapshot.js";
|
|
16
|
+
export * from "./schema/swarm.js";
|
|
16
17
|
export * from "./schema/types.js";
|
|
17
18
|
export * from "./schema/wizard.js";
|
|
@@ -54,6 +54,7 @@ export function createGatewayCloseHandler(params) {
|
|
|
54
54
|
clearInterval(params.tickInterval);
|
|
55
55
|
clearInterval(params.healthInterval);
|
|
56
56
|
clearInterval(params.dedupeCleanup);
|
|
57
|
+
clearInterval(params.telemetryInterval);
|
|
57
58
|
if (params.agentUnsub) {
|
|
58
59
|
try {
|
|
59
60
|
params.agentUnsub();
|
|
@@ -84,6 +85,9 @@ export function createGatewayCloseHandler(params) {
|
|
|
84
85
|
if (params.browserControl) {
|
|
85
86
|
await params.browserControl.stop().catch(() => { });
|
|
86
87
|
}
|
|
88
|
+
if (params.telemetryService) {
|
|
89
|
+
await params.telemetryService.shutdown().catch(() => { });
|
|
90
|
+
}
|
|
87
91
|
await new Promise((resolve) => params.wss.close(() => resolve()));
|
|
88
92
|
const servers = params.httpServers && params.httpServers.length > 0
|
|
89
93
|
? params.httpServers
|
|
@@ -29,5 +29,6 @@ export const getHandshakeTimeoutMs = () => {
|
|
|
29
29
|
};
|
|
30
30
|
export const TICK_INTERVAL_MS = 30_000;
|
|
31
31
|
export const HEALTH_REFRESH_INTERVAL_MS = 60_000;
|
|
32
|
+
export const TELEMETRY_METRICS_INTERVAL_MS = 30_000; // Broadcast telemetry metrics every 30s
|
|
32
33
|
export const DEDUPE_TTL_MS = 5 * 60_000;
|
|
33
34
|
export const DEDUPE_MAX = 1000;
|
|
@@ -338,6 +338,35 @@ export function buildGatewayCronService(params) {
|
|
|
338
338
|
});
|
|
339
339
|
}
|
|
340
340
|
},
|
|
341
|
+
onJobMetrics: (metrics) => {
|
|
342
|
+
// Import telemetry service dynamically to avoid circular dependencies
|
|
343
|
+
void import("../telemetry/service.js")
|
|
344
|
+
.then(({ getGlobalTelemetryService }) => {
|
|
345
|
+
const telemetry = getGlobalTelemetryService();
|
|
346
|
+
if (telemetry) {
|
|
347
|
+
telemetry.recordCounter("poolbot.cron.jobs_executed", 1, {
|
|
348
|
+
job_id: metrics.jobId,
|
|
349
|
+
job_name: metrics.jobName,
|
|
350
|
+
schedule: metrics.schedule,
|
|
351
|
+
status: metrics.success ? "success" : "failure",
|
|
352
|
+
});
|
|
353
|
+
telemetry.recordHistogram("poolbot.cron.job_duration_ms", metrics.durationMs, {
|
|
354
|
+
job_id: metrics.jobId,
|
|
355
|
+
job_name: metrics.jobName,
|
|
356
|
+
});
|
|
357
|
+
if (!metrics.success) {
|
|
358
|
+
telemetry.recordCounter("poolbot.cron.job_failures", 1, {
|
|
359
|
+
job_id: metrics.jobId,
|
|
360
|
+
job_name: metrics.jobName,
|
|
361
|
+
error_type: metrics.errorType ?? "unknown",
|
|
362
|
+
});
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
})
|
|
366
|
+
.catch(() => {
|
|
367
|
+
// Silently ignore telemetry errors
|
|
368
|
+
});
|
|
369
|
+
},
|
|
341
370
|
});
|
|
342
371
|
return { cron, storePath, cronEnabled };
|
|
343
372
|
}
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
import { createAlertEngine } from "../telemetry/alert-engine.js";
|
|
2
|
+
import { getGlobalTelemetryService } from "../telemetry/service.js";
|
|
1
3
|
import { abortChatRunById } from "./chat-abort.js";
|
|
2
|
-
import { DEDUPE_MAX, DEDUPE_TTL_MS, HEALTH_REFRESH_INTERVAL_MS, TICK_INTERVAL_MS, } from "./server-constants.js";
|
|
4
|
+
import { DEDUPE_MAX, DEDUPE_TTL_MS, HEALTH_REFRESH_INTERVAL_MS, TELEMETRY_METRICS_INTERVAL_MS, TICK_INTERVAL_MS, } from "./server-constants.js";
|
|
3
5
|
import { formatError } from "./server-utils.js";
|
|
4
6
|
import { setBroadcastHealthUpdate } from "./server/health-state.js";
|
|
7
|
+
import { createSubsystemLogger } from "../logging/subsystem.js";
|
|
8
|
+
const logTelemetry = createSubsystemLogger("telemetry:broadcast");
|
|
5
9
|
export function startGatewayMaintenanceTimers(params) {
|
|
6
10
|
setBroadcastHealthUpdate((snap) => {
|
|
7
11
|
params.broadcast("health", snap, {
|
|
@@ -28,6 +32,35 @@ export function startGatewayMaintenanceTimers(params) {
|
|
|
28
32
|
void params
|
|
29
33
|
.refreshGatewayHealthSnapshot({ probe: true })
|
|
30
34
|
.catch((err) => params.logHealth.error(`initial refresh failed: ${formatError(err)}`));
|
|
35
|
+
// Setup alert engine
|
|
36
|
+
const telemetry = getGlobalTelemetryService();
|
|
37
|
+
if (telemetry?.isEnabled()) {
|
|
38
|
+
const alertEngine = createAlertEngine();
|
|
39
|
+
telemetry.setAlertEngine(alertEngine);
|
|
40
|
+
logTelemetry.info(`Alert engine initialized with ${alertEngine.getRules().length} rules`);
|
|
41
|
+
}
|
|
42
|
+
// periodic telemetry metrics broadcast
|
|
43
|
+
const telemetryInterval = setInterval(() => {
|
|
44
|
+
const telemetry = getGlobalTelemetryService();
|
|
45
|
+
if (!telemetry?.isEnabled()) {
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
const snapshot = telemetry.getSnapshot();
|
|
49
|
+
if (snapshot) {
|
|
50
|
+
const payload = { metrics: snapshot, ts: Date.now() };
|
|
51
|
+
params.broadcast("telemetry.metrics", payload, { dropIfSlow: true });
|
|
52
|
+
params.nodeSendToAllSubscribed("telemetry.metrics", payload);
|
|
53
|
+
logTelemetry.debug(`broadcasted ${snapshot.metrics.length} metrics, ${snapshot.spans.length} spans`);
|
|
54
|
+
// Evaluate alerts
|
|
55
|
+
const alerts = telemetry.evaluateAlerts(snapshot);
|
|
56
|
+
for (const alert of alerts) {
|
|
57
|
+
const alertPayload = { alert, ts: Date.now() };
|
|
58
|
+
params.broadcast("telemetry.alert", alertPayload, { dropIfSlow: false });
|
|
59
|
+
params.nodeSendToAllSubscribed("telemetry.alert", alertPayload);
|
|
60
|
+
logTelemetry.warn(`Alert triggered: ${alert.name} (${alert.severity}) - ${alert.message}`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}, TELEMETRY_METRICS_INTERVAL_MS);
|
|
31
64
|
// dedupe cache cleanup
|
|
32
65
|
const dedupeCleanup = setInterval(() => {
|
|
33
66
|
const AGENT_RUN_SEQ_MAX = 10_000;
|
|
@@ -79,5 +112,5 @@ export function startGatewayMaintenanceTimers(params) {
|
|
|
79
112
|
params.chatDeltaSentAt.delete(runId);
|
|
80
113
|
}
|
|
81
114
|
}, 60_000);
|
|
82
|
-
return { tickInterval, healthInterval, dedupeCleanup };
|
|
115
|
+
return { tickInterval, healthInterval, dedupeCleanup, telemetryInterval };
|
|
83
116
|
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { ErrorCodes, errorShape, formatValidationErrors, validateSwarmCreateParams, validateSwarmListParams, validateSwarmStatusParams, } from "../protocol/index.js";
|
|
2
|
+
import { swarmService } from "../../swarm/service.js";
|
|
3
|
+
export const swarmHandlers = {
|
|
4
|
+
"swarm.list": async ({ params, respond }) => {
|
|
5
|
+
if (!validateSwarmListParams(params)) {
|
|
6
|
+
respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, `invalid swarm.list params: ${formatValidationErrors(validateSwarmListParams.errors)}`));
|
|
7
|
+
return;
|
|
8
|
+
}
|
|
9
|
+
try {
|
|
10
|
+
const result = await swarmService.list();
|
|
11
|
+
respond(true, result, undefined);
|
|
12
|
+
}
|
|
13
|
+
catch (err) {
|
|
14
|
+
respond(false, undefined, errorShape(ErrorCodes.INTERNAL_ERROR, `swarm.list failed: ${err}`));
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"swarm.status": async ({ params, respond }) => {
|
|
18
|
+
if (!validateSwarmStatusParams(params)) {
|
|
19
|
+
respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, `invalid swarm.status params: ${formatValidationErrors(validateSwarmStatusParams.errors)}`));
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
try {
|
|
23
|
+
const p = params;
|
|
24
|
+
// If no swarmId provided, return default swarm
|
|
25
|
+
const swarmId = p.swarmId ?? "swarm-default";
|
|
26
|
+
let result = await swarmService.getStatus(swarmId);
|
|
27
|
+
// If swarm doesn't exist, create default one
|
|
28
|
+
if (!result.swarm) {
|
|
29
|
+
const defaultSwarm = await swarmService.getOrCreateDefaultSwarm();
|
|
30
|
+
result = { swarm: defaultSwarm };
|
|
31
|
+
}
|
|
32
|
+
respond(true, result, undefined);
|
|
33
|
+
}
|
|
34
|
+
catch (err) {
|
|
35
|
+
respond(false, undefined, errorShape(ErrorCodes.INTERNAL_ERROR, `swarm.status failed: ${err}`));
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"swarm.create": async ({ params, respond }) => {
|
|
39
|
+
if (!validateSwarmCreateParams(params)) {
|
|
40
|
+
respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, `invalid swarm.create params: ${formatValidationErrors(validateSwarmCreateParams.errors)}`));
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
try {
|
|
44
|
+
const p = params;
|
|
45
|
+
const result = await swarmService.create({
|
|
46
|
+
name: p.name,
|
|
47
|
+
description: p.description,
|
|
48
|
+
strategy: p.strategy ??
|
|
49
|
+
"capability_match",
|
|
50
|
+
orchestratorAgentId: p.orchestrator ?? "main",
|
|
51
|
+
});
|
|
52
|
+
respond(true, result, undefined);
|
|
53
|
+
}
|
|
54
|
+
catch (err) {
|
|
55
|
+
respond(false, undefined, errorShape(ErrorCodes.INTERNAL_ERROR, `swarm.create failed: ${err}`));
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
};
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { getGlobalTelemetryService } from "../../telemetry/service.js";
|
|
2
|
+
import { ErrorCodes, errorShape } from "../protocol/index.js";
|
|
3
|
+
export const telemetryHandlers = {
|
|
4
|
+
"telemetry.status": ({ respond }) => {
|
|
5
|
+
const telemetry = getGlobalTelemetryService();
|
|
6
|
+
if (!telemetry) {
|
|
7
|
+
respond(true, { enabled: false, initialized: false }, undefined);
|
|
8
|
+
return;
|
|
9
|
+
}
|
|
10
|
+
const config = telemetry.getConfig();
|
|
11
|
+
const snapshot = telemetry.getSnapshot();
|
|
12
|
+
respond(true, {
|
|
13
|
+
enabled: config.enabled,
|
|
14
|
+
initialized: true,
|
|
15
|
+
serviceName: config.serviceName,
|
|
16
|
+
exporter: config.tracing.exporter,
|
|
17
|
+
endpoint: config.tracing.otlpEndpoint,
|
|
18
|
+
tracing: config.tracing,
|
|
19
|
+
metrics: config.metrics,
|
|
20
|
+
sampleRate: config.tracing.sampleRate,
|
|
21
|
+
metricsSnapshot: snapshot,
|
|
22
|
+
}, undefined);
|
|
23
|
+
},
|
|
24
|
+
"telemetry.metrics": ({ respond }) => {
|
|
25
|
+
const telemetry = getGlobalTelemetryService();
|
|
26
|
+
if (!telemetry) {
|
|
27
|
+
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, "Telemetry service not initialized"));
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
const snapshot = telemetry.getSnapshot();
|
|
31
|
+
respond(true, { metrics: snapshot }, undefined);
|
|
32
|
+
},
|
|
33
|
+
"telemetry.config": ({ params, respond }) => {
|
|
34
|
+
const telemetry = getGlobalTelemetryService();
|
|
35
|
+
if (!telemetry) {
|
|
36
|
+
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, "Telemetry service not initialized"));
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
// Allow runtime updates to sample rate
|
|
40
|
+
if (typeof params.sampleRate === "number") {
|
|
41
|
+
// Note: This would require restarting the service with new config
|
|
42
|
+
// For now, we just return the current config
|
|
43
|
+
}
|
|
44
|
+
const config = telemetry.getConfig();
|
|
45
|
+
respond(true, {
|
|
46
|
+
enabled: config.enabled,
|
|
47
|
+
serviceName: config.serviceName,
|
|
48
|
+
tracing: config.tracing,
|
|
49
|
+
metrics: config.metrics,
|
|
50
|
+
}, undefined);
|
|
51
|
+
},
|
|
52
|
+
"telemetry.alerts.list": ({ respond }) => {
|
|
53
|
+
const telemetry = getGlobalTelemetryService();
|
|
54
|
+
if (!telemetry) {
|
|
55
|
+
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, "Telemetry service not initialized"));
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
const alerts = telemetry.evaluateAlerts();
|
|
59
|
+
respond(true, { alerts }, undefined);
|
|
60
|
+
},
|
|
61
|
+
"telemetry.alerts.test": ({ respond }) => {
|
|
62
|
+
const telemetry = getGlobalTelemetryService();
|
|
63
|
+
if (!telemetry) {
|
|
64
|
+
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, "Telemetry service not initialized"));
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
// Force alert evaluation and return any triggered alerts
|
|
68
|
+
const alerts = telemetry.evaluateAlerts();
|
|
69
|
+
respond(true, { triggered: alerts.length > 0, alerts }, undefined);
|
|
70
|
+
},
|
|
71
|
+
};
|
|
@@ -89,6 +89,12 @@ const BASE_METHODS = [
|
|
|
89
89
|
"chat.history",
|
|
90
90
|
"chat.abort",
|
|
91
91
|
"chat.send",
|
|
92
|
+
// Telemetry methods
|
|
93
|
+
"telemetry.status",
|
|
94
|
+
"telemetry.metrics",
|
|
95
|
+
"telemetry.config",
|
|
96
|
+
"telemetry.alerts.list",
|
|
97
|
+
"telemetry.alerts.test",
|
|
92
98
|
];
|
|
93
99
|
export function listGatewayMethods() {
|
|
94
100
|
const channelMethods = listChannelPlugins().flatMap((plugin) => plugin.gatewayMethods ?? []);
|
|
@@ -113,4 +119,6 @@ export const GATEWAY_EVENTS = [
|
|
|
113
119
|
"voicewake.changed",
|
|
114
120
|
"exec.approval.requested",
|
|
115
121
|
"exec.approval.resolved",
|
|
122
|
+
"telemetry.metrics",
|
|
123
|
+
"telemetry.alert",
|
|
116
124
|
];
|
|
@@ -8,7 +8,6 @@ import { configHandlers } from "./server-methods/config.js";
|
|
|
8
8
|
import { connectHandlers } from "./server-methods/connect.js";
|
|
9
9
|
import { cronHandlers } from "./server-methods/cron.js";
|
|
10
10
|
import { deviceHandlers } from "./server-methods/devices.js";
|
|
11
|
-
import { execApprovalsHandlers } from "./server-methods/exec-approvals.js";
|
|
12
11
|
import { healthHandlers } from "./server-methods/health.js";
|
|
13
12
|
import { logsHandlers } from "./server-methods/logs.js";
|
|
14
13
|
import { modelsHandlers } from "./server-methods/models.js";
|
|
@@ -16,7 +15,9 @@ import { nodeHandlers } from "./server-methods/nodes.js";
|
|
|
16
15
|
import { sendHandlers } from "./server-methods/send.js";
|
|
17
16
|
import { sessionsHandlers } from "./server-methods/sessions.js";
|
|
18
17
|
import { skillsHandlers } from "./server-methods/skills.js";
|
|
18
|
+
import { swarmHandlers } from "./server-methods/swarm.js";
|
|
19
19
|
import { systemHandlers } from "./server-methods/system.js";
|
|
20
|
+
import { telemetryHandlers } from "./server-methods/telemetry.js";
|
|
20
21
|
import { talkHandlers } from "./server-methods/talk.js";
|
|
21
22
|
import { ttsHandlers } from "./server-methods/tts.js";
|
|
22
23
|
import { updateHandlers } from "./server-methods/update.js";
|
|
@@ -68,6 +69,8 @@ const READ_METHODS = new Set([
|
|
|
68
69
|
"cron.list",
|
|
69
70
|
"cron.status",
|
|
70
71
|
"cron.runs",
|
|
72
|
+
"swarm.list",
|
|
73
|
+
"swarm.status",
|
|
71
74
|
"system-presence",
|
|
72
75
|
"last-heartbeat",
|
|
73
76
|
"node.list",
|
|
@@ -75,6 +78,9 @@ const READ_METHODS = new Set([
|
|
|
75
78
|
"chat.history",
|
|
76
79
|
"config.get",
|
|
77
80
|
"talk.config",
|
|
81
|
+
"telemetry.status",
|
|
82
|
+
"telemetry.metrics",
|
|
83
|
+
"telemetry.config",
|
|
78
84
|
]);
|
|
79
85
|
const WRITE_METHODS = new Set([
|
|
80
86
|
"send",
|
|
@@ -170,7 +176,7 @@ export const coreGatewayHandlers = {
|
|
|
170
176
|
...chatHandlers,
|
|
171
177
|
...cronHandlers,
|
|
172
178
|
...deviceHandlers,
|
|
173
|
-
...
|
|
179
|
+
...telemetryHandlers,
|
|
174
180
|
...webHandlers,
|
|
175
181
|
...modelsHandlers,
|
|
176
182
|
...configHandlers,
|
|
@@ -187,6 +193,7 @@ export const coreGatewayHandlers = {
|
|
|
187
193
|
...agentHandlers,
|
|
188
194
|
...agentsHandlers,
|
|
189
195
|
...browserHandlers,
|
|
196
|
+
...swarmHandlers,
|
|
190
197
|
};
|
|
191
198
|
export async function handleGatewayRequest(opts) {
|
|
192
199
|
const { req, respond, client, isWebchatConnect, context } = opts;
|
|
@@ -27,6 +27,7 @@ import { createSubsystemLogger, runtimeForLogger } from "../logging/subsystem.js
|
|
|
27
27
|
import { getGlobalHookRunner, runGlobalGatewayStopSafely } from "../plugins/hook-runner-global.js";
|
|
28
28
|
import { createEmptyPluginRegistry } from "../plugins/registry.js";
|
|
29
29
|
import { getTotalQueueSize } from "../process/command-queue.js";
|
|
30
|
+
import { createTelemetryService, setGlobalTelemetryService, telemetryConfigFromOtelConfig, } from "../telemetry/service.js";
|
|
30
31
|
import { runOnboardingWizard } from "../wizard/onboarding.js";
|
|
31
32
|
import { createAuthRateLimiter } from "./auth-rate-limit.js";
|
|
32
33
|
import { startChannelHealthMonitor } from "./channel-health-monitor.js";
|
|
@@ -148,6 +149,18 @@ export async function startGatewayServer(port = 18789, opts = {}) {
|
|
|
148
149
|
if (diagnosticsEnabled) {
|
|
149
150
|
startDiagnosticHeartbeat();
|
|
150
151
|
}
|
|
152
|
+
// Initialize telemetry service if enabled
|
|
153
|
+
let telemetryService = null;
|
|
154
|
+
const otelConfig = cfgAtStart.diagnostics?.otel;
|
|
155
|
+
if (otelConfig?.enabled) {
|
|
156
|
+
const telemetryConfig = telemetryConfigFromOtelConfig(otelConfig, {
|
|
157
|
+
defaultServiceName: "poolbot-gateway",
|
|
158
|
+
});
|
|
159
|
+
telemetryService = createTelemetryService(telemetryConfig);
|
|
160
|
+
setGlobalTelemetryService(telemetryService);
|
|
161
|
+
await telemetryService.start();
|
|
162
|
+
log.info(`telemetry: initialized with ${telemetryConfig.tracing.exporter} exporter`);
|
|
163
|
+
}
|
|
151
164
|
setGatewaySigusr1RestartPolicy({ allowExternal: isRestartEnabled(cfgAtStart) });
|
|
152
165
|
setPreRestartDeferralCheck(() => getTotalQueueSize() + getTotalPendingReplies() + getActiveEmbeddedRunCount());
|
|
153
166
|
initSubagentRegistry();
|
|
@@ -326,23 +339,25 @@ export async function startGatewayServer(port = 18789, opts = {}) {
|
|
|
326
339
|
let tickInterval = noopInterval();
|
|
327
340
|
let healthInterval = noopInterval();
|
|
328
341
|
let dedupeCleanup = noopInterval();
|
|
342
|
+
let telemetryInterval = noopInterval();
|
|
329
343
|
if (!minimalTestGateway) {
|
|
330
|
-
({ tickInterval, healthInterval, dedupeCleanup } =
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
344
|
+
({ tickInterval, healthInterval, dedupeCleanup, telemetryInterval } =
|
|
345
|
+
startGatewayMaintenanceTimers({
|
|
346
|
+
broadcast,
|
|
347
|
+
nodeSendToAllSubscribed,
|
|
348
|
+
getPresenceVersion,
|
|
349
|
+
getHealthVersion,
|
|
350
|
+
refreshGatewayHealthSnapshot,
|
|
351
|
+
logHealth,
|
|
352
|
+
dedupe,
|
|
353
|
+
chatAbortControllers,
|
|
354
|
+
chatRunState,
|
|
355
|
+
chatRunBuffers,
|
|
356
|
+
chatDeltaSentAt,
|
|
357
|
+
removeChatRun,
|
|
358
|
+
agentRunSeq,
|
|
359
|
+
nodeSendToSession,
|
|
360
|
+
}));
|
|
346
361
|
}
|
|
347
362
|
const agentUnsub = minimalTestGateway
|
|
348
363
|
? null
|
|
@@ -564,12 +579,14 @@ export async function startGatewayServer(port = 18789, opts = {}) {
|
|
|
564
579
|
tickInterval,
|
|
565
580
|
healthInterval,
|
|
566
581
|
dedupeCleanup,
|
|
582
|
+
telemetryInterval,
|
|
567
583
|
agentUnsub,
|
|
568
584
|
heartbeatUnsub,
|
|
569
585
|
chatRunState,
|
|
570
586
|
clients,
|
|
571
587
|
configReloader,
|
|
572
588
|
browserControl,
|
|
589
|
+
telemetryService,
|
|
573
590
|
wss,
|
|
574
591
|
httpServer,
|
|
575
592
|
httpServers,
|
|
@@ -33,10 +33,10 @@ export function createAbortRelay(controller) {
|
|
|
33
33
|
*/
|
|
34
34
|
export function linkAbortSignal(source, target) {
|
|
35
35
|
const handler = createAbortRelay(target);
|
|
36
|
-
source.addEventListener(
|
|
36
|
+
source.addEventListener("abort", handler, { once: true });
|
|
37
37
|
// Return cleanup function
|
|
38
38
|
return () => {
|
|
39
|
-
source.removeEventListener(
|
|
39
|
+
source.removeEventListener("abort", handler);
|
|
40
40
|
};
|
|
41
41
|
}
|
|
42
42
|
/**
|
|
@@ -71,8 +71,8 @@ export function raceAbortSignals(signals, controller) {
|
|
|
71
71
|
}
|
|
72
72
|
else if (!alreadyAborted) {
|
|
73
73
|
const handler = createAbortRelay(controller);
|
|
74
|
-
signal.addEventListener(
|
|
75
|
-
handlers.push(() => signal.removeEventListener(
|
|
74
|
+
signal.addEventListener("abort", handler, { once: true });
|
|
75
|
+
handlers.push(() => signal.removeEventListener("abort", handler));
|
|
76
76
|
}
|
|
77
77
|
}
|
|
78
78
|
return () => {
|
package/dist/infra/retry.js
CHANGED
|
@@ -95,7 +95,9 @@ export async function retryWithResult(fn, options = {}) {
|
|
|
95
95
|
const resolved = resolveRetryConfig(DEFAULT_RETRY_CONFIG, options);
|
|
96
96
|
const maxAttempts = resolved.attempts;
|
|
97
97
|
const minDelayMs = resolved.minDelayMs;
|
|
98
|
-
const maxDelayMs = Number.isFinite(resolved.maxDelayMs)
|
|
98
|
+
const maxDelayMs = Number.isFinite(resolved.maxDelayMs)
|
|
99
|
+
? resolved.maxDelayMs
|
|
100
|
+
: Number.POSITIVE_INFINITY;
|
|
99
101
|
const jitter = resolved.jitter;
|
|
100
102
|
const shouldRetry = options.shouldRetry ?? (() => true);
|
|
101
103
|
let lastErr;
|
package/dist/skills/commands.js
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*
|
|
5
5
|
* @module skills/commands
|
|
6
6
|
*/
|
|
7
|
-
import { SkillError
|
|
7
|
+
import { SkillError } from "./types.js";
|
|
8
8
|
import { getRegistry } from "./registry.js";
|
|
9
9
|
import { getLoader } from "./loader.js";
|
|
10
10
|
import { formatFindings, getSecuritySummary } from "./security.js";
|
|
@@ -15,9 +15,7 @@ import { formatFindings, getSecuritySummary } from "./security.js";
|
|
|
15
15
|
* Register skills commands with CLI
|
|
16
16
|
*/
|
|
17
17
|
export function registerSkillsCommands(program) {
|
|
18
|
-
const mods = program
|
|
19
|
-
.command("mods")
|
|
20
|
-
.description("Manage capability modules (SKILL.md files)");
|
|
18
|
+
const mods = program.command("mods").description("Manage capability modules (SKILL.md files)");
|
|
21
19
|
// List command
|
|
22
20
|
mods
|
|
23
21
|
.command("list")
|
|
@@ -43,24 +41,12 @@ export function registerSkillsCommands(program) {
|
|
|
43
41
|
.option("-c, --category <category>", "Filter by category")
|
|
44
42
|
.action(searchCommand);
|
|
45
43
|
// Enable/disable commands
|
|
46
|
-
mods
|
|
47
|
-
|
|
48
|
-
.description("Enable a skill")
|
|
49
|
-
.action(enableCommand);
|
|
50
|
-
mods
|
|
51
|
-
.command("disable <id>")
|
|
52
|
-
.description("Disable a skill")
|
|
53
|
-
.action(disableCommand);
|
|
44
|
+
mods.command("enable <id>").description("Enable a skill").action(enableCommand);
|
|
45
|
+
mods.command("disable <id>").description("Disable a skill").action(disableCommand);
|
|
54
46
|
// Scan command
|
|
55
|
-
mods
|
|
56
|
-
.command("scan [path]")
|
|
57
|
-
.description("Scan for skills in directory")
|
|
58
|
-
.action(scanCommand);
|
|
47
|
+
mods.command("scan [path]").description("Scan for skills in directory").action(scanCommand);
|
|
59
48
|
// Stats command
|
|
60
|
-
mods
|
|
61
|
-
.command("stats")
|
|
62
|
-
.description("Show skill statistics")
|
|
63
|
-
.action(statsCommand);
|
|
49
|
+
mods.command("stats").description("Show skill statistics").action(statsCommand);
|
|
64
50
|
}
|
|
65
51
|
// ============================================================================
|
|
66
52
|
// Command Implementations
|
|
@@ -333,11 +319,7 @@ function printSkillTable(skills) {
|
|
|
333
319
|
// Rows
|
|
334
320
|
for (const skill of skills) {
|
|
335
321
|
const status = skill.enabled ? "✓" : "○";
|
|
336
|
-
const secIcon = skill.verification === "failed"
|
|
337
|
-
? "⚠"
|
|
338
|
-
: skill.verification === "warning"
|
|
339
|
-
? "!"
|
|
340
|
-
: " ";
|
|
322
|
+
const secIcon = skill.verification === "failed" ? "⚠" : skill.verification === "warning" ? "!" : " ";
|
|
341
323
|
console.log(`${skill.id.slice(0, idWidth).padEnd(idWidth)} ` +
|
|
342
324
|
`${skill.name.slice(0, nameWidth).padEnd(nameWidth)} ` +
|
|
343
325
|
`${skill.category.padEnd(catWidth)} ` +
|