@poolzin/pool-bot 2026.3.7 → 2026.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/CHANGELOG.md +40 -0
  2. package/README.md +147 -69
  3. package/dist/.buildstamp +1 -1
  4. package/dist/agents/error-classifier.js +251 -0
  5. package/dist/agents/skills/security.js +211 -0
  6. package/dist/build-info.json +3 -3
  7. package/dist/cli/cron-cli/register.cron-dashboard.js +339 -0
  8. package/dist/cli/cron-cli/register.js +2 -0
  9. package/dist/cli/errors.js +187 -0
  10. package/dist/cli/lazy-commands.example.js +113 -0
  11. package/dist/cli/lazy-commands.js +329 -0
  12. package/dist/cli/program/command-registry.js +26 -0
  13. package/dist/cli/program/register.maintenance.js +21 -0
  14. package/dist/cli/program/register.skills.js +4 -0
  15. package/dist/cli/program/register.subclis.js +9 -0
  16. package/dist/cli/swarm-cli/register.js +8 -0
  17. package/dist/cli/swarm-cli/register.swarm-status.js +488 -0
  18. package/dist/cli/telemetry-cli/register.js +10 -0
  19. package/dist/cli/telemetry-cli/register.telemetry-alerts.js +176 -0
  20. package/dist/cli/telemetry-cli/register.telemetry-metrics.js +323 -0
  21. package/dist/cli/telemetry-cli/register.telemetry-status.js +179 -0
  22. package/dist/commands/doctor-checks.js +498 -0
  23. package/dist/config/config.js +1 -0
  24. package/dist/config/secrets-integration.js +88 -0
  25. package/dist/context-engine/index.js +33 -0
  26. package/dist/context-engine/legacy.js +179 -0
  27. package/dist/context-engine/registry.js +86 -0
  28. package/dist/context-engine/summarizing.js +290 -0
  29. package/dist/context-engine/types.js +7 -0
  30. package/dist/cron/service/timer.js +18 -0
  31. package/dist/gateway/protocol/index.js +5 -2
  32. package/dist/gateway/protocol/schema/error-codes.js +1 -0
  33. package/dist/gateway/protocol/schema/swarm.js +80 -0
  34. package/dist/gateway/protocol/schema.js +1 -0
  35. package/dist/gateway/server-close.js +4 -0
  36. package/dist/gateway/server-constants.js +1 -0
  37. package/dist/gateway/server-cron.js +29 -0
  38. package/dist/gateway/server-maintenance.js +35 -2
  39. package/dist/gateway/server-methods/swarm.js +58 -0
  40. package/dist/gateway/server-methods/telemetry.js +71 -0
  41. package/dist/gateway/server-methods-list.js +8 -0
  42. package/dist/gateway/server-methods.js +9 -2
  43. package/dist/gateway/server.impl.js +33 -16
  44. package/dist/infra/abort-pattern.js +106 -0
  45. package/dist/infra/retry.js +96 -0
  46. package/dist/secrets/index.js +28 -0
  47. package/dist/secrets/resolver.js +185 -0
  48. package/dist/secrets/runtime.js +142 -0
  49. package/dist/secrets/types.js +11 -0
  50. package/dist/security/dangerous-tools.js +80 -0
  51. package/dist/security/types.js +12 -0
  52. package/dist/skills/commands.js +333 -0
  53. package/dist/skills/index.js +164 -0
  54. package/dist/skills/loader.js +282 -0
  55. package/dist/skills/parser.js +446 -0
  56. package/dist/skills/registry.js +394 -0
  57. package/dist/skills/security.js +312 -0
  58. package/dist/skills/types.js +21 -0
  59. package/dist/swarm/service.js +247 -0
  60. package/dist/telemetry/alert-engine.js +258 -0
  61. package/dist/telemetry/cron-instrumentation.js +49 -0
  62. package/dist/telemetry/gateway-instrumentation.js +80 -0
  63. package/dist/telemetry/instrumentation.js +66 -0
  64. package/dist/telemetry/service.js +345 -0
  65. package/dist/test-utils/index.js +219 -0
  66. package/dist/tui/components/assistant-message.js +6 -2
  67. package/dist/tui/components/hyperlink-markdown.js +32 -0
  68. package/dist/tui/components/searchable-select-list.js +12 -1
  69. package/dist/tui/components/user-message.js +6 -2
  70. package/dist/tui/index.js +611 -0
  71. package/dist/tui/theme/theme-detection.js +226 -0
  72. package/dist/tui/tui-command-handlers.js +20 -0
  73. package/dist/tui/tui-formatters.js +4 -3
  74. package/dist/tui/utils/ctrl-c-handler.js +67 -0
  75. package/dist/tui/utils/osc8-hyperlinks.js +208 -0
  76. package/dist/tui/utils/safe-stop.js +180 -0
  77. package/dist/tui/utils/session-key-utils.js +81 -0
  78. package/dist/tui/utils/text-sanitization.js +284 -0
  79. package/dist/utils/lru-cache.js +116 -0
  80. package/dist/utils/performance.js +199 -0
  81. package/dist/utils/retry.js +240 -0
  82. package/docs/INTEGRATION_PLAN.md +475 -0
  83. package/docs/INTEGRATION_SUMMARY.md +215 -0
  84. package/docs/MELHORIAS_IMPLEMENTADAS.md +228 -0
  85. package/docs/MELHORIAS_PROFISSIONAIS.md +282 -0
  86. package/docs/PLANO_ACAO_TUI.md +357 -0
  87. package/docs/PROGRESSO_TUI.md +66 -0
  88. package/docs/RELATORIO_FINAL.md +217 -0
  89. package/docs/diagnostico-shell-completion.md +265 -0
  90. package/docs/features/advanced-memory.md +585 -0
  91. package/docs/features/discord-components-v2.md +277 -0
  92. package/docs/features/swarm.md +100 -0
  93. package/docs/features/telemetry.md +284 -0
  94. package/docs/integrations/HEXSTRIKE_PLAN.md +796 -0
  95. package/docs/integrations/INTEGRATION_PLAN.md +744 -0
  96. package/docs/integrations/PAGE_AGENT_PLAN.md +370 -0
  97. package/docs/integrations/XYOPS_PLAN.md +978 -0
  98. package/docs/models/provider-infrastructure.md +400 -0
  99. package/docs/security/exec-approvals.md +294 -0
  100. package/docs/skills/IMPLEMENTATION_SUMMARY.md +145 -0
  101. package/docs/skills/SKILL.md +524 -0
  102. package/docs/skills.md +405 -0
  103. package/extensions/bluebubbles/package.json +1 -1
  104. package/extensions/copilot-proxy/package.json +1 -1
  105. package/extensions/diagnostics-otel/package.json +1 -1
  106. package/extensions/discord/package.json +1 -1
  107. package/extensions/feishu/package.json +1 -1
  108. package/extensions/google-antigravity-auth/package.json +1 -1
  109. package/extensions/google-gemini-cli-auth/package.json +1 -1
  110. package/extensions/googlechat/package.json +1 -1
  111. package/extensions/hexstrike-bridge/README.md +119 -0
  112. package/extensions/hexstrike-bridge/index.test.ts +247 -0
  113. package/extensions/hexstrike-bridge/index.ts +487 -0
  114. package/extensions/hexstrike-bridge/package.json +17 -0
  115. package/extensions/imessage/package.json +1 -1
  116. package/extensions/irc/package.json +1 -1
  117. package/extensions/line/package.json +1 -1
  118. package/extensions/llm-task/package.json +1 -1
  119. package/extensions/lobster/package.json +1 -1
  120. package/extensions/matrix/CHANGELOG.md +5 -0
  121. package/extensions/matrix/package.json +1 -1
  122. package/extensions/mattermost/package.json +1 -1
  123. package/extensions/mcp-server/index.ts +14 -0
  124. package/extensions/mcp-server/package.json +11 -0
  125. package/extensions/mcp-server/src/service.ts +540 -0
  126. package/extensions/memory-core/package.json +1 -1
  127. package/extensions/memory-lancedb/package.json +1 -1
  128. package/extensions/minimax-portal-auth/package.json +1 -1
  129. package/extensions/msteams/CHANGELOG.md +5 -0
  130. package/extensions/msteams/package.json +1 -1
  131. package/extensions/nextcloud-talk/package.json +1 -1
  132. package/extensions/nostr/CHANGELOG.md +5 -0
  133. package/extensions/nostr/package.json +1 -1
  134. package/extensions/open-prose/package.json +1 -1
  135. package/extensions/openai-codex-auth/package.json +1 -1
  136. package/extensions/signal/package.json +1 -1
  137. package/extensions/slack/package.json +1 -1
  138. package/extensions/telegram/package.json +1 -1
  139. package/extensions/tlon/package.json +1 -1
  140. package/extensions/twitch/CHANGELOG.md +5 -0
  141. package/extensions/twitch/package.json +1 -1
  142. package/extensions/voice-call/CHANGELOG.md +5 -0
  143. package/extensions/voice-call/package.json +1 -1
  144. package/extensions/whatsapp/package.json +1 -1
  145. package/extensions/zalo/CHANGELOG.md +5 -0
  146. package/extensions/zalo/package.json +1 -1
  147. package/extensions/zalouser/CHANGELOG.md +5 -0
  148. package/extensions/zalouser/package.json +1 -1
  149. package/package.json +8 -1
  150. package/skills/example-skill/SKILL.md +195 -0
@@ -0,0 +1,80 @@
1
+ import { Type } from "@sinclair/typebox";
2
+ export const SwarmStrategySchema = Type.Union([
3
+ Type.Literal("round_robin"),
4
+ Type.Literal("least_busy"),
5
+ Type.Literal("capability_match"),
6
+ Type.Literal("priority_queue"),
7
+ ]);
8
+ export const SwarmMemberSchema = Type.Object({
9
+ agentId: Type.String(),
10
+ sessionKey: Type.String(),
11
+ status: Type.Union([Type.Literal("idle"), Type.Literal("working"), Type.Literal("offline")]),
12
+ capabilities: Type.Array(Type.String()),
13
+ joinedAt: Type.Number(),
14
+ lastHeartbeatAt: Type.Number(),
15
+ completedTasks: Type.Number(),
16
+ failedTasks: Type.Number(),
17
+ currentTaskId: Type.Optional(Type.String()),
18
+ });
19
+ export const SwarmTaskSchema = Type.Object({
20
+ id: Type.String(),
21
+ description: Type.String(),
22
+ status: Type.Union([
23
+ Type.Literal("pending"),
24
+ Type.Literal("in_progress"),
25
+ Type.Literal("completed"),
26
+ Type.Literal("failed"),
27
+ ]),
28
+ priority: Type.Number(),
29
+ createdAt: Type.Number(),
30
+ startedAt: Type.Optional(Type.Number()),
31
+ completedAt: Type.Optional(Type.Number()),
32
+ assignedTo: Type.Optional(Type.String()),
33
+ requiredCapabilities: Type.Array(Type.String()),
34
+ });
35
+ export const SwarmStateSchema = Type.Object({
36
+ id: Type.String(),
37
+ name: Type.String(),
38
+ description: Type.Optional(Type.String()),
39
+ createdAt: Type.Number(),
40
+ orchestratorAgentId: Type.String(),
41
+ members: Type.Array(SwarmMemberSchema),
42
+ tasks: Type.Array(SwarmTaskSchema),
43
+ strategy: SwarmStrategySchema,
44
+ status: Type.Union([Type.Literal("active"), Type.Literal("paused"), Type.Literal("shutdown")]),
45
+ });
46
+ export const SwarmListParamsSchema = Type.Object({});
47
+ export const SwarmStatusParamsSchema = Type.Object({
48
+ swarmId: Type.Optional(Type.String()),
49
+ });
50
+ export const SwarmCreateParamsSchema = Type.Object({
51
+ name: Type.String(),
52
+ description: Type.Optional(Type.String()),
53
+ strategy: SwarmStrategySchema,
54
+ orchestrator: Type.Optional(Type.String()),
55
+ });
56
+ export const SwarmListResultSchema = Type.Object({
57
+ swarms: Type.Array(Type.Object({
58
+ id: Type.String(),
59
+ name: Type.String(),
60
+ status: Type.Union([
61
+ Type.Literal("active"),
62
+ Type.Literal("paused"),
63
+ Type.Literal("shutdown"),
64
+ ]),
65
+ members: Type.Number(),
66
+ tasks: Type.Number(),
67
+ })),
68
+ });
69
+ export const SwarmStatusResultSchema = Type.Object({
70
+ swarm: Type.Union([SwarmStateSchema, Type.Null()]),
71
+ });
72
+ export const SwarmCreateResultSchema = Type.Object({
73
+ id: Type.String(),
74
+ name: Type.String(),
75
+ description: Type.Optional(Type.String()),
76
+ strategy: SwarmStrategySchema,
77
+ orchestratorAgentId: Type.String(),
78
+ createdAt: Type.Number(),
79
+ status: Type.Union([Type.Literal("active"), Type.Literal("paused"), Type.Literal("shutdown")]),
80
+ });
@@ -13,5 +13,6 @@ export * from "./schema/push.js";
13
13
  export * from "./schema/protocol-schemas.js";
14
14
  export * from "./schema/sessions.js";
15
15
  export * from "./schema/snapshot.js";
16
+ export * from "./schema/swarm.js";
16
17
  export * from "./schema/types.js";
17
18
  export * from "./schema/wizard.js";
@@ -54,6 +54,7 @@ export function createGatewayCloseHandler(params) {
54
54
  clearInterval(params.tickInterval);
55
55
  clearInterval(params.healthInterval);
56
56
  clearInterval(params.dedupeCleanup);
57
+ clearInterval(params.telemetryInterval);
57
58
  if (params.agentUnsub) {
58
59
  try {
59
60
  params.agentUnsub();
@@ -84,6 +85,9 @@ export function createGatewayCloseHandler(params) {
84
85
  if (params.browserControl) {
85
86
  await params.browserControl.stop().catch(() => { });
86
87
  }
88
+ if (params.telemetryService) {
89
+ await params.telemetryService.shutdown().catch(() => { });
90
+ }
87
91
  await new Promise((resolve) => params.wss.close(() => resolve()));
88
92
  const servers = params.httpServers && params.httpServers.length > 0
89
93
  ? params.httpServers
@@ -29,5 +29,6 @@ export const getHandshakeTimeoutMs = () => {
29
29
  };
30
30
  export const TICK_INTERVAL_MS = 30_000;
31
31
  export const HEALTH_REFRESH_INTERVAL_MS = 60_000;
32
+ export const TELEMETRY_METRICS_INTERVAL_MS = 30_000; // Broadcast telemetry metrics every 30s
32
33
  export const DEDUPE_TTL_MS = 5 * 60_000;
33
34
  export const DEDUPE_MAX = 1000;
@@ -338,6 +338,35 @@ export function buildGatewayCronService(params) {
338
338
  });
339
339
  }
340
340
  },
341
+ onJobMetrics: (metrics) => {
342
+ // Import telemetry service dynamically to avoid circular dependencies
343
+ void import("../telemetry/service.js")
344
+ .then(({ getGlobalTelemetryService }) => {
345
+ const telemetry = getGlobalTelemetryService();
346
+ if (telemetry) {
347
+ telemetry.recordCounter("poolbot.cron.jobs_executed", 1, {
348
+ job_id: metrics.jobId,
349
+ job_name: metrics.jobName,
350
+ schedule: metrics.schedule,
351
+ status: metrics.success ? "success" : "failure",
352
+ });
353
+ telemetry.recordHistogram("poolbot.cron.job_duration_ms", metrics.durationMs, {
354
+ job_id: metrics.jobId,
355
+ job_name: metrics.jobName,
356
+ });
357
+ if (!metrics.success) {
358
+ telemetry.recordCounter("poolbot.cron.job_failures", 1, {
359
+ job_id: metrics.jobId,
360
+ job_name: metrics.jobName,
361
+ error_type: metrics.errorType ?? "unknown",
362
+ });
363
+ }
364
+ }
365
+ })
366
+ .catch(() => {
367
+ // Silently ignore telemetry errors
368
+ });
369
+ },
341
370
  });
342
371
  return { cron, storePath, cronEnabled };
343
372
  }
@@ -1,7 +1,11 @@
1
+ import { createAlertEngine } from "../telemetry/alert-engine.js";
2
+ import { getGlobalTelemetryService } from "../telemetry/service.js";
1
3
  import { abortChatRunById } from "./chat-abort.js";
2
- import { DEDUPE_MAX, DEDUPE_TTL_MS, HEALTH_REFRESH_INTERVAL_MS, TICK_INTERVAL_MS, } from "./server-constants.js";
4
+ import { DEDUPE_MAX, DEDUPE_TTL_MS, HEALTH_REFRESH_INTERVAL_MS, TELEMETRY_METRICS_INTERVAL_MS, TICK_INTERVAL_MS, } from "./server-constants.js";
3
5
  import { formatError } from "./server-utils.js";
4
6
  import { setBroadcastHealthUpdate } from "./server/health-state.js";
7
+ import { createSubsystemLogger } from "../logging/subsystem.js";
8
+ const logTelemetry = createSubsystemLogger("telemetry:broadcast");
5
9
  export function startGatewayMaintenanceTimers(params) {
6
10
  setBroadcastHealthUpdate((snap) => {
7
11
  params.broadcast("health", snap, {
@@ -28,6 +32,35 @@ export function startGatewayMaintenanceTimers(params) {
28
32
  void params
29
33
  .refreshGatewayHealthSnapshot({ probe: true })
30
34
  .catch((err) => params.logHealth.error(`initial refresh failed: ${formatError(err)}`));
35
+ // Setup alert engine
36
+ const telemetry = getGlobalTelemetryService();
37
+ if (telemetry?.isEnabled()) {
38
+ const alertEngine = createAlertEngine();
39
+ telemetry.setAlertEngine(alertEngine);
40
+ logTelemetry.info(`Alert engine initialized with ${alertEngine.getRules().length} rules`);
41
+ }
42
+ // periodic telemetry metrics broadcast
43
+ const telemetryInterval = setInterval(() => {
44
+ const telemetry = getGlobalTelemetryService();
45
+ if (!telemetry?.isEnabled()) {
46
+ return;
47
+ }
48
+ const snapshot = telemetry.getSnapshot();
49
+ if (snapshot) {
50
+ const payload = { metrics: snapshot, ts: Date.now() };
51
+ params.broadcast("telemetry.metrics", payload, { dropIfSlow: true });
52
+ params.nodeSendToAllSubscribed("telemetry.metrics", payload);
53
+ logTelemetry.debug(`broadcasted ${snapshot.metrics.length} metrics, ${snapshot.spans.length} spans`);
54
+ // Evaluate alerts
55
+ const alerts = telemetry.evaluateAlerts(snapshot);
56
+ for (const alert of alerts) {
57
+ const alertPayload = { alert, ts: Date.now() };
58
+ params.broadcast("telemetry.alert", alertPayload, { dropIfSlow: false });
59
+ params.nodeSendToAllSubscribed("telemetry.alert", alertPayload);
60
+ logTelemetry.warn(`Alert triggered: ${alert.name} (${alert.severity}) - ${alert.message}`);
61
+ }
62
+ }
63
+ }, TELEMETRY_METRICS_INTERVAL_MS);
31
64
  // dedupe cache cleanup
32
65
  const dedupeCleanup = setInterval(() => {
33
66
  const AGENT_RUN_SEQ_MAX = 10_000;
@@ -79,5 +112,5 @@ export function startGatewayMaintenanceTimers(params) {
79
112
  params.chatDeltaSentAt.delete(runId);
80
113
  }
81
114
  }, 60_000);
82
- return { tickInterval, healthInterval, dedupeCleanup };
115
+ return { tickInterval, healthInterval, dedupeCleanup, telemetryInterval };
83
116
  }
@@ -0,0 +1,58 @@
1
+ import { ErrorCodes, errorShape, formatValidationErrors, validateSwarmCreateParams, validateSwarmListParams, validateSwarmStatusParams, } from "../protocol/index.js";
2
+ import { swarmService } from "../../swarm/service.js";
3
+ export const swarmHandlers = {
4
+ "swarm.list": async ({ params, respond }) => {
5
+ if (!validateSwarmListParams(params)) {
6
+ respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, `invalid swarm.list params: ${formatValidationErrors(validateSwarmListParams.errors)}`));
7
+ return;
8
+ }
9
+ try {
10
+ const result = await swarmService.list();
11
+ respond(true, result, undefined);
12
+ }
13
+ catch (err) {
14
+ respond(false, undefined, errorShape(ErrorCodes.INTERNAL_ERROR, `swarm.list failed: ${err}`));
15
+ }
16
+ },
17
+ "swarm.status": async ({ params, respond }) => {
18
+ if (!validateSwarmStatusParams(params)) {
19
+ respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, `invalid swarm.status params: ${formatValidationErrors(validateSwarmStatusParams.errors)}`));
20
+ return;
21
+ }
22
+ try {
23
+ const p = params;
24
+ // If no swarmId provided, return default swarm
25
+ const swarmId = p.swarmId ?? "swarm-default";
26
+ let result = await swarmService.getStatus(swarmId);
27
+ // If swarm doesn't exist, create default one
28
+ if (!result.swarm) {
29
+ const defaultSwarm = await swarmService.getOrCreateDefaultSwarm();
30
+ result = { swarm: defaultSwarm };
31
+ }
32
+ respond(true, result, undefined);
33
+ }
34
+ catch (err) {
35
+ respond(false, undefined, errorShape(ErrorCodes.INTERNAL_ERROR, `swarm.status failed: ${err}`));
36
+ }
37
+ },
38
+ "swarm.create": async ({ params, respond }) => {
39
+ if (!validateSwarmCreateParams(params)) {
40
+ respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, `invalid swarm.create params: ${formatValidationErrors(validateSwarmCreateParams.errors)}`));
41
+ return;
42
+ }
43
+ try {
44
+ const p = params;
45
+ const result = await swarmService.create({
46
+ name: p.name,
47
+ description: p.description,
48
+ strategy: p.strategy ??
49
+ "capability_match",
50
+ orchestratorAgentId: p.orchestrator ?? "main",
51
+ });
52
+ respond(true, result, undefined);
53
+ }
54
+ catch (err) {
55
+ respond(false, undefined, errorShape(ErrorCodes.INTERNAL_ERROR, `swarm.create failed: ${err}`));
56
+ }
57
+ },
58
+ };
@@ -0,0 +1,71 @@
1
+ import { getGlobalTelemetryService } from "../../telemetry/service.js";
2
+ import { ErrorCodes, errorShape } from "../protocol/index.js";
3
+ export const telemetryHandlers = {
4
+ "telemetry.status": ({ respond }) => {
5
+ const telemetry = getGlobalTelemetryService();
6
+ if (!telemetry) {
7
+ respond(true, { enabled: false, initialized: false }, undefined);
8
+ return;
9
+ }
10
+ const config = telemetry.getConfig();
11
+ const snapshot = telemetry.getSnapshot();
12
+ respond(true, {
13
+ enabled: config.enabled,
14
+ initialized: true,
15
+ serviceName: config.serviceName,
16
+ exporter: config.tracing.exporter,
17
+ endpoint: config.tracing.otlpEndpoint,
18
+ tracing: config.tracing,
19
+ metrics: config.metrics,
20
+ sampleRate: config.tracing.sampleRate,
21
+ metricsSnapshot: snapshot,
22
+ }, undefined);
23
+ },
24
+ "telemetry.metrics": ({ respond }) => {
25
+ const telemetry = getGlobalTelemetryService();
26
+ if (!telemetry) {
27
+ respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, "Telemetry service not initialized"));
28
+ return;
29
+ }
30
+ const snapshot = telemetry.getSnapshot();
31
+ respond(true, { metrics: snapshot }, undefined);
32
+ },
33
+ "telemetry.config": ({ params, respond }) => {
34
+ const telemetry = getGlobalTelemetryService();
35
+ if (!telemetry) {
36
+ respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, "Telemetry service not initialized"));
37
+ return;
38
+ }
39
+ // Allow runtime updates to sample rate
40
+ if (typeof params.sampleRate === "number") {
41
+ // Note: This would require restarting the service with new config
42
+ // For now, we just return the current config
43
+ }
44
+ const config = telemetry.getConfig();
45
+ respond(true, {
46
+ enabled: config.enabled,
47
+ serviceName: config.serviceName,
48
+ tracing: config.tracing,
49
+ metrics: config.metrics,
50
+ }, undefined);
51
+ },
52
+ "telemetry.alerts.list": ({ respond }) => {
53
+ const telemetry = getGlobalTelemetryService();
54
+ if (!telemetry) {
55
+ respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, "Telemetry service not initialized"));
56
+ return;
57
+ }
58
+ const alerts = telemetry.evaluateAlerts();
59
+ respond(true, { alerts }, undefined);
60
+ },
61
+ "telemetry.alerts.test": ({ respond }) => {
62
+ const telemetry = getGlobalTelemetryService();
63
+ if (!telemetry) {
64
+ respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, "Telemetry service not initialized"));
65
+ return;
66
+ }
67
+ // Force alert evaluation and return any triggered alerts
68
+ const alerts = telemetry.evaluateAlerts();
69
+ respond(true, { triggered: alerts.length > 0, alerts }, undefined);
70
+ },
71
+ };
@@ -89,6 +89,12 @@ const BASE_METHODS = [
89
89
  "chat.history",
90
90
  "chat.abort",
91
91
  "chat.send",
92
+ // Telemetry methods
93
+ "telemetry.status",
94
+ "telemetry.metrics",
95
+ "telemetry.config",
96
+ "telemetry.alerts.list",
97
+ "telemetry.alerts.test",
92
98
  ];
93
99
  export function listGatewayMethods() {
94
100
  const channelMethods = listChannelPlugins().flatMap((plugin) => plugin.gatewayMethods ?? []);
@@ -113,4 +119,6 @@ export const GATEWAY_EVENTS = [
113
119
  "voicewake.changed",
114
120
  "exec.approval.requested",
115
121
  "exec.approval.resolved",
122
+ "telemetry.metrics",
123
+ "telemetry.alert",
116
124
  ];
@@ -8,7 +8,6 @@ import { configHandlers } from "./server-methods/config.js";
8
8
  import { connectHandlers } from "./server-methods/connect.js";
9
9
  import { cronHandlers } from "./server-methods/cron.js";
10
10
  import { deviceHandlers } from "./server-methods/devices.js";
11
- import { execApprovalsHandlers } from "./server-methods/exec-approvals.js";
12
11
  import { healthHandlers } from "./server-methods/health.js";
13
12
  import { logsHandlers } from "./server-methods/logs.js";
14
13
  import { modelsHandlers } from "./server-methods/models.js";
@@ -16,7 +15,9 @@ import { nodeHandlers } from "./server-methods/nodes.js";
16
15
  import { sendHandlers } from "./server-methods/send.js";
17
16
  import { sessionsHandlers } from "./server-methods/sessions.js";
18
17
  import { skillsHandlers } from "./server-methods/skills.js";
18
+ import { swarmHandlers } from "./server-methods/swarm.js";
19
19
  import { systemHandlers } from "./server-methods/system.js";
20
+ import { telemetryHandlers } from "./server-methods/telemetry.js";
20
21
  import { talkHandlers } from "./server-methods/talk.js";
21
22
  import { ttsHandlers } from "./server-methods/tts.js";
22
23
  import { updateHandlers } from "./server-methods/update.js";
@@ -68,6 +69,8 @@ const READ_METHODS = new Set([
68
69
  "cron.list",
69
70
  "cron.status",
70
71
  "cron.runs",
72
+ "swarm.list",
73
+ "swarm.status",
71
74
  "system-presence",
72
75
  "last-heartbeat",
73
76
  "node.list",
@@ -75,6 +78,9 @@ const READ_METHODS = new Set([
75
78
  "chat.history",
76
79
  "config.get",
77
80
  "talk.config",
81
+ "telemetry.status",
82
+ "telemetry.metrics",
83
+ "telemetry.config",
78
84
  ]);
79
85
  const WRITE_METHODS = new Set([
80
86
  "send",
@@ -170,7 +176,7 @@ export const coreGatewayHandlers = {
170
176
  ...chatHandlers,
171
177
  ...cronHandlers,
172
178
  ...deviceHandlers,
173
- ...execApprovalsHandlers,
179
+ ...telemetryHandlers,
174
180
  ...webHandlers,
175
181
  ...modelsHandlers,
176
182
  ...configHandlers,
@@ -187,6 +193,7 @@ export const coreGatewayHandlers = {
187
193
  ...agentHandlers,
188
194
  ...agentsHandlers,
189
195
  ...browserHandlers,
196
+ ...swarmHandlers,
190
197
  };
191
198
  export async function handleGatewayRequest(opts) {
192
199
  const { req, respond, client, isWebchatConnect, context } = opts;
@@ -27,6 +27,7 @@ import { createSubsystemLogger, runtimeForLogger } from "../logging/subsystem.js
27
27
  import { getGlobalHookRunner, runGlobalGatewayStopSafely } from "../plugins/hook-runner-global.js";
28
28
  import { createEmptyPluginRegistry } from "../plugins/registry.js";
29
29
  import { getTotalQueueSize } from "../process/command-queue.js";
30
+ import { createTelemetryService, setGlobalTelemetryService, telemetryConfigFromOtelConfig, } from "../telemetry/service.js";
30
31
  import { runOnboardingWizard } from "../wizard/onboarding.js";
31
32
  import { createAuthRateLimiter } from "./auth-rate-limit.js";
32
33
  import { startChannelHealthMonitor } from "./channel-health-monitor.js";
@@ -148,6 +149,18 @@ export async function startGatewayServer(port = 18789, opts = {}) {
148
149
  if (diagnosticsEnabled) {
149
150
  startDiagnosticHeartbeat();
150
151
  }
152
+ // Initialize telemetry service if enabled
153
+ let telemetryService = null;
154
+ const otelConfig = cfgAtStart.diagnostics?.otel;
155
+ if (otelConfig?.enabled) {
156
+ const telemetryConfig = telemetryConfigFromOtelConfig(otelConfig, {
157
+ defaultServiceName: "poolbot-gateway",
158
+ });
159
+ telemetryService = createTelemetryService(telemetryConfig);
160
+ setGlobalTelemetryService(telemetryService);
161
+ await telemetryService.start();
162
+ log.info(`telemetry: initialized with ${telemetryConfig.tracing.exporter} exporter`);
163
+ }
151
164
  setGatewaySigusr1RestartPolicy({ allowExternal: isRestartEnabled(cfgAtStart) });
152
165
  setPreRestartDeferralCheck(() => getTotalQueueSize() + getTotalPendingReplies() + getActiveEmbeddedRunCount());
153
166
  initSubagentRegistry();
@@ -326,23 +339,25 @@ export async function startGatewayServer(port = 18789, opts = {}) {
326
339
  let tickInterval = noopInterval();
327
340
  let healthInterval = noopInterval();
328
341
  let dedupeCleanup = noopInterval();
342
+ let telemetryInterval = noopInterval();
329
343
  if (!minimalTestGateway) {
330
- ({ tickInterval, healthInterval, dedupeCleanup } = startGatewayMaintenanceTimers({
331
- broadcast,
332
- nodeSendToAllSubscribed,
333
- getPresenceVersion,
334
- getHealthVersion,
335
- refreshGatewayHealthSnapshot,
336
- logHealth,
337
- dedupe,
338
- chatAbortControllers,
339
- chatRunState,
340
- chatRunBuffers,
341
- chatDeltaSentAt,
342
- removeChatRun,
343
- agentRunSeq,
344
- nodeSendToSession,
345
- }));
344
+ ({ tickInterval, healthInterval, dedupeCleanup, telemetryInterval } =
345
+ startGatewayMaintenanceTimers({
346
+ broadcast,
347
+ nodeSendToAllSubscribed,
348
+ getPresenceVersion,
349
+ getHealthVersion,
350
+ refreshGatewayHealthSnapshot,
351
+ logHealth,
352
+ dedupe,
353
+ chatAbortControllers,
354
+ chatRunState,
355
+ chatRunBuffers,
356
+ chatDeltaSentAt,
357
+ removeChatRun,
358
+ agentRunSeq,
359
+ nodeSendToSession,
360
+ }));
346
361
  }
347
362
  const agentUnsub = minimalTestGateway
348
363
  ? null
@@ -564,12 +579,14 @@ export async function startGatewayServer(port = 18789, opts = {}) {
564
579
  tickInterval,
565
580
  healthInterval,
566
581
  dedupeCleanup,
582
+ telemetryInterval,
567
583
  agentUnsub,
568
584
  heartbeatUnsub,
569
585
  chatRunState,
570
586
  clients,
571
587
  configReloader,
572
588
  browserControl,
589
+ telemetryService,
573
590
  wss,
574
591
  httpServer,
575
592
  httpServers,
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Abort Pattern Utilities
3
+ *
4
+ * Provides memory-leak-free abort signal handling.
5
+ *
6
+ * CRITICAL FIX: Uses `.bind()` instead of closures to prevent memory leaks.
7
+ * Issue #7174: Closure-based abort handlers capture scope and leak memory.
8
+ *
9
+ * @example
10
+ * ```typescript
11
+ * // BAD: Captures closure scope (leaks memory)
12
+ * signal.addEventListener('abort', () => controller.abort());
13
+ *
14
+ * // GOOD: No closure capture
15
+ * signal.addEventListener('abort', relayAbort.bind(controller));
16
+ * ```
17
+ */
18
+ /**
19
+ * Relay abort signal without closure capture
20
+ * Prevents memory leak by using bind instead of arrow function
21
+ */
22
+ export function relayAbort() {
23
+ this.abort();
24
+ }
25
+ /**
26
+ * Create an abort relay that doesn't capture closure scope
27
+ */
28
+ export function createAbortRelay(controller) {
29
+ return relayAbort.bind(controller);
30
+ }
31
+ /**
32
+ * Link an abort signal to a controller without memory leaks
33
+ */
34
+ export function linkAbortSignal(source, target) {
35
+ const handler = createAbortRelay(target);
36
+ source.addEventListener("abort", handler, { once: true });
37
+ // Return cleanup function
38
+ return () => {
39
+ source.removeEventListener("abort", handler);
40
+ };
41
+ }
42
+ /**
43
+ * Create a timeout-based abort controller
44
+ */
45
+ export function createTimeoutAbortController(timeoutMs) {
46
+ const controller = new AbortController();
47
+ if (timeoutMs > 0) {
48
+ const timeout = setTimeout(() => {
49
+ controller.abort();
50
+ }, timeoutMs);
51
+ return {
52
+ controller,
53
+ cleanup: () => clearTimeout(timeout),
54
+ };
55
+ }
56
+ return {
57
+ controller,
58
+ cleanup: () => { },
59
+ };
60
+ }
61
+ /**
62
+ * Race multiple abort signals
63
+ */
64
+ export function raceAbortSignals(signals, controller) {
65
+ const handlers = [];
66
+ let alreadyAborted = false;
67
+ for (const signal of signals) {
68
+ if (signal.aborted && !alreadyAborted) {
69
+ alreadyAborted = true;
70
+ controller.abort();
71
+ }
72
+ else if (!alreadyAborted) {
73
+ const handler = createAbortRelay(controller);
74
+ signal.addEventListener("abort", handler, { once: true });
75
+ handlers.push(() => signal.removeEventListener("abort", handler));
76
+ }
77
+ }
78
+ return () => {
79
+ for (const cleanup of handlers) {
80
+ cleanup();
81
+ }
82
+ };
83
+ }
84
+ /**
85
+ * Fetch with timeout and proper abort handling
86
+ */
87
+ export async function fetchWithTimeout(url, options = {}) {
88
+ const { timeoutMs = 30000, ...fetchOptions } = options;
89
+ const { controller, cleanup } = createTimeoutAbortController(timeoutMs);
90
+ // Link existing signal if provided
91
+ let unlink;
92
+ if (fetchOptions.signal) {
93
+ unlink = linkAbortSignal(fetchOptions.signal, controller);
94
+ }
95
+ try {
96
+ const response = await fetch(url, {
97
+ ...fetchOptions,
98
+ signal: controller.signal,
99
+ });
100
+ return response;
101
+ }
102
+ finally {
103
+ unlink?.();
104
+ cleanup();
105
+ }
106
+ }