@tloncorp/openclaw 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2,20 +2,24 @@ import { spawn } from 'node:child_process';
2
2
  import { createRequire } from 'node:module';
3
3
  import { dirname } from 'node:path';
4
4
  import { fileURLToPath } from 'node:url';
5
- import { defineChannelPluginEntry } from 'openclaw/plugin-sdk/core';
5
+ import { defineChannelPluginEntry, } from 'openclaw/plugin-sdk/core';
6
+ import { onDiagnosticEvent, onInternalDiagnosticEvent, } from 'openclaw/plugin-sdk/diagnostic-runtime';
6
7
  import { tlonPlugin } from './src/channel.js';
8
+ import { installTlonDiagnosticSubscriptions, shouldInstallTlonDiagnosticSubscriptions, } from './src/diagnostic-subscriptions.js';
7
9
  import { sendGatewayStop } from './src/gateway-status.js';
8
10
  import { createGatewayStatusManager, setGatewayStatusManager, } from './src/gateway-status.js';
9
11
  import { resolveBridgeForCommand } from './src/monitor/command-auth.js';
12
+ import { isRouteDebugEnabled } from './src/monitor/session-routing.js';
10
13
  import { handleOwnerListenCommand } from './src/owner-listen-command.js';
11
14
  import { setTlonRuntime } from './src/runtime.js';
12
15
  import { getSessionRole } from './src/session-roles.js';
13
- import { recordToolCall } from './src/telemetry.js';
16
+ import { parseTlonTarget } from './src/targets.js';
17
+ import { formatTlonTelemetryErrorText, recordToolCall, reportHarnessError, reportOutboundRoute, reportPluginError, reportSessionDiagnostic, reportSessionLifecycle, reportSessionTurnCreated, reportTelemetryError, } from './src/telemetry.js';
14
18
  import { resolveTlonBinary } from './src/tlon-binary.js';
15
19
  import { checkBlockedSendOperation } from './src/tlon-tool-guard.js';
16
20
  import { formatToolTraceEvent, liveToolTraceContentsEnabled, shouldLogAfterToolTrace, } from './src/tool-trace.js';
17
21
  import { listTlonAccountIds, resolveTlonAccount } from './src/types.js';
18
- import { PLUGIN_COMMIT, PLUGIN_VERSION } from './src/version.generated.js';
22
+ import { formatTlonVersionIdentity, resolveTlonSkillVersion, setTlonSkillVersionResolver, } from './src/version.js';
19
23
  export { tlonPlugin } from './src/channel.js';
20
24
  export { setTlonRuntime } from './src/runtime.js';
21
25
  const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -115,7 +119,7 @@ function shellSplit(str) {
115
119
  /**
116
120
  * Run the tlon command and return the result
117
121
  */
118
- function runTlonCommand(binary, args, credentials) {
122
+ function runTlonCommand(binary, args, credentials, options) {
119
123
  return new Promise((resolve, reject) => {
120
124
  const env = { ...process.env };
121
125
  if (credentials) {
@@ -126,6 +130,15 @@ function runTlonCommand(binary, args, credentials) {
126
130
  const child = spawn(binary, args, { env });
127
131
  let stdout = '';
128
132
  let stderr = '';
133
+ let timedOut = false;
134
+ let timeout;
135
+ const timeoutMs = options?.timeoutMs;
136
+ const cleanup = () => {
137
+ if (timeout) {
138
+ clearTimeout(timeout);
139
+ timeout = undefined;
140
+ }
141
+ };
129
142
  child.stdout.on('data', (data) => {
130
143
  stdout += data.toString();
131
144
  });
@@ -133,10 +146,21 @@ function runTlonCommand(binary, args, credentials) {
133
146
  stderr += data.toString();
134
147
  });
135
148
  child.on('error', (err) => {
149
+ cleanup();
136
150
  reject(new Error(`Failed to run tlon: ${err.message}`));
137
151
  });
152
+ if (timeoutMs) {
153
+ timeout = setTimeout(() => {
154
+ timedOut = true;
155
+ child.kill('SIGTERM');
156
+ }, timeoutMs);
157
+ }
138
158
  child.on('close', (code) => {
139
- if (code !== 0) {
159
+ cleanup();
160
+ if (timedOut) {
161
+ reject(new Error(`tlon timed out after ${timeoutMs}ms`));
162
+ }
163
+ else if (code !== 0) {
140
164
  reject(new Error(stderr || `tlon exited with code ${code}`));
141
165
  }
142
166
  else {
@@ -145,6 +169,365 @@ function runTlonCommand(binary, args, credentials) {
145
169
  });
146
170
  });
147
171
  }
172
+ function firstLine(value) {
173
+ return value.trim().split(/\r?\n/)[0]?.trim() || 'unknown';
174
+ }
175
+ function summarizeError(error) {
176
+ const message = error instanceof Error ? error.message : String(error);
177
+ return firstLine(message).slice(0, 180);
178
+ }
179
+ async function readTlonSkillVersion(binary) {
180
+ try {
181
+ return firstLine(await runTlonCommand(binary, ['--version'], undefined, {
182
+ timeoutMs: 5_000,
183
+ }));
184
+ }
185
+ catch (error) {
186
+ return `unavailable (${summarizeError(error)})`;
187
+ }
188
+ }
189
+ function isTlonSessionDiagnosticEvent(event) {
190
+ return (event.type === 'session.stalled' ||
191
+ event.type === 'session.stuck' ||
192
+ event.type === 'session.recovery.requested' ||
193
+ event.type === 'session.recovery.completed');
194
+ }
195
+ function stringField(event, key) {
196
+ const value = event[key];
197
+ return typeof value === 'string' && value.trim() ? value : null;
198
+ }
199
+ function numberField(event, key) {
200
+ const value = event[key];
201
+ return typeof value === 'number' && Number.isFinite(value) ? value : null;
202
+ }
203
+ function diagnosticErrorText(event) {
204
+ return stringField(event, 'error') ?? stringField(event, 'message');
205
+ }
206
+ function stringListField(event, key) {
207
+ const value = event[key];
208
+ if (!Array.isArray(value)) {
209
+ return [];
210
+ }
211
+ return value
212
+ .map((entry) => (typeof entry === 'string' ? entry.trim() : ''))
213
+ .filter(Boolean);
214
+ }
215
+ function diagnosticSummary(parts) {
216
+ return parts
217
+ .filter(([, value]) => value !== null && value !== undefined && value !== '')
218
+ .map(([key, value]) => `${key}=${String(value)}`)
219
+ .join(' ');
220
+ }
221
+ function reportHarnessDiagnostic(event) {
222
+ const type = stringField(event, 'type');
223
+ if (!type) {
224
+ return;
225
+ }
226
+ if (type === 'session.turn.created') {
227
+ reportSessionTurnCreated({
228
+ type,
229
+ sessionKey: stringField(event, 'sessionKey'),
230
+ sessionId: stringField(event, 'sessionId'),
231
+ runId: stringField(event, 'runId'),
232
+ agentId: stringField(event, 'agentId'),
233
+ });
234
+ return;
235
+ }
236
+ const common = {
237
+ harnessEventType: type,
238
+ sessionKey: stringField(event, 'sessionKey'),
239
+ sessionId: stringField(event, 'sessionId'),
240
+ runId: stringField(event, 'runId'),
241
+ agentId: stringField(event, 'agentId'),
242
+ provider: stringField(event, 'provider'),
243
+ model: stringField(event, 'model'),
244
+ phase: stringField(event, 'phase'),
245
+ outcome: stringField(event, 'outcome'),
246
+ errorCategory: stringField(event, 'errorCategory'),
247
+ failureKind: stringField(event, 'failureKind'),
248
+ durationMs: numberField(event, 'durationMs'),
249
+ errorText: diagnosticErrorText(event),
250
+ };
251
+ switch (type) {
252
+ case 'harness.run.error':
253
+ reportHarnessError({
254
+ ...common,
255
+ errorScope: 'harness',
256
+ });
257
+ return;
258
+ case 'harness.run.completed':
259
+ if (common.outcome === 'completed') {
260
+ return;
261
+ }
262
+ reportHarnessError({
263
+ ...common,
264
+ errorScope: 'harness',
265
+ });
266
+ return;
267
+ case 'model.call.error':
268
+ reportHarnessError({
269
+ ...common,
270
+ errorScope: 'model',
271
+ });
272
+ return;
273
+ case 'model.failover': {
274
+ const reason = stringField(event, 'reason');
275
+ const fromProvider = stringField(event, 'fromProvider');
276
+ const fromModel = stringField(event, 'fromModel');
277
+ const toProvider = stringField(event, 'toProvider');
278
+ const toModel = stringField(event, 'toModel');
279
+ reportHarnessError({
280
+ ...common,
281
+ errorScope: 'model',
282
+ provider: fromProvider,
283
+ model: fromModel,
284
+ phase: stringField(event, 'lane'),
285
+ outcome: 'failover',
286
+ errorCategory: 'model_failover',
287
+ failureKind: reason,
288
+ errorText: diagnosticSummary([
289
+ ['fromProvider', fromProvider],
290
+ ['fromModel', fromModel],
291
+ ['toProvider', toProvider],
292
+ ['toModel', toModel],
293
+ ['reason', reason],
294
+ ['cascadeDepth', numberField(event, 'cascadeDepth')],
295
+ ]),
296
+ });
297
+ return;
298
+ }
299
+ case 'tool.execution.error':
300
+ reportHarnessError({
301
+ ...common,
302
+ errorScope: 'tool',
303
+ toolName: stringField(event, 'toolName'),
304
+ });
305
+ return;
306
+ case 'tool.execution.blocked': {
307
+ const deniedReason = stringField(event, 'deniedReason');
308
+ const reason = stringField(event, 'reason');
309
+ reportHarnessError({
310
+ ...common,
311
+ errorScope: 'tool',
312
+ toolName: stringField(event, 'toolName'),
313
+ phase: stringField(event, 'toolSource'),
314
+ outcome: 'blocked',
315
+ errorCategory: 'tool_blocked',
316
+ failureKind: deniedReason,
317
+ errorText: reason ?? deniedReason,
318
+ });
319
+ return;
320
+ }
321
+ case 'tool.loop': {
322
+ const level = stringField(event, 'level');
323
+ const action = stringField(event, 'action');
324
+ if (level !== 'critical' && action !== 'block') {
325
+ return;
326
+ }
327
+ reportHarnessError({
328
+ ...common,
329
+ errorScope: 'tool',
330
+ toolName: stringField(event, 'toolName'),
331
+ phase: level,
332
+ outcome: action,
333
+ errorCategory: 'tool_loop',
334
+ failureKind: stringField(event, 'detector'),
335
+ errorText: stringField(event, 'message') ??
336
+ diagnosticSummary([
337
+ ['level', level],
338
+ ['action', action],
339
+ ['detector', stringField(event, 'detector')],
340
+ ['count', numberField(event, 'count')],
341
+ ]),
342
+ });
343
+ return;
344
+ }
345
+ case 'run.completed':
346
+ if (common.outcome === 'completed') {
347
+ return;
348
+ }
349
+ reportHarnessError({
350
+ ...common,
351
+ errorScope: 'run',
352
+ });
353
+ return;
354
+ case 'message.delivery.error':
355
+ reportHarnessError({
356
+ ...common,
357
+ errorScope: 'message_delivery',
358
+ phase: stringField(event, 'deliveryKind'),
359
+ });
360
+ return;
361
+ case 'message.dispatch.completed':
362
+ if (common.outcome !== 'error') {
363
+ return;
364
+ }
365
+ reportHarnessError({
366
+ ...common,
367
+ errorScope: 'message_dispatch',
368
+ phase: stringField(event, 'source'),
369
+ });
370
+ return;
371
+ case 'message.processed':
372
+ if (common.outcome !== 'error') {
373
+ return;
374
+ }
375
+ reportHarnessError({
376
+ ...common,
377
+ errorScope: 'message_processing',
378
+ phase: stringField(event, 'channel'),
379
+ });
380
+ return;
381
+ case 'diagnostic.async_queue.dropped':
382
+ reportHarnessError({
383
+ ...common,
384
+ errorScope: 'diagnostics',
385
+ outcome: 'dropped',
386
+ errorCategory: 'diagnostic_async_queue_dropped',
387
+ failureKind: 'queue_full',
388
+ errorText: diagnosticSummary([
389
+ ['droppedEvents', numberField(event, 'droppedEvents')],
390
+ ['droppedTrustedEvents', numberField(event, 'droppedTrustedEvents')],
391
+ [
392
+ 'droppedUntrustedEvents',
393
+ numberField(event, 'droppedUntrustedEvents'),
394
+ ],
395
+ ['queueLength', numberField(event, 'queueLength')],
396
+ ['maxQueueLength', numberField(event, 'maxQueueLength')],
397
+ ]),
398
+ });
399
+ return;
400
+ case 'diagnostic.liveness.warning': {
401
+ const reasons = stringListField(event, 'reasons');
402
+ reportHarnessError({
403
+ ...common,
404
+ errorScope: 'runtime',
405
+ phase: stringField(event, 'phase'),
406
+ outcome: 'warning',
407
+ errorCategory: 'liveness_warning',
408
+ failureKind: reasons.join(',') || null,
409
+ durationMs: numberField(event, 'intervalMs'),
410
+ errorText: diagnosticSummary([
411
+ ['reasons', reasons.join(',')],
412
+ ['eventLoopDelayP99Ms', numberField(event, 'eventLoopDelayP99Ms')],
413
+ ['eventLoopDelayMaxMs', numberField(event, 'eventLoopDelayMaxMs')],
414
+ ['cpuCoreRatio', numberField(event, 'cpuCoreRatio')],
415
+ ['active', numberField(event, 'active')],
416
+ ['waiting', numberField(event, 'waiting')],
417
+ ['queued', numberField(event, 'queued')],
418
+ ]),
419
+ });
420
+ return;
421
+ }
422
+ case 'diagnostic.memory.pressure': {
423
+ const memory = event.memory;
424
+ const memoryNumber = (key) => {
425
+ const value = memory?.[key];
426
+ return typeof value === 'number' && Number.isFinite(value)
427
+ ? value
428
+ : null;
429
+ };
430
+ reportHarnessError({
431
+ ...common,
432
+ errorScope: 'runtime',
433
+ outcome: stringField(event, 'level'),
434
+ errorCategory: 'memory_pressure',
435
+ failureKind: stringField(event, 'reason'),
436
+ durationMs: numberField(event, 'windowMs'),
437
+ errorText: diagnosticSummary([
438
+ ['level', stringField(event, 'level')],
439
+ ['reason', stringField(event, 'reason')],
440
+ ['rssBytes', memoryNumber('rssBytes')],
441
+ ['heapUsedBytes', memoryNumber('heapUsedBytes')],
442
+ ['thresholdBytes', numberField(event, 'thresholdBytes')],
443
+ ['rssGrowthBytes', numberField(event, 'rssGrowthBytes')],
444
+ ]),
445
+ });
446
+ return;
447
+ }
448
+ case 'payload.large':
449
+ if (stringField(event, 'action') !== 'rejected') {
450
+ return;
451
+ }
452
+ reportHarnessError({
453
+ ...common,
454
+ errorScope: 'payload',
455
+ phase: stringField(event, 'surface'),
456
+ outcome: 'rejected',
457
+ errorCategory: 'payload_large',
458
+ failureKind: stringField(event, 'reason'),
459
+ errorText: diagnosticSummary([
460
+ ['surface', stringField(event, 'surface')],
461
+ ['channel', stringField(event, 'channel')],
462
+ ['pluginId', stringField(event, 'pluginId')],
463
+ ['bytes', numberField(event, 'bytes')],
464
+ ['limitBytes', numberField(event, 'limitBytes')],
465
+ ['count', numberField(event, 'count')],
466
+ ['reason', stringField(event, 'reason')],
467
+ ]),
468
+ });
469
+ return;
470
+ }
471
+ }
472
+ function safeTelemetryObserver(params) {
473
+ try {
474
+ params.run();
475
+ }
476
+ catch (error) {
477
+ params.logger.warn(`[tlon] Telemetry observer failed (${params.telemetrySource}${params.sourceEventName ? `:${params.sourceEventName}` : ''}): ${String(error)}`);
478
+ try {
479
+ reportTelemetryError({
480
+ telemetrySource: params.telemetrySource,
481
+ sourceEventName: params.sourceEventName,
482
+ sessionKey: params.sessionKey,
483
+ sessionId: params.sessionId,
484
+ runId: params.runId,
485
+ agentId: params.agentId,
486
+ errorKind: error instanceof Error ? error.name : typeof error,
487
+ errorText: formatTlonTelemetryErrorText(error),
488
+ });
489
+ }
490
+ catch (reportError) {
491
+ params.logger.warn(`[tlon] Telemetry error reporting failed: ${String(reportError)}`);
492
+ }
493
+ }
494
+ }
495
+ function installTelemetryDiagnosticObservers(api) {
496
+ return installTlonDiagnosticSubscriptions(() => {
497
+ const unsubscribeDiagnosticEvents = onDiagnosticEvent((event) => {
498
+ const candidate = event;
499
+ safeTelemetryObserver({
500
+ logger: api.logger,
501
+ telemetrySource: 'diagnostic_session',
502
+ sourceEventName: candidate.type,
503
+ sessionKey: candidate.sessionKey,
504
+ sessionId: candidate.sessionId,
505
+ run: () => {
506
+ if (isTlonSessionDiagnosticEvent(candidate)) {
507
+ reportSessionDiagnostic(candidate);
508
+ }
509
+ },
510
+ });
511
+ });
512
+ const unsubscribeInternalDiagnosticEvents = onInternalDiagnosticEvent((event) => {
513
+ const candidate = event;
514
+ safeTelemetryObserver({
515
+ logger: api.logger,
516
+ telemetrySource: 'diagnostic_internal',
517
+ sourceEventName: stringField(candidate, 'type'),
518
+ sessionKey: stringField(candidate, 'sessionKey'),
519
+ sessionId: stringField(candidate, 'sessionId'),
520
+ runId: stringField(candidate, 'runId'),
521
+ agentId: stringField(candidate, 'agentId'),
522
+ run: () => reportHarnessDiagnostic(candidate),
523
+ });
524
+ });
525
+ return () => {
526
+ unsubscribeDiagnosticEvents();
527
+ unsubscribeInternalDiagnosticEvents();
528
+ };
529
+ });
530
+ }
148
531
  export default defineChannelPluginEntry({
149
532
  id: 'tlon',
150
533
  name: 'Tlon',
@@ -178,7 +561,14 @@ export default defineChannelPluginEntry({
178
561
  const gsManager = createGatewayStatusManager({
179
562
  logger: {
180
563
  log: (m) => api.logger.info(m),
181
- error: (m) => api.logger.warn(m),
564
+ error: (m) => {
565
+ reportPluginError({
566
+ pluginErrorSource: 'gateway_status_heartbeat',
567
+ errorKind: 'heartbeat',
568
+ errorText: m,
569
+ });
570
+ api.logger.warn(m);
571
+ },
182
572
  },
183
573
  });
184
574
  setGatewayStatusManager(gsManager);
@@ -221,21 +611,49 @@ export default defineChannelPluginEntry({
221
611
  });
222
612
  }
223
613
  // else: zero accounts configured — nothing to do
614
+ // Resolve the tlon tool binary once. The tool itself and version
615
+ // diagnostics share this path so telemetry reports what OpenClaw will
616
+ // actually execute.
617
+ const tlonBinary = resolveTlonBinary({
618
+ moduleDir: __dirname,
619
+ resolveModule: require.resolve,
620
+ log: (msg) => api.logger.debug?.(msg),
621
+ });
622
+ api.logger.info(`[tlon] Registering tlon tool, binary: ${tlonBinary}`);
623
+ setTlonSkillVersionResolver(() => readTlonSkillVersion(tlonBinary));
624
+ const renderTlonVersion = async () => ({
625
+ text: formatTlonVersionIdentity({
626
+ tlonSkillVersion: await resolveTlonSkillVersion(),
627
+ }),
628
+ });
629
+ void resolveTlonSkillVersion().then((version) => {
630
+ api.logger.info(`[tlon] Tlon skill version: ${version}`);
631
+ });
224
632
  // Register /tlon-version command
225
633
  api.registerCommand({
226
634
  name: 'tlon-version',
227
635
  description: 'Show Tlon plugin version.',
228
636
  handler: async () => {
229
- return { text: `Tlon plugin v${PLUGIN_VERSION} (${PLUGIN_COMMIT})` };
637
+ return renderTlonVersion();
230
638
  },
231
639
  });
232
- // Register the tlon tool
233
- const tlonBinary = resolveTlonBinary({
234
- moduleDir: __dirname,
235
- resolveModule: require.resolve,
236
- log: (msg) => api.logger.debug?.(msg),
640
+ api.registerCommand({
641
+ name: 'tlon',
642
+ description: 'Tlon plugin diagnostics. Usage: /tlon version',
643
+ acceptsArgs: true,
644
+ handler: async (ctx) => {
645
+ const args = (ctx.args ?? '').trim().toLowerCase();
646
+ if (args !== 'version') {
647
+ return { text: 'Usage: /tlon version' };
648
+ }
649
+ const result = resolveBridgeForCommand(ctx);
650
+ if ('error' in result) {
651
+ return { text: result.error };
652
+ }
653
+ return renderTlonVersion();
654
+ },
237
655
  });
238
- api.logger.info(`[tlon] Registering tlon tool, binary: ${tlonBinary}`);
656
+ // Register the tlon tool
239
657
  // Capture credentials from config at registration time
240
658
  const account = resolveTlonAccount(api.config);
241
659
  const credentials = account.configured && account.url && account.ship && account.code
@@ -356,11 +774,135 @@ export default defineChannelPluginEntry({
356
774
  },
357
775
  }));
358
776
  }
359
- recordToolCall({
777
+ safeTelemetryObserver({
778
+ logger: api.logger,
779
+ telemetrySource: 'after_tool_call',
780
+ sourceEventName: event.toolName,
781
+ sessionKey: ctx.sessionKey,
782
+ run: () => {
783
+ recordToolCall({
784
+ sessionKey: ctx.sessionKey,
785
+ toolName: event.toolName,
786
+ durationMs: event.durationMs,
787
+ error: event.error,
788
+ });
789
+ },
790
+ });
791
+ });
792
+ // ── Session lifecycle / watchdog telemetry ─────────────────────────
793
+ // These hooks are global to OpenClaw, so telemetry.ts filters them through
794
+ // session keys remembered from Tlon inbound replies before emitting.
795
+ api.on('session_start', (event, ctx) => {
796
+ safeTelemetryObserver({
797
+ logger: api.logger,
798
+ telemetrySource: 'session_start',
799
+ sourceEventName: 'session_start',
800
+ sessionKey: event.sessionKey ?? ctx.sessionKey,
801
+ sessionId: event.sessionId ?? ctx.sessionId,
802
+ agentId: ctx.agentId,
803
+ run: () => {
804
+ reportSessionLifecycle({
805
+ lifecycleEvent: 'session_start',
806
+ sessionKey: event.sessionKey ?? ctx.sessionKey,
807
+ sessionId: event.sessionId ?? ctx.sessionId,
808
+ agentId: ctx.agentId,
809
+ hasNextSession: false,
810
+ });
811
+ },
812
+ });
813
+ });
814
+ api.on('session_end', (event, ctx) => {
815
+ safeTelemetryObserver({
816
+ logger: api.logger,
817
+ telemetrySource: 'session_end',
818
+ sourceEventName: 'session_end',
819
+ sessionKey: event.sessionKey ?? ctx.sessionKey,
820
+ sessionId: event.sessionId ?? ctx.sessionId,
821
+ agentId: ctx.agentId,
822
+ run: () => {
823
+ reportSessionLifecycle({
824
+ lifecycleEvent: 'session_end',
825
+ sessionKey: event.sessionKey ?? ctx.sessionKey,
826
+ sessionId: event.sessionId ?? ctx.sessionId,
827
+ agentId: ctx.agentId,
828
+ reason: event.reason ?? null,
829
+ messageCount: event.messageCount,
830
+ durationMs: event.durationMs ?? null,
831
+ transcriptArchived: event.transcriptArchived ?? null,
832
+ hasNextSession: Boolean(event.nextSessionId ?? event.nextSessionKey),
833
+ });
834
+ },
835
+ });
836
+ });
837
+ if (shouldInstallTlonDiagnosticSubscriptions(api.registrationMode)) {
838
+ const unsubscribeDiagnosticEvents = installTelemetryDiagnosticObservers(api);
839
+ api.on('gateway_stop', unsubscribeDiagnosticEvents);
840
+ }
841
+ // ── Route diagnostics ───────────────────────────────────────────────
842
+ // Fires for every outbound send OpenClaw routes — the primary streamed
843
+ // reply (resolves to `tlon`) and route-dependent sends (the shared
844
+ // `message` tool, subagents, which can resolve elsewhere). `ctx.channelId`
845
+ // is where the send resolved; `routedToTlon: false` (e.g. `webchat`) is the
846
+ // leak this work targets. Read-only; never alters delivery.
847
+ //
848
+ // Two sinks: a PostHog event (the primary, fleet-wide signal — gated by the
849
+ // existing telemetry config, on in hosted prod) so we can count how often
850
+ // sends land off-Tlon; and a debug-gated local log for single-gateway
851
+ // triage.
852
+ api.on('message_sending', (event, ctx) => {
853
+ safeTelemetryObserver({
854
+ logger: api.logger,
855
+ telemetrySource: 'message_sending',
856
+ sourceEventName: 'message_sending',
360
857
  sessionKey: ctx.sessionKey,
361
- toolName: event.toolName,
362
- durationMs: event.durationMs,
363
- error: event.error,
858
+ runId: ctx.runId,
859
+ run: () => {
860
+ const resolvedChannel = ctx.channelId;
861
+ const routedToTlon = resolvedChannel === 'tlon';
862
+ // Only infer target kind for Tlon targets; a webchat target id is not
863
+ // a Tlon target and must not be misclassified.
864
+ const parsedTarget = routedToTlon ? parseTlonTarget(event.to) : null;
865
+ const targetKind = parsedTarget?.kind === 'dm'
866
+ ? 'dm'
867
+ : parsedTarget?.kind === 'channel'
868
+ ? 'group'
869
+ : 'unknown';
870
+ reportOutboundRoute({ resolvedChannel, routedToTlon, targetKind });
871
+ if (isRouteDebugEnabled()) {
872
+ api.logger.info(`[tlon][route-debug] message_sending ${JSON.stringify({
873
+ channelId: ctx.channelId,
874
+ to: event.to,
875
+ routedToTlon,
876
+ targetKind,
877
+ sessionKey: ctx.sessionKey ?? null,
878
+ conversationId: ctx.conversationId ?? null,
879
+ messageId: ctx.messageId ?? null,
880
+ threadId: event.threadId ?? null,
881
+ })}`);
882
+ }
883
+ },
884
+ });
885
+ });
886
+ api.on('message_sent', (event, ctx) => {
887
+ safeTelemetryObserver({
888
+ logger: api.logger,
889
+ telemetrySource: 'message_sent',
890
+ sourceEventName: 'message_sent',
891
+ sessionKey: event.sessionKey ?? ctx.sessionKey,
892
+ runId: event.runId ?? ctx.runId,
893
+ run: () => {
894
+ if (event.success !== false) {
895
+ return;
896
+ }
897
+ reportHarnessError({
898
+ harnessEventType: 'message_sent',
899
+ errorScope: 'message_delivery',
900
+ sessionKey: event.sessionKey ?? ctx.sessionKey,
901
+ runId: event.runId ?? ctx.runId,
902
+ errorText: event.error ?? null,
903
+ outcome: 'error',
904
+ });
905
+ },
364
906
  });
365
907
  });
366
908
  // ── Slash commands for approval & admin ────────────────────────────