openclaw-topic-shift-reset 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { randomUUID } from "node:crypto";
2
2
  import fs from "node:fs/promises";
3
3
  import path from "node:path";
4
- import type { OpenClawPluginApi, PluginHookMessageReceivedEvent } from "openclaw/plugin-sdk";
4
+ import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
5
5
  import {
6
6
  readJsonFileWithFallback,
7
7
  withFileLock,
@@ -11,7 +11,6 @@ import {
11
11
  type PresetName = "conservative" | "balanced" | "aggressive";
12
12
  type EmbeddingProvider = "auto" | "none" | "openai" | "ollama";
13
13
  type HandoffMode = "none" | "summary" | "verbatim_last_n";
14
- type HandoffPreference = "none" | "summary" | "verbatim";
15
14
 
16
15
  type EmbeddingConfig = {
17
16
  provider?: EmbeddingProvider;
@@ -21,6 +20,18 @@ type EmbeddingConfig = {
21
20
  timeoutMs?: number;
22
21
  };
23
22
 
23
+ type HandoffConfig = {
24
+ mode?: HandoffMode;
25
+ lastN?: number;
26
+ maxChars?: number;
27
+ };
28
+
29
+ type StripRulesConfig = {
30
+ dropLinePrefixPatterns?: string[];
31
+ dropExactLines?: string[];
32
+ dropFencedBlockAfterHeaderPatterns?: string[];
33
+ };
34
+
24
35
  type TopicShiftResetAdvancedConfig = {
25
36
  historyWindow?: number;
26
37
  minHistoryMessages?: number;
@@ -29,7 +40,12 @@ type TopicShiftResetAdvancedConfig = {
29
40
  minSignalChars?: number;
30
41
  minSignalTokenCount?: number;
31
42
  minSignalEntropy?: number;
43
+ minUniqueTokenRatio?: number;
44
+ shortMessageTokenLimit?: number;
45
+ embeddingTriggerMargin?: number;
32
46
  stripEnvelope?: boolean;
47
+ stripRules?: StripRulesConfig;
48
+ handoffTailReadMaxBytes?: number;
33
49
  softConsecutiveSignals?: number;
34
50
  cooldownMinutes?: number;
35
51
  ignoredProviders?: string[];
@@ -39,18 +55,13 @@ type TopicShiftResetAdvancedConfig = {
39
55
  hardSimilarityThreshold?: number;
40
56
  softNoveltyThreshold?: number;
41
57
  hardNoveltyThreshold?: number;
42
- handoff?: HandoffPreference | HandoffMode;
43
- handoffLastN?: number;
44
- handoffMaxChars?: number;
45
- embeddings?: EmbeddingProvider;
46
- embedding?: EmbeddingConfig;
47
58
  };
48
59
 
49
60
  type TopicShiftResetConfig = {
50
61
  enabled?: boolean;
51
62
  preset?: PresetName;
52
- embeddings?: EmbeddingProvider;
53
- handoff?: HandoffPreference;
63
+ embedding?: EmbeddingConfig;
64
+ handoff?: HandoffConfig;
54
65
  dryRun?: boolean;
55
66
  debug?: boolean;
56
67
  advanced?: TopicShiftResetAdvancedConfig;
@@ -65,7 +76,16 @@ type ResolvedConfig = {
65
76
  minSignalChars: number;
66
77
  minSignalTokenCount: number;
67
78
  minSignalEntropy: number;
79
+ minUniqueTokenRatio: number;
80
+ shortMessageTokenLimit: number;
81
+ embeddingTriggerMargin: number;
68
82
  stripEnvelope: boolean;
83
+ stripRules: {
84
+ dropLinePrefixPatterns: RegExp[];
85
+ dropExactLines: Set<string>;
86
+ dropFencedBlockAfterHeaderPatterns: RegExp[];
87
+ };
88
+ handoffTailReadMaxBytes: number;
69
89
  softConsecutiveSignals: number;
70
90
  cooldownMinutes: number;
71
91
  ignoredProviders: Set<string>;
@@ -75,9 +95,11 @@ type ResolvedConfig = {
75
95
  hardSimilarityThreshold: number;
76
96
  softNoveltyThreshold: number;
77
97
  hardNoveltyThreshold: number;
78
- handoffMode: HandoffMode;
79
- handoffLastN: number;
80
- handoffMaxChars: number;
98
+ handoff: {
99
+ mode: HandoffMode;
100
+ lastN: number;
101
+ maxChars: number;
102
+ };
81
103
  embedding: {
82
104
  provider: EmbeddingProvider;
83
105
  model?: string;
@@ -113,6 +135,9 @@ type SessionState = {
113
135
  pendingSoftSignals: number;
114
136
  pendingEntries: HistoryEntry[];
115
137
  lastResetAt?: number;
138
+ topicCentroid?: number[];
139
+ topicCount: number;
140
+ topicDim?: number;
116
141
  lastSeenAt: number;
117
142
  };
118
143
 
@@ -120,11 +145,21 @@ type ClassifierMetrics = {
120
145
  score: number;
121
146
  novelty: number;
122
147
  lexicalDistance: number;
148
+ uniqueTokenRatio: number;
149
+ entropy: number;
123
150
  similarity?: number;
124
151
  usedEmbedding: boolean;
125
152
  pendingSoftSignals: number;
126
153
  };
127
154
 
155
+ type LexicalFeatures = {
156
+ score: number;
157
+ novelty: number;
158
+ lexicalDistance: number;
159
+ uniqueTokenRatio: number;
160
+ entropy: number;
161
+ };
162
+
128
163
  type ClassificationDecision =
129
164
  | { kind: "warmup" | "stable" | "suspect"; metrics: ClassifierMetrics; reason: string }
130
165
  | { kind: "rotate-hard" | "rotate-soft"; metrics: ClassifierMetrics; reason: string };
@@ -134,9 +169,9 @@ type EmbeddingBackend = {
134
169
  embed: (text: string) => Promise<number[] | null>;
135
170
  };
136
171
 
137
- type ResolvedFastSession = {
138
- sessionKey: string;
139
- routeKind: "direct" | "group" | "thread";
172
+ type FastMessageEventLike = {
173
+ from?: string;
174
+ metadata?: Record<string, unknown>;
140
175
  };
141
176
 
142
177
  type TranscriptMessage = {
@@ -207,7 +242,7 @@ const PRESETS = {
207
242
  const DEFAULTS = {
208
243
  enabled: true,
209
244
  preset: "balanced" as PresetName,
210
- handoff: "summary" as HandoffPreference,
245
+ handoffMode: "summary" as HandoffMode,
211
246
  handoffLastN: 6,
212
247
  handoffMaxChars: 220,
213
248
  embeddingProvider: "auto" as EmbeddingProvider,
@@ -215,7 +250,18 @@ const DEFAULTS = {
215
250
  minSignalChars: 20,
216
251
  minSignalTokenCount: 3,
217
252
  minSignalEntropy: 1.2,
253
+ minUniqueTokenRatio: 0.34,
254
+ shortMessageTokenLimit: 6,
255
+ embeddingTriggerMargin: 0.08,
218
256
  stripEnvelope: true,
257
+ stripDropLinePrefixPatterns: [
258
+ "^[A-Za-z][A-Za-z _-]{0,30}:\\s*\\[",
259
+ ],
260
+ stripDropExactLines: [] as string[],
261
+ stripDropFencedBlockAfterHeaderPatterns: [
262
+ "^[A-Za-z][A-Za-z _-]{0,40}:\\s*\\([^)]*(metadata|context)[^)]*\\):?$",
263
+ ],
264
+ handoffTailReadMaxBytes: 512 * 1024,
219
265
  dryRun: false,
220
266
  debug: false,
221
267
  } as const;
@@ -264,15 +310,6 @@ function clampFloat(value: unknown, fallback: number, min: number, max: number):
264
310
  return value;
265
311
  }
266
312
 
267
- function pickDefined<T>(...values: Array<T | undefined>): T | undefined {
268
- for (const value of values) {
269
- if (value !== undefined) {
270
- return value;
271
- }
272
- }
273
- return undefined;
274
- }
275
-
276
313
  function normalizePreset(value: unknown): PresetName {
277
314
  if (typeof value !== "string") {
278
315
  return DEFAULTS.preset;
@@ -300,18 +337,94 @@ function normalizeEmbeddingProvider(value: unknown): EmbeddingProvider {
300
337
  return DEFAULTS.embeddingProvider;
301
338
  }
302
339
 
303
- function normalizeHandoffPreference(value: unknown): HandoffPreference {
340
+ function normalizeHandoffMode(value: unknown): HandoffMode {
304
341
  if (typeof value !== "string") {
305
- return DEFAULTS.handoff;
342
+ return DEFAULTS.handoffMode;
306
343
  }
307
344
  const normalized = value.trim().toLowerCase();
308
- if (normalized === "none" || normalized === "summary") {
309
- return normalized;
345
+ if (normalized === "none" || normalized === "summary" || normalized === "verbatim_last_n") {
346
+ return normalized as HandoffMode;
310
347
  }
311
- if (normalized === "verbatim" || normalized === "verbatim_last_n") {
312
- return "verbatim";
348
+ return DEFAULTS.handoffMode;
349
+ }
350
+
351
+ function compileRegexList(values: unknown, fallback: readonly string[]): RegExp[] {
352
+ const source = Array.isArray(values) ? values : fallback;
353
+ const out: RegExp[] = [];
354
+ for (const item of source) {
355
+ if (typeof item !== "string") {
356
+ continue;
357
+ }
358
+ const pattern = item.trim();
359
+ if (!pattern) {
360
+ continue;
361
+ }
362
+ try {
363
+ out.push(new RegExp(pattern));
364
+ } catch {
365
+ continue;
366
+ }
313
367
  }
314
- return DEFAULTS.handoff;
368
+ return out;
369
+ }
370
+
371
+ function normalizeStringList(values: unknown, fallback: readonly string[]): string[] {
372
+ const source = Array.isArray(values) ? values : fallback;
373
+ return source
374
+ .filter((item): item is string => typeof item === "string")
375
+ .map((item) => item.trim())
376
+ .filter(Boolean);
377
+ }
378
+
379
+ function normalizeProviderId(raw: string): string {
380
+ const provider = raw.trim().toLowerCase();
381
+ if (!provider) {
382
+ return "";
383
+ }
384
+ if (provider === "telegram" || provider.includes("telegram")) {
385
+ return "telegram";
386
+ }
387
+ if (
388
+ provider === "whatsapp" ||
389
+ provider.includes("whatsapp") ||
390
+ provider.includes("baileys")
391
+ ) {
392
+ return "whatsapp";
393
+ }
394
+ if (provider === "signal" || provider.includes("signal")) {
395
+ return "signal";
396
+ }
397
+ if (provider === "discord" || provider.includes("discord")) {
398
+ return "discord";
399
+ }
400
+ if (provider === "slack" || provider.includes("slack")) {
401
+ return "slack";
402
+ }
403
+ if (provider === "matrix" || provider.includes("matrix")) {
404
+ return "matrix";
405
+ }
406
+ if (provider === "msteams" || provider.includes("teams")) {
407
+ return "msteams";
408
+ }
409
+ if (provider === "imessage" || provider.includes("imessage") || provider.includes("bluebubbles")) {
410
+ return "imessage";
411
+ }
412
+ if (provider === "web" || provider.includes("webchat")) {
413
+ return "web";
414
+ }
415
+ if (provider === "voice" || provider.includes("voice")) {
416
+ return "voice";
417
+ }
418
+ if (provider.includes("openai")) {
419
+ return "openai";
420
+ }
421
+ if (provider.includes("anthropic")) {
422
+ return "anthropic";
423
+ }
424
+ if (provider.includes("ollama")) {
425
+ return "ollama";
426
+ }
427
+ return provider;
315
428
  }
316
429
 
317
430
  function resolveConfig(raw: unknown): ResolvedConfig {
@@ -320,9 +433,13 @@ function resolveConfig(raw: unknown): ResolvedConfig {
320
433
  obj.advanced && typeof obj.advanced === "object"
321
434
  ? (obj.advanced as TopicShiftResetAdvancedConfig)
322
435
  : {};
323
- const advancedEmbedding =
324
- advanced.embedding && typeof advanced.embedding === "object"
325
- ? (advanced.embedding as EmbeddingConfig)
436
+ const embedding =
437
+ obj.embedding && typeof obj.embedding === "object" ? (obj.embedding as EmbeddingConfig) : {};
438
+ const handoff =
439
+ obj.handoff && typeof obj.handoff === "object" ? (obj.handoff as HandoffConfig) : {};
440
+ const stripRules =
441
+ advanced.stripRules && typeof advanced.stripRules === "object"
442
+ ? (advanced.stripRules as StripRulesConfig)
326
443
  : {};
327
444
 
328
445
  const preset = normalizePreset(obj.preset);
@@ -331,15 +448,13 @@ function resolveConfig(raw: unknown): ResolvedConfig {
331
448
  const ignoredProviders = new Set(
332
449
  Array.isArray(advanced.ignoredProviders)
333
450
  ? advanced.ignoredProviders
334
- .map((value) => (typeof value === "string" ? value.trim().toLowerCase() : ""))
451
+ .map((value) =>
452
+ typeof value === "string" ? normalizeProviderId(value.trim().toLowerCase()) : "",
453
+ )
335
454
  .filter(Boolean)
336
455
  : [],
337
456
  );
338
457
 
339
- const handoffPreference = normalizeHandoffPreference(pickDefined(advanced.handoff, obj.handoff));
340
- const handoffMode: HandoffMode =
341
- handoffPreference === "verbatim" ? "verbatim_last_n" : handoffPreference;
342
-
343
458
  return {
344
459
  enabled: obj.enabled ?? DEFAULTS.enabled,
345
460
  historyWindow: clampInt(advanced.historyWindow, presetConfig.historyWindow, 2, 40),
@@ -369,7 +484,44 @@ function resolveConfig(raw: unknown): ResolvedConfig {
369
484
  0,
370
485
  8,
371
486
  ),
487
+ minUniqueTokenRatio: clampFloat(
488
+ advanced.minUniqueTokenRatio,
489
+ DEFAULTS.minUniqueTokenRatio,
490
+ 0,
491
+ 1,
492
+ ),
493
+ shortMessageTokenLimit: clampInt(
494
+ advanced.shortMessageTokenLimit,
495
+ DEFAULTS.shortMessageTokenLimit,
496
+ 1,
497
+ 40,
498
+ ),
499
+ embeddingTriggerMargin: clampFloat(
500
+ advanced.embeddingTriggerMargin,
501
+ DEFAULTS.embeddingTriggerMargin,
502
+ 0,
503
+ 0.5,
504
+ ),
372
505
  stripEnvelope: advanced.stripEnvelope ?? DEFAULTS.stripEnvelope,
506
+ stripRules: {
507
+ dropLinePrefixPatterns: compileRegexList(
508
+ stripRules.dropLinePrefixPatterns,
509
+ DEFAULTS.stripDropLinePrefixPatterns,
510
+ ),
511
+ dropExactLines: new Set(
512
+ normalizeStringList(stripRules.dropExactLines, DEFAULTS.stripDropExactLines),
513
+ ),
514
+ dropFencedBlockAfterHeaderPatterns: compileRegexList(
515
+ stripRules.dropFencedBlockAfterHeaderPatterns,
516
+ DEFAULTS.stripDropFencedBlockAfterHeaderPatterns,
517
+ ),
518
+ },
519
+ handoffTailReadMaxBytes: clampInt(
520
+ advanced.handoffTailReadMaxBytes,
521
+ DEFAULTS.handoffTailReadMaxBytes,
522
+ 64 * 1024,
523
+ 8 * 1024 * 1024,
524
+ ),
373
525
  softConsecutiveSignals: clampInt(
374
526
  advanced.softConsecutiveSignals,
375
527
  presetConfig.softConsecutiveSignals,
@@ -404,26 +556,26 @@ function resolveConfig(raw: unknown): ResolvedConfig {
404
556
  0,
405
557
  1,
406
558
  ),
407
- handoffMode,
408
- handoffLastN: clampInt(advanced.handoffLastN, DEFAULTS.handoffLastN, 1, 20),
409
- handoffMaxChars: clampInt(advanced.handoffMaxChars, DEFAULTS.handoffMaxChars, 60, 800),
559
+ handoff: {
560
+ mode: normalizeHandoffMode(handoff.mode),
561
+ lastN: clampInt(handoff.lastN, DEFAULTS.handoffLastN, 1, 20),
562
+ maxChars: clampInt(handoff.maxChars, DEFAULTS.handoffMaxChars, 60, 800),
563
+ },
410
564
  embedding: {
411
- provider: normalizeEmbeddingProvider(
412
- pickDefined(advanced.embeddings, advancedEmbedding.provider, obj.embeddings),
413
- ),
565
+ provider: normalizeEmbeddingProvider(embedding.provider),
414
566
  model: (() => {
415
- const rawModel = advancedEmbedding.model;
567
+ const rawModel = embedding.model;
416
568
  return typeof rawModel === "string" ? rawModel.trim() : undefined;
417
569
  })(),
418
570
  baseUrl: (() => {
419
- const rawBaseUrl = advancedEmbedding.baseUrl;
571
+ const rawBaseUrl = embedding.baseUrl;
420
572
  return typeof rawBaseUrl === "string" ? rawBaseUrl.trim() : undefined;
421
573
  })(),
422
574
  apiKey: (() => {
423
- const rawApiKey = advancedEmbedding.apiKey;
575
+ const rawApiKey = embedding.apiKey;
424
576
  return typeof rawApiKey === "string" ? rawApiKey.trim() : undefined;
425
577
  })(),
426
- timeoutMs: clampInt(advancedEmbedding.timeoutMs, DEFAULTS.embeddingTimeoutMs, 1000, 30_000),
578
+ timeoutMs: clampInt(embedding.timeoutMs, DEFAULTS.embeddingTimeoutMs, 1000, 30_000),
427
579
  },
428
580
  dryRun: obj.dryRun ?? DEFAULTS.dryRun,
429
581
  debug: obj.debug ?? DEFAULTS.debug,
@@ -484,13 +636,27 @@ function tokenEntropy(tokens: string[]): number {
484
636
  return entropy;
485
637
  }
486
638
 
487
- function stripClassifierEnvelope(text: string): string {
639
+ function matchesAny(patterns: RegExp[], value: string): boolean {
640
+ for (const pattern of patterns) {
641
+ if (pattern.test(value)) {
642
+ return true;
643
+ }
644
+ }
645
+ return false;
646
+ }
647
+
648
+ function stripClassifierEnvelope(
649
+ text: string,
650
+ rules: ResolvedConfig["stripRules"],
651
+ ): string {
488
652
  const lines = text.replace(/\r\n/g, "\n").split("\n");
489
653
  const kept: string[] = [];
490
654
  let skipFence = false;
491
655
  let expectingMetadataFence = false;
656
+ let sawSemanticContent = false;
492
657
 
493
- for (const line of lines) {
658
+ for (let index = 0; index < lines.length; index += 1) {
659
+ const line = lines[index] ?? "";
494
660
  const trimmed = line.trim();
495
661
  if (skipFence) {
496
662
  if (trimmed.startsWith("```")) {
@@ -499,14 +665,17 @@ function stripClassifierEnvelope(text: string): string {
499
665
  continue;
500
666
  }
501
667
 
502
- if (
503
- trimmed === "Conversation info (untrusted metadata):" ||
504
- trimmed === "Replied message (untrusted, for context):"
505
- ) {
668
+ if (matchesAny(rules.dropFencedBlockAfterHeaderPatterns, trimmed)) {
506
669
  expectingMetadataFence = true;
507
670
  continue;
508
671
  }
509
672
 
673
+ // Drop top fenced metadata envelopes even if header wording changes.
674
+ if (!sawSemanticContent && index < 12 && (trimmed === "```" || trimmed === "```json")) {
675
+ skipFence = true;
676
+ continue;
677
+ }
678
+
510
679
  if (expectingMetadataFence && trimmed.startsWith("```")) {
511
680
  skipFence = true;
512
681
  expectingMetadataFence = false;
@@ -515,17 +684,14 @@ function stripClassifierEnvelope(text: string): string {
515
684
 
516
685
  expectingMetadataFence = false;
517
686
 
518
- if (
519
- trimmed.startsWith("System: [") ||
520
- trimmed.startsWith("Current time:") ||
521
- trimmed.startsWith("Read HEARTBEAT.md if it exists") ||
522
- trimmed.startsWith("To send an image back, prefer the message tool") ||
523
- trimmed.startsWith("[media attached:")
524
- ) {
687
+ if (rules.dropExactLines.has(trimmed) || matchesAny(rules.dropLinePrefixPatterns, trimmed)) {
525
688
  continue;
526
689
  }
527
690
 
528
691
  kept.push(line);
692
+ if (trimmed) {
693
+ sawSemanticContent = true;
694
+ }
529
695
  }
530
696
 
531
697
  return kept.join("\n").replace(/\n{3,}/g, "\n\n").trim();
@@ -592,29 +758,36 @@ function cosineSimilarity(a: number[], b: number[]): number | undefined {
592
758
  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
593
759
  }
594
760
 
595
- function centroid(vectors: number[][]): number[] | undefined {
596
- if (vectors.length === 0) {
597
- return undefined;
598
- }
599
- const dim = vectors[0]?.length ?? 0;
600
- if (!dim) {
601
- return undefined;
761
+ function seedTopicCentroid(state: SessionState, vector?: number[]): void {
762
+ if (!Array.isArray(vector) || vector.length === 0) {
763
+ state.topicCentroid = undefined;
764
+ state.topicCount = 0;
765
+ state.topicDim = undefined;
766
+ return;
602
767
  }
603
- for (const vector of vectors) {
604
- if (vector.length !== dim) {
605
- return undefined;
606
- }
768
+ state.topicCentroid = [...vector];
769
+ state.topicCount = 1;
770
+ state.topicDim = vector.length;
771
+ }
772
+
773
+ function updateTopicCentroid(state: SessionState, vector?: number[]): void {
774
+ if (!Array.isArray(vector) || vector.length === 0) {
775
+ return;
607
776
  }
608
- const out = new Array<number>(dim).fill(0);
609
- for (const vector of vectors) {
610
- for (let i = 0; i < dim; i += 1) {
611
- out[i] += vector[i];
612
- }
777
+ if (
778
+ !Array.isArray(state.topicCentroid) ||
779
+ state.topicCentroid.length !== vector.length ||
780
+ !state.topicCount
781
+ ) {
782
+ seedTopicCentroid(state, vector);
783
+ return;
613
784
  }
614
- for (let i = 0; i < dim; i += 1) {
615
- out[i] /= vectors.length;
785
+ const nextCount = state.topicCount + 1;
786
+ for (let i = 0; i < vector.length; i += 1) {
787
+ state.topicCentroid[i] += (vector[i] - state.topicCentroid[i]) / nextCount;
616
788
  }
617
- return out;
789
+ state.topicCount = nextCount;
790
+ state.topicDim = vector.length;
618
791
  }
619
792
 
620
793
  function trimHistory(entries: HistoryEntry[], limit: number): HistoryEntry[] {
@@ -645,7 +818,10 @@ function looksLikeGroup(value?: string): boolean {
645
818
  );
646
819
  }
647
820
 
648
- function inferFastPeer(event: PluginHookMessageReceivedEvent, ctx: { conversationId?: string }) {
821
+ function inferFastPeer(
822
+ event: FastMessageEventLike,
823
+ ctx: { conversationId?: string },
824
+ ): { kind: "direct" | "group" | "channel"; id: string } {
649
825
  const from = event.from?.trim() ?? "";
650
826
  const conversationId = ctx.conversationId?.trim() || from;
651
827
  const metadata = (event.metadata ?? {}) as Record<string, unknown>;
@@ -659,12 +835,13 @@ function inferFastPeer(event: PluginHookMessageReceivedEvent, ctx: { conversatio
659
835
 
660
836
  if (threadId) {
661
837
  return {
662
- kind: "thread" as const,
838
+ kind: "group",
663
839
  id: `${conversationId || from}:thread:${threadId}`,
664
840
  };
665
841
  }
666
842
 
667
- const kind = looksLikeGroup(conversationId) || looksLikeGroup(from) ? "group" : "direct";
843
+ const kind: "group" | "direct" =
844
+ looksLikeGroup(conversationId) || looksLikeGroup(from) ? "group" : "direct";
668
845
  return {
669
846
  kind,
670
847
  id: conversationId || from || "unknown",
@@ -763,45 +940,84 @@ function resolveEmbeddingBackend(cfg: ResolvedConfig): EmbeddingBackend | null {
763
940
  return createOllamaBackend(cfg);
764
941
  }
765
942
 
766
- function classifyMessage(params: {
943
+ function computeLexicalFeatures(params: {
767
944
  cfg: ResolvedConfig;
768
- state: SessionState;
769
945
  entry: HistoryEntry;
770
- now: number;
771
- }): ClassificationDecision {
772
- const { cfg, state, entry, now } = params;
773
- const baselineEntries = state.history;
774
- const baselineTokens = unionTokens(baselineEntries);
775
-
776
- const lexicalSimilarity = jaccardSimilarity(entry.tokens, baselineTokens);
946
+ tokenList: string[];
947
+ baselineTokens: Set<string>;
948
+ }): LexicalFeatures {
949
+ const lexicalSimilarity = jaccardSimilarity(params.entry.tokens, params.baselineTokens);
777
950
  const lexicalDistance = 1 - lexicalSimilarity;
778
- const novelty = noveltyRatio(entry.tokens, baselineTokens);
951
+ const novelty = noveltyRatio(params.entry.tokens, params.baselineTokens);
952
+ const uniqueTokenRatio =
953
+ params.tokenList.length > 0 ? params.entry.tokens.size / params.tokenList.length : 0;
954
+ const entropy = tokenEntropy(params.tokenList);
955
+
956
+ let score = 0.55 * lexicalDistance + 0.45 * novelty;
957
+ if (uniqueTokenRatio < params.cfg.minUniqueTokenRatio) {
958
+ score -= (params.cfg.minUniqueTokenRatio - uniqueTokenRatio) * 0.4;
959
+ }
960
+ if (params.tokenList.length <= params.cfg.shortMessageTokenLimit && entropy < params.cfg.minSignalEntropy) {
961
+ score -= (params.cfg.minSignalEntropy - entropy) * 0.06;
962
+ }
779
963
 
780
- const baseVectors = baselineEntries
781
- .map((item) => item.embedding)
782
- .filter((vector): vector is number[] => Array.isArray(vector) && vector.length > 0);
783
- const baseCentroid = centroid(baseVectors);
784
- const similarity =
785
- entry.embedding && baseCentroid ? cosineSimilarity(entry.embedding, baseCentroid) : undefined;
786
- const usedEmbedding = typeof similarity === "number";
964
+ return {
965
+ score: Math.max(0, Math.min(1, score)),
966
+ novelty,
967
+ lexicalDistance,
968
+ uniqueTokenRatio,
969
+ entropy,
970
+ };
971
+ }
787
972
 
788
- const score = usedEmbedding
789
- ? 0.7 * (1 - similarity) + 0.15 * lexicalDistance + 0.15 * novelty
790
- : 0.55 * lexicalDistance + 0.45 * novelty;
973
+ function shouldRequestEmbedding(params: {
974
+ cfg: ResolvedConfig;
975
+ backendAvailable: boolean;
976
+ lexical: LexicalFeatures;
977
+ warmup: boolean;
978
+ cooldownActive: boolean;
979
+ }): boolean {
980
+ if (!params.backendAvailable || params.warmup || params.cooldownActive) {
981
+ return false;
982
+ }
983
+ if (params.lexical.score >= params.cfg.softScoreThreshold - params.cfg.embeddingTriggerMargin) {
984
+ return true;
985
+ }
986
+ return (
987
+ params.lexical.novelty >= params.cfg.softNoveltyThreshold * 0.9 &&
988
+ params.lexical.lexicalDistance >= 0.35
989
+ );
990
+ }
991
+
992
+ function classifyMessage(params: {
993
+ cfg: ResolvedConfig;
994
+ state: SessionState;
995
+ baselineTokenCount: number;
996
+ lexical: LexicalFeatures;
997
+ similarity?: number;
998
+ usedEmbedding: boolean;
999
+ now: number;
1000
+ }): ClassificationDecision {
1001
+ const { cfg, state, now } = params;
1002
+ const score =
1003
+ params.usedEmbedding && typeof params.similarity === "number"
1004
+ ? 0.7 * (1 - params.similarity) +
1005
+ 0.15 * params.lexical.lexicalDistance +
1006
+ 0.15 * params.lexical.novelty
1007
+ : params.lexical.score;
791
1008
 
792
1009
  const metrics = {
793
1010
  score,
794
- novelty,
795
- lexicalDistance,
796
- similarity,
797
- usedEmbedding,
1011
+ novelty: params.lexical.novelty,
1012
+ lexicalDistance: params.lexical.lexicalDistance,
1013
+ uniqueTokenRatio: params.lexical.uniqueTokenRatio,
1014
+ entropy: params.lexical.entropy,
1015
+ similarity: params.similarity,
1016
+ usedEmbedding: params.usedEmbedding,
798
1017
  pendingSoftSignals: state.pendingSoftSignals,
799
1018
  } satisfies ClassifierMetrics;
800
1019
 
801
- if (
802
- baselineEntries.length < cfg.minHistoryMessages ||
803
- baselineTokens.size < cfg.minMeaningfulTokens
804
- ) {
1020
+ if (state.history.length < cfg.minHistoryMessages || params.baselineTokenCount < cfg.minMeaningfulTokens) {
805
1021
  return { kind: "warmup", metrics, reason: "warmup" };
806
1022
  }
807
1023
 
@@ -814,8 +1030,11 @@ function classifyMessage(params: {
814
1030
 
815
1031
  const hardSignal =
816
1032
  score >= cfg.hardScoreThreshold ||
817
- ((typeof similarity === "number" ? similarity <= cfg.hardSimilarityThreshold : false) &&
818
- novelty >= cfg.hardNoveltyThreshold);
1033
+ (params.usedEmbedding && typeof params.similarity === "number"
1034
+ ? params.similarity <= cfg.hardSimilarityThreshold &&
1035
+ params.lexical.novelty >= cfg.hardNoveltyThreshold
1036
+ : params.lexical.novelty >= cfg.hardNoveltyThreshold &&
1037
+ params.lexical.lexicalDistance >= 0.65);
819
1038
 
820
1039
  if (hardSignal) {
821
1040
  return { kind: "rotate-hard", metrics, reason: "hard-threshold" };
@@ -823,9 +1042,11 @@ function classifyMessage(params: {
823
1042
 
824
1043
  const softSignal =
825
1044
  score >= cfg.softScoreThreshold ||
826
- ((typeof similarity === "number" ? similarity <= cfg.softSimilarityThreshold : false) &&
827
- novelty >= cfg.softNoveltyThreshold) ||
828
- (!usedEmbedding && novelty >= cfg.softNoveltyThreshold && lexicalDistance >= 0.45);
1045
+ (params.usedEmbedding && typeof params.similarity === "number"
1046
+ ? params.similarity <= cfg.softSimilarityThreshold &&
1047
+ params.lexical.novelty >= cfg.softNoveltyThreshold
1048
+ : params.lexical.novelty >= cfg.softNoveltyThreshold &&
1049
+ params.lexical.lexicalDistance >= 0.45);
829
1050
 
830
1051
  if (!softSignal) {
831
1052
  return { kind: "stable", metrics, reason: "stable" };
@@ -885,15 +1106,10 @@ function resolveSessionFilePathFromEntry(params: {
885
1106
  return path.resolve(sessionsDir, `${sessionId}.jsonl`);
886
1107
  }
887
1108
 
888
- async function readTranscriptTail(params: {
889
- sessionFile: string;
890
- takeLast: number;
891
- }): Promise<TranscriptMessage[]> {
892
- const raw = await fs.readFile(params.sessionFile, "utf-8");
893
- const lines = raw.split("\n");
1109
+ function parseTranscriptTailLines(lines: string[], takeLast: number): TranscriptMessage[] {
894
1110
  const messages: TranscriptMessage[] = [];
895
-
896
- for (const line of lines) {
1111
+ for (let i = lines.length - 1; i >= 0; i -= 1) {
1112
+ const line = lines[i] ?? "";
897
1113
  const trimmed = line.trim();
898
1114
  if (!trimmed) {
899
1115
  continue;
@@ -924,12 +1140,67 @@ async function readTranscriptTail(params: {
924
1140
  continue;
925
1141
  }
926
1142
  messages.push({ role, text });
1143
+ if (messages.length >= takeLast) {
1144
+ break;
1145
+ }
1146
+ }
1147
+ messages.reverse();
1148
+ return messages;
1149
+ }
1150
+
1151
+ async function readTranscriptTail(params: {
1152
+ sessionFile: string;
1153
+ takeLast: number;
1154
+ maxBytes: number;
1155
+ onFallbackFullRead?: () => void;
1156
+ }): Promise<TranscriptMessage[]> {
1157
+ const fd = await fs.open(params.sessionFile, "r");
1158
+ let fallbackFullRead = false;
1159
+ try {
1160
+ const stat = await fd.stat();
1161
+ if (stat.size <= 0) {
1162
+ return [];
1163
+ }
1164
+
1165
+ let position = stat.size;
1166
+ let bytesReadTotal = 0;
1167
+ const chunks: string[] = [];
1168
+ while (position > 0 && bytesReadTotal < params.maxBytes) {
1169
+ const remainingBudget = params.maxBytes - bytesReadTotal;
1170
+ const toRead = Math.min(64 * 1024, position, remainingBudget);
1171
+ if (toRead <= 0) {
1172
+ break;
1173
+ }
1174
+ const nextPosition = position - toRead;
1175
+ const buffer = Buffer.allocUnsafe(toRead);
1176
+ const result = await fd.read(buffer, 0, toRead, nextPosition);
1177
+ if (result.bytesRead <= 0) {
1178
+ break;
1179
+ }
1180
+ chunks.unshift(buffer.subarray(0, result.bytesRead).toString("utf-8"));
1181
+ position = nextPosition;
1182
+ bytesReadTotal += result.bytesRead;
1183
+ if (chunks.join("").split("\n").length > params.takeLast * 30) {
1184
+ break;
1185
+ }
1186
+ }
1187
+
1188
+ const tail = parseTranscriptTailLines(chunks.join("").split("\n"), params.takeLast);
1189
+ if (tail.length >= params.takeLast || bytesReadTotal >= stat.size) {
1190
+ return tail;
1191
+ }
1192
+ fallbackFullRead = true;
1193
+ } finally {
1194
+ await fd.close();
927
1195
  }
928
1196
 
929
- if (messages.length <= params.takeLast) {
930
- return messages;
1197
+ if (!fallbackFullRead) {
1198
+ return [];
931
1199
  }
932
- return messages.slice(messages.length - params.takeLast);
1200
+
1201
+ params.onFallbackFullRead?.();
1202
+ const raw = await fs.readFile(params.sessionFile, "utf-8");
1203
+ return parseTranscriptTailLines(raw.split("\n"), params.takeLast);
933
1204
  }
934
1205
 
935
1206
  function truncateText(text: string, maxChars: number): string {
@@ -968,7 +1239,7 @@ async function buildHandoffEventFromPreviousSession(params: {
968
1239
  previousEntry?: SessionEntryLike;
969
1240
  logger: OpenClawPluginApi["logger"];
970
1241
  }): Promise<string | null> {
971
- if (params.cfg.handoffMode === "none") {
1242
+ if (params.cfg.handoff.mode === "none") {
972
1243
  return null;
973
1244
  }
974
1245
  const sessionFile = resolveSessionFilePathFromEntry({
@@ -982,12 +1253,18 @@ async function buildHandoffEventFromPreviousSession(params: {
982
1253
  try {
983
1254
  const tail = await readTranscriptTail({
984
1255
  sessionFile,
985
- takeLast: params.cfg.handoffLastN,
1256
+ takeLast: params.cfg.handoff.lastN,
1257
+ maxBytes: params.cfg.handoffTailReadMaxBytes,
1258
+ onFallbackFullRead: () => {
1259
+ params.logger.warn(
1260
+ `topic-shift-reset: handoff tail fallback full-read file=${sessionFile}`,
1261
+ );
1262
+ },
986
1263
  });
987
1264
  return formatHandoffEventText({
988
- mode: params.cfg.handoffMode,
1265
+ mode: params.cfg.handoff.mode,
989
1266
  messages: tail,
990
- maxChars: params.cfg.handoffMaxChars,
1267
+ maxChars: params.cfg.handoff.maxChars,
991
1268
  });
992
1269
  } catch (error) {
993
1270
  params.logger.warn(
@@ -1054,13 +1331,9 @@ async function rotateSessionEntry(params: {
1054
1331
  });
1055
1332
 
1056
1333
  if (params.cfg.dryRun) {
1057
- params.state.lastResetAt = Date.now();
1058
- params.state.pendingSoftSignals = 0;
1059
- params.state.pendingEntries = [];
1060
- params.state.history = trimHistory([params.entry], params.cfg.historyWindow);
1061
1334
  params.api.logger.info(
1062
1335
  [
1063
- `topic-shift-reset: dry-run rotate`,
1336
+ `topic-shift-reset: would-rotate`,
1064
1337
  `source=${params.source}`,
1065
1338
  `reason=${params.reason}`,
1066
1339
  `session=${params.sessionKey}`,
@@ -1129,8 +1402,9 @@ async function rotateSessionEntry(params: {
1129
1402
  params.state.pendingSoftSignals = 0;
1130
1403
  params.state.pendingEntries = [];
1131
1404
  params.state.history = trimHistory([params.entry], params.cfg.historyWindow);
1405
+ seedTopicCentroid(params.state, params.entry.embedding);
1132
1406
 
1133
- params.api.logger.warn(
1407
+ params.api.logger.info(
1134
1408
  [
1135
1409
  `topic-shift-reset: rotated`,
1136
1410
  `source=${params.source}`,
@@ -1164,7 +1438,7 @@ export default function register(api: OpenClawPluginApi): void {
1164
1438
  if (embeddingInitError) {
1165
1439
  api.logger.warn(`topic-shift-reset: embedding backend init failed: ${embeddingInitError}`);
1166
1440
  } else if (!embeddingBackend) {
1167
- api.logger.warn("topic-shift-reset: embedding backend unavailable, using lexical-only mode");
1441
+ api.logger.info("topic-shift-reset: embedding backend unavailable, using lexical-only mode");
1168
1442
  } else {
1169
1443
  api.logger.info(`topic-shift-reset: embedding backend ${embeddingBackend.name}`);
1170
1444
  }
@@ -1175,7 +1449,6 @@ export default function register(api: OpenClawPluginApi): void {
1175
1449
  text: string;
1176
1450
  messageProvider?: string;
1177
1451
  agentId?: string;
1178
- dedupeKey?: string;
1179
1452
  }) => {
1180
1453
  if (!cfg.enabled) {
1181
1454
  return;
@@ -1185,24 +1458,19 @@ export default function register(api: OpenClawPluginApi): void {
1185
1458
  return;
1186
1459
  }
1187
1460
 
1188
- const provider = params.messageProvider?.trim().toLowerCase();
1461
+ const provider = normalizeProviderId(params.messageProvider ?? "");
1189
1462
  if (provider && cfg.ignoredProviders.has(provider)) {
1190
1463
  return;
1191
1464
  }
1192
1465
 
1193
1466
  const rawText = params.text.trim();
1194
- const text = cfg.stripEnvelope ? stripClassifierEnvelope(rawText) : rawText;
1467
+ const text = cfg.stripEnvelope ? stripClassifierEnvelope(rawText, cfg.stripRules) : rawText;
1195
1468
  if (!text || text.startsWith("/")) {
1196
1469
  return;
1197
1470
  }
1198
1471
 
1199
1472
  const tokenList = tokenizeList(text, cfg.minTokenLength);
1200
- const signalEntropy = tokenEntropy(tokenList);
1201
- if (
1202
- text.length < cfg.minSignalChars ||
1203
- tokenList.length < cfg.minSignalTokenCount ||
1204
- signalEntropy < cfg.minSignalEntropy
1205
- ) {
1473
+ if (text.length < cfg.minSignalChars || tokenList.length < cfg.minSignalTokenCount) {
1206
1474
  if (cfg.debug) {
1207
1475
  api.logger.info(
1208
1476
  [
@@ -1211,7 +1479,6 @@ export default function register(api: OpenClawPluginApi): void {
1211
1479
  `session=${sessionKey}`,
1212
1480
  `chars=${text.length}`,
1213
1481
  `tokens=${tokenList.length}`,
1214
- `entropy=${signalEntropy.toFixed(3)}`,
1215
1482
  ].join(" "),
1216
1483
  );
1217
1484
  }
@@ -1219,9 +1486,6 @@ export default function register(api: OpenClawPluginApi): void {
1219
1486
  }
1220
1487
 
1221
1488
  const tokens = new Set(tokenList);
1222
- if (tokens.size < cfg.minMeaningfulTokens) {
1223
- return;
1224
- }
1225
1489
 
1226
1490
  const contentHash = hashString(normalizeTextForHash(text));
1227
1491
  const lastRotationAt = recentRotationBySession.get(`${sessionKey}:${contentHash}`);
@@ -1229,18 +1493,6 @@ export default function register(api: OpenClawPluginApi): void {
1229
1493
  return;
1230
1494
  }
1231
1495
 
1232
- let embedding: number[] | undefined;
1233
- if (embeddingBackend) {
1234
- try {
1235
- const vector = await embeddingBackend.embed(text);
1236
- if (Array.isArray(vector) && vector.length > 0) {
1237
- embedding = vector;
1238
- }
1239
- } catch (error) {
1240
- api.logger.warn(`topic-shift-reset: embeddings error backend=${embeddingBackend.name} err=${String(error)}`);
1241
- }
1242
- }
1243
-
1244
1496
  const now = Date.now();
1245
1497
  const state =
1246
1498
  sessionState.get(sessionKey) ??
@@ -1249,12 +1501,61 @@ export default function register(api: OpenClawPluginApi): void {
1249
1501
  pendingSoftSignals: 0,
1250
1502
  pendingEntries: [],
1251
1503
  lastResetAt: undefined,
1504
+ topicCentroid: undefined,
1505
+ topicCount: 0,
1506
+ topicDim: undefined,
1252
1507
  lastSeenAt: now,
1253
1508
  } satisfies SessionState);
1254
1509
  state.lastSeenAt = now;
1255
1510
 
1511
+ const baselineTokens = unionTokens(state.history);
1512
+ const lexical = computeLexicalFeatures({
1513
+ cfg,
1514
+ entry: { tokens, at: now },
1515
+ tokenList,
1516
+ baselineTokens,
1517
+ });
1518
+ const warmup =
1519
+ state.history.length < cfg.minHistoryMessages || baselineTokens.size < cfg.minMeaningfulTokens;
1520
+ const cooldownMs = cfg.cooldownMinutes * 60_000;
1521
+ const cooldownActive =
1522
+ cooldownMs > 0 && typeof state.lastResetAt === "number" && now - state.lastResetAt < cooldownMs;
1523
+
1524
+ let embedding: number[] | undefined;
1525
+ if (
1526
+ shouldRequestEmbedding({
1527
+ cfg,
1528
+ backendAvailable: !!embeddingBackend,
1529
+ lexical,
1530
+ warmup,
1531
+ cooldownActive,
1532
+ }) &&
1533
+ embeddingBackend
1534
+ ) {
1535
+ try {
1536
+ const vector = await embeddingBackend.embed(text);
1537
+ if (Array.isArray(vector) && vector.length > 0) {
1538
+ embedding = vector;
1539
+ }
1540
+ } catch (error) {
1541
+ api.logger.warn(`topic-shift-reset: embeddings error backend=${embeddingBackend.name} err=${String(error)}`);
1542
+ }
1543
+ }
1544
+
1256
1545
  const entry: HistoryEntry = { tokens, embedding, at: now };
1257
- const decision = classifyMessage({ cfg, state, entry, now });
1546
+ const similarity =
1547
+ entry.embedding && state.topicCentroid
1548
+ ? cosineSimilarity(entry.embedding, state.topicCentroid)
1549
+ : undefined;
1550
+ const decision = classifyMessage({
1551
+ cfg,
1552
+ state,
1553
+ baselineTokenCount: baselineTokens.size,
1554
+ lexical,
1555
+ similarity,
1556
+ usedEmbedding: typeof similarity === "number",
1557
+ now,
1558
+ });
1258
1559
 
1259
1560
  if (cfg.debug) {
1260
1561
  api.logger.info(
@@ -1267,6 +1568,8 @@ export default function register(api: OpenClawPluginApi): void {
1267
1568
  `score=${decision.metrics.score.toFixed(3)}`,
1268
1569
  `novelty=${decision.metrics.novelty.toFixed(3)}`,
1269
1570
  `lex=${decision.metrics.lexicalDistance.toFixed(3)}`,
1571
+ `uniq=${decision.metrics.uniqueTokenRatio.toFixed(3)}`,
1572
+ `entropy=${decision.metrics.entropy.toFixed(3)}`,
1270
1573
  `sim=${typeof decision.metrics.similarity === "number" ? decision.metrics.similarity.toFixed(3) : "n/a"}`,
1271
1574
  `embed=${decision.metrics.usedEmbedding ? "1" : "0"}`,
1272
1575
  `pending=${state.pendingSoftSignals}`,
@@ -1278,6 +1581,7 @@ export default function register(api: OpenClawPluginApi): void {
1278
1581
  state.pendingSoftSignals = 0;
1279
1582
  state.pendingEntries = [];
1280
1583
  state.history = trimHistory([...state.history, entry], cfg.historyWindow);
1584
+ updateTopicCentroid(state, entry.embedding);
1281
1585
  sessionState.set(sessionKey, state);
1282
1586
  pruneStateMaps(sessionState);
1283
1587
  return;
@@ -1285,6 +1589,9 @@ export default function register(api: OpenClawPluginApi): void {
1285
1589
 
1286
1590
  if (decision.kind === "stable") {
1287
1591
  const merged = [...state.history, ...state.pendingEntries, entry];
1592
+ for (const item of [...state.pendingEntries, entry]) {
1593
+ updateTopicCentroid(state, item.embedding);
1594
+ }
1288
1595
  state.pendingSoftSignals = 0;
1289
1596
  state.pendingEntries = [];
1290
1597
  state.history = trimHistory(merged, cfg.historyWindow);
@@ -1318,7 +1625,9 @@ export default function register(api: OpenClawPluginApi): void {
1318
1625
  });
1319
1626
 
1320
1627
  if (rotated) {
1321
- recentRotationBySession.set(`${sessionKey}:${contentHash}`, Date.now());
1628
+ if (!cfg.dryRun) {
1629
+ recentRotationBySession.set(`${sessionKey}:${contentHash}`, Date.now());
1630
+ }
1322
1631
  }
1323
1632
 
1324
1633
  sessionState.set(sessionKey, state);
@@ -1355,7 +1664,7 @@ export default function register(api: OpenClawPluginApi): void {
1355
1664
  recentFastEvents.set(fastEventKey, Date.now());
1356
1665
  pruneRecentMap(recentFastEvents, FAST_EVENT_TTL_MS, MAX_RECENT_FAST_EVENTS);
1357
1666
 
1358
- let resolved: ResolvedFastSession | null = null;
1667
+ let resolvedSessionKey = "";
1359
1668
  try {
1360
1669
  const route = api.runtime.channel.routing.resolveAgentRoute({
1361
1670
  cfg: api.config,
@@ -1363,10 +1672,7 @@ export default function register(api: OpenClawPluginApi): void {
1363
1672
  accountId: ctx.accountId,
1364
1673
  peer,
1365
1674
  });
1366
- resolved = {
1367
- sessionKey: route.sessionKey,
1368
- routeKind: peer.kind,
1369
- };
1675
+ resolvedSessionKey = route.sessionKey;
1370
1676
  } catch (error) {
1371
1677
  if (cfg.debug) {
1372
1678
  api.logger.info(
@@ -1378,10 +1684,9 @@ export default function register(api: OpenClawPluginApi): void {
1378
1684
 
1379
1685
  await classifyAndMaybeRotate({
1380
1686
  source: "fast",
1381
- sessionKey: resolved.sessionKey,
1687
+ sessionKey: resolvedSessionKey,
1382
1688
  text,
1383
1689
  messageProvider: channelId,
1384
- dedupeKey: fastEventKey,
1385
1690
  });
1386
1691
  });
1387
1692