@martian-engineering/lossless-claw 0.6.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +26 -6
  2. package/docs/agent-tools.md +16 -5
  3. package/docs/configuration.md +223 -214
  4. package/openclaw.plugin.json +123 -0
  5. package/package.json +1 -1
  6. package/skills/lossless-claw/SKILL.md +3 -2
  7. package/skills/lossless-claw/references/architecture.md +12 -0
  8. package/skills/lossless-claw/references/config.md +135 -3
  9. package/skills/lossless-claw/references/diagnostics.md +13 -0
  10. package/src/assembler.ts +17 -5
  11. package/src/compaction.ts +161 -53
  12. package/src/db/config.ts +102 -4
  13. package/src/db/connection.ts +35 -7
  14. package/src/db/features.ts +24 -5
  15. package/src/db/migration.ts +257 -78
  16. package/src/engine.ts +1007 -110
  17. package/src/estimate-tokens.ts +80 -0
  18. package/src/lcm-log.ts +37 -0
  19. package/src/plugin/index.ts +493 -101
  20. package/src/plugin/lcm-command.ts +288 -7
  21. package/src/plugin/lcm-doctor-apply.ts +1 -3
  22. package/src/plugin/lcm-doctor-cleaners.ts +655 -0
  23. package/src/plugin/shared-init.ts +59 -0
  24. package/src/prune.ts +391 -0
  25. package/src/retrieval.ts +8 -9
  26. package/src/startup-banner-log.ts +1 -0
  27. package/src/store/compaction-telemetry-store.ts +156 -0
  28. package/src/store/conversation-store.ts +6 -1
  29. package/src/store/fts5-sanitize.ts +25 -4
  30. package/src/store/full-text-sort.ts +21 -0
  31. package/src/store/index.ts +8 -0
  32. package/src/store/summary-store.ts +21 -14
  33. package/src/summarize.ts +55 -34
  34. package/src/tools/lcm-describe-tool.ts +9 -4
  35. package/src/tools/lcm-expand-query-tool.ts +609 -200
  36. package/src/tools/lcm-expand-tool.ts +9 -4
  37. package/src/tools/lcm-grep-tool.ts +22 -8
  38. package/src/types.ts +1 -0
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Shared token estimation utility.
3
+ *
4
+ * Uses code-point-aware weighting instead of `text.length / 4`:
5
+ * - CJK (Chinese/Japanese/Korean) characters: ~1.5 tokens/char
6
+ * - Emoji / Supplementary Plane: ~2 tokens/char
7
+ * - ASCII / Latin: ~0.25 tokens/char (≈ 4 chars/token)
8
+ *
9
+ * Why not `text.length / 4`?
10
+ * JavaScript `String.length` counts UTF-16 code units, not Unicode code points.
11
+ * CJK characters are 1 UTF-16 unit but ~1.5 tokens; emoji are 2 UTF-16 units
12
+ * (surrogate pairs) but ~2-4 tokens. The naive formula underestimates CJK by
13
+ * ~6× and emoji by ~2-4×, causing compaction to trigger far too late for
14
+ * non-English conversations.
15
+ */
16
+
17
+ /** Detect CJK code points across all relevant Unicode ranges. */
18
+ function isCjkCodePoint(cp: number): boolean {
19
+ return (
20
+ (cp >= 0x4e00 && cp <= 0x9fff) || // CJK Unified Ideographs
21
+ (cp >= 0x3400 && cp <= 0x4dbf) || // CJK Extension A
22
+ (cp >= 0x20000 && cp <= 0x2a6df) || // CJK Extension B
23
+ (cp >= 0x2a700 && cp <= 0x2b73f) || // CJK Extension C
24
+ (cp >= 0x2b740 && cp <= 0x2b81f) || // CJK Extension D
25
+ (cp >= 0x2b820 && cp <= 0x2ceaf) || // CJK Extension E
26
+ (cp >= 0x2ceb0 && cp <= 0x2ebef) || // CJK Extension F
27
+ (cp >= 0x3000 && cp <= 0x303f) || // CJK Symbols and Punctuation
28
+ (cp >= 0x3040 && cp <= 0x30ff) || // Hiragana + Katakana
29
+ (cp >= 0xac00 && cp <= 0xd7af) || // Hangul Syllables
30
+ (cp >= 0xff00 && cp <= 0xffef) // Fullwidth Forms
31
+ );
32
+ }
33
+
34
+ /** Estimate token cost for a single Unicode code point. */
35
+ function estimateCodePointTokens(cp: number): number {
36
+ if (isCjkCodePoint(cp)) {
37
+ return 1.5;
38
+ }
39
+ if (cp > 0xffff) {
40
+ return 2;
41
+ }
42
+ return 0.25;
43
+ }
44
+
45
+ /** Estimate text tokens using Unicode-aware character weighting. */
46
+ export function estimateTokens(text: string): number {
47
+ let tokens = 0;
48
+ for (const char of text) {
49
+ const cp = char.codePointAt(0) ?? 0;
50
+ tokens += estimateCodePointTokens(cp);
51
+ }
52
+ return Math.ceil(tokens);
53
+ }
54
+
55
+ /**
56
+ * Truncate text so the estimated token count stays within `maxTokens`.
57
+ *
58
+ * Iterates by Unicode code point to avoid splitting surrogate pairs while
59
+ * preserving the same weighting model as `estimateTokens()`.
60
+ */
61
+ export function truncateTextToEstimatedTokens(text: string, maxTokens: number): string {
62
+ if (maxTokens <= 0 || !text) {
63
+ return "";
64
+ }
65
+
66
+ let tokens = 0;
67
+ let end = 0;
68
+
69
+ for (const char of text) {
70
+ const cp = char.codePointAt(0) ?? 0;
71
+ const nextTokens = tokens + estimateCodePointTokens(cp);
72
+ if (Math.ceil(nextTokens) > maxTokens) {
73
+ break;
74
+ }
75
+ tokens = nextTokens;
76
+ end += char.length;
77
+ }
78
+
79
+ return text.slice(0, end);
80
+ }
package/src/lcm-log.ts ADDED
@@ -0,0 +1,37 @@
1
+ import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
2
+ import type { LcmDependencies } from "./types.js";
3
+
4
+ export type LcmLogger = LcmDependencies["log"];
5
+
6
+ /** Silent logger used when a caller does not provide an explicit sink. */
7
+ export const NOOP_LCM_LOGGER: LcmLogger = {
8
+ info: () => {},
9
+ warn: () => {},
10
+ error: () => {},
11
+ debug: () => {},
12
+ };
13
+
14
+ /** Format unknown failures into stable one-line log text. */
15
+ export function describeLogError(error: unknown): string {
16
+ return error instanceof Error ? error.message : String(error);
17
+ }
18
+
19
+ /** Create the LCM logger, preferring OpenClaw's file-backed runtime logger. */
20
+ export function createLcmLogger(api: OpenClawPluginApi): LcmLogger {
21
+ const runtimeLogger = api.runtime.logging?.getChildLogger?.({ plugin: "lossless-claw" });
22
+ if (runtimeLogger) {
23
+ return {
24
+ info: (message) => runtimeLogger.info(message),
25
+ warn: (message) => runtimeLogger.warn(message),
26
+ error: (message) => runtimeLogger.error(message),
27
+ debug: (message) => runtimeLogger.debug?.(message),
28
+ };
29
+ }
30
+
31
+ return {
32
+ info: (message) => api.logger.info(message),
33
+ warn: (message) => api.logger.warn(message),
34
+ error: (message) => api.logger.error(message),
35
+ debug: (message) => api.logger.debug?.(message),
36
+ };
37
+ }