@martian-engineering/lossless-claw 0.6.3 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -6
- package/docs/agent-tools.md +16 -5
- package/docs/configuration.md +223 -214
- package/openclaw.plugin.json +123 -0
- package/package.json +1 -1
- package/skills/lossless-claw/SKILL.md +3 -2
- package/skills/lossless-claw/references/architecture.md +12 -0
- package/skills/lossless-claw/references/config.md +135 -3
- package/skills/lossless-claw/references/diagnostics.md +13 -0
- package/src/assembler.ts +17 -5
- package/src/compaction.ts +161 -53
- package/src/db/config.ts +102 -4
- package/src/db/connection.ts +35 -7
- package/src/db/features.ts +24 -5
- package/src/db/migration.ts +257 -78
- package/src/engine.ts +1007 -110
- package/src/estimate-tokens.ts +80 -0
- package/src/lcm-log.ts +37 -0
- package/src/plugin/index.ts +493 -101
- package/src/plugin/lcm-command.ts +288 -7
- package/src/plugin/lcm-doctor-apply.ts +1 -3
- package/src/plugin/lcm-doctor-cleaners.ts +655 -0
- package/src/plugin/shared-init.ts +59 -0
- package/src/prune.ts +391 -0
- package/src/retrieval.ts +8 -9
- package/src/startup-banner-log.ts +1 -0
- package/src/store/compaction-telemetry-store.ts +156 -0
- package/src/store/conversation-store.ts +6 -1
- package/src/store/fts5-sanitize.ts +25 -4
- package/src/store/full-text-sort.ts +21 -0
- package/src/store/index.ts +8 -0
- package/src/store/summary-store.ts +21 -14
- package/src/summarize.ts +55 -34
- package/src/tools/lcm-describe-tool.ts +9 -4
- package/src/tools/lcm-expand-query-tool.ts +609 -200
- package/src/tools/lcm-expand-tool.ts +9 -4
- package/src/tools/lcm-grep-tool.ts +22 -8
- package/src/types.ts +1 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared token estimation utility.
|
|
3
|
+
*
|
|
4
|
+
* Uses code-point-aware weighting instead of `text.length / 4`:
|
|
5
|
+
* - CJK (Chinese/Japanese/Korean) characters: ~1.5 tokens/char
|
|
6
|
+
* - Emoji / Supplementary Plane: ~2 tokens/char
|
|
7
|
+
* - ASCII / Latin: ~0.25 tokens/char (≈ 4 chars/token)
|
|
8
|
+
*
|
|
9
|
+
* Why not `text.length / 4`?
|
|
10
|
+
* JavaScript `String.length` counts UTF-16 code units, not Unicode code points.
|
|
11
|
+
* CJK characters are 1 UTF-16 unit but ~1.5 tokens; emoji are 2 UTF-16 units
|
|
12
|
+
* (surrogate pairs) but ~2-4 tokens. The naive formula underestimates CJK by
|
|
13
|
+
* ~6× and emoji by ~2-4×, causing compaction to trigger far too late for
|
|
14
|
+
* non-English conversations.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/** Detect CJK code points across all relevant Unicode ranges. */
|
|
18
|
+
function isCjkCodePoint(cp: number): boolean {
|
|
19
|
+
return (
|
|
20
|
+
(cp >= 0x4e00 && cp <= 0x9fff) || // CJK Unified Ideographs
|
|
21
|
+
(cp >= 0x3400 && cp <= 0x4dbf) || // CJK Extension A
|
|
22
|
+
(cp >= 0x20000 && cp <= 0x2a6df) || // CJK Extension B
|
|
23
|
+
(cp >= 0x2a700 && cp <= 0x2b73f) || // CJK Extension C
|
|
24
|
+
(cp >= 0x2b740 && cp <= 0x2b81f) || // CJK Extension D
|
|
25
|
+
(cp >= 0x2b820 && cp <= 0x2ceaf) || // CJK Extension E
|
|
26
|
+
(cp >= 0x2ceb0 && cp <= 0x2ebef) || // CJK Extension F
|
|
27
|
+
(cp >= 0x3000 && cp <= 0x303f) || // CJK Symbols and Punctuation
|
|
28
|
+
(cp >= 0x3040 && cp <= 0x30ff) || // Hiragana + Katakana
|
|
29
|
+
(cp >= 0xac00 && cp <= 0xd7af) || // Hangul Syllables
|
|
30
|
+
(cp >= 0xff00 && cp <= 0xffef) // Fullwidth Forms
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** Estimate token cost for a single Unicode code point. */
|
|
35
|
+
function estimateCodePointTokens(cp: number): number {
|
|
36
|
+
if (isCjkCodePoint(cp)) {
|
|
37
|
+
return 1.5;
|
|
38
|
+
}
|
|
39
|
+
if (cp > 0xffff) {
|
|
40
|
+
return 2;
|
|
41
|
+
}
|
|
42
|
+
return 0.25;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Estimate text tokens using Unicode-aware character weighting. */
|
|
46
|
+
export function estimateTokens(text: string): number {
|
|
47
|
+
let tokens = 0;
|
|
48
|
+
for (const char of text) {
|
|
49
|
+
const cp = char.codePointAt(0) ?? 0;
|
|
50
|
+
tokens += estimateCodePointTokens(cp);
|
|
51
|
+
}
|
|
52
|
+
return Math.ceil(tokens);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Truncate text so the estimated token count stays within `maxTokens`.
|
|
57
|
+
*
|
|
58
|
+
* Iterates by Unicode code point to avoid splitting surrogate pairs while
|
|
59
|
+
* preserving the same weighting model as `estimateTokens()`.
|
|
60
|
+
*/
|
|
61
|
+
export function truncateTextToEstimatedTokens(text: string, maxTokens: number): string {
|
|
62
|
+
if (maxTokens <= 0 || !text) {
|
|
63
|
+
return "";
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
let tokens = 0;
|
|
67
|
+
let end = 0;
|
|
68
|
+
|
|
69
|
+
for (const char of text) {
|
|
70
|
+
const cp = char.codePointAt(0) ?? 0;
|
|
71
|
+
const nextTokens = tokens + estimateCodePointTokens(cp);
|
|
72
|
+
if (Math.ceil(nextTokens) > maxTokens) {
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
tokens = nextTokens;
|
|
76
|
+
end += char.length;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return text.slice(0, end);
|
|
80
|
+
}
|
package/src/lcm-log.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
|
2
|
+
import type { LcmDependencies } from "./types.js";
|
|
3
|
+
|
|
4
|
+
export type LcmLogger = LcmDependencies["log"];
|
|
5
|
+
|
|
6
|
+
/** Silent logger used when a caller does not provide an explicit sink. */
|
|
7
|
+
export const NOOP_LCM_LOGGER: LcmLogger = {
|
|
8
|
+
info: () => {},
|
|
9
|
+
warn: () => {},
|
|
10
|
+
error: () => {},
|
|
11
|
+
debug: () => {},
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
/** Format unknown failures into stable one-line log text. */
|
|
15
|
+
export function describeLogError(error: unknown): string {
|
|
16
|
+
return error instanceof Error ? error.message : String(error);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** Create the LCM logger, preferring OpenClaw's file-backed runtime logger. */
|
|
20
|
+
export function createLcmLogger(api: OpenClawPluginApi): LcmLogger {
|
|
21
|
+
const runtimeLogger = api.runtime.logging?.getChildLogger?.({ plugin: "lossless-claw" });
|
|
22
|
+
if (runtimeLogger) {
|
|
23
|
+
return {
|
|
24
|
+
info: (message) => runtimeLogger.info(message),
|
|
25
|
+
warn: (message) => runtimeLogger.warn(message),
|
|
26
|
+
error: (message) => runtimeLogger.error(message),
|
|
27
|
+
debug: (message) => runtimeLogger.debug?.(message),
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return {
|
|
32
|
+
info: (message) => api.logger.info(message),
|
|
33
|
+
warn: (message) => api.logger.warn(message),
|
|
34
|
+
error: (message) => api.logger.error(message),
|
|
35
|
+
debug: (message) => api.logger.debug?.(message),
|
|
36
|
+
};
|
|
37
|
+
}
|