@martian-engineering/lossless-claw 0.1.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # lossless-claw
2
2
 
3
+ > ⚠️ **Current requirement:** This plugin currently requires a custom OpenClaw build with [PR #22201](https://github.com/openclaw/openclaw/pull/22201) applied until that PR is merged upstream.
4
+
3
5
  Lossless Context Management plugin for [OpenClaw](https://github.com/openclaw/openclaw), based on the [LCM paper](https://voltropy.com/LCM). Replaces OpenClaw's built-in sliding-window compaction with a DAG-based summarization system that preserves every message while keeping active context within model token limits.
4
6
 
5
7
  ## What it does
@@ -26,45 +28,37 @@ Nothing is lost. Raw messages stay in the database. Summaries link back to their
26
28
 
27
29
  ### Install the plugin
28
30
 
29
- **From npm** (recommended):
31
+ Use OpenClaw's plugin installer (recommended):
30
32
 
31
33
  ```bash
32
- npm install @martian-engineering/lossless-claw
34
+ openclaw plugins install @martian-engineering/lossless-claw
33
35
  ```
34
36
 
35
- **From source** (for development):
37
+ If you're running from a local OpenClaw checkout, use:
36
38
 
37
39
  ```bash
38
- git clone https://github.com/Martian-Engineering/lossless-claw.git
39
- cd lossless-claw
40
- npm install
40
+ pnpm openclaw plugins install @martian-engineering/lossless-claw
41
41
  ```
42
42
 
43
- ### Configure OpenClaw
44
-
45
- Add the plugin to your OpenClaw config (`~/.openclaw/openclaw.json`):
43
+ For local plugin development, link your working copy instead of copying files:
46
44
 
47
- ```json
48
- {
49
- "plugins": {
50
- "paths": [
51
- "node_modules/@martian-engineering/lossless-claw"
52
- ],
53
- "slots": {
54
- "contextEngine": "lossless-claw"
55
- }
56
- }
57
- }
45
+ ```bash
46
+ openclaw plugins install --link /path/to/lossless-claw
47
+ # or from a local OpenClaw checkout:
48
+ # pnpm openclaw plugins install --link /path/to/lossless-claw
58
49
  ```
59
50
 
60
- If installed from source, use the absolute path to the cloned repo instead:
51
+ The install command records the plugin, enables it, and applies compatible slot selection (including `contextEngine` when applicable).
52
+
53
+ ### Configure OpenClaw
54
+
55
+ In most cases, no manual JSON edits are needed after `openclaw plugins install`.
56
+
57
+ If you need to set it manually, ensure the context engine slot points at lossless-claw:
61
58
 
62
59
  ```json
63
60
  {
64
61
  "plugins": {
65
- "paths": [
66
- "/path/to/lossless-claw"
67
- ],
68
62
  "slots": {
69
63
  "contextEngine": "lossless-claw"
70
64
  }
@@ -72,8 +66,6 @@ If installed from source, use the absolute path to the cloned repo instead:
72
66
  }
73
67
  ```
74
68
 
75
- The `slots.contextEngine` setting tells OpenClaw to route all context management through LCM instead of the built-in legacy engine.
76
-
77
69
  Restart OpenClaw after configuration changes.
78
70
 
79
71
  ## Configuration
@@ -67,8 +67,10 @@ The **leaf pass** converts raw messages into leaf summaries:
67
67
  3. Concatenate message content with timestamps.
68
68
  4. Resolve the most recent prior summary for continuity (passed as `previous_context` so the LLM avoids repeating known information).
69
69
  5. Send to the LLM with the leaf prompt.
70
- 6. If the summary is larger than the input (LLM failure), retry with the aggressive prompt. If still too large, fall back to deterministic truncation.
71
- 7. Persist the summary, link to source messages, and replace the message range in context_items.
70
+ 6. Normalize provider response blocks (Anthropic/OpenAI text, output_text, and nested content/summary shapes) into plain text.
71
+ 7. If normalization is empty, log provider/model/block-type diagnostics and fall back to deterministic truncation.
72
+ 8. If the summary is larger than the input (LLM failure), retry with the aggressive prompt. If still too large, fall back to deterministic truncation.
73
+ 9. Persist the summary, link to source messages, and replace the message range in context_items.
72
74
 
73
75
  ### Condensation
74
76
 
@@ -215,8 +217,8 @@ All mutating operations (ingest, compact) are serialized per-session using a pro
215
217
 
216
218
  LCM needs to call an LLM for summarization. It resolves credentials through a three-tier cascade:
217
219
 
218
- 1. **Explicit API key** — If provided in legacy params
220
+ 1. **Auth profiles** — OpenClaw's OAuth/token/API-key profile system (`auth-profiles.json`), checked in priority order
219
221
  2. **Environment variables** — Standard provider env vars (`ANTHROPIC_API_KEY`, etc.)
220
- 3. **Auth profiles** — OpenClaw's OAuth/token/API-key profile system (`auth-profiles.json`)
222
+ 3. **Custom provider key** — From models config (e.g., `models.json`)
221
223
 
222
224
  For OAuth providers (e.g., Anthropic via Claude Max), LCM handles token refresh and credential persistence automatically.
@@ -2,24 +2,25 @@
2
2
 
3
3
  ## Quick start
4
4
 
5
- Install the plugin and add it to your OpenClaw config:
5
+ Install the plugin with OpenClaw's plugin installer:
6
6
 
7
7
  ```bash
8
- npm install @martian-engineering/lossless-claw
8
+ openclaw plugins install @martian-engineering/lossless-claw
9
9
  ```
10
10
 
11
- ```json
12
- {
13
- "plugins": {
14
- "paths": ["node_modules/@martian-engineering/lossless-claw"],
15
- "slots": {
16
- "contextEngine": "lossless-claw"
17
- }
18
- }
19
- }
11
+ If you're running from a local OpenClaw checkout:
12
+
13
+ ```bash
14
+ pnpm openclaw plugins install @martian-engineering/lossless-claw
15
+ ```
16
+
17
+ For local development of this plugin, link your working copy:
18
+
19
+ ```bash
20
+ openclaw plugins install --link /path/to/lossless-claw
20
21
  ```
21
22
 
22
- If installed from source, use the absolute path to the repo instead of `node_modules/...`.
23
+ `openclaw plugins install` handles plugin registration/enabling and slot selection automatically.
23
24
 
24
25
  Set recommended environment variables:
25
26
 
package/docs/tui.md CHANGED
@@ -176,7 +176,7 @@ Lists files that exceeded the large file threshold (default 25k tokens) and were
176
176
  Re-summarizes a single summary node using the current depth-aware prompt templates. The process:
177
177
 
178
178
  1. **Preview** — shows the prompt that will be sent, including source material, target token count, previous context, and time range
179
- 2. **API call** — sends to Anthropic's API (Claude Sonnet by default)
179
+ 2. **API call** — sends to the configured provider API (Anthropic by default)
180
180
  3. **Review** — shows old and new content side-by-side with token delta. Toggle unified diff view with `d`. Scroll with `j`/`k`.
181
181
 
182
182
  | Key (Preview) | Action |
@@ -280,6 +280,9 @@ lcm-tui rewrite 44 --depth 0 --apply
280
280
  # Rewrite everything bottom-up
281
281
  lcm-tui rewrite 44 --all --apply --diff
282
282
 
283
+ # Rewrite with OpenAI Responses API
284
+ lcm-tui rewrite 44 --summary sum_abc123 --provider openai --model gpt-5.3-codex --apply
285
+
283
286
  # Use custom prompt templates
284
287
  lcm-tui rewrite 44 --all --apply --prompt-dir ~/.config/lcm-tui/prompts
285
288
  ```
@@ -292,7 +295,8 @@ lcm-tui rewrite 44 --all --apply --prompt-dir ~/.config/lcm-tui/prompts
292
295
  | `--apply` | Write changes to database |
293
296
  | `--dry-run` | Show before/after without writing (default) |
294
297
  | `--diff` | Show unified diff |
295
- | `--model <model>` | Anthropic model (default: `claude-sonnet-4-20250514`) |
298
+ | `--provider <id>` | API provider (inferred from `--model` when omitted) |
299
+ | `--model <model>` | API model (default depends on provider) |
296
300
  | `--prompt-dir <path>` | Custom prompt template directory |
297
301
  | `--timestamps` | Inject timestamps into source text (default: true) |
298
302
  | `--tz <timezone>` | Timezone for timestamps (default: system local) |
@@ -348,6 +352,56 @@ Everything runs in a single transaction.
348
352
  | `--apply` | Execute transplant |
349
353
  | `--dry-run` | Show what would be transplanted (default) |
350
354
 
355
+ ### `lcm-tui backfill`
356
+
357
+ Imports a pre-LCM JSONL session into `conversations/messages/context_items`, runs iterative depth-aware compaction with the configured provider + prompt templates, optionally forces a single-root fold, and can transplant the result to another conversation.
358
+
359
+ ```bash
360
+ # Preview import + compaction plan (no writes)
361
+ lcm-tui backfill my-agent session_abc123
362
+
363
+ # Import + compact
364
+ lcm-tui backfill my-agent session_abc123 --apply
365
+
366
+ # Re-run compaction for an already-imported session
367
+ lcm-tui backfill my-agent session_abc123 --apply --recompact
368
+
369
+ # Force a single summary root when possible
370
+ lcm-tui backfill my-agent session_abc123 --apply --recompact --single-root
371
+
372
+ # Import + compact + transplant into an active conversation
373
+ lcm-tui backfill my-agent session_abc123 --apply --transplant-to 653
374
+
375
+ # Backfill using OpenAI
376
+ lcm-tui backfill my-agent session_abc123 --apply --provider openai --model gpt-5.3-codex
377
+ ```
378
+
379
+ All write paths are transactional:
380
+ 1. Import transaction (conversation/messages/message_parts/context)
381
+ 2. Per-pass compaction transactions (leaf/condensed replacements)
382
+ 3. Optional transplant transaction (reuse of transplant command internals)
383
+
384
+ An idempotency guard prevents duplicate imports for the same `session_id`.
385
+
386
+ | Flag | Description |
387
+ |------|-------------|
388
+ | `--apply` | Execute import/compaction/transplant |
389
+ | `--dry-run` | Show what would run, without writes (default) |
390
+ | `--recompact` | Re-run compaction for already-imported sessions (message import remains idempotent) |
391
+ | `--single-root` | Force condensed folding until one summary remains when possible |
392
+ | `--transplant-to <conv_id>` | Transplant backfilled summaries into target conversation |
393
+ | `--title <text>` | Override imported conversation title |
394
+ | `--leaf-chunk-tokens <n>` | Max source tokens per leaf chunk |
395
+ | `--leaf-target-tokens <n>` | Target output tokens for leaf summaries |
396
+ | `--condensed-target-tokens <n>` | Target output tokens for condensed summaries |
397
+ | `--leaf-fanout <n>` | Min leaves required for d1 condensation |
398
+ | `--condensed-fanout <n>` | Min summaries required for d2+ condensation |
399
+ | `--hard-fanout <n>` | Min summaries for forced single-root passes |
400
+ | `--fresh-tail <n>` | Preserve freshest N raw messages from leaf compaction |
401
+ | `--provider <id>` | API provider (inferred from model when omitted) |
402
+ | `--model <id>` | API model (default depends on provider) |
403
+ | `--prompt-dir <path>` | Custom depth-prompt directory |
404
+
351
405
  ### `lcm-tui prompts`
352
406
 
353
407
  Manage and inspect depth-aware prompt templates. Templates control how the LLM summarizes at each depth level.
@@ -404,21 +458,31 @@ All templates end with an `"Expand for details about:"` footer listing topics av
404
458
 
405
459
  ## Authentication
406
460
 
407
- The TUI needs an Anthropic API key for rewrite and repair operations. It resolves credentials in this order:
461
+ The TUI resolves API keys by provider for rewrite, repair, and backfill compaction operations.
462
+
463
+ - Anthropic: `ANTHROPIC_API_KEY`
464
+ - OpenAI: `OPENAI_API_KEY`
408
465
 
409
- 1. `ANTHROPIC_API_KEY` environment variable
410
- 2. OpenClaw config (`~/.openclaw/openclaw.json`) reads the `anthropic:default` auth profile mode
466
+ Resolution order:
467
+ 1. Provider API key environment variable
468
+ 2. OpenClaw config (`~/.openclaw/openclaw.json`) — checks matching provider auth profile mode
411
469
  3. OpenClaw env file
412
470
  4. `~/.zshrc` export
413
- 5. Various credential file candidates under `~/.openclaw/`
471
+ 5. Credential file candidates under `~/.openclaw/`
472
+
473
+ If the provider auth profile mode is `oauth` (not `api_key`), set the provider API key environment variable explicitly.
474
+
475
+ Interactive rewrite (`w`/`W`) can be configured with:
476
+ - `LCM_TUI_SUMMARY_PROVIDER`
477
+ - `LCM_TUI_SUMMARY_MODEL`
414
478
 
415
- If the auth profile mode is `oauth` (not `api_key`), the TUI cannot use it — set `ANTHROPIC_API_KEY` explicitly for repair/rewrite commands.
479
+ It also honors `LCM_SUMMARY_PROVIDER` / `LCM_SUMMARY_MODEL` as fallback.
416
480
 
417
481
  ## Database
418
482
 
419
- The TUI operates directly on the SQLite database at `~/.openclaw/lcm.db`. All write operations (rewrite, dissolve, repair, transplant) use transactions. Changes take effect on the next conversation turn — the running OpenClaw instance picks up database changes automatically.
483
+ The TUI operates directly on the SQLite database at `~/.openclaw/lcm.db`. All write operations (rewrite, dissolve, repair, transplant, backfill) use transactions. Changes take effect on the next conversation turn — the running OpenClaw instance picks up database changes automatically.
420
484
 
421
- **Backup recommendation:** Before batch operations (repair `--all`, rewrite `--all`, transplant), copy the database:
485
+ **Backup recommendation:** Before batch operations (repair `--all`, rewrite `--all`, transplant, backfill), copy the database:
422
486
 
423
487
  ```bash
424
488
  cp ~/.openclaw/lcm.db ~/.openclaw/lcm.db.bak-$(date +%Y%m%d)
@@ -428,7 +492,7 @@ cp ~/.openclaw/lcm.db ~/.openclaw/lcm.db.bak-$(date +%Y%m%d)
428
492
 
429
493
  **"No LCM summaries found"** — The session may not have an associated conversation in the LCM database. Check that the `conv_id` column shows a non-zero value in the session list. Sessions without LCM tracking won't have summaries.
430
494
 
431
- **Rewrite returns empty/bad content** — Check the API key is valid and the model is accessible. The TUI uses `claude-sonnet-4-20250514` by default; override with `--model` if needed.
495
+ **Rewrite returns empty/bad content** — Check provider/model access and API key. If normalization still yields empty text, the TUI now returns diagnostics including `provider`, `model`, and response `block_types` to help pinpoint adapter mismatches.
432
496
 
433
497
  **Dissolve fails with "not condensed"** — Only condensed summaries (depth > 0) can be dissolved. Leaf summaries have no parent summaries to restore.
434
498
 
package/index.ts CHANGED
@@ -39,8 +39,29 @@ function normalizeAgentId(agentId: string | undefined): string {
39
39
  return normalized.length > 0 ? normalized : "main";
40
40
  }
41
41
 
42
+ type PluginEnvSnapshot = {
43
+ lcmSummaryModel: string;
44
+ lcmSummaryProvider: string;
45
+ openclawProvider: string;
46
+ agentDir: string;
47
+ home: string;
48
+ };
49
+
50
+ type ReadEnvFn = (key: string) => string | undefined;
51
+
52
+ /** Capture plugin env values once during initialization. */
53
+ function snapshotPluginEnv(env: NodeJS.ProcessEnv = process.env): PluginEnvSnapshot {
54
+ return {
55
+ lcmSummaryModel: env.LCM_SUMMARY_MODEL?.trim() ?? "",
56
+ lcmSummaryProvider: env.LCM_SUMMARY_PROVIDER?.trim() ?? "",
57
+ openclawProvider: env.OPENCLAW_PROVIDER?.trim() ?? "",
58
+ agentDir: env.OPENCLAW_AGENT_DIR?.trim() || env.PI_CODING_AGENT_DIR?.trim() || "",
59
+ home: env.HOME?.trim() ?? "",
60
+ };
61
+ }
62
+
42
63
  /** Resolve common provider API keys from environment. */
43
- function resolveApiKey(provider: string): string | undefined {
64
+ function resolveApiKey(provider: string, readEnv: ReadEnvFn): string | undefined {
44
65
  const keyMap: Record<string, string[]> = {
45
66
  openai: ["OPENAI_API_KEY"],
46
67
  anthropic: ["ANTHROPIC_API_KEY"],
@@ -59,7 +80,7 @@ function resolveApiKey(provider: string): string | undefined {
59
80
  keys.push(normalizedProviderEnv);
60
81
 
61
82
  for (const key of keys) {
62
- const value = process.env[key]?.trim();
83
+ const value = readEnv(key)?.trim();
63
84
  if (value) {
64
85
  return value;
65
86
  }
@@ -255,19 +276,19 @@ function mergeAuthProfileStores(stores: AuthProfileStore[]): AuthProfileStore |
255
276
  }
256
277
 
257
278
  /** Determine candidate auth store paths ordered by precedence. */
258
- function resolveAuthStorePaths(agentDir?: string): string[] {
279
+ function resolveAuthStorePaths(params: { agentDir?: string; envSnapshot: PluginEnvSnapshot }): string[] {
259
280
  const paths: string[] = [];
260
- const directAgentDir = agentDir?.trim();
281
+ const directAgentDir = params.agentDir?.trim();
261
282
  if (directAgentDir) {
262
283
  paths.push(join(directAgentDir, "auth-profiles.json"));
263
284
  }
264
285
 
265
- const envAgentDir = process.env.OPENCLAW_AGENT_DIR?.trim() || process.env.PI_CODING_AGENT_DIR?.trim();
286
+ const envAgentDir = params.envSnapshot.agentDir;
266
287
  if (envAgentDir) {
267
288
  paths.push(join(envAgentDir, "auth-profiles.json"));
268
289
  }
269
290
 
270
- const home = process.env.HOME?.trim();
291
+ const home = params.envSnapshot.home;
271
292
  if (home) {
272
293
  paths.push(join(home, ".openclaw", "agents", "main", "agent", "auth-profiles.json"));
273
294
  }
@@ -334,8 +355,12 @@ async function resolveApiKeyFromAuthProfiles(params: {
334
355
  agentDir?: string;
335
356
  runtimeConfig?: unknown;
336
357
  piAiModule: PiAiModule;
358
+ envSnapshot: PluginEnvSnapshot;
337
359
  }): Promise<string | undefined> {
338
- const storesWithPaths = resolveAuthStorePaths(params.agentDir)
360
+ const storesWithPaths = resolveAuthStorePaths({
361
+ agentDir: params.agentDir,
362
+ envSnapshot: params.envSnapshot,
363
+ })
339
364
  .map((path) => {
340
365
  try {
341
366
  const parsed = parseAuthProfileStore(readFileSync(path, "utf8"));
@@ -526,7 +551,9 @@ function readLatestAssistantReply(messages: unknown[]): string | undefined {
526
551
 
527
552
  /** Construct LCM dependencies from plugin API/runtime surfaces. */
528
553
  function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
529
- const config = resolveLcmConfig(process.env);
554
+ const envSnapshot = snapshotPluginEnv();
555
+ const readEnv: ReadEnvFn = (key) => process.env[key];
556
+ const config = resolveLcmConfig();
530
557
 
531
558
  return {
532
559
  config,
@@ -602,7 +629,7 @@ function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
602
629
  maxTokens: 8_000,
603
630
  };
604
631
 
605
- let resolvedApiKey = apiKey?.trim() || resolveApiKey(providerId);
632
+ let resolvedApiKey = apiKey?.trim() || resolveApiKey(providerId, readEnv);
606
633
  if (!resolvedApiKey && typeof mod.getEnvApiKey === "function") {
607
634
  resolvedApiKey = mod.getEnvApiKey(providerId)?.trim();
608
635
  }
@@ -613,6 +640,7 @@ function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
613
640
  agentDir,
614
641
  runtimeConfig,
615
642
  piAiModule: mod,
643
+ envSnapshot,
616
644
  });
617
645
  }
618
646
 
@@ -673,7 +701,7 @@ function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
673
701
  }
674
702
  },
675
703
  resolveModel: (modelRef, providerHint) => {
676
- const raw = (modelRef ?? process.env.LCM_SUMMARY_MODEL ?? "").trim();
704
+ const raw = (modelRef ?? envSnapshot.lcmSummaryModel).trim();
677
705
  if (!raw) {
678
706
  throw new Error("No model configured for LCM summarization.");
679
707
  }
@@ -688,15 +716,15 @@ function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
688
716
 
689
717
  const provider = (
690
718
  providerHint?.trim() ||
691
- process.env.LCM_SUMMARY_PROVIDER ||
692
- process.env.OPENCLAW_PROVIDER ||
719
+ envSnapshot.lcmSummaryProvider ||
720
+ envSnapshot.openclawProvider ||
693
721
  "openai"
694
722
  ).trim();
695
723
  return { provider, model: raw };
696
724
  },
697
- getApiKey: (provider) => resolveApiKey(provider),
725
+ getApiKey: (provider) => resolveApiKey(provider, readEnv),
698
726
  requireApiKey: (provider) => {
699
- const key = resolveApiKey(provider);
727
+ const key = resolveApiKey(provider, readEnv);
700
728
  if (!key) {
701
729
  throw new Error(`Missing API key for provider '${provider}'.`);
702
730
  }
@@ -756,7 +784,7 @@ const lcmPlugin = {
756
784
  ? (value as Record<string, unknown>)
757
785
  : {};
758
786
  const enabled = typeof raw.enabled === "boolean" ? raw.enabled : undefined;
759
- const config = resolveLcmConfig(process.env);
787
+ const config = resolveLcmConfig();
760
788
  if (enabled !== undefined) {
761
789
  config.enabled = enabled;
762
790
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@martian-engineering/lossless-claw",
3
- "version": "0.1.1",
3
+ "version": "0.1.4",
4
4
  "description": "Lossless Context Management plugin for OpenClaw — DAG-based conversation summarization with incremental compaction",
5
5
  "type": "module",
6
6
  "main": "index.ts",
package/src/db/config.ts CHANGED
@@ -15,6 +15,10 @@ export type LcmConfig = {
15
15
  condensedTargetTokens: number;
16
16
  maxExpandTokens: number;
17
17
  largeFileTokenThreshold: number;
18
+ /** Provider override for large-file text summarization. */
19
+ largeFileSummaryProvider: string;
20
+ /** Model override for large-file text summarization. */
21
+ largeFileSummaryModel: string;
18
22
  autocompactDisabled: boolean;
19
23
  /** IANA timezone for timestamps in summaries (from TZ env or system default) */
20
24
  timezone: string;
@@ -37,6 +41,8 @@ export function resolveLcmConfig(env: NodeJS.ProcessEnv = process.env): LcmConfi
37
41
  condensedTargetTokens: parseInt(env.LCM_CONDENSED_TARGET_TOKENS ?? "2000", 10),
38
42
  maxExpandTokens: parseInt(env.LCM_MAX_EXPAND_TOKENS ?? "4000", 10),
39
43
  largeFileTokenThreshold: parseInt(env.LCM_LARGE_FILE_TOKEN_THRESHOLD ?? "25000", 10),
44
+ largeFileSummaryProvider: env.LCM_LARGE_FILE_SUMMARY_PROVIDER?.trim() ?? "",
45
+ largeFileSummaryModel: env.LCM_LARGE_FILE_SUMMARY_MODEL?.trim() ?? "",
40
46
  autocompactDisabled: env.LCM_AUTOCOMPACT_DISABLED === "true",
41
47
  timezone: env.TZ ?? Intl.DateTimeFormat().resolvedOptions().timeZone,
42
48
  pruneHeartbeatOk: env.LCM_PRUNE_HEARTBEAT_OK === "true",
package/src/engine.ts CHANGED
@@ -681,8 +681,8 @@ export class LcmContextEngine implements ContextEngine {
681
681
  }
682
682
  this.largeFileTextSummarizerResolved = true;
683
683
 
684
- const provider = process.env.LCM_LARGE_FILE_SUMMARY_PROVIDER?.trim() ?? "";
685
- const model = process.env.LCM_LARGE_FILE_SUMMARY_MODEL?.trim() ?? "";
684
+ const provider = this.deps.config.largeFileSummaryProvider;
685
+ const model = this.deps.config.largeFileSummaryModel;
686
686
  if (!provider || !model) {
687
687
  return undefined;
688
688
  }
package/src/summarize.ts CHANGED
@@ -78,13 +78,119 @@ function estimateTokens(text: string): number {
78
78
  return Math.ceil(text.length / 4);
79
79
  }
80
80
 
81
- /** Narrows completion response blocks to plain text blocks. */
82
- function isTextBlock(block: unknown): block is { type: string; text: string } {
83
- if (!block || typeof block !== "object" || Array.isArray(block)) {
84
- return false;
81
+ /** Narrow unknown values to plain object records. */
82
+ function isRecord(value: unknown): value is Record<string, unknown> {
83
+ return !!value && typeof value === "object" && !Array.isArray(value);
84
+ }
85
+
86
+ /**
87
+ * Normalize text fragments from provider-specific block shapes.
88
+ *
89
+ * Deduplicates exact repeated fragments while preserving first-seen order so
90
+ * providers that mirror output in multiple fields don't duplicate summaries.
91
+ */
92
+ function normalizeTextFragments(chunks: string[]): string {
93
+ const normalized: string[] = [];
94
+ const seen = new Set<string>();
95
+
96
+ for (const chunk of chunks) {
97
+ const trimmed = chunk.trim();
98
+ if (!trimmed || seen.has(trimmed)) {
99
+ continue;
100
+ }
101
+ seen.add(trimmed);
102
+ normalized.push(trimmed);
103
+ }
104
+ return normalized.join("\n").trim();
105
+ }
106
+
107
+ /** Collect all nested `type` labels for diagnostics on normalization failures. */
108
+ function collectBlockTypes(value: unknown, out: Set<string>): void {
109
+ if (Array.isArray(value)) {
110
+ for (const entry of value) {
111
+ collectBlockTypes(entry, out);
112
+ }
113
+ return;
114
+ }
115
+ if (!isRecord(value)) {
116
+ return;
117
+ }
118
+
119
+ if (typeof value.type === "string" && value.type.trim()) {
120
+ out.add(value.type.trim());
121
+ }
122
+ for (const nested of Object.values(value)) {
123
+ collectBlockTypes(nested, out);
124
+ }
125
+ }
126
+
127
+ /** Collect text payloads from common provider response shapes. */
128
+ function collectTextLikeFields(value: unknown, out: string[]): void {
129
+ if (Array.isArray(value)) {
130
+ for (const entry of value) {
131
+ collectTextLikeFields(entry, out);
132
+ }
133
+ return;
134
+ }
135
+ if (!isRecord(value)) {
136
+ return;
137
+ }
138
+
139
+ for (const key of ["text", "output_text", "thinking"]) {
140
+ appendTextValue(value[key], out);
141
+ }
142
+ for (const key of ["content", "summary", "output", "message", "response"]) {
143
+ if (key in value) {
144
+ collectTextLikeFields(value[key], out);
145
+ }
146
+ }
147
+ }
148
+
149
+ /** Append raw textual values and nested text wrappers (`value`, `text`). */
150
+ function appendTextValue(value: unknown, out: string[]): void {
151
+ if (typeof value === "string") {
152
+ out.push(value);
153
+ return;
154
+ }
155
+ if (Array.isArray(value)) {
156
+ for (const entry of value) {
157
+ appendTextValue(entry, out);
158
+ }
159
+ return;
160
+ }
161
+ if (!isRecord(value)) {
162
+ return;
163
+ }
164
+
165
+ if (typeof value.value === "string") {
166
+ out.push(value.value);
167
+ }
168
+ if (typeof value.text === "string") {
169
+ out.push(value.text);
170
+ }
171
+ }
172
+
173
+ /** Normalize provider completion content into a plain-text summary payload. */
174
+ function normalizeCompletionSummary(content: unknown): { summary: string; blockTypes: string[] } {
175
+ const chunks: string[] = [];
176
+ const blockTypeSet = new Set<string>();
177
+
178
+ collectTextLikeFields(content, chunks);
179
+ collectBlockTypes(content, blockTypeSet);
180
+
181
+ const blockTypes = [...blockTypeSet].sort((a, b) => a.localeCompare(b));
182
+ return {
183
+ summary: normalizeTextFragments(chunks),
184
+ blockTypes,
185
+ };
186
+ }
187
+
188
+ /** Format normalized block types for concise diagnostics. */
189
+ function formatBlockTypes(blockTypes: string[]): string {
190
+ if (blockTypes.length === 0) {
191
+ return "(none)";
85
192
  }
86
- const record = block as { type?: unknown; text?: unknown };
87
- return record.type === "text" && typeof record.text === "string";
193
+ return blockTypes.join(",");
88
194
  }
89
195
 
90
196
  /**
@@ -426,15 +532,15 @@ export async function createLcmSummarizeFromLegacyParams(params: {
426
532
  temperature: aggressive ? 0.1 : 0.2,
427
533
  });
428
534
 
429
- const summary = result.content
430
- .filter(isTextBlock)
431
- .map((block) => block.text.trim())
432
- .filter(Boolean)
433
- .join("\n")
434
- .trim();
535
+ const normalized = normalizeCompletionSummary(result.content);
536
+ const summary = normalized.summary;
435
537
 
436
538
  if (!summary) {
437
- console.error(`[lcm] summarize got empty content from LLM (${result.content.length} blocks, types: ${result.content.map(b => b.type).join(",")}), falling back to truncation`);
539
+ console.error(
540
+ `[lcm] summarize empty normalized summary; provider=${provider} model=${model} block_types=${formatBlockTypes(
541
+ normalized.blockTypes,
542
+ )}; response_blocks=${result.content.length}; falling back to truncation`,
543
+ );
438
544
  return buildDeterministicFallbackSummary(text, targetTokens);
439
545
  }
440
546