@vectorize-io/hindsight-openclaw 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +147 -18
- package/dist/backfill-lib.d.ts +63 -0
- package/dist/backfill-lib.js +201 -0
- package/dist/backfill.d.ts +22 -0
- package/dist/backfill.js +473 -0
- package/dist/index.d.ts +49 -2
- package/dist/index.js +612 -344
- package/dist/retain-queue.d.ts +54 -0
- package/dist/retain-queue.js +105 -0
- package/dist/session-patterns.d.ts +10 -0
- package/dist/session-patterns.js +21 -0
- package/dist/setup-lib.d.ts +80 -0
- package/dist/setup-lib.js +134 -0
- package/dist/setup.d.ts +34 -0
- package/dist/setup.js +425 -0
- package/dist/types.d.ts +40 -40
- package/openclaw.plugin.json +110 -10
- package/package.json +13 -5
- package/dist/client.d.ts +0 -34
- package/dist/client.js +0 -215
- package/dist/embed-manager.d.ts +0 -27
- package/dist/embed-manager.js +0 -210
package/README.md
CHANGED
|
@@ -5,30 +5,72 @@ Biomimetic long-term memory for [OpenClaw](https://openclaw.ai) using [Hindsight
|
|
|
5
5
|
## Quick Start
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
# 1.
|
|
9
|
-
# Option A: OpenAI
|
|
10
|
-
export OPENAI_API_KEY="sk-your-key"
|
|
11
|
-
|
|
12
|
-
# Option B: Claude Code (no API key needed)
|
|
13
|
-
export HINDSIGHT_API_LLM_PROVIDER=claude-code
|
|
14
|
-
|
|
15
|
-
# Option C: OpenAI Codex (no API key needed)
|
|
16
|
-
export HINDSIGHT_API_LLM_PROVIDER=openai-codex
|
|
17
|
-
|
|
18
|
-
# 2. Install and enable the plugin
|
|
8
|
+
# 1. Install the plugin
|
|
19
9
|
openclaw plugins install @vectorize-io/hindsight-openclaw
|
|
20
10
|
|
|
11
|
+
# 2. Run the interactive setup wizard
|
|
12
|
+
npx --package @vectorize-io/hindsight-openclaw hindsight-openclaw-setup
|
|
13
|
+
|
|
21
14
|
# 3. Start OpenClaw
|
|
22
15
|
openclaw gateway
|
|
23
16
|
```
|
|
24
17
|
|
|
25
|
-
|
|
18
|
+
`hindsight-openclaw-setup` walks you through picking one of three modes:
|
|
19
|
+
|
|
20
|
+
- **Cloud** — managed Hindsight. Pick an API token env var, done.
|
|
21
|
+
- **External API** — your own running Hindsight deployment. Prompts for the URL and optional token.
|
|
22
|
+
- **Embedded daemon** — spawns a local `hindsight-embed` daemon on this machine. Prompts for the LLM provider (OpenAI / Anthropic / Gemini / Groq / Claude Code / Codex / Ollama) and the env var that holds the API key.
|
|
23
|
+
|
|
24
|
+
Credentials are always written as `SecretRef` objects that reference an environment variable — the key itself never ends up in plaintext on disk. `--ref-source file` and `--ref-source exec` are also supported by OpenClaw for mounted-secret and Vault-style setups; once you want to use them, set them via `openclaw config set` (see below).
|
|
25
|
+
|
|
26
|
+
### Manual configuration (without the wizard)
|
|
27
|
+
|
|
28
|
+
The wizard is a convenience wrapper — all of the same fields can be set directly with `openclaw config set`:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# Embedded daemon with OpenAI
|
|
32
|
+
openclaw config set plugins.entries.hindsight-openclaw.config.llmProvider openai
|
|
33
|
+
openclaw config set plugins.entries.hindsight-openclaw.config.llmApiKey \
|
|
34
|
+
--ref-source env --ref-provider default --ref-id OPENAI_API_KEY
|
|
35
|
+
|
|
36
|
+
# Or: Claude Code (no API key needed)
|
|
37
|
+
openclaw config set plugins.entries.hindsight-openclaw.config.llmProvider claude-code
|
|
38
|
+
|
|
39
|
+
# Or: point at an external Hindsight API
|
|
40
|
+
openclaw config set plugins.entries.hindsight-openclaw.config.hindsightApiUrl https://mcp.hindsight.example.com
|
|
41
|
+
openclaw config set plugins.entries.hindsight-openclaw.config.hindsightApiToken \
|
|
42
|
+
--ref-source env --ref-id HINDSIGHT_API_TOKEN
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Migrating from 0.5.x
|
|
46
|
+
|
|
47
|
+
0.6.0 removes all process-environment reads from the plugin. Configuration that
|
|
48
|
+
previously came from shell env vars must now go through OpenClaw's plugin config
|
|
49
|
+
(with SecretRef for credentials). Concrete mappings:
|
|
50
|
+
|
|
51
|
+
| Old (0.5.x) | New (0.6.0) |
|
|
52
|
+
|---|---|
|
|
53
|
+
| `OPENAI_API_KEY=…` (auto-detected) | `openclaw config set plugins.entries.hindsight-openclaw.config.llmProvider openai` <br> `openclaw config set plugins.entries.hindsight-openclaw.config.llmApiKey --ref-source env --ref-id OPENAI_API_KEY` |
|
|
54
|
+
| `HINDSIGHT_API_LLM_PROVIDER=…` | `openclaw config set plugins.entries.hindsight-openclaw.config.llmProvider …` |
|
|
55
|
+
| `HINDSIGHT_API_LLM_MODEL=…` | `openclaw config set plugins.entries.hindsight-openclaw.config.llmModel …` |
|
|
56
|
+
| `HINDSIGHT_API_LLM_API_KEY=…` | `openclaw config set plugins.entries.hindsight-openclaw.config.llmApiKey --ref-source env --ref-id …` |
|
|
57
|
+
| `HINDSIGHT_API_LLM_BASE_URL=…` | `openclaw config set plugins.entries.hindsight-openclaw.config.llmBaseUrl …` |
|
|
58
|
+
| `HINDSIGHT_EMBED_API_URL=…` | `openclaw config set plugins.entries.hindsight-openclaw.config.hindsightApiUrl …` |
|
|
59
|
+
| `HINDSIGHT_EMBED_API_TOKEN=…` | `openclaw config set plugins.entries.hindsight-openclaw.config.hindsightApiToken --ref-source env --ref-id …` |
|
|
60
|
+
| `HINDSIGHT_BANK_ID=…` | `openclaw config set plugins.entries.hindsight-openclaw.config.bankId …` |
|
|
61
|
+
| `llmApiKeyEnv: "MY_KEY"` (plugin config) | `llmApiKey` configured as a SecretRef with `--ref-id MY_KEY` |
|
|
62
|
+
|
|
63
|
+
If your shell already exports `OPENAI_API_KEY`, the SecretRef config above resolves
|
|
64
|
+
to the same value at startup — no need to change your shell setup, just point the
|
|
65
|
+
plugin at the variable explicitly. Run `openclaw config validate` after migrating
|
|
66
|
+
to confirm the new shape parses cleanly.
|
|
26
67
|
|
|
27
68
|
## Features
|
|
28
69
|
|
|
29
70
|
- **Auto-capture** and **auto-recall** of memories each turn, injected into system prompt space so recalled memories stay out of the visible chat transcript
|
|
30
71
|
- **Memory isolation** — configurable per agent, channel, user, or provider via `dynamicBankGranularity`
|
|
31
|
-
- **
|
|
72
|
+
- **Historical backfill CLI** — import prior OpenClaw session history into Hindsight using the active plugin bank-routing config by default
|
|
73
|
+
- **Retention controls** — choose which message roles to retain, toggle auto-retain on/off, and stamp retained documents with consistent tags/source metadata
|
|
32
74
|
|
|
33
75
|
## Configuration
|
|
34
76
|
|
|
@@ -42,13 +84,17 @@ Optional settings in `~/.openclaw/openclaw.json` under `plugins.entries.hindsigh
|
|
|
42
84
|
| `embedVersion` | `"latest"` | hindsight-embed version |
|
|
43
85
|
| `embedPackagePath` | — | Local path to `hindsight-embed` package for development |
|
|
44
86
|
| `bankMission` | — | Agent identity/purpose stored on the memory bank. Helps the engine understand context for better fact extraction. Set once per bank — not a recall prompt. |
|
|
45
|
-
| `llmProvider` |
|
|
46
|
-
| `llmModel` | provider default | LLM model
|
|
47
|
-
| `
|
|
87
|
+
| `llmProvider` | — | LLM provider for memory extraction (`openai`, `anthropic`, `gemini`, `groq`, `ollama`, `openai-codex`, `claude-code`). Required unless `hindsightApiUrl` is set. |
|
|
88
|
+
| `llmModel` | provider default | LLM model used with `llmProvider` |
|
|
89
|
+
| `llmApiKey` | — | API key for the LLM provider. **Sensitive** — set via `openclaw config set ... --ref-source env --ref-id OPENAI_API_KEY` to reference an env var (or `--ref-source file`/`exec` for mounted-secret/Vault sources). |
|
|
90
|
+
| `llmBaseUrl` | — | Optional base URL override for OpenAI-compatible providers (e.g. `https://openrouter.ai/api/v1`) |
|
|
48
91
|
| `dynamicBankId` | `true` | Enable per-context memory banks |
|
|
92
|
+
| `bankId` | — | Static bank ID used when `dynamicBankId` is `false`. |
|
|
49
93
|
| `bankIdPrefix` | — | Prefix for bank IDs (e.g. `"prod"`) |
|
|
94
|
+
| `retainTags` | `[]` | Tags applied to every retained document, useful for cross-agent/source labeling (e.g. `source_system:openclaw`, `agent:agentname`) |
|
|
95
|
+
| `retainSource` | `"openclaw"` | `source` value written into retained document metadata |
|
|
50
96
|
| `dynamicBankGranularity` | `["agent", "channel", "user"]` | Fields used to derive bank ID. Options: `agent`, `channel`, `user`, `provider` |
|
|
51
|
-
| `excludeProviders` | `[]` | Message providers to skip for recall/retain (e.g. `slack`, `telegram`, `discord`) |
|
|
97
|
+
| `excludeProviders` | `["heartbeat"]` | Message providers to skip for recall/retain (e.g. `heartbeat`, `slack`, `telegram`, `discord`) |
|
|
52
98
|
| `autoRecall` | `true` | Auto-inject memories before each turn. Set to `false` when the agent has its own recall tool. |
|
|
53
99
|
| `autoRetain` | `true` | Auto-retain conversations after each turn |
|
|
54
100
|
| `retainRoles` | `["user", "assistant"]` | Which message roles to retain. Options: `user`, `assistant`, `system`, `tool` |
|
|
@@ -63,7 +109,45 @@ Optional settings in `~/.openclaw/openclaw.json` under `plugins.entries.hindsigh
|
|
|
63
109
|
| `recallMaxQueryChars` | `800` | Maximum character length for the composed recall query before calling recall. |
|
|
64
110
|
| `recallPromptPreamble` | built-in string | Prompt text placed above recalled memories in the injected `<hindsight_memories>` system-context block. |
|
|
65
111
|
| `hindsightApiUrl` | — | External Hindsight API URL (skips local daemon) |
|
|
66
|
-
| `hindsightApiToken` | — | Auth token for external API |
|
|
112
|
+
| `hindsightApiToken` | — | Auth token for external API. **Sensitive** — set via `openclaw config set ... --ref-source env --ref-id HINDSIGHT_API_TOKEN`. |
|
|
113
|
+
| `ignoreSessionPatterns` | `[]` | Session key glob patterns to skip entirely — no recall, no retain (e.g. `["agent:*:cron:**"]`) |
|
|
114
|
+
| `statelessSessionPatterns` | `[]` | Session key glob patterns for read-only sessions — retain is always skipped; recall is skipped when `skipStatelessSessions` is `true` (e.g. `["agent:*:subagent:**", "agent:*:heartbeat:**"]`) |
|
|
115
|
+
| `skipStatelessSessions` | `true` | When `true`, sessions matching `statelessSessionPatterns` also skip recall. Set to `false` to allow recall but still skip retain. |
|
|
116
|
+
|
|
117
|
+
### Session pattern filtering
|
|
118
|
+
|
|
119
|
+
`ignoreSessionPatterns` and `statelessSessionPatterns` accept glob patterns matched against the session key (format: `agent:<agentId>:<type>:<uuid>`).
|
|
120
|
+
|
|
121
|
+
Glob syntax:
|
|
122
|
+
- `*` — matches any characters except `:` (single segment)
|
|
123
|
+
- `**` — matches anything including `:` (multiple segments)
|
|
124
|
+
|
|
125
|
+
| Pattern | Matches |
|
|
126
|
+
|---|---|
|
|
127
|
+
| `agent:*:cron:**` | All cron sessions for any agent |
|
|
128
|
+
| `agent:*:subagent:**` | All subagent sessions for any agent |
|
|
129
|
+
| `agent:main:**` | All sessions under the `main` agent |
|
|
130
|
+
|
|
131
|
+
**Difference between the two options:**
|
|
132
|
+
|
|
133
|
+
| | `ignoreSessionPatterns` | `statelessSessionPatterns` |
|
|
134
|
+
|---|---|---|
|
|
135
|
+
| Retain | Skipped | Always skipped |
|
|
136
|
+
| Recall | Skipped | Skipped only when `skipStatelessSessions: true` |
|
|
137
|
+
|
|
138
|
+
**Example config** — exclude cron jobs from memory entirely, allow subagents to read but not write memories:
|
|
139
|
+
|
|
140
|
+
```json
|
|
141
|
+
{
|
|
142
|
+
"ignoreSessionPatterns": ["agent:*:cron:**"],
|
|
143
|
+
"statelessSessionPatterns": ["agent:*:subagent:**"],
|
|
144
|
+
"skipStatelessSessions": false
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Retention details
|
|
149
|
+
|
|
150
|
+
Retained documents use stable session-scoped IDs like `openclaw:agent:agentname:discord:channel:123:turn:000001` (or `...:window:000002` for chunked retention), and include richer metadata such as `session_key`, `agent_id`, `provider`, `channel_id`, `thread_id`, `sender_id`, `turn_index`, and `retention_scope`.
|
|
67
151
|
|
|
68
152
|
## Documentation
|
|
69
153
|
|
|
@@ -105,6 +189,51 @@ tail -f ~/.hindsight/profiles/openclaw.log
|
|
|
105
189
|
uvx hindsight-embed@latest profile list
|
|
106
190
|
```
|
|
107
191
|
|
|
192
|
+
## Backfilling Existing OpenClaw History
|
|
193
|
+
|
|
194
|
+
The package includes a config-aware backfill CLI for importing historical OpenClaw sessions into Hindsight.
|
|
195
|
+
|
|
196
|
+
By default it mirrors the active plugin settings for:
|
|
197
|
+
|
|
198
|
+
- `dynamicBankId`
|
|
199
|
+
- `dynamicBankGranularity`
|
|
200
|
+
- `bankIdPrefix`
|
|
201
|
+
- local daemon vs external `hindsightApiUrl`
|
|
202
|
+
|
|
203
|
+
Dry-run example:
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
npx --package @vectorize-io/hindsight-openclaw hindsight-openclaw-backfill \
|
|
207
|
+
--openclaw-root ~/.openclaw \
|
|
208
|
+
--dry-run
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Direct invocation from a built checkout:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
node dist/backfill.js --openclaw-root ~/.openclaw --dry-run
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
Migration-oriented overrides are explicit:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
node dist/backfill.js \
|
|
221
|
+
--openclaw-root ~/.openclaw \
|
|
222
|
+
--bank-strategy agent \
|
|
223
|
+
--agent proj-run \
|
|
224
|
+
--resume \
|
|
225
|
+
--max-pending-operations 10
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Useful options:
|
|
229
|
+
|
|
230
|
+
- `--agent <id>` limit import to selected agents
|
|
231
|
+
- `--exclude-archive` ignore `sessions-archive-from-migration_backup`
|
|
232
|
+
- `--bank-strategy mirror-config|agent|fixed`
|
|
233
|
+
- `--resume` skip only entries already finalized as completed
|
|
234
|
+
- `--checkpoint <path>` store progress outside the default location
|
|
235
|
+
- `--wait-until-drained` block until the touched bank queues have finished and checkpoint state can be finalized
|
|
236
|
+
|
|
108
237
|
## Links
|
|
109
238
|
|
|
110
239
|
- [Hindsight Documentation](https://vectorize.io/hindsight)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import type { PluginConfig } from './types.js';
|
|
2
|
+
export interface BackfillCliOptions {
|
|
3
|
+
openclawRoot: string;
|
|
4
|
+
includeArchive: boolean;
|
|
5
|
+
selectedAgents?: Set<string>;
|
|
6
|
+
limit?: number;
|
|
7
|
+
bankStrategy: 'mirror-config' | 'agent' | 'fixed';
|
|
8
|
+
fixedBank?: string;
|
|
9
|
+
}
|
|
10
|
+
export interface SessionMessage {
|
|
11
|
+
role: 'user' | 'assistant' | 'system' | 'tool';
|
|
12
|
+
content: string | Array<{
|
|
13
|
+
type?: string;
|
|
14
|
+
text?: string;
|
|
15
|
+
}>;
|
|
16
|
+
}
|
|
17
|
+
export interface ParsedSessionFile {
|
|
18
|
+
filePath: string;
|
|
19
|
+
agentId: string;
|
|
20
|
+
sessionId: string;
|
|
21
|
+
sessionKey?: string;
|
|
22
|
+
startedAt?: string;
|
|
23
|
+
messages: SessionMessage[];
|
|
24
|
+
}
|
|
25
|
+
export interface BackfillPlanEntry {
|
|
26
|
+
filePath: string;
|
|
27
|
+
agentId: string;
|
|
28
|
+
sessionId: string;
|
|
29
|
+
startedAt?: string;
|
|
30
|
+
bankId: string;
|
|
31
|
+
documentId: string;
|
|
32
|
+
transcript: string;
|
|
33
|
+
messageCount: number;
|
|
34
|
+
}
|
|
35
|
+
export interface BackfillCheckpointEntry {
|
|
36
|
+
status: 'enqueued' | 'completed' | 'failed';
|
|
37
|
+
bankId: string;
|
|
38
|
+
filePath: string;
|
|
39
|
+
sessionId: string;
|
|
40
|
+
updatedAt: string;
|
|
41
|
+
error?: string;
|
|
42
|
+
}
|
|
43
|
+
export interface BackfillCheckpoint {
|
|
44
|
+
version: 1;
|
|
45
|
+
entries: Record<string, BackfillCheckpointEntry>;
|
|
46
|
+
}
|
|
47
|
+
export declare function defaultOpenClawRoot(): string;
|
|
48
|
+
export declare function defaultCheckpointPath(openclawRoot: string): string;
|
|
49
|
+
export declare function loadPluginConfigFromOpenClawRoot(openclawRoot: string): PluginConfig;
|
|
50
|
+
export declare function parseSessionFile(filePath: string, agentId: string): ParsedSessionFile;
|
|
51
|
+
export declare function discoverSessionFiles(openclawRoot: string, includeArchive: boolean): Array<{
|
|
52
|
+
agentId: string;
|
|
53
|
+
filePath: string;
|
|
54
|
+
}>;
|
|
55
|
+
export declare function stableDocumentId(session: ParsedSessionFile, bankId: string): string;
|
|
56
|
+
export declare function buildBackfillPlan(pluginConfig: PluginConfig, opts: BackfillCliOptions): {
|
|
57
|
+
entries: BackfillPlanEntry[];
|
|
58
|
+
discoveredSessions: number;
|
|
59
|
+
skippedEmpty: number;
|
|
60
|
+
};
|
|
61
|
+
export declare function loadCheckpoint(checkpointPath: string): BackfillCheckpoint;
|
|
62
|
+
export declare function saveCheckpoint(checkpointPath: string, checkpoint: BackfillCheckpoint): void;
|
|
63
|
+
export declare function checkpointKey(entry: Pick<BackfillPlanEntry, 'bankId' | 'documentId'>): string;
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { homedir } from 'os';
|
|
2
|
+
import { dirname, join, resolve } from 'path';
|
|
3
|
+
import { readFileSync, existsSync, mkdirSync, writeFileSync, readdirSync } from 'fs';
|
|
4
|
+
import { deriveBankId, prepareRetentionTranscript } from './index.js';
|
|
5
|
+
const DEFAULT_PLUGIN_CONFIG = {
|
|
6
|
+
dynamicBankId: true,
|
|
7
|
+
retainRoles: ['user', 'assistant'],
|
|
8
|
+
};
|
|
9
|
+
export function defaultOpenClawRoot() {
|
|
10
|
+
return resolve(join(homedir(), '.openclaw'));
|
|
11
|
+
}
|
|
12
|
+
export function defaultCheckpointPath(openclawRoot) {
|
|
13
|
+
return join(openclawRoot, 'data', 'hindsight-backfill-checkpoint.json');
|
|
14
|
+
}
|
|
15
|
+
export function loadPluginConfigFromOpenClawRoot(openclawRoot) {
|
|
16
|
+
const configPath = join(openclawRoot, 'openclaw.json');
|
|
17
|
+
const raw = JSON.parse(readFileSync(configPath, 'utf8'));
|
|
18
|
+
return {
|
|
19
|
+
...DEFAULT_PLUGIN_CONFIG,
|
|
20
|
+
...(raw.plugins?.entries?.['hindsight-openclaw']?.config || {}),
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
function extractTextContent(content) {
|
|
24
|
+
if (typeof content === 'string') {
|
|
25
|
+
return content;
|
|
26
|
+
}
|
|
27
|
+
if (Array.isArray(content)) {
|
|
28
|
+
return content
|
|
29
|
+
.filter((block) => !!block && typeof block === 'object')
|
|
30
|
+
.filter((block) => block.type === 'text' && typeof block.text === 'string')
|
|
31
|
+
.map((block) => block.text || '')
|
|
32
|
+
.join('\n');
|
|
33
|
+
}
|
|
34
|
+
return '';
|
|
35
|
+
}
|
|
36
|
+
function readJsonLines(filePath) {
|
|
37
|
+
const content = readFileSync(filePath, 'utf8');
|
|
38
|
+
return content
|
|
39
|
+
.split('\n')
|
|
40
|
+
.map((line) => line.trim())
|
|
41
|
+
.filter(Boolean)
|
|
42
|
+
.map((line) => JSON.parse(line));
|
|
43
|
+
}
|
|
44
|
+
export function parseSessionFile(filePath, agentId) {
|
|
45
|
+
const records = readJsonLines(filePath);
|
|
46
|
+
let sessionId = filePath.split('/').pop()?.replace(/\.jsonl$/, '') || 'session';
|
|
47
|
+
let sessionKey;
|
|
48
|
+
let startedAt;
|
|
49
|
+
const messages = [];
|
|
50
|
+
for (const record of records) {
|
|
51
|
+
if (record.type === 'session') {
|
|
52
|
+
sessionId = typeof record.id === 'string' ? record.id : sessionId;
|
|
53
|
+
startedAt = typeof record.timestamp === 'string' ? record.timestamp : startedAt;
|
|
54
|
+
sessionKey = typeof record.sessionKey === 'string' ? record.sessionKey : sessionKey;
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (record.type !== 'message' || !record.message || typeof record.message !== 'object') {
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
const message = record.message;
|
|
61
|
+
const role = message.role;
|
|
62
|
+
if (role !== 'user' && role !== 'assistant' && role !== 'system' && role !== 'tool') {
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
const text = extractTextContent(message.content);
|
|
66
|
+
if (!text.trim()) {
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
messages.push({
|
|
70
|
+
role,
|
|
71
|
+
content: typeof message.content === 'string' ? message.content : [{ type: 'text', text }],
|
|
72
|
+
});
|
|
73
|
+
if (!sessionKey && typeof record.sessionKey === 'string') {
|
|
74
|
+
sessionKey = record.sessionKey;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return {
|
|
78
|
+
filePath,
|
|
79
|
+
agentId,
|
|
80
|
+
sessionId,
|
|
81
|
+
sessionKey,
|
|
82
|
+
startedAt,
|
|
83
|
+
messages,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
function sessionDirectories(openclawRoot, includeArchive) {
|
|
87
|
+
const agentsRoot = join(openclawRoot, 'agents');
|
|
88
|
+
if (!existsSync(agentsRoot)) {
|
|
89
|
+
return [];
|
|
90
|
+
}
|
|
91
|
+
const result = [];
|
|
92
|
+
for (const entry of readdirSync(agentsRoot, { withFileTypes: true })) {
|
|
93
|
+
if (!entry.isDirectory())
|
|
94
|
+
continue;
|
|
95
|
+
const agentId = entry.name;
|
|
96
|
+
const sessionsDir = join(agentsRoot, agentId, 'sessions');
|
|
97
|
+
if (existsSync(sessionsDir)) {
|
|
98
|
+
result.push({ agentId, path: sessionsDir });
|
|
99
|
+
}
|
|
100
|
+
if (includeArchive) {
|
|
101
|
+
const archiveDir = join(agentsRoot, agentId, 'sessions-archive-from-migration_backup');
|
|
102
|
+
if (existsSync(archiveDir)) {
|
|
103
|
+
result.push({ agentId, path: archiveDir });
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return result.sort((a, b) => a.agentId.localeCompare(b.agentId) || a.path.localeCompare(b.path));
|
|
108
|
+
}
|
|
109
|
+
export function discoverSessionFiles(openclawRoot, includeArchive) {
|
|
110
|
+
const sessions = [];
|
|
111
|
+
for (const dir of sessionDirectories(openclawRoot, includeArchive)) {
|
|
112
|
+
for (const entry of readdirSync(dir.path, { withFileTypes: true })) {
|
|
113
|
+
if (!entry.isFile() || !entry.name.endsWith('.jsonl'))
|
|
114
|
+
continue;
|
|
115
|
+
sessions.push({
|
|
116
|
+
agentId: dir.agentId,
|
|
117
|
+
filePath: join(dir.path, entry.name),
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return sessions.sort((a, b) => a.agentId.localeCompare(b.agentId) || a.filePath.localeCompare(b.filePath));
|
|
122
|
+
}
|
|
123
|
+
function backfillContextForSession(session) {
|
|
124
|
+
return {
|
|
125
|
+
agentId: session.agentId,
|
|
126
|
+
sessionKey: session.sessionKey,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
function deriveTargetBank(session, pluginConfig, bankStrategy, fixedBank) {
|
|
130
|
+
if (bankStrategy === 'agent') {
|
|
131
|
+
return session.agentId;
|
|
132
|
+
}
|
|
133
|
+
if (bankStrategy === 'fixed') {
|
|
134
|
+
if (!fixedBank) {
|
|
135
|
+
throw new Error('fixed bank strategy requires --fixed-bank');
|
|
136
|
+
}
|
|
137
|
+
return fixedBank;
|
|
138
|
+
}
|
|
139
|
+
return deriveBankId(backfillContextForSession(session), pluginConfig);
|
|
140
|
+
}
|
|
141
|
+
export function stableDocumentId(session, bankId) {
|
|
142
|
+
return `backfill::${bankId}::${session.agentId}::${session.sessionId}`;
|
|
143
|
+
}
|
|
144
|
+
export function buildBackfillPlan(pluginConfig, opts) {
|
|
145
|
+
const entries = [];
|
|
146
|
+
let discoveredSessions = 0;
|
|
147
|
+
let skippedEmpty = 0;
|
|
148
|
+
for (const candidate of discoverSessionFiles(opts.openclawRoot, opts.includeArchive)) {
|
|
149
|
+
if (opts.selectedAgents && !opts.selectedAgents.has(candidate.agentId)) {
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
discoveredSessions += 1;
|
|
153
|
+
const parsed = parseSessionFile(candidate.filePath, candidate.agentId);
|
|
154
|
+
const retention = prepareRetentionTranscript(parsed.messages, pluginConfig, true);
|
|
155
|
+
if (!retention) {
|
|
156
|
+
skippedEmpty += 1;
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
const bankId = deriveTargetBank(parsed, pluginConfig, opts.bankStrategy, opts.fixedBank);
|
|
160
|
+
entries.push({
|
|
161
|
+
filePath: parsed.filePath,
|
|
162
|
+
agentId: parsed.agentId,
|
|
163
|
+
sessionId: parsed.sessionId,
|
|
164
|
+
startedAt: parsed.startedAt,
|
|
165
|
+
bankId,
|
|
166
|
+
documentId: stableDocumentId(parsed, bankId),
|
|
167
|
+
transcript: retention.transcript,
|
|
168
|
+
messageCount: retention.messageCount,
|
|
169
|
+
});
|
|
170
|
+
if (opts.limit && entries.length >= opts.limit) {
|
|
171
|
+
break;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return { entries, discoveredSessions, skippedEmpty };
|
|
175
|
+
}
|
|
176
|
+
export function loadCheckpoint(checkpointPath) {
|
|
177
|
+
if (!existsSync(checkpointPath)) {
|
|
178
|
+
return { version: 1, entries: {} };
|
|
179
|
+
}
|
|
180
|
+
const raw = JSON.parse(readFileSync(checkpointPath, 'utf8'));
|
|
181
|
+
if (raw.version !== 1 || !raw.entries || typeof raw.entries !== 'object') {
|
|
182
|
+
return { version: 1, entries: {} };
|
|
183
|
+
}
|
|
184
|
+
return {
|
|
185
|
+
version: 1,
|
|
186
|
+
entries: Object.fromEntries(Object.entries(raw.entries).map(([key, entry]) => [
|
|
187
|
+
key,
|
|
188
|
+
{
|
|
189
|
+
...entry,
|
|
190
|
+
status: entry.status === 'queued' ? 'enqueued' : entry.status,
|
|
191
|
+
},
|
|
192
|
+
])),
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
export function saveCheckpoint(checkpointPath, checkpoint) {
|
|
196
|
+
mkdirSync(dirname(checkpointPath), { recursive: true });
|
|
197
|
+
writeFileSync(checkpointPath, JSON.stringify(checkpoint, null, 2) + '\n', 'utf8');
|
|
198
|
+
}
|
|
199
|
+
export function checkpointKey(entry) {
|
|
200
|
+
return `${entry.bankId}::${entry.documentId}`;
|
|
201
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import type { BankStats, PluginConfig } from './types.js';
|
|
3
|
+
import { type BackfillCheckpoint, type BackfillPlanEntry } from './backfill-lib.js';
|
|
4
|
+
interface BackfillRuntime {
|
|
5
|
+
apiUrl: string;
|
|
6
|
+
apiToken?: string;
|
|
7
|
+
stop(): Promise<void>;
|
|
8
|
+
}
|
|
9
|
+
export declare function filterEntriesForResume(entries: BackfillPlanEntry[], checkpoint: BackfillCheckpoint, resume: boolean): BackfillPlanEntry[];
|
|
10
|
+
export declare function splitResumeEntries(entries: BackfillPlanEntry[], checkpoint: BackfillCheckpoint, waitUntilDrained: boolean): {
|
|
11
|
+
entriesToEnqueue: BackfillPlanEntry[];
|
|
12
|
+
alreadyEnqueuedKeys: string[];
|
|
13
|
+
};
|
|
14
|
+
export declare function applyDrainResults(checkpoint: BackfillCheckpoint, touchedEntriesByBank: Map<string, string[]>, finalStatsByBank: Map<string, BankStats>, initialFailedOperationsByBank: Map<string, number>): {
|
|
15
|
+
completed: number;
|
|
16
|
+
unresolved: number;
|
|
17
|
+
warnings: string[];
|
|
18
|
+
};
|
|
19
|
+
export declare function createBackfillRuntime(pluginConfig: PluginConfig, explicitApiUrl?: string, explicitApiToken?: string): Promise<BackfillRuntime>;
|
|
20
|
+
export declare function runCli(argv?: string[]): Promise<void>;
|
|
21
|
+
export declare function isDirectExecution(entrypoint?: string | undefined, moduleUrl?: string): boolean;
|
|
22
|
+
export {};
|