memorylake-openclaw 0.0.11 → 0.0.15-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -39,7 +39,7 @@ Get an API key from [app.memorylake.ai](https://app.memorylake.ai), then add to
39
39
 
40
40
  ## Agent tools
41
41
 
42
- The agent gets seven tools it can call during conversations:
42
+ The agent gets eight tools it can call during conversations:
43
43
 
44
44
  | Tool | Description |
45
45
  |------|-------------|
@@ -50,6 +50,7 @@ The agent gets seven tools it can call during conversations:
50
50
  | `memory_forget` | Delete a memory by ID |
51
51
  | `document_search` | Search project documents for relevant paragraphs, tables, and figures |
52
52
  | `advanced_web_search` | Optional tool for web search with plugin-level domain and locale constraints |
53
+ | `open_data_search` | Search open data sources scoped to the project's configured industry categories |
53
54
 
54
55
  ## CLI
55
56
 
package/core-bridge.ts ADDED
@@ -0,0 +1,155 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { fileURLToPath, pathToFileURL } from "node:url";
4
+
5
+ export type CoreConfig = {
6
+ session?: {
7
+ store?: string;
8
+ };
9
+ [key: string]: unknown;
10
+ };
11
+
12
+ type CoreAgentDeps = {
13
+ resolveAgentDir: (cfg: CoreConfig, agentId: string) => string;
14
+ resolveAgentWorkspaceDir: (cfg: CoreConfig, agentId: string) => string;
15
+ resolveAgentIdentity: (
16
+ cfg: CoreConfig,
17
+ agentId: string,
18
+ ) => { name?: string | null } | null | undefined;
19
+ resolveThinkingDefault: (params: {
20
+ cfg: CoreConfig;
21
+ provider?: string;
22
+ model?: string;
23
+ }) => string;
24
+ runEmbeddedPiAgent: (params: {
25
+ sessionId: string;
26
+ sessionKey?: string;
27
+ messageProvider?: string;
28
+ sessionFile: string;
29
+ workspaceDir: string;
30
+ config?: CoreConfig;
31
+ prompt: string;
32
+ provider?: string;
33
+ model?: string;
34
+ thinkLevel?: string;
35
+ verboseLevel?: string;
36
+ timeoutMs: number;
37
+ runId: string;
38
+ lane?: string;
39
+ extraSystemPrompt?: string;
40
+ agentDir?: string;
41
+ }) => Promise<{
42
+ payloads?: Array<{ text?: string; isError?: boolean }>;
43
+ meta?: { aborted?: boolean };
44
+ }>;
45
+ resolveAgentTimeoutMs: (opts: { cfg: CoreConfig }) => number;
46
+ ensureAgentWorkspace: (params?: { dir: string }) => Promise<void>;
47
+ resolveStorePath: (store?: string, opts?: { agentId?: string }) => string;
48
+ loadSessionStore: (storePath: string) => Record<string, unknown>;
49
+ saveSessionStore: (storePath: string, store: Record<string, unknown>) => Promise<void>;
50
+ resolveSessionFilePath: (
51
+ sessionId: string,
52
+ entry: unknown,
53
+ opts?: { agentId?: string },
54
+ ) => string;
55
+ DEFAULT_MODEL: string;
56
+ DEFAULT_PROVIDER: string;
57
+ };
58
+
59
+ let coreRootCache: string | null = null;
60
+ let coreDepsPromise: Promise<CoreAgentDeps> | null = null;
61
+
62
+ function findPackageRoot(startDir: string, name: string): string | null {
63
+ let dir = startDir;
64
+ for (;;) {
65
+ const pkgPath = path.join(dir, "package.json");
66
+ try {
67
+ if (fs.existsSync(pkgPath)) {
68
+ const raw = fs.readFileSync(pkgPath, "utf8");
69
+ const pkg = JSON.parse(raw) as { name?: string };
70
+ if (pkg.name === name) {
71
+ return dir;
72
+ }
73
+ }
74
+ } catch {
75
+ // ignore parse errors and keep walking
76
+ }
77
+ const parent = path.dirname(dir);
78
+ if (parent === dir) {
79
+ return null;
80
+ }
81
+ dir = parent;
82
+ }
83
+ }
84
+
85
+ function resolveOpenClawRoot(): string {
86
+ if (coreRootCache) {
87
+ return coreRootCache;
88
+ }
89
+ const override = process.env.OPENCLAW_ROOT?.trim();
90
+ if (override) {
91
+ coreRootCache = override;
92
+ return override;
93
+ }
94
+
95
+ const candidates = new Set<string>();
96
+ if (process.argv[1]) {
97
+ candidates.add(path.dirname(process.argv[1]));
98
+ }
99
+ candidates.add(process.cwd());
100
+ try {
101
+ const urlPath = fileURLToPath(import.meta.url);
102
+ candidates.add(path.dirname(urlPath));
103
+ } catch {
104
+ // ignore
105
+ }
106
+
107
+ for (const start of candidates) {
108
+ for (const name of ["openclaw"]) {
109
+ const found = findPackageRoot(start, name);
110
+ if (found) {
111
+ coreRootCache = found;
112
+ return found;
113
+ }
114
+ }
115
+ }
116
+
117
+ throw new Error("Unable to resolve core root. Set OPENCLAW_ROOT to the package root.");
118
+ }
119
+
120
+ async function importCoreExtensionAPI(): Promise<{
121
+ resolveAgentDir: CoreAgentDeps["resolveAgentDir"];
122
+ resolveAgentWorkspaceDir: CoreAgentDeps["resolveAgentWorkspaceDir"];
123
+ DEFAULT_MODEL: string;
124
+ DEFAULT_PROVIDER: string;
125
+ resolveAgentIdentity: CoreAgentDeps["resolveAgentIdentity"];
126
+ resolveThinkingDefault: CoreAgentDeps["resolveThinkingDefault"];
127
+ runEmbeddedPiAgent: CoreAgentDeps["runEmbeddedPiAgent"];
128
+ resolveAgentTimeoutMs: CoreAgentDeps["resolveAgentTimeoutMs"];
129
+ ensureAgentWorkspace: CoreAgentDeps["ensureAgentWorkspace"];
130
+ resolveStorePath: CoreAgentDeps["resolveStorePath"];
131
+ loadSessionStore: CoreAgentDeps["loadSessionStore"];
132
+ saveSessionStore: CoreAgentDeps["saveSessionStore"];
133
+ resolveSessionFilePath: CoreAgentDeps["resolveSessionFilePath"];
134
+ }> {
135
+ // Do not import any other module. You can't touch this or you will be fired.
136
+ const distPath = path.join(resolveOpenClawRoot(), "dist", "extensionAPI.js");
137
+ if (!fs.existsSync(distPath)) {
138
+ throw new Error(
139
+ `Missing core module at ${distPath}. Run \`pnpm build\` or install the official package.`,
140
+ );
141
+ }
142
+ return await import(pathToFileURL(distPath).href);
143
+ }
144
+
145
+ export async function loadCoreAgentDeps(): Promise<CoreAgentDeps> {
146
+ if (coreDepsPromise) {
147
+ return coreDepsPromise;
148
+ }
149
+
150
+ coreDepsPromise = (async () => {
151
+ return await importCoreExtensionAPI();
152
+ })();
153
+
154
+ return coreDepsPromise;
155
+ }
package/docs/openclaw.mdx CHANGED
@@ -14,7 +14,7 @@ The plugin provides:
14
14
  1. **Auto-Recall** — Before the agent responds, memories and relevant document excerpts matching the current message are injected into context
15
15
  2. **Auto-Capture** — After the agent responds, the exchange is sent to MemoryLake which decides what's worth keeping
16
16
  3. **Auto-Upload** — When a user sends a file, the plugin uploads it to MemoryLake as a project document asynchronously
17
- 4. **Agent Tools** — Seven tools for memory, document, and optional web search operations during conversations
17
+ 4. **Agent Tools** — Eight tools for memory, document, web search, and open data search operations during conversations
18
18
 
19
19
  Auto-Recall, Auto-Capture, and Auto-Upload run silently by default.
20
20
 
@@ -43,7 +43,7 @@ Add to your `openclaw.json`:
43
43
 
44
44
  ## Agent Tools
45
45
 
46
- The agent gets seven tools it can call during conversations:
46
+ The agent gets eight tools it can call during conversations:
47
47
 
48
48
  | Tool | Description |
49
49
  |------|-------------|
@@ -54,6 +54,9 @@ The agent gets seven tools it can call during conversations:
54
54
  | `memory_forget` | Delete a memory by ID |
55
55
  | `document_search` | Search project documents for relevant paragraphs, tables, and figures |
56
56
  | `advanced_web_search` | Optional web search tool backed by the unified search API with plugin-level domain and locale constraints |
57
+ | `open_data_search` | Optional search across open datasets — academic, clinical, drug, financial, economic, and more — routed to the appropriate proprietary data source based on the `dataset` field |
58
+
59
+ <Note>`open_data_search` requires the project to have at least one open data industry configured in MemoryLake. The `dataset` parameter is required and validated against the project's subscribed datasets at call time. The agent is automatically informed of available datasets via context injection at the start of each session. Supported datasets: `research/academic`, `clinical/trials`, `drug/database`, `financial/markets`, `company/fundamentals`, `economic/data`, `patents/ip`.</Note>
57
60
 
58
61
  ## CLI Commands
59
62
 
@@ -83,18 +86,19 @@ openclaw memorylake stats
83
86
  | `webSearchCountry` | `string` | — | Optional ISO country code for localizing `advanced_web_search` |
84
87
  | `webSearchTimezone` | `string` | — | Optional IANA timezone for localizing `advanced_web_search` |
85
88
 
86
- <Note>`advanced_web_search` is registered as an optional OpenClaw tool, so it must be explicitly allowed before an agent can call it.</Note>
89
+ <Note>`advanced_web_search` and `open_data_search` are registered as optional OpenClaw tools, so they must be explicitly allowed before an agent can call them.</Note>
87
90
 
88
91
  ## Key Features
89
92
 
90
93
  1. **Zero Configuration** — Auto-recall and auto-capture work out of the box with no prompting required
91
94
  2. **Async Processing** — Memory extraction and file uploads run asynchronously without blocking the agent
92
95
  3. **Session Tracking** — Conversations are tagged with `chat_session_id` for traceability
93
- 4. **Rich Tool Suite** — Seven agent tools for memory, document, and optional web search operations when needed
96
+ 4. **Rich Tool Suite** — Eight agent tools for memory, document, web search, and open data search operations when needed
97
+ 5. **Open Data Awareness** — At the start of each session, the agent is automatically informed of which open data categories the project has access to, so it can use `open_data_search` with the correct category without guessing
94
98
 
95
99
  ## Conclusion
96
100
 
97
- The `memorylake-openclaw` plugin gives OpenClaw agents persistent memory with minimal setup. Your agents can remember user preferences, facts, and context across sessions automatically.
101
+ The `memorylake-openclaw` plugin gives OpenClaw agents persistent memory with minimal setup. Your agents can remember user preferences, facts, and context across sessions automatically — and optionally search across a wide range of open datasets when deeper external knowledge is needed.
98
102
 
99
103
  {/*<CardGroup cols={2}>
100
104
  <Card title="MemoryLake" icon="brain" href="https://app.memorylake.ai">
package/index.ts CHANGED
@@ -4,17 +4,20 @@
4
4
  * Long-term memory via MemoryLake platform.
5
5
  *
6
6
  * Features:
7
- * - 7 tools: memory_search, memory_list, memory_store, memory_get, memory_forget, document_search, advanced_web_search
7
+ * - 8 tools: memory_search, memory_list, memory_store, memory_get, memory_forget, document_search, advanced_web_search, open_data_search
8
8
  * - Auto-recall: injects relevant memories and document excerpts before each agent turn
9
9
  * - Auto-capture: stores key facts scoped to the current session after each agent turn
10
10
  * - CLI: openclaw memorylake search, openclaw memorylake stats
11
11
  */
12
12
 
13
13
  import fs from "node:fs";
14
+ import fsPromises from "node:fs/promises";
15
+ import os from "node:os";
14
16
  import path from "node:path";
15
17
  import got from "got";
16
18
  import { Type } from "@sinclair/typebox";
17
19
  import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
20
+ import { loadCoreAgentDeps } from "./core-bridge";
18
21
 
19
22
  // ============================================================================
20
23
  // Types
@@ -65,6 +68,52 @@ interface MemoryItem {
65
68
  user_id?: string;
66
69
  created_at?: string;
67
70
  updated_at?: string;
71
+ has_unresolved_conflict?: boolean;
72
+ }
73
+
74
+ interface ConflictMemorySnapshot {
75
+ memory_id: string;
76
+ memory_history_id?: string;
77
+ memory_text: string;
78
+ }
79
+
80
+ interface ConflictFileChunk {
81
+ chunk: { type?: string; text: string; range?: string };
82
+ document_id?: string;
83
+ document_name?: string;
84
+ }
85
+
86
+ interface ConflictResolve {
87
+ id: string;
88
+ strategy: string;
89
+ keep_memory_id?: string;
90
+ forgotten_memory_ids?: string[];
91
+ resolved_by?: string;
92
+ created_at?: string;
93
+ }
94
+
95
+ interface ConflictItem {
96
+ id: string;
97
+ name: string;
98
+ description: string;
99
+ category: "m2m" | "m2d";
100
+ conflict_type: "logical" | "knowledge";
101
+ memory_ids: string[];
102
+ memory_snapshots: ConflictMemorySnapshot[];
103
+ file_chunks: ConflictFileChunk[];
104
+ resolved: boolean;
105
+ resolve?: ConflictResolve;
106
+ stale?: boolean;
107
+ event_id?: string;
108
+ created_at?: string;
109
+ updated_at?: string;
110
+ }
111
+
112
+ interface ConflictListResponse {
113
+ items: ConflictItem[];
114
+ page: number;
115
+ total: number;
116
+ page_size: number;
68
117
  }
69
118
 
70
119
  interface AddResultItem {
@@ -168,6 +217,68 @@ interface WebSearchResponse {
168
217
  total_results: number;
169
218
  }
170
219
 
220
+ /**
221
+ * Allowed values for open data search category (aligned with opendata endpoint).
222
+ * Maps to proprietary data sources per category.
223
+ */
224
+ const OpenDataCategoryValues = [
225
+ "research/academic",
226
+ "clinical/trials",
227
+ "drug/database",
228
+ "financial/markets",
229
+ "company/fundamentals",
230
+ "economic/data",
231
+ "patents/ip",
232
+ ] as const;
233
+ type OpenDataCategory = (typeof OpenDataCategoryValues)[number];
234
+
235
+ const OPEN_DATA_CATEGORY_SET = new Set<string>(OpenDataCategoryValues);
236
+
237
+ /** Normalize category: accept string at runtime; return undefined if not a valid enum value. */
238
+ function normalizeOpenDataCategory(value: unknown): OpenDataCategory | undefined {
239
+ if (value == null) return undefined;
240
+ const s = typeof value === "string" ? value.toLowerCase().trim() : "";
241
+ return OPEN_DATA_CATEGORY_SET.has(s) ? (s as OpenDataCategory) : undefined;
242
+ }
243
+
244
+ interface OpenDataIndustry {
245
+ id: string;
246
+ name: string;
247
+ description?: string;
248
+ }
249
+
250
+ interface ProjectInfo {
251
+ id: string;
252
+ name: string;
253
+ description?: string;
254
+ industries: OpenDataIndustry[];
255
+ }
256
+
257
+ interface OpenDataSearchOptions {
258
+ dataset?: OpenDataCategory | string;
259
+ max_results?: number;
260
+ start_date?: string;
261
+ end_date?: string;
262
+ }
263
+
264
+ interface OpenDataSearchResult {
265
+ title?: string;
266
+ url?: string;
267
+ summary?: string;
268
+ content?: string;
269
+ source?: string;
270
+ category?: string;
271
+ published_date?: string;
272
+ author?: string;
273
+ score?: number;
274
+ metadata?: Record<string, unknown>;
275
+ }
276
+
277
+ interface OpenDataSearchResponse {
278
+ results: OpenDataSearchResult[];
279
+ total_results: number;
280
+ }
281
+
171
282
  // ============================================================================
172
283
  // Unified Provider Interface
173
284
  // ============================================================================
@@ -183,6 +294,9 @@ interface MemoryLakeProvider {
183
294
  delete(memoryId: string): Promise<void>;
184
295
  searchDocuments(query: string, topN: number): Promise<DocumentSearchResponse>;
185
296
  searchWeb(query: string, options: WebSearchOptions): Promise<WebSearchResponse>;
297
+ searchOpenData(query: string, options: OpenDataSearchOptions): Promise<OpenDataSearchResponse>;
298
+ getProject(): Promise<ProjectInfo>;
299
+ listConflicts(memoryIds: string[], userId: string): Promise<ConflictItem[]>;
186
300
  }
187
301
 
188
302
  // ============================================================================
@@ -201,11 +315,19 @@ class PlatformProvider implements MemoryLakeProvider {
201
315
  private readonly basePath: string;
202
316
  private readonly docSearchPath: string;
203
317
  private readonly webSearchPath: string;
318
+ private readonly openDataSearchPath: string;
319
+ private readonly projectPath: string;
320
+ private readonly conflictsPath: string;
321
+ private readonly projectId: string;
204
322
 
205
323
  constructor(host: string, apiKey: string, projectId: string) {
324
+ this.projectId = projectId;
206
325
  this.basePath = `openapi/memorylake/api/v2/projects/${projectId}/memories`;
207
326
  this.docSearchPath = `openapi/memorylake/api/v1/projects/${projectId}/documents/search`;
208
327
  this.webSearchPath = "openapi/memorylake/api/v1/search";
328
+ this.openDataSearchPath = "openapi/memorylake/api/v1/search/opendata";
329
+ this.projectPath = `openapi/memorylake/api/v1/projects/${projectId}`;
330
+ this.conflictsPath = `openapi/memorylake/api/v2/projects/${projectId}/memories/conflicts`;
209
331
  this.http = got.extend({
210
332
  prefixUrl: host,
211
333
  headers: {
@@ -238,6 +360,7 @@ class PlatformProvider implements MemoryLakeProvider {
238
360
  const body: Record<string, unknown> = {
239
361
  query,
240
362
  user_id: options.user_id,
363
+ with_conflicts: true,
241
364
  };
242
365
  if (options.top_k != null) body.top_k = options.top_k;
243
366
  if (options.threshold != null) body.threshold = options.threshold;
@@ -312,6 +435,61 @@ class PlatformProvider implements MemoryLakeProvider {
312
435
  return normalizeWebSearchResponse(resp);
313
436
  }
314
437
 
438
+ async searchOpenData(query: string, options: OpenDataSearchOptions): Promise<OpenDataSearchResponse> {
439
+ const body: Record<string, unknown> = { query };
440
+ if (options.dataset != null) {
441
+ const ds = normalizeOpenDataCategory(options.dataset);
442
+ if (!ds) throw new Error(`Invalid open data dataset: "${options.dataset}"`);
443
+ body.dataset = ds;
444
+ }
445
+ if (options.max_results != null) body.max_results = options.max_results;
446
+ if (options.start_date) body.start_date = options.start_date;
447
+ if (options.end_date) body.end_date = options.end_date;
448
+
449
+ const resp = await this.http
450
+ .post(this.openDataSearchPath, { json: body })
451
+ .json<OpenDataSearchResponse>();
452
+ return normalizeOpenDataSearchResponse(resp);
453
+ }
454
+
455
+ async getProject(): Promise<ProjectInfo> {
456
+ const resp = await this.http
457
+ .get(this.projectPath)
458
+ .json<ApiResponse<{ id?: string; name?: string; description?: string; industries?: Array<{ id?: string; name?: string; description?: string }> }>>();
459
+ if (!resp.success) throw new Error(resp.message ?? "get project failed");
460
+ const data = resp.data;
461
+ const info: ProjectInfo = {
462
+ id: data?.id ?? "",
463
+ name: data?.name ?? "",
464
+ description: data?.description,
465
+ industries: Array.isArray(data?.industries)
466
+ ? data.industries.map((ind) => ({
467
+ id: ind.id ?? "",
468
+ name: ind.name ?? "",
469
+ description: ind.description,
470
+ }))
471
+ : [],
472
+ };
473
+ return info;
474
+ }
475
+
476
+ async listConflicts(memoryIds: string[], userId: string): Promise<ConflictItem[]> {
477
+ if (memoryIds.length === 0) return [];
478
+ const searchParams: Record<string, string | string[]> = {
479
+ resolved: "false",
480
+ memory_ids: memoryIds,
481
+ };
482
+ const resp = await this.http
483
+ .get(this.conflictsPath, {
484
+ searchParams,
485
+ headers: { "X-User-ID": userId },
486
+ })
487
+ .json<ApiResponse<ConflictListResponse>>();
488
+ if (!resp.success) throw new Error(resp.message ?? "list conflicts failed");
489
+ const data = resp.data;
490
+ return Array.isArray(data?.items) ? data.items : [];
491
+ }
492
+
315
493
  }
316
494
 
317
495
  // ============================================================================
@@ -325,6 +503,7 @@ function normalizeMemoryItem(raw: any): MemoryItem {
325
503
  user_id: raw.user_id,
326
504
  created_at: raw.created_at,
327
505
  updated_at: raw.updated_at,
506
+ has_unresolved_conflict: raw.has_unresolved_conflict ?? false,
328
507
  };
329
508
  }
330
509
 
@@ -353,6 +532,30 @@ function normalizeWebSearchResponse(raw: any): WebSearchResponse {
353
532
  };
354
533
  }
355
534
 
535
+ function normalizeOpenDataResult(raw: any): OpenDataSearchResult {
536
+ return {
537
+ title: typeof raw?.title === "string" ? raw.title : undefined,
538
+ url: typeof raw?.url === "string" ? raw.url : undefined,
539
+ summary: typeof raw?.summary === "string" ? raw.summary : undefined,
540
+ content: typeof raw?.content === "string" ? raw.content : undefined,
541
+ source: typeof raw?.source === "string" ? raw.source : undefined,
542
+ category: typeof raw?.category === "string" ? raw.category : undefined,
543
+ published_date: typeof raw?.published_date === "string" ? raw.published_date : undefined,
544
+ author: typeof raw?.author === "string" ? raw.author : undefined,
545
+ score: typeof raw?.score === "number" ? raw.score : undefined,
546
+ metadata: raw?.metadata && typeof raw.metadata === "object" && !Array.isArray(raw.metadata)
547
+ ? raw.metadata as Record<string, unknown>
548
+ : undefined,
549
+ };
550
+ }
551
+
552
+ function normalizeOpenDataSearchResponse(raw: any): OpenDataSearchResponse {
553
+ return {
554
+ results: Array.isArray(raw?.results) ? raw.results.map(normalizeOpenDataResult) : [],
555
+ total_results: typeof raw?.total_results === "number" ? raw.total_results : 0,
556
+ };
557
+ }
558
+
356
559
  // ============================================================================
357
560
  // Document Context Builder
358
561
  // ============================================================================
@@ -414,6 +617,37 @@ function buildWebSearchContext(results: WebSearchResult[]): string {
414
617
  .join("\n\n");
415
618
  }
416
619
 
620
+ function buildConflictContext(conflicts: ConflictItem[], maxChunkLength = 200): string {
621
+ return conflicts
622
+ .map((c) => {
623
+ const parts: string[] = [
624
+ `- [${c.conflict_type}] ${c.description}`,
625
+ ];
626
+ for (const snap of c.memory_snapshots ?? []) {
627
+ parts.push(` Memory(${snap.memory_id}): ${snap.memory_text.slice(0, maxChunkLength)}`);
628
+ }
629
+ for (const fc of c.file_chunks ?? []) {
630
+ const docLabel = fc.document_name ?? fc.document_id ?? "unknown";
631
+ parts.push(` Document(${docLabel}): ${fc.chunk.text.slice(0, maxChunkLength)}`);
632
+ }
633
+ return parts.join("\n");
634
+ })
635
+ .join("\n");
636
+ }
637
+
638
+ function buildOpenDataContext(results: OpenDataSearchResult[]): string {
639
+ const filtered = results.map((r) => {
640
+ const item: Record<string, unknown> = {};
641
+ if (r.title != null) item.title = r.title;
642
+ if (r.url != null) item.url = r.url;
643
+ if (r.content != null) item.content = r.content;
644
+ if (r.published_date != null) item.published_date = r.published_date;
645
+ if (r.category != null) item.category = r.category;
646
+ return item;
647
+ });
648
+ return JSON.stringify(filtered, null, 2);
649
+ }
650
+
417
651
  // ============================================================================
418
652
  // Config Parser
419
653
  // ============================================================================
@@ -574,16 +808,16 @@ const memoryPlugin = {
574
808
  }
575
809
 
576
810
  return { ...cfg, ...overrides } as MemoryLakeConfig;
577
- } catch {
811
+ } catch (err) {
578
812
  api.logger.warn(
579
- `memorylake-openclaw: failed to parse workspace config JSON; falling back to global config (path: ${localPath})`,
813
+ `memorylake-openclaw: failed to parse workspace config JSON; falling back to global config (path: ${localPath}): ${String(err)}`,
580
814
  );
581
815
  return cfg;
582
816
  }
583
817
  }
584
818
 
585
- // Track current session ID for tool-level session scoping
586
- let currentSessionId: string | undefined;
819
+ // Cache project industries per session fetched once, reused on subsequent prompts
820
+ const sessionIndustriesCache = new Map<string, OpenDataIndustry[]>();
587
821
 
588
822
  api.logger.info(
589
823
  `memorylake-openclaw: registered (user: ${cfg.userId}, autoRecall: ${cfg.autoRecall}, autoCapture: ${cfg.autoCapture}, autoUpload: ${cfg.autoUpload})`,
@@ -742,7 +976,7 @@ const memoryPlugin = {
742
976
  try {
743
977
  const result = await effectiveProvider.add(
744
978
  [{ role: "user", content: text }],
745
- buildAddOptions(effectiveCfg, userId, currentSessionId),
979
+ buildAddOptions(effectiveCfg, userId, (ctx as any)?.sessionId),
746
980
  );
747
981
 
748
982
  const count = result.results?.length ?? 0;
@@ -1128,6 +1362,152 @@ const memoryPlugin = {
1128
1362
  { optional: true },
1129
1363
  );
1130
1364
 
1365
+ api.registerTool(
1366
+ (ctx) => ({
1367
+ name: "open_data_search",
1368
+ label: "Open Data Search",
1369
+ description:
1370
+ "Search across open datasets routed to the appropriate proprietary data source based on the dataset:\n- research/academic: arXiv, PubMed, bioRxiv, medRxiv\n- clinical/trials: Clinical trial registries\n- drug/database: ChEMBL, DrugBank, PubChem, etc.\n- financial/markets: Stocks, crypto, forex, funds, commodities\n- company/fundamentals: SEC filings, earnings, balance sheets, etc.\n- economic/data: FRED, BLS, World Bank, etc.\n- patents/ip: USPTO patents",
1371
+ parameters: Type.Object({
1372
+ query: Type.String({
1373
+ description: "The search query to send to the open data endpoint.",
1374
+ }),
1375
+ dataset: Type.Union(
1376
+ [
1377
+ Type.Literal("research/academic"),
1378
+ Type.Literal("clinical/trials"),
1379
+ Type.Literal("drug/database"),
1380
+ Type.Literal("financial/markets"),
1381
+ Type.Literal("company/fundamentals"),
1382
+ Type.Literal("economic/data"),
1383
+ Type.Literal("patents/ip"),
1384
+ ],
1385
+ {
1386
+ description:
1387
+ "Dataset category to search. Must be one of the project's enabled categories.",
1388
+ },
1389
+ ),
1390
+ maxResults: Type.Optional(
1391
+ Type.Number({
1392
+ description: `Maximum number of results to return (default: ${cfg.topK}). The server enforces a hard cap.`,
1393
+ minimum: 1,
1394
+ }),
1395
+ ),
1396
+ startDate: Type.Optional(
1397
+ Type.String({
1398
+ description: "Only include results published on or after this date (YYYY-MM-DD).",
1399
+ }),
1400
+ ),
1401
+ endDate: Type.Optional(
1402
+ Type.String({
1403
+ description: "Only include results published on or before this date (YYYY-MM-DD).",
1404
+ }),
1405
+ ),
1406
+ }),
1407
+ async execute(_toolCallId, params) {
1408
+ const effectiveCfg = resolveConfig(ctx);
1409
+ const effectiveProvider = getProvider(effectiveCfg);
1410
+ const {
1411
+ query,
1412
+ dataset: rawDataset,
1413
+ maxResults,
1414
+ startDate,
1415
+ endDate,
1416
+ } = params as {
1417
+ query: string;
1418
+ dataset: string;
1419
+ maxResults?: number;
1420
+ startDate?: string;
1421
+ endDate?: string;
1422
+ };
1423
+
1424
+ // Normalize once; use throughout to avoid casing bugs
1425
+ const dataset = normalizeOpenDataCategory(rawDataset);
1426
+
1427
+ if (!dataset) {
1428
+ return {
1429
+ content: [
1430
+ {
1431
+ type: "text",
1432
+ text: `Unsupported dataset: "${rawDataset}". Supported values are: ${OpenDataCategoryValues.join(", ")}`,
1433
+ },
1434
+ ],
1435
+ details: { error: "unsupported_dataset", dataset: rawDataset },
1436
+ };
1437
+ }
1438
+
1439
+ try {
1440
+ // Validate dataset against project's allowed industries
1441
+ const projectInfo = await effectiveProvider.getProject();
1442
+ if (projectInfo.industries.length > 0) {
1443
+ const allowedIds = projectInfo.industries.map((ind) => ind.id);
1444
+ if (!allowedIds.includes(dataset)) {
1445
+ const allowed = projectInfo.industries
1446
+ .map((ind) => `${ind.id} (${ind.name})`)
1447
+ .join(", ");
1448
+ return {
1449
+ content: [
1450
+ {
1451
+ type: "text",
1452
+ text: `Dataset "${dataset}" is not enabled for this project. Allowed datasets: ${allowed}`,
1453
+ },
1454
+ ],
1455
+ details: {
1456
+ error: "dataset_not_allowed",
1457
+ dataset,
1458
+ allowed_datasets: allowedIds,
1459
+ },
1460
+ };
1461
+ }
1462
+ }
1463
+
1464
+ const response = await effectiveProvider.searchOpenData(query, {
1465
+ dataset,
1466
+ max_results: maxResults ?? effectiveCfg.topK,
1467
+ start_date: startDate,
1468
+ end_date: endDate,
1469
+ });
1470
+
1471
+ if (!response.results || response.results.length === 0) {
1472
+ return {
1473
+ content: [
1474
+ { type: "text", text: "No relevant open data results found." },
1475
+ ],
1476
+ details: { count: 0, total_results: response.total_results },
1477
+ };
1478
+ }
1479
+
1480
+ const context = buildOpenDataContext(response.results);
1481
+
1482
+ return {
1483
+ content: [
1484
+ {
1485
+ type: "text",
1486
+ text: `Found ${response.results.length} open data results:\n\n${context}`,
1487
+ },
1488
+ ],
1489
+ details: {
1490
+ count: response.results.length,
1491
+ total_results: response.total_results,
1492
+ results: response.results,
1493
+ },
1494
+ };
1495
+ } catch (err) {
1496
+ return {
1497
+ content: [
1498
+ {
1499
+ type: "text",
1500
+ text: `Open data search failed: ${String(err)}`,
1501
+ },
1502
+ ],
1503
+ details: { error: String(err) },
1504
+ };
1505
+ }
1506
+ },
1507
+ }),
1508
+ { optional: true },
1509
+ );
1510
+
1131
1511
  // ========================================================================
1132
1512
  // CLI Commands
1133
1513
  // ========================================================================
@@ -1247,6 +1627,10 @@ const memoryPlugin = {
1247
1627
  let uploadFn: ((opts: { host: string; apiKey: string; projectId: string; filePath: string; fileName: string }) => Promise<unknown>) | undefined;
1248
1628
 
1249
1629
  api.on("before_prompt_build", (event, ctx) => {
1630
+ if ((ctx as any)?.trigger !== "user") {
1631
+ api.logger.info(`memorylake-openclaw: auto-upload skipped, trigger=${(ctx as any)?.trigger ?? "undefined"}`);
1632
+ return;
1633
+ }
1250
1634
  const workspaceDir = (ctx as any)?.workspaceDir;
1251
1635
  if (!workspaceDir || !event.prompt) return;
1252
1636
 
@@ -1303,36 +1687,214 @@ const memoryPlugin = {
1303
1687
  });
1304
1688
  }
1305
1689
 
1306
- // Auto-recall: inject relevant memories and documents before agent starts
1690
+ // ------------------------------------------------------------------
1691
+ // LLM Query Rewrite Helpers
1692
+ // ------------------------------------------------------------------
1693
+
1694
+ /**
1695
+ * Summarize recent session messages into a compact text block for the rewrite prompt.
1696
+ * Messages are unknown[] from the hook event — we extract role+content from each.
1697
+ */
1698
+ function summarizeMessages(messages: unknown[], maxMessages = 10): string {
1699
+ if (!messages || messages.length === 0) return "";
1700
+ const recent = messages.slice(-maxMessages);
1701
+ return recent
1702
+ .map((m: any) => {
1703
+ const role = m?.role ?? "user";
1704
+ const content =
1705
+ typeof m?.content === "string"
1706
+ ? m.content
1707
+ : JSON.stringify(m?.content ?? "");
1708
+ return `[${role}]: ${content}`;
1709
+ })
1710
+ .join("\n");
1711
+ }
1712
+
1713
+ // (loadCoreAgentDeps is defined at module scope above)
1714
+
1715
+ /**
1716
+ * Resolve provider/model from config. Returns undefined for both if not found
1717
+ * (openclaw will use its own defaults).
1718
+ */
1719
+ function resolveProviderModel(): { provider: string | undefined; model: string | undefined } {
1720
+ const modelPrimary = (api.config as any)?.agents?.defaults?.model?.primary as string | undefined;
1721
+ if (modelPrimary) {
1722
+ const slashIdx = modelPrimary.indexOf("/");
1723
+ if (slashIdx >= 0) {
1724
+ return { provider: modelPrimary.slice(0, slashIdx), model: modelPrimary.slice(slashIdx + 1) };
1725
+ }
1726
+ return { provider: undefined, model: modelPrimary };
1727
+ }
1728
+ return { provider: undefined, model: undefined };
1729
+ }
1730
+
1731
+ /**
1732
+ * Rewrite the user's prompt into a search-optimized query using
1733
+ * openclaw's runEmbeddedPiAgent, considering conversation history.
1734
+ *
1735
+ * Priority: api.runtime.agent.runEmbeddedPiAgent → loadCoreAgentDeps()
1736
+ */
1737
+ async function rewriteQueryForSearch(
1738
+ originalPrompt: string,
1739
+ messages: unknown[],
1740
+ ctx: { workspaceDir?: string },
1741
+ ): Promise<string> {
1742
+ if (!ctx.workspaceDir) {
1743
+ api.logger.warn("memorylake-openclaw: no workspaceDir, skipping query rewrite");
1744
+ return originalPrompt;
1745
+ }
1746
+
1747
+ const conversationHistory = summarizeMessages(messages);
1748
+ const systemPrompt =
1749
+ "You are a search query optimizer. Extract the key search intent and produce a concise, search-optimized query. Output ONLY the rewritten query, nothing else. Preserve important entities, names, dates, and technical terms.";
1750
+ const userContent = conversationHistory
1751
+ ? `Conversation history:\n${conversationHistory}\n\nUser's latest message:\n${originalPrompt}`
1752
+ : originalPrompt;
1753
+ const fullPrompt = `${systemPrompt}\n\n${userContent}`;
1754
+
1755
+ const { provider, model } = resolveProviderModel();
1756
+ api.logger.info(`memorylake-openclaw: rewriting query via runEmbeddedPiAgent (provider=${provider}, model=${model})`);
1757
+
1758
+ let tempSessionFile: string | null = null;
1759
+ try {
1760
+ const tempDir = await fsPromises.mkdtemp(path.join(os.tmpdir(), "memorylake-rewrite-"));
1761
+ tempSessionFile = path.join(tempDir, "session.jsonl");
1762
+
1763
+ const nowMs = Date.now();
1764
+ const callParams = {
1765
+ sessionId: `memorylake-rewrite-${nowMs}`,
1766
+ sessionKey: `temp:memorylake-rewrite`,
1767
+ sessionFile: tempSessionFile,
1768
+ workspaceDir: ctx.workspaceDir,
1769
+ config: api.config,
1770
+ prompt: fullPrompt,
1771
+ provider,
1772
+ model,
1773
+ disableTools: true,
1774
+ timeoutMs: 15_000,
1775
+ runId: `memorylake-rewrite-${nowMs}`,
1776
+ lane: `memorylake-rewrite`,
1777
+ trigger: "memory",
1778
+ };
1779
+
1780
+ // Priority 1: try api.runtime.agent.runEmbeddedPiAgent
1781
+ let runEmbeddedPiAgent: ((p: typeof callParams) => Promise<any>) | undefined =
1782
+ (api.runtime as any)?.agent?.runEmbeddedPiAgent;
1783
+
1784
+ if (typeof runEmbeddedPiAgent !== "function") {
1785
+ api.logger.info("memorylake-openclaw: api.runtime.agent.runEmbeddedPiAgent not available, using loadCoreAgentDeps fallback");
1786
+ const deps = await loadCoreAgentDeps();
1787
+ runEmbeddedPiAgent = deps.runEmbeddedPiAgent;
1788
+ }
1789
+
1790
+ const result = await runEmbeddedPiAgent(callParams);
1791
+
1792
+ const rewritten = result?.payloads?.[0]?.text?.trim();
1793
+ if (rewritten && rewritten.length > 0) {
1794
+ api.logger.info(`memorylake-openclaw: rewritten query: "${rewritten}"`);
1795
+ return rewritten;
1796
+ }
1797
+ api.logger.warn("memorylake-openclaw: rewrite returned empty, using original");
1798
+ } catch (err) {
1799
+ api.logger.warn(`memorylake-openclaw: query rewrite failed, using original: ${String(err)}`);
1800
+ } finally {
1801
+ if (tempSessionFile) {
1802
+ try {
1803
+ await fsPromises.rm(path.dirname(tempSessionFile), { recursive: true, force: true });
1804
+ } catch (cleanupErr) {
1805
+ api.logger.warn(`memorylake-openclaw: temp session cleanup failed: ${String(cleanupErr)}`);
1806
+ }
1807
+ }
1808
+ }
1809
+ return originalPrompt;
1810
+ }
1811
+
1812
+ // ------------------------------------------------------------------
1813
+ // Auto-recall: inject relevant memories and documents before prompt build
1814
+ // ------------------------------------------------------------------
1307
1815
  if (cfg.autoRecall) {
1308
- api.on("before_agent_start", async (event, ctx) => {
1309
- if (!event.prompt || event.prompt.length < 5) return;
1816
+ api.on("before_prompt_build", async (event, ctx) => {
1817
+ if ((ctx as any)?.trigger !== "user") {
1818
+ api.logger.info(`memorylake-openclaw: auto-recall skipped, trigger=${(ctx as any)?.trigger ?? "undefined"}`);
1819
+ return;
1820
+ }
1821
+ if (!event.prompt) return;
1310
1822
 
1311
- // Resolve per-workspace config override
1312
1823
  const effectiveCfg = resolveConfig(ctx);
1313
1824
  const effectiveProvider = getProvider(effectiveCfg);
1314
1825
 
1315
- // Track session ID
1316
- const sessionId = (ctx as any)?.sessionKey ?? undefined;
1317
- if (sessionId) currentSessionId = sessionId;
1826
+ const sessionId = (ctx as any)?.sessionId ?? undefined;
1827
+
1828
+ // LLM-rewrite FIRST short prompts like "它呢?" can become meaningful
1829
+ // search queries when the LLM has conversation history context.
1830
+ const searchQuery = await rewriteQueryForSearch(event.prompt, event.messages, ctx);
1831
+
1832
+ // Only skip if the rewritten result is still too short
1833
+ if (searchQuery.length < 5) {
1834
+ api.logger.info(
1835
+ `memorylake-openclaw: skipping auto-recall, rewritten query too short (${searchQuery.length} chars)`,
1836
+ );
1837
+ return;
1838
+ }
1839
+
1840
+ // Fetch industries once per session, then cache
1841
+ let industries: OpenDataIndustry[] | undefined;
1842
+ if (sessionId && sessionIndustriesCache.has(sessionId)) {
1843
+ industries = sessionIndustriesCache.get(sessionId);
1844
+ } else {
1845
+ try {
1846
+ const projectInfo = await effectiveProvider.getProject();
1847
+ industries = projectInfo.industries;
1848
+ if (sessionId) {
1849
+ sessionIndustriesCache.set(sessionId, industries);
1850
+ }
1851
+ } catch (err) {
1852
+ api.logger.warn(`memorylake-openclaw: project info fetch failed: ${String(err)}`);
1853
+ }
1854
+ }
1318
1855
 
1319
1856
  const [memoryResult, docResult] = await Promise.allSettled([
1320
- effectiveProvider.search(event.prompt, buildSearchOptions(effectiveCfg)),
1321
- effectiveProvider.searchDocuments(event.prompt, effectiveCfg.topK),
1857
+ effectiveProvider.search(searchQuery, buildSearchOptions(effectiveCfg)),
1858
+ effectiveProvider.searchDocuments(searchQuery, effectiveCfg.topK),
1322
1859
  ]);
1323
1860
 
1324
1861
  const contextParts: string[] = [];
1325
1862
 
1326
1863
  if (memoryResult.status === "fulfilled" && memoryResult.value.length > 0) {
1327
- const memoryContext = memoryResult.value
1864
+ const memories = memoryResult.value;
1865
+ const memoryContext = memories
1328
1866
  .map((r) => `- ${r.content}`)
1329
1867
  .join("\n");
1330
1868
  contextParts.push(
1331
1869
  `<relevant-memories>\nThe following memories may be relevant to this conversation:\n${memoryContext}\n</relevant-memories>`,
1332
1870
  );
1333
1871
  api.logger.info(
1334
- `memorylake-openclaw: injecting ${memoryResult.value.length} memories into context`,
1872
+ `memorylake-openclaw: injecting ${memories.length} memories into context`,
1335
1873
  );
1874
+
1875
+ // Fetch conflict details for memories flagged with unresolved conflicts
1876
+ const conflictedIds = memories
1877
+ .filter((m) => m.has_unresolved_conflict)
1878
+ .map((m) => m.id);
1879
+ if (conflictedIds.length > 0) {
1880
+ try {
1881
+ const conflicts = await effectiveProvider.listConflicts(conflictedIds, effectiveCfg.userId);
1882
+ if (conflicts.length > 0) {
1883
+ const conflictContext = buildConflictContext(conflicts);
1884
+ contextParts.push(
1885
+ `<memory-conflicts>\nThe following conflicts exist among the recalled memories. ` +
1886
+ `Consider these contradictions when using the above memories.\n` +
1887
+ `If you have not already informed the user about these conflicts in this conversation, briefly mention that some recalled memories contain contradictions and note which points are uncertain. Do not repeat this notice if you have already done so.\n` +
1888
+ `${conflictContext}\n</memory-conflicts>`,
1889
+ );
1890
+ api.logger.info(
1891
+ `memorylake-openclaw: injecting ${conflicts.length} memory conflicts into context`,
1892
+ );
1893
+ }
1894
+ } catch (err) {
1895
+ api.logger.warn(`memorylake-openclaw: conflict fetch failed: ${String(err)}`);
1896
+ }
1897
+ }
1336
1898
  } else if (memoryResult.status === "rejected") {
1337
1899
  api.logger.warn(`memorylake-openclaw: memory recall failed: ${String(memoryResult.reason)}`);
1338
1900
  }
@@ -1349,15 +1911,36 @@ const memoryPlugin = {
1349
1911
  api.logger.warn(`memorylake-openclaw: document search failed: ${String(docResult.reason)}`);
1350
1912
  }
1351
1913
 
1352
- if (contextParts.length === 0) return;
1914
+ const result: { prependContext?: string; appendSystemContext?: string } = {};
1915
+
1916
+ if (contextParts.length > 0) {
1917
+ result.prependContext = contextParts.join("\n\n");
1918
+ }
1919
+
1920
+ if (industries && industries.length > 0) {
1921
+ const categoryList = industries
1922
+ .map((ind) => `- ${ind.id}: ${ind.name}${ind.description ? ` — ${ind.description}` : ""}`)
1923
+ .join("\n");
1924
+ result.appendSystemContext =
1925
+ `<open-data-categories>\nThis project has access to the following open data categories via the open_data_search tool:\n${categoryList}\nWhen the user's question relates to any of these categories, use the open_data_search tool to retrieve relevant data.\n</open-data-categories>`;
1926
+ api.logger.info(
1927
+ `memorylake-openclaw: injecting ${industries.length} open data categories into system context`,
1928
+ );
1929
+ }
1930
+
1931
+ if (!result.prependContext && !result.appendSystemContext) return;
1353
1932
 
1354
- return { prependContext: contextParts.join("\n\n") };
1933
+ return result;
1355
1934
  });
1356
1935
  }
1357
1936
 
1358
1937
  // Auto-capture: store conversation context after agent ends
1359
1938
  if (cfg.autoCapture) {
1360
1939
  api.on("agent_end", async (event, ctx) => {
1940
+ if ((ctx as any)?.trigger !== "user") {
1941
+ api.logger.info(`memorylake-openclaw: auto-capture skipped, trigger=${(ctx as any)?.trigger ?? "undefined"}`);
1942
+ return;
1943
+ }
1361
1944
  if (!event.success || !event.messages || event.messages.length === 0) {
1362
1945
  return;
1363
1946
  }
@@ -1367,8 +1950,7 @@ const memoryPlugin = {
1367
1950
  const effectiveProvider = getProvider(effectiveCfg);
1368
1951
 
1369
1952
  // Track session ID
1370
- const sessionId = (ctx as any)?.sessionKey ?? undefined;
1371
- if (sessionId) currentSessionId = sessionId;
1953
+ const sessionId = (ctx as any)?.sessionId ?? undefined;
1372
1954
 
1373
1955
  try {
1374
1956
  // Extract messages, limiting to last 10
@@ -1410,9 +1992,13 @@ const memoryPlugin = {
1410
1992
  if (textContent.includes("<relevant-memories>")) {
1411
1993
  textContent = textContent.replace(/<relevant-memories>[\s\S]*?<\/relevant-memories>\s*/g, "").trim();
1412
1994
  }
1995
+ if (textContent.includes("<memory-conflicts>")) {
1996
+ textContent = textContent.replace(/<memory-conflicts>[\s\S]*?<\/memory-conflicts>\s*/g, "").trim();
1997
+ }
1413
1998
  if (textContent.includes("<relevant-documents>")) {
1414
1999
  textContent = textContent.replace(/<relevant-documents>[\s\S]*?<\/relevant-documents>\s*/g, "").trim();
1415
2000
  }
2001
+
1416
2002
  if (!textContent) continue;
1417
2003
 
1418
2004
  formattedMessages.push({
@@ -1423,7 +2009,7 @@ const memoryPlugin = {
1423
2009
 
1424
2010
  if (formattedMessages.length === 0) return;
1425
2011
 
1426
- const addOpts = buildAddOptions(effectiveCfg, undefined, currentSessionId);
2012
+ const addOpts = buildAddOptions(effectiveCfg, undefined, sessionId);
1427
2013
  const result = await effectiveProvider.add(
1428
2014
  formattedMessages,
1429
2015
  addOpts,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "memorylake-openclaw",
3
- "version": "0.0.11",
3
+ "version": "0.0.15-beta.1",
4
4
  "type": "module",
5
5
  "description": "MemoryLake memory backend for OpenClaw",
6
6
  "license": "MIT",
@@ -35,9 +35,9 @@ $QCLAW_CONFIG = "$env:USERPROFILE\.qclaw\openclaw.json"
35
35
  function Write-Message {
36
36
  param([string]$Message, [string]$Level = "info")
37
37
  $msg = switch ($Level) {
38
- "success" { "$ColorSuccess✓$ColorReset $Message" }
38
+ "success" { "$ColorSuccess[OK]$ColorReset $Message" }
39
39
  "warn" { "$ColorWarn!$ColorReset $Message" }
40
- "error" { "$ColorError✗$ColorReset $Message" }
40
+ "error" { "$ColorError[X]$ColorReset $Message" }
41
41
  default { "$ColorMuted·$ColorReset $Message" }
42
42
  }
43
43
  Write-Host $msg