@memtensor/memos-local-openclaw-plugin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/.env.example +11 -0
  2. package/README.md +251 -0
  3. package/SKILL.md +43 -0
  4. package/dist/capture/index.d.ts +16 -0
  5. package/dist/capture/index.d.ts.map +1 -0
  6. package/dist/capture/index.js +80 -0
  7. package/dist/capture/index.js.map +1 -0
  8. package/dist/config.d.ts +4 -0
  9. package/dist/config.d.ts.map +1 -0
  10. package/dist/config.js +96 -0
  11. package/dist/config.js.map +1 -0
  12. package/dist/embedding/index.d.ts +12 -0
  13. package/dist/embedding/index.d.ts.map +1 -0
  14. package/dist/embedding/index.js +75 -0
  15. package/dist/embedding/index.js.map +1 -0
  16. package/dist/embedding/local.d.ts +3 -0
  17. package/dist/embedding/local.d.ts.map +1 -0
  18. package/dist/embedding/local.js +65 -0
  19. package/dist/embedding/local.js.map +1 -0
  20. package/dist/embedding/providers/cohere.d.ts +4 -0
  21. package/dist/embedding/providers/cohere.d.ts.map +1 -0
  22. package/dist/embedding/providers/cohere.js +57 -0
  23. package/dist/embedding/providers/cohere.js.map +1 -0
  24. package/dist/embedding/providers/gemini.d.ts +3 -0
  25. package/dist/embedding/providers/gemini.d.ts.map +1 -0
  26. package/dist/embedding/providers/gemini.js +31 -0
  27. package/dist/embedding/providers/gemini.js.map +1 -0
  28. package/dist/embedding/providers/mistral.d.ts +3 -0
  29. package/dist/embedding/providers/mistral.d.ts.map +1 -0
  30. package/dist/embedding/providers/mistral.js +25 -0
  31. package/dist/embedding/providers/mistral.js.map +1 -0
  32. package/dist/embedding/providers/openai.d.ts +3 -0
  33. package/dist/embedding/providers/openai.d.ts.map +1 -0
  34. package/dist/embedding/providers/openai.js +35 -0
  35. package/dist/embedding/providers/openai.js.map +1 -0
  36. package/dist/embedding/providers/voyage.d.ts +3 -0
  37. package/dist/embedding/providers/voyage.d.ts.map +1 -0
  38. package/dist/embedding/providers/voyage.js +25 -0
  39. package/dist/embedding/providers/voyage.js.map +1 -0
  40. package/dist/index.d.ts +44 -0
  41. package/dist/index.d.ts.map +1 -0
  42. package/dist/index.js +75 -0
  43. package/dist/index.js.map +1 -0
  44. package/dist/ingest/chunker.d.ts +15 -0
  45. package/dist/ingest/chunker.d.ts.map +1 -0
  46. package/dist/ingest/chunker.js +193 -0
  47. package/dist/ingest/chunker.js.map +1 -0
  48. package/dist/ingest/dedup.d.ts +11 -0
  49. package/dist/ingest/dedup.d.ts.map +1 -0
  50. package/dist/ingest/dedup.js +29 -0
  51. package/dist/ingest/dedup.js.map +1 -0
  52. package/dist/ingest/providers/anthropic.d.ts +3 -0
  53. package/dist/ingest/providers/anthropic.d.ts.map +1 -0
  54. package/dist/ingest/providers/anthropic.js +33 -0
  55. package/dist/ingest/providers/anthropic.js.map +1 -0
  56. package/dist/ingest/providers/bedrock.d.ts +8 -0
  57. package/dist/ingest/providers/bedrock.d.ts.map +1 -0
  58. package/dist/ingest/providers/bedrock.js +41 -0
  59. package/dist/ingest/providers/bedrock.js.map +1 -0
  60. package/dist/ingest/providers/gemini.d.ts +3 -0
  61. package/dist/ingest/providers/gemini.d.ts.map +1 -0
  62. package/dist/ingest/providers/gemini.js +31 -0
  63. package/dist/ingest/providers/gemini.js.map +1 -0
  64. package/dist/ingest/providers/index.d.ts +9 -0
  65. package/dist/ingest/providers/index.d.ts.map +1 -0
  66. package/dist/ingest/providers/index.js +68 -0
  67. package/dist/ingest/providers/index.js.map +1 -0
  68. package/dist/ingest/providers/openai.d.ts +3 -0
  69. package/dist/ingest/providers/openai.d.ts.map +1 -0
  70. package/dist/ingest/providers/openai.js +41 -0
  71. package/dist/ingest/providers/openai.js.map +1 -0
  72. package/dist/ingest/worker.d.ts +21 -0
  73. package/dist/ingest/worker.d.ts.map +1 -0
  74. package/dist/ingest/worker.js +111 -0
  75. package/dist/ingest/worker.js.map +1 -0
  76. package/dist/recall/engine.d.ts +23 -0
  77. package/dist/recall/engine.d.ts.map +1 -0
  78. package/dist/recall/engine.js +153 -0
  79. package/dist/recall/engine.js.map +1 -0
  80. package/dist/recall/mmr.d.ts +17 -0
  81. package/dist/recall/mmr.d.ts.map +1 -0
  82. package/dist/recall/mmr.js +51 -0
  83. package/dist/recall/mmr.js.map +1 -0
  84. package/dist/recall/recency.d.ts +20 -0
  85. package/dist/recall/recency.d.ts.map +1 -0
  86. package/dist/recall/recency.js +26 -0
  87. package/dist/recall/recency.js.map +1 -0
  88. package/dist/recall/rrf.d.ts +16 -0
  89. package/dist/recall/rrf.d.ts.map +1 -0
  90. package/dist/recall/rrf.js +15 -0
  91. package/dist/recall/rrf.js.map +1 -0
  92. package/dist/storage/sqlite.d.ts +34 -0
  93. package/dist/storage/sqlite.d.ts.map +1 -0
  94. package/dist/storage/sqlite.js +274 -0
  95. package/dist/storage/sqlite.js.map +1 -0
  96. package/dist/storage/vector.d.ts +13 -0
  97. package/dist/storage/vector.d.ts.map +1 -0
  98. package/dist/storage/vector.js +33 -0
  99. package/dist/storage/vector.js.map +1 -0
  100. package/dist/tools/index.d.ts +4 -0
  101. package/dist/tools/index.d.ts.map +1 -0
  102. package/dist/tools/index.js +10 -0
  103. package/dist/tools/index.js.map +1 -0
  104. package/dist/tools/memory-get.d.ts +4 -0
  105. package/dist/tools/memory-get.d.ts.map +1 -0
  106. package/dist/tools/memory-get.js +59 -0
  107. package/dist/tools/memory-get.js.map +1 -0
  108. package/dist/tools/memory-search.d.ts +4 -0
  109. package/dist/tools/memory-search.d.ts.map +1 -0
  110. package/dist/tools/memory-search.js +36 -0
  111. package/dist/tools/memory-search.js.map +1 -0
  112. package/dist/tools/memory-timeline.d.ts +4 -0
  113. package/dist/tools/memory-timeline.d.ts.map +1 -0
  114. package/dist/tools/memory-timeline.js +64 -0
  115. package/dist/tools/memory-timeline.js.map +1 -0
  116. package/dist/types.d.ts +158 -0
  117. package/dist/types.d.ts.map +1 -0
  118. package/dist/types.js +25 -0
  119. package/dist/types.js.map +1 -0
  120. package/dist/viewer/html.d.ts +2 -0
  121. package/dist/viewer/html.d.ts.map +1 -0
  122. package/dist/viewer/html.js +686 -0
  123. package/dist/viewer/html.js.map +1 -0
  124. package/dist/viewer/server.d.ts +48 -0
  125. package/dist/viewer/server.d.ts.map +1 -0
  126. package/dist/viewer/server.js +470 -0
  127. package/dist/viewer/server.js.map +1 -0
  128. package/index.ts +357 -0
  129. package/openclaw.plugin.json +57 -0
  130. package/package.json +57 -0
  131. package/src/capture/index.ts +92 -0
  132. package/src/config.ts +67 -0
  133. package/src/embedding/index.ts +76 -0
  134. package/src/embedding/local.ts +35 -0
  135. package/src/embedding/providers/cohere.ts +69 -0
  136. package/src/embedding/providers/gemini.ts +41 -0
  137. package/src/embedding/providers/mistral.ts +32 -0
  138. package/src/embedding/providers/openai.ts +42 -0
  139. package/src/embedding/providers/voyage.ts +32 -0
  140. package/src/index.ts +106 -0
  141. package/src/ingest/chunker.ts +217 -0
  142. package/src/ingest/dedup.ts +37 -0
  143. package/src/ingest/providers/anthropic.ts +41 -0
  144. package/src/ingest/providers/bedrock.ts +50 -0
  145. package/src/ingest/providers/gemini.ts +41 -0
  146. package/src/ingest/providers/index.ts +67 -0
  147. package/src/ingest/providers/openai.ts +48 -0
  148. package/src/ingest/worker.ts +130 -0
  149. package/src/recall/engine.ts +182 -0
  150. package/src/recall/mmr.ts +60 -0
  151. package/src/recall/recency.ts +27 -0
  152. package/src/recall/rrf.ts +31 -0
  153. package/src/storage/sqlite.ts +305 -0
  154. package/src/storage/vector.ts +39 -0
  155. package/src/tools/index.ts +3 -0
  156. package/src/tools/memory-get.ts +68 -0
  157. package/src/tools/memory-search.ts +36 -0
  158. package/src/tools/memory-timeline.ts +73 -0
  159. package/src/types.ts +214 -0
  160. package/src/viewer/html.ts +682 -0
  161. package/src/viewer/server.ts +464 -0
  162. package/www/index.html +606 -0
@@ -0,0 +1,76 @@
1
+ import type { EmbeddingConfig, Logger } from "../types";
2
+ import { embedOpenAI } from "./providers/openai";
3
+ import { embedGemini } from "./providers/gemini";
4
+ import { embedCohere, embedCohereQuery } from "./providers/cohere";
5
+ import { embedVoyage } from "./providers/voyage";
6
+ import { embedMistral } from "./providers/mistral";
7
+ import { embedLocal } from "./local";
8
+
9
+ export class Embedder {
10
+ constructor(
11
+ private cfg: EmbeddingConfig | undefined,
12
+ private log: Logger,
13
+ ) {}
14
+
15
+ get provider(): string {
16
+ return this.cfg?.provider ?? "local";
17
+ }
18
+
19
+ get dimensions(): number {
20
+ if (this.provider === "local") return 384;
21
+ return this.cfg?.dimensions ?? 1536;
22
+ }
23
+
24
+ async embed(texts: string[]): Promise<number[][]> {
25
+ const batchSize = this.cfg?.batchSize ?? 32;
26
+ const results: number[][] = [];
27
+
28
+ for (let i = 0; i < texts.length; i += batchSize) {
29
+ const batch = texts.slice(i, i + batchSize);
30
+ const vecs = await this.embedBatch(batch);
31
+ results.push(...vecs);
32
+ }
33
+
34
+ return results;
35
+ }
36
+
37
+ async embedQuery(text: string): Promise<number[]> {
38
+ if (this.provider === "cohere" && this.cfg) {
39
+ return embedCohereQuery(text, this.cfg, this.log);
40
+ }
41
+ const vecs = await this.embedBatch([text]);
42
+ return vecs[0];
43
+ }
44
+
45
+ private async embedBatch(texts: string[]): Promise<number[][]> {
46
+ const provider = this.provider;
47
+ const cfg = this.cfg;
48
+
49
+ try {
50
+ switch (provider) {
51
+ case "openai":
52
+ case "openai_compatible":
53
+ return await embedOpenAI(texts, cfg!, this.log);
54
+ case "gemini":
55
+ return await embedGemini(texts, cfg!, this.log);
56
+ case "azure_openai":
57
+ return await embedOpenAI(texts, cfg!, this.log);
58
+ case "cohere":
59
+ return await embedCohere(texts, cfg!, this.log);
60
+ case "mistral":
61
+ return await embedMistral(texts, cfg!, this.log);
62
+ case "voyage":
63
+ return await embedVoyage(texts, cfg!, this.log);
64
+ case "local":
65
+ default:
66
+ return await embedLocal(texts, this.log);
67
+ }
68
+ } catch (err) {
69
+ if (provider !== "local") {
70
+ this.log.warn(`Embedding provider '${provider}' failed, falling back to local: ${err}`);
71
+ return await embedLocal(texts, this.log);
72
+ }
73
+ throw err;
74
+ }
75
+ }
76
+ }
@@ -0,0 +1,35 @@
1
+ import type { Logger } from "../types";
2
+ import { DEFAULTS } from "../types";
3
+
4
+ let extractorPromise: Promise<any> | null = null;
5
+
6
+ function getExtractor(log: Logger): Promise<any> {
7
+ if (extractorPromise) return extractorPromise;
8
+
9
+ extractorPromise = (async () => {
10
+ log.info("Loading local embedding model (first call may download ~23MB)...");
11
+ const { pipeline } = await import("@xenova/transformers");
12
+ const ext = await pipeline("feature-extraction", DEFAULTS.localEmbeddingModel, {
13
+ quantized: true,
14
+ });
15
+ log.info("Local embedding model ready");
16
+ return ext;
17
+ })().catch((err) => {
18
+ extractorPromise = null;
19
+ throw err;
20
+ });
21
+
22
+ return extractorPromise;
23
+ }
24
+
25
+ export async function embedLocal(texts: string[], log: Logger): Promise<number[][]> {
26
+ const ext = await getExtractor(log);
27
+ const results: number[][] = [];
28
+
29
+ for (const text of texts) {
30
+ const output = await ext(text, { pooling: "mean", normalize: true });
31
+ results.push(Array.from(output.data as Float32Array).slice(0, DEFAULTS.localEmbeddingDimensions));
32
+ }
33
+
34
+ return results;
35
+ }
@@ -0,0 +1,69 @@
1
+ import type { EmbeddingConfig, Logger } from "../../types";
2
+
3
+ export async function embedCohere(
4
+ texts: string[],
5
+ cfg: EmbeddingConfig,
6
+ log: Logger,
7
+ ): Promise<number[][]> {
8
+ const endpoint = cfg.endpoint ?? "https://api.cohere.ai/v1/embed";
9
+ const model = cfg.model ?? "embed-english-v3.0";
10
+ const headers: Record<string, string> = {
11
+ "Content-Type": "application/json",
12
+ Authorization: `Bearer ${cfg.apiKey}`,
13
+ ...cfg.headers,
14
+ };
15
+
16
+ const resp = await fetch(endpoint, {
17
+ method: "POST",
18
+ headers,
19
+ body: JSON.stringify({
20
+ texts,
21
+ model,
22
+ input_type: "search_document",
23
+ truncate: "END",
24
+ }),
25
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 30_000),
26
+ });
27
+
28
+ if (!resp.ok) {
29
+ const body = await resp.text();
30
+ throw new Error(`Cohere embedding failed (${resp.status}): ${body}`);
31
+ }
32
+
33
+ const json = (await resp.json()) as { embeddings: number[][] };
34
+ return json.embeddings;
35
+ }
36
+
37
+ export async function embedCohereQuery(
38
+ text: string,
39
+ cfg: EmbeddingConfig,
40
+ log: Logger,
41
+ ): Promise<number[]> {
42
+ const endpoint = cfg.endpoint ?? "https://api.cohere.ai/v1/embed";
43
+ const model = cfg.model ?? "embed-english-v3.0";
44
+ const headers: Record<string, string> = {
45
+ "Content-Type": "application/json",
46
+ Authorization: `Bearer ${cfg.apiKey}`,
47
+ ...cfg.headers,
48
+ };
49
+
50
+ const resp = await fetch(endpoint, {
51
+ method: "POST",
52
+ headers,
53
+ body: JSON.stringify({
54
+ texts: [text],
55
+ model,
56
+ input_type: "search_query",
57
+ truncate: "END",
58
+ }),
59
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 30_000),
60
+ });
61
+
62
+ if (!resp.ok) {
63
+ const body = await resp.text();
64
+ throw new Error(`Cohere query embedding failed (${resp.status}): ${body}`);
65
+ }
66
+
67
+ const json = (await resp.json()) as { embeddings: number[][] };
68
+ return json.embeddings[0];
69
+ }
@@ -0,0 +1,41 @@
1
+ import type { EmbeddingConfig, Logger } from "../../types";
2
+
3
+ export async function embedGemini(
4
+ texts: string[],
5
+ cfg: EmbeddingConfig,
6
+ log: Logger,
7
+ ): Promise<number[][]> {
8
+ const model = cfg.model ?? "text-embedding-004";
9
+ const endpoint =
10
+ cfg.endpoint ??
11
+ `https://generativelanguage.googleapis.com/v1beta/models/${model}:batchEmbedContents`;
12
+
13
+ const headers: Record<string, string> = {
14
+ "Content-Type": "application/json",
15
+ ...cfg.headers,
16
+ };
17
+
18
+ const url = `${endpoint}?key=${cfg.apiKey}`;
19
+
20
+ const resp = await fetch(url, {
21
+ method: "POST",
22
+ headers,
23
+ body: JSON.stringify({
24
+ requests: texts.map((text) => ({
25
+ model: `models/${model}`,
26
+ content: { parts: [{ text }] },
27
+ })),
28
+ }),
29
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 30_000),
30
+ });
31
+
32
+ if (!resp.ok) {
33
+ const body = await resp.text();
34
+ throw new Error(`Gemini embedding failed (${resp.status}): ${body}`);
35
+ }
36
+
37
+ const json = (await resp.json()) as {
38
+ embeddings: Array<{ values: number[] }>;
39
+ };
40
+ return json.embeddings.map((e) => e.values);
41
+ }
@@ -0,0 +1,32 @@
1
+ import type { EmbeddingConfig, Logger } from "../../types";
2
+
3
+ export async function embedMistral(
4
+ texts: string[],
5
+ cfg: EmbeddingConfig,
6
+ log: Logger,
7
+ ): Promise<number[][]> {
8
+ const endpoint = cfg.endpoint ?? "https://api.mistral.ai/v1/embeddings";
9
+ const model = cfg.model ?? "mistral-embed";
10
+ const headers: Record<string, string> = {
11
+ "Content-Type": "application/json",
12
+ Authorization: `Bearer ${cfg.apiKey}`,
13
+ ...cfg.headers,
14
+ };
15
+
16
+ const resp = await fetch(endpoint, {
17
+ method: "POST",
18
+ headers,
19
+ body: JSON.stringify({ input: texts, model, encoding_format: "float" }),
20
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 30_000),
21
+ });
22
+
23
+ if (!resp.ok) {
24
+ const body = await resp.text();
25
+ throw new Error(`Mistral embedding failed (${resp.status}): ${body}`);
26
+ }
27
+
28
+ const json = (await resp.json()) as {
29
+ data: Array<{ embedding: number[] }>;
30
+ };
31
+ return json.data.map((d) => d.embedding);
32
+ }
@@ -0,0 +1,42 @@
1
+ import type { EmbeddingConfig, Logger } from "../../types";
2
+
3
+ export async function embedOpenAI(
4
+ texts: string[],
5
+ cfg: EmbeddingConfig,
6
+ log: Logger,
7
+ ): Promise<number[][]> {
8
+ const endpoint = normalizeEmbeddingEndpoint(cfg.endpoint ?? "https://api.openai.com/v1/embeddings");
9
+ const model = cfg.model ?? "text-embedding-3-small";
10
+ const headers: Record<string, string> = {
11
+ "Content-Type": "application/json",
12
+ Authorization: `Bearer ${cfg.apiKey}`,
13
+ ...cfg.headers,
14
+ };
15
+
16
+ const resp = await fetch(endpoint, {
17
+ method: "POST",
18
+ headers,
19
+ body: JSON.stringify({ input: texts, model }),
20
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 30_000),
21
+ });
22
+
23
+ if (!resp.ok) {
24
+ const body = await resp.text();
25
+ throw new Error(`OpenAI embedding failed (${resp.status}): ${body}`);
26
+ }
27
+
28
+ const json = (await resp.json()) as {
29
+ data: Array<{ embedding: number[] }>;
30
+ };
31
+ return json.data.map((d) => d.embedding);
32
+ }
33
+
34
+ /**
35
+ * Normalize endpoint: if user provides a base_url (e.g. https://host/v1)
36
+ * without the /embeddings suffix, append it automatically.
37
+ */
38
+ function normalizeEmbeddingEndpoint(url: string): string {
39
+ const stripped = url.replace(/\/+$/, "");
40
+ if (stripped.endsWith("/embeddings")) return stripped;
41
+ return `${stripped}/embeddings`;
42
+ }
@@ -0,0 +1,32 @@
1
+ import type { EmbeddingConfig, Logger } from "../../types";
2
+
3
+ export async function embedVoyage(
4
+ texts: string[],
5
+ cfg: EmbeddingConfig,
6
+ log: Logger,
7
+ ): Promise<number[][]> {
8
+ const endpoint = cfg.endpoint ?? "https://api.voyageai.com/v1/embeddings";
9
+ const model = cfg.model ?? "voyage-2";
10
+ const headers: Record<string, string> = {
11
+ "Content-Type": "application/json",
12
+ Authorization: `Bearer ${cfg.apiKey}`,
13
+ ...cfg.headers,
14
+ };
15
+
16
+ const resp = await fetch(endpoint, {
17
+ method: "POST",
18
+ headers,
19
+ body: JSON.stringify({ input: texts, model }),
20
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 30_000),
21
+ });
22
+
23
+ if (!resp.ok) {
24
+ const body = await resp.text();
25
+ throw new Error(`Voyage embedding failed (${resp.status}): ${body}`);
26
+ }
27
+
28
+ const json = (await resp.json()) as {
29
+ data: Array<{ embedding: number[] }>;
30
+ };
31
+ return json.data.map((d) => d.embedding);
32
+ }
package/src/index.ts ADDED
@@ -0,0 +1,106 @@
1
+ import { v4 as uuid } from "uuid";
2
+ import { buildContext } from "./config";
3
+ import { SqliteStore } from "./storage/sqlite";
4
+ import { Embedder } from "./embedding";
5
+ import { IngestWorker } from "./ingest/worker";
6
+ import { RecallEngine } from "./recall/engine";
7
+ import { captureMessages } from "./capture";
8
+ import { createMemorySearchTool, createMemoryTimelineTool, createMemoryGetTool } from "./tools";
9
+ import type { MemosLocalConfig, ToolDefinition, Logger } from "./types";
10
+
11
+ export interface MemosLocalPlugin {
12
+ id: string;
13
+ tools: ToolDefinition[];
14
+ onConversationTurn: (messages: Array<{ role: string; content: string }>, sessionKey?: string) => void;
15
+ /** Wait for all pending ingest operations to complete. */
16
+ flush: () => Promise<void>;
17
+ shutdown: () => void;
18
+ }
19
+
20
+ export interface PluginInitOptions {
21
+ stateDir?: string;
22
+ workspaceDir?: string;
23
+ config?: Partial<MemosLocalConfig>;
24
+ log?: Logger;
25
+ }
26
+
27
+ /**
28
+ * Initialize the memos-local plugin.
29
+ *
30
+ * Typical usage inside OpenClaw plugin lifecycle:
31
+ *
32
+ * ```ts
33
+ * import { initPlugin } from "@memos/local-openclaw";
34
+ *
35
+ * export default function activate(ctx) {
36
+ * const plugin = initPlugin({
37
+ * stateDir: ctx.stateDir,
38
+ * workspaceDir: ctx.workspaceDir,
39
+ * config: ctx.pluginConfig,
40
+ * log: ctx.log,
41
+ * });
42
+ * ctx.registerTools(plugin.tools);
43
+ * ctx.onConversationTurn((msgs, session) => {
44
+ * plugin.onConversationTurn(msgs, session);
45
+ * });
46
+ * ctx.onDeactivate(() => plugin.shutdown());
47
+ * }
48
+ * ```
49
+ */
50
+ export function initPlugin(opts: PluginInitOptions = {}): MemosLocalPlugin {
51
+ const stateDir = opts.stateDir ?? defaultStateDir();
52
+ const workspaceDir = opts.workspaceDir ?? process.cwd();
53
+ const ctx = buildContext(stateDir, workspaceDir, opts.config, opts.log);
54
+
55
+ ctx.log.info("Initializing memos-local plugin...");
56
+
57
+ const store = new SqliteStore(ctx.config.storage!.dbPath!, ctx.log);
58
+ const embedder = new Embedder(ctx.config.embedding, ctx.log);
59
+ const worker = new IngestWorker(store, embedder, ctx);
60
+ const engine = new RecallEngine(store, embedder, ctx);
61
+
62
+ const tools: ToolDefinition[] = [
63
+ createMemorySearchTool(engine),
64
+ createMemoryTimelineTool(store),
65
+ createMemoryGetTool(store),
66
+ ];
67
+
68
+ ctx.log.info(`Plugin ready. DB: ${ctx.config.storage!.dbPath}, Embedding: ${embedder.provider}`);
69
+
70
+ return {
71
+ id: "memos-local",
72
+
73
+ tools,
74
+
75
+ onConversationTurn(
76
+ messages: Array<{ role: string; content: string }>,
77
+ sessionKey?: string,
78
+ ): void {
79
+ const session = sessionKey ?? "default";
80
+ const turnId = uuid();
81
+ const tag = ctx.config.capture?.evidenceWrapperTag ?? "STORED_MEMORY";
82
+
83
+ const captured = captureMessages(messages, session, turnId, tag, ctx.log);
84
+ if (captured.length > 0) {
85
+ worker.enqueue(captured);
86
+ }
87
+ },
88
+
89
+ async flush(): Promise<void> {
90
+ await worker.flush();
91
+ },
92
+
93
+ shutdown(): void {
94
+ ctx.log.info("Shutting down memos-local plugin...");
95
+ store.close();
96
+ },
97
+ };
98
+ }
99
+
100
+ function defaultStateDir(): string {
101
+ const home = process.env.HOME ?? process.env.USERPROFILE ?? "/tmp";
102
+ return `${home}/.openclaw`;
103
+ }
104
+
105
+ // Re-export types for consumers
106
+ export type { MemosLocalConfig, ToolDefinition, SearchResult, SearchHit, TimelineResult, GetResult } from "./types";
@@ -0,0 +1,217 @@
1
+ import type { ChunkKind } from "../types";
2
+
3
+ export interface RawChunk {
4
+ content: string;
5
+ kind: ChunkKind;
6
+ }
7
+
8
+ const MAX_CHUNK_CHARS = 3000;
9
+ const MIN_CHUNK_CHARS = 40;
10
+ const IDEAL_CHUNK_CHARS = 1500;
11
+
12
+ const FENCED_CODE_RE = /^(`{3,})[^\n]*\n[\s\S]*?^\1\s*$/gm;
13
+
14
+ const FUNC_OPEN_RE =
15
+ /^[ \t]*(?:(?:export\s+)?(?:async\s+)?(?:function|class|const\s+\w+\s*=\s*(?:\([^)]*\)|[^=])*=>)|(?:def |class )|(?:func |fn |pub\s+fn )|(?:public |private |protected |static )+.*\{)\s*$/;
16
+ const BLOCK_CLOSE_RE = /^[ \t]*[}\]]\s*;?\s*$/;
17
+
18
+ const ERROR_STACK_RE =
19
+ /(?:(?:Error|Exception|Traceback)[^\n]*\n(?:\s+at\s+[^\n]+\n?|.*File "[^\n]+\n?|.*line \d+[^\n]*\n?){2,})/gm;
20
+ const LIST_BLOCK_RE = /(?:^[\s]*[-*•]\s+.+\n?){3,}/gm;
21
+ const COMMAND_LINE_RE = /^(?:\$|>|#)\s+.+$/gm;
22
+
23
+ /**
24
+ * Semantic-aware chunking:
25
+ * 1. Extract fenced code blocks as whole units (never split inside)
26
+ * 2. Detect unfenced code regions by brace-matching (functions/classes kept intact)
27
+ * 3. Extract error stacks, list blocks, command lines
28
+ * 4. Split remaining prose at paragraph boundaries (double newline)
29
+ * 5. Merge short adjacent chunks of the same kind
30
+ */
31
+ export function chunkText(text: string): RawChunk[] {
32
+ let remaining = text;
33
+ const slots: Array<{ placeholder: string; chunk: RawChunk }> = [];
34
+ let counter = 0;
35
+
36
+ function ph(content: string, kind: ChunkKind): string {
37
+ const tag = `\x00SLOT_${counter++}\x00`;
38
+ slots.push({ placeholder: tag, chunk: { content: content.trim(), kind } });
39
+ return tag;
40
+ }
41
+
42
+ remaining = remaining.replace(FENCED_CODE_RE, (m) => ph(m, "code_block"));
43
+
44
+ remaining = extractBraceBlocks(remaining, ph);
45
+
46
+ const structural: Array<{ re: RegExp; kind: ChunkKind }> = [
47
+ { re: ERROR_STACK_RE, kind: "error_stack" },
48
+ { re: LIST_BLOCK_RE, kind: "list" },
49
+ { re: COMMAND_LINE_RE, kind: "command" },
50
+ ];
51
+ for (const { re, kind } of structural) {
52
+ remaining = remaining.replace(re, (m) => ph(m, kind));
53
+ }
54
+
55
+ const raw: RawChunk[] = [];
56
+ const sections = remaining.split(/\n{2,}/);
57
+
58
+ for (const sec of sections) {
59
+ const trimmed = sec.trim();
60
+ if (!trimmed) continue;
61
+
62
+ if (trimmed.includes("\x00SLOT_")) {
63
+ const parts = trimmed.split(/(\x00SLOT_\d+\x00)/);
64
+ for (const part of parts) {
65
+ const slot = slots.find((s) => s.placeholder === part);
66
+ if (slot) {
67
+ raw.push(slot.chunk);
68
+ } else if (part.trim().length >= MIN_CHUNK_CHARS) {
69
+ raw.push({ content: part.trim(), kind: "paragraph" });
70
+ }
71
+ }
72
+ } else if (trimmed.length >= MIN_CHUNK_CHARS) {
73
+ raw.push({ content: trimmed, kind: "paragraph" });
74
+ }
75
+ }
76
+
77
+ for (const s of slots) {
78
+ if (!raw.some((c) => c.content === s.chunk.content)) {
79
+ raw.push(s.chunk);
80
+ }
81
+ }
82
+
83
+ const merged = mergeSmallChunks(raw);
84
+ const final = splitOversized(merged);
85
+
86
+ return final.length > 0 ? final : [{ content: text.trim(), kind: "paragraph" }];
87
+ }
88
+
89
+ /**
90
+ * Detect function/class bodies that aren't inside fenced blocks.
91
+ * Tracks brace depth to keep complete blocks together.
92
+ */
93
+ function extractBraceBlocks(
94
+ text: string,
95
+ ph: (content: string, kind: ChunkKind) => string,
96
+ ): string {
97
+ const lines = text.split("\n");
98
+ const result: string[] = [];
99
+ let blockLines: string[] = [];
100
+ let depth = 0;
101
+ let inBlock = false;
102
+
103
+ for (let i = 0; i < lines.length; i++) {
104
+ const line = lines[i];
105
+
106
+ if (line.includes("\x00SLOT_")) {
107
+ if (inBlock) {
108
+ blockLines.push(line);
109
+ } else {
110
+ result.push(line);
111
+ }
112
+ continue;
113
+ }
114
+
115
+ if (!inBlock && FUNC_OPEN_RE.test(line)) {
116
+ inBlock = true;
117
+ blockLines = [line];
118
+ depth = countBraces(line);
119
+ if (depth <= 0) depth = 1;
120
+ continue;
121
+ }
122
+
123
+ if (inBlock) {
124
+ blockLines.push(line);
125
+ depth += countBraces(line);
126
+ if (depth <= 0 || (BLOCK_CLOSE_RE.test(line) && depth <= 0)) {
127
+ const block = blockLines.join("\n");
128
+ if (block.trim().length >= MIN_CHUNK_CHARS) {
129
+ result.push(ph(block, "code_block"));
130
+ } else {
131
+ result.push(block);
132
+ }
133
+ inBlock = false;
134
+ blockLines = [];
135
+ depth = 0;
136
+ }
137
+ } else {
138
+ result.push(line);
139
+ }
140
+ }
141
+
142
+ if (blockLines.length > 0) {
143
+ const block = blockLines.join("\n");
144
+ if (block.trim().length >= MIN_CHUNK_CHARS) {
145
+ result.push(ph(block, "code_block"));
146
+ } else {
147
+ result.push(block);
148
+ }
149
+ }
150
+
151
+ return result.join("\n");
152
+ }
153
+
154
+ function countBraces(line: string): number {
155
+ let d = 0;
156
+ for (const ch of line) {
157
+ if (ch === "{" || ch === "(") d++;
158
+ else if (ch === "}" || ch === ")") d--;
159
+ }
160
+ return d;
161
+ }
162
+
163
+ function mergeSmallChunks(chunks: RawChunk[]): RawChunk[] {
164
+ if (chunks.length <= 1) return chunks;
165
+ const merged: RawChunk[] = [];
166
+ let buf: RawChunk | null = null;
167
+
168
+ for (const c of chunks) {
169
+ if (!buf) {
170
+ buf = { ...c };
171
+ continue;
172
+ }
173
+
174
+ const sameKind = buf.kind === c.kind;
175
+ const bothSmall = buf.content.length < IDEAL_CHUNK_CHARS && c.content.length < IDEAL_CHUNK_CHARS;
176
+ const mergedLen = buf.content.length + c.content.length + 2;
177
+
178
+ if (sameKind && bothSmall && mergedLen <= MAX_CHUNK_CHARS) {
179
+ buf.content = buf.content + "\n\n" + c.content;
180
+ } else {
181
+ merged.push(buf);
182
+ buf = { ...c };
183
+ }
184
+ }
185
+ if (buf) merged.push(buf);
186
+ return merged;
187
+ }
188
+
189
+ function splitOversized(chunks: RawChunk[]): RawChunk[] {
190
+ const result: RawChunk[] = [];
191
+ for (const c of chunks) {
192
+ if (c.content.length <= MAX_CHUNK_CHARS || c.kind === "code_block") {
193
+ result.push(c);
194
+ continue;
195
+ }
196
+ result.push(...splitAtSentenceBoundary(c.content, c.kind));
197
+ }
198
+ return result;
199
+ }
200
+
201
+ function splitAtSentenceBoundary(text: string, kind: ChunkKind): RawChunk[] {
202
+ const sentences = text.match(/[^.!?。!?\n]+(?:[.!?。!?]+|\n{2,})/g) ?? [text];
203
+ const result: RawChunk[] = [];
204
+ let buf = "";
205
+
206
+ for (const s of sentences) {
207
+ if (buf.length + s.length > MAX_CHUNK_CHARS && buf.length > 0) {
208
+ result.push({ content: buf.trim(), kind });
209
+ buf = "";
210
+ }
211
+ buf += s;
212
+ }
213
+ if (buf.trim().length >= MIN_CHUNK_CHARS) {
214
+ result.push({ content: buf.trim(), kind });
215
+ }
216
+ return result;
217
+ }
@@ -0,0 +1,37 @@
1
+ import { cosineSimilarity } from "../storage/vector";
2
+ import type { SqliteStore } from "../storage/sqlite";
3
+ import type { Logger } from "../types";
4
+
5
+ /**
6
+ * Check if a new summary embedding is a near-duplicate of any
7
+ * existing embedding. If similarity >= threshold, return the
8
+ * existing chunk ID to merge/update instead of creating a new entry.
9
+ *
10
+ * PRD §4.4: dedup threshold 0.92–0.95
11
+ */
12
+ export function findDuplicate(
13
+ store: SqliteStore,
14
+ newVec: number[],
15
+ threshold: number,
16
+ log: Logger,
17
+ ): string | null {
18
+ const all = store.getAllEmbeddings();
19
+
20
+ let bestId: string | null = null;
21
+ let bestScore = 0;
22
+
23
+ for (const { chunkId, vector } of all) {
24
+ const sim = cosineSimilarity(newVec, vector);
25
+ if (sim > bestScore) {
26
+ bestScore = sim;
27
+ bestId = chunkId;
28
+ }
29
+ }
30
+
31
+ if (bestId && bestScore >= threshold) {
32
+ log.debug(`Dedup: found duplicate chunk=${bestId} sim=${bestScore.toFixed(4)}`);
33
+ return bestId;
34
+ }
35
+
36
+ return null;
37
+ }