clawmem 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/llm.ts CHANGED
@@ -290,6 +290,12 @@ export class LlamaCpp implements LLM {
290
290
  // Track disposal state to prevent double-dispose
291
291
  private disposed = false;
292
292
 
293
+ // Cooldown-based down-cache for remote services.
294
+ // Timestamps (ms since epoch) until which we skip remote and use local fallback.
295
+ // Resets after cooldown expires — one network hiccup doesn't permanently disable GPU.
296
+ private remoteEmbedDownUntil = 0;
297
+ private remoteLlmDownUntil = 0;
298
+ private static readonly REMOTE_COOLDOWN_MS = 60_000; // 60s cooldown on transport failure
293
299
 
294
300
  constructor(config: LlamaCppConfig = {}) {
295
301
  this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL;
@@ -563,14 +569,19 @@ export class LlamaCpp implements LLM {
563
569
 
564
570
  async embed(text: string, options: EmbedOptions = {}): Promise<EmbeddingResult | null> {
565
571
  // Remote server or cloud API — preferred path
566
- if (this.remoteEmbedUrl) {
572
+ if (this.remoteEmbedUrl && !this.isRemoteEmbedDown()) {
567
573
  const extraParams = this.getCloudEmbedParams(!!options.isQuery);
568
574
  const result = await this.embedRemote(text, extraParams);
569
575
  if (result) return result;
570
576
  // Cloud providers don't fall back — if API key is set, the user chose cloud
571
577
  if (this.isCloudEmbedding()) return null;
572
- // Local server unreachable — fall through to in-process fallback
573
- console.error("[embed] Remote server unreachable, falling back to in-process embedding");
578
+ // Transport failure already set cooldown in embedRemote — fall through
579
+ }
580
+
581
+ // Remote is in cooldown or was never configured — try local fallback
582
+ if (this.remoteEmbedUrl && this.isRemoteEmbedDown()) {
583
+ if (process.env.CLAWMEM_NO_LOCAL_MODELS === "true") return null;
584
+ console.error("[embed] Remote embed in cooldown, using in-process fallback");
574
585
  }
575
586
 
576
587
  // In-process fallback via node-llama-cpp (auto-downloads EmbeddingGemma on first use)
@@ -586,15 +597,20 @@ export class LlamaCpp implements LLM {
586
597
  if (texts.length === 0) return [];
587
598
 
588
599
  // Remote server or cloud API
589
- if (this.remoteEmbedUrl) {
600
+ if (this.remoteEmbedUrl && !this.isRemoteEmbedDown()) {
590
601
  const extraParams = this.getCloudEmbedParams(false);
591
602
  const results = await this.embedRemoteBatch(texts, extraParams);
592
603
  // If we got at least one result, remote is working
593
604
  if (results.some(r => r !== null)) return results;
594
605
  // Cloud providers don't fall back
595
606
  if (this.isCloudEmbedding()) return results;
596
- // Local server unreachable — fall through to in-process fallback
597
- console.error("[embed] Remote server unreachable, falling back to in-process embedding");
607
+ // Transport failure already set cooldown in embedRemoteBatch — fall through
608
+ }
609
+
610
+ // Remote is in cooldown or was never configured — try local fallback
611
+ if (this.remoteEmbedUrl && this.isRemoteEmbedDown()) {
612
+ if (process.env.CLAWMEM_NO_LOCAL_MODELS === "true") return texts.map(() => null);
613
+ console.error("[embed] Remote embed in cooldown, using in-process fallback");
598
614
  }
599
615
 
600
616
  // In-process fallback via node-llama-cpp
@@ -645,6 +661,46 @@ export class LlamaCpp implements LLM {
645
661
  return text.slice(0, this.maxRemoteEmbedChars);
646
662
  }
647
663
 
664
+ // ---------- Remote failure classification ----------
665
+
666
+ /**
667
+ * Classify whether an error is a transport failure (server unreachable)
668
+ * vs an HTTP error (server received request but rejected it) or abort.
669
+ * Only transport failures should trigger the down-cache cooldown.
670
+ */
671
+ private isTransportError(error: unknown): boolean {
672
+ if (error instanceof TypeError && String(error.message).includes("fetch")) return true; // fetch network error
673
+ const code = (error as any)?.code || (error as any)?.cause?.code;
674
+ if (code === "ECONNREFUSED" || code === "ETIMEDOUT" || code === "ENOTFOUND" ||
675
+ code === "EHOSTUNREACH" || code === "ENETUNREACH" || code === "ECONNRESET" ||
676
+ code === "UND_ERR_CONNECT_TIMEOUT") return true;
677
+ const msg = String((error as any)?.message || "").toLowerCase();
678
+ if (msg.includes("econnrefused") || msg.includes("etimedout") || msg.includes("enotfound") ||
679
+ msg.includes("ehostunreach") || msg.includes("enetunreach")) return true;
680
+ return false;
681
+ }
682
+
683
+ private isAbortError(error: unknown): boolean {
684
+ return (error instanceof DOMException && error.name === "AbortError") ||
685
+ (error as any)?.name === "AbortError";
686
+ }
687
+
688
+ private isRemoteLlmDown(): boolean {
689
+ return Date.now() < this.remoteLlmDownUntil;
690
+ }
691
+
692
+ private isRemoteEmbedDown(): boolean {
693
+ return Date.now() < this.remoteEmbedDownUntil;
694
+ }
695
+
696
+ private markRemoteLlmDown(): void {
697
+ this.remoteLlmDownUntil = Date.now() + LlamaCpp.REMOTE_COOLDOWN_MS;
698
+ }
699
+
700
+ private markRemoteEmbedDown(): void {
701
+ this.remoteEmbedDownUntil = Date.now() + LlamaCpp.REMOTE_COOLDOWN_MS;
702
+ }
703
+
648
704
  // ---------- Remote embedding (GPU server or cloud API via /v1/embeddings) ----------
649
705
 
650
706
  // Default: 6000 chars for EmbeddingGemma-300M (2048-token context).
@@ -712,6 +768,7 @@ export class LlamaCpp implements LLM {
712
768
  }
713
769
 
714
770
  private async embedRemote(text: string, extraParams: Record<string, unknown> = {}, retries = 5): Promise<EmbeddingResult | null> {
771
+ if (this.isRemoteEmbedDown()) return null;
715
772
  const input = this.truncateForEmbed(text);
716
773
  for (let attempt = 0; attempt < retries; attempt++) {
717
774
  try {
@@ -741,11 +798,16 @@ export class LlamaCpp implements LLM {
741
798
  model: data.model || this.remoteEmbedUrl!,
742
799
  };
743
800
  } catch (error) {
744
- console.error("Remote embed error:", error);
801
+ if (this.isTransportError(error)) {
802
+ console.error("[embed] Remote embed server unreachable, cooldown 60s");
803
+ this.markRemoteEmbedDown();
804
+ } else {
805
+ console.error("[embed] Remote embed error:", error);
806
+ }
745
807
  return null;
746
808
  }
747
809
  }
748
- console.error("Remote embed: max retries exceeded (rate limit)");
810
+ console.error("[embed] Remote embed: max retries exceeded (rate limit)");
749
811
  return null;
750
812
  }
751
813
 
@@ -753,6 +815,7 @@ export class LlamaCpp implements LLM {
753
815
  lastBatchTokens = 0;
754
816
 
755
817
  private async embedRemoteBatch(texts: string[], extraParams: Record<string, unknown> = {}, retries = 3): Promise<(EmbeddingResult | null)[]> {
818
+ if (this.isRemoteEmbedDown()) return texts.map(() => null);
756
819
  const truncated = texts.map(t => this.truncateForEmbed(t));
757
820
  for (let attempt = 0; attempt < retries; attempt++) {
758
821
  try {
@@ -787,11 +850,16 @@ export class LlamaCpp implements LLM {
787
850
  }
788
851
  return results;
789
852
  } catch (error) {
790
- console.error("Remote batch embed error:", error);
853
+ if (this.isTransportError(error)) {
854
+ console.error("[embed] Remote batch embed server unreachable, cooldown 60s");
855
+ this.markRemoteEmbedDown();
856
+ } else {
857
+ console.error("[embed] Remote batch embed error:", error);
858
+ }
791
859
  return texts.map(() => null);
792
860
  }
793
861
  }
794
- console.error("Remote batch embed: max retries exceeded (rate limit)");
862
+ console.error("[embed] Remote batch embed: max retries exceeded (rate limit)");
795
863
  return texts.map(() => null);
796
864
  }
797
865
 
@@ -800,8 +868,18 @@ export class LlamaCpp implements LLM {
800
868
  const temperature = options.temperature ?? 0;
801
869
 
802
870
  // Remote LLM server (GPU) — preferred path
803
- if (this.remoteLlmUrl) {
804
- return this.generateRemote(prompt, maxTokens, temperature, options.signal);
871
+ if (this.remoteLlmUrl && !this.isRemoteLlmDown()) {
872
+ const result = await this.generateRemote(prompt, maxTokens, temperature, options.signal);
873
+ if (result) return result;
874
+ // If remote failed but NOT transport error (HTTP 400/500, abort), don't fall through
875
+ if (!this.isRemoteLlmDown()) return null;
876
+ // Transport failure set cooldown — fall through to local
877
+ }
878
+
879
+ // Remote is in cooldown or was never configured — try local fallback
880
+ if (this.remoteLlmUrl && this.isRemoteLlmDown()) {
881
+ if (process.env.CLAWMEM_NO_LOCAL_MODELS === "true") return null;
882
+ console.error("[generate] Remote LLM in cooldown, falling back to in-process generation");
805
883
  }
806
884
 
807
885
  // Local fallback via node-llama-cpp (CPU)
@@ -840,6 +918,8 @@ export class LlamaCpp implements LLM {
840
918
  temperature: number,
841
919
  signal?: AbortSignal
842
920
  ): Promise<GenerateResult | null> {
921
+ // Re-check: concurrent call may have set cooldown while we were awaited
922
+ if (this.isRemoteLlmDown()) return null;
843
923
  try {
844
924
  const resp = await fetch(`${this.remoteLlmUrl}/v1/chat/completions`, {
845
925
  method: "POST",
@@ -854,7 +934,8 @@ export class LlamaCpp implements LLM {
854
934
  });
855
935
 
856
936
  if (!resp.ok) {
857
- console.error(`[generate] Remote LLM error: ${resp.status} ${resp.statusText}`);
937
+ console.error(`[generate] Remote LLM HTTP ${resp.status}: ${resp.statusText}`);
938
+ // HTTP errors mean the server IS reachable — don't trigger down-cache
858
939
  return null;
859
940
  }
860
941
 
@@ -869,7 +950,16 @@ export class LlamaCpp implements LLM {
869
950
  done: true,
870
951
  };
871
952
  } catch (error) {
872
- console.error("[generate] Remote LLM error:", error);
953
+ if (this.isAbortError(error)) {
954
+ // User/caller cancelled — don't cache as "down"
955
+ return null;
956
+ }
957
+ if (this.isTransportError(error)) {
958
+ console.error("[generate] Remote LLM server unreachable, cooldown 60s");
959
+ this.markRemoteLlmDown();
960
+ } else {
961
+ console.error("[generate] Remote LLM error:", error);
962
+ }
873
963
  return null;
874
964
  }
875
965
  }
@@ -939,8 +1029,22 @@ Output:`;
939
1029
  const intent = options.intent;
940
1030
 
941
1031
  // Remote LLM path — no grammar constraint, parse output instead
942
- if (this.remoteLlmUrl) {
943
- return this.expandQueryRemote(query, includeLexical, context, intent);
1032
+ if (this.remoteLlmUrl && !this.isRemoteLlmDown()) {
1033
+ const result = await this.expandQueryRemote(query, includeLexical, context, intent);
1034
+ // Check if transport failure set cooldown during this call
1035
+ if (!this.isRemoteLlmDown()) return result;
1036
+ // Transport failure — fall through to local grammar path
1037
+ }
1038
+
1039
+ // Remote is in cooldown (pre-existing or just set) — fall through to local
1040
+ if (this.remoteLlmUrl && this.isRemoteLlmDown()) {
1041
+ if (process.env.CLAWMEM_NO_LOCAL_MODELS === "true") {
1042
+ // Can't fall back — return passthrough
1043
+ const fallback: Queryable[] = [{ type: 'vec', text: query }];
1044
+ if (includeLexical) fallback.unshift({ type: 'lex', text: query });
1045
+ return fallback;
1046
+ }
1047
+ console.error("[expandQuery] Remote LLM in cooldown, falling back to in-process grammar expansion");
944
1048
  }
945
1049
 
946
1050
  const llama = await this.ensureLlama();
package/src/mcp.ts CHANGED
@@ -1918,6 +1918,61 @@ This is the recommended entry point for ALL memory queries.`,
1918
1918
  }
1919
1919
  );
1920
1920
 
1921
+ // ---------------------------------------------------------------------------
1922
+ // Tool: kg_query (SPO Knowledge Graph)
1923
+ // ---------------------------------------------------------------------------
1924
+
1925
+ server.registerTool(
1926
+ "kg_query",
1927
+ {
1928
+ title: "Knowledge Graph Query",
1929
+ description: "Query the knowledge graph for an entity's relationships. Returns structured facts with temporal validity (valid_from/valid_to). Use for 'what does X relate to?', 'what was true about X on date Y?', 'who/what is connected to X?'.",
1930
+ inputSchema: {
1931
+ entity: z.string().describe("Entity name or ID to query"),
1932
+ as_of: z.string().optional().describe("Date filter (YYYY-MM-DD) — only facts valid at this date"),
1933
+ direction: z.enum(["outgoing", "incoming", "both"]).optional().default("both").describe("Relationship direction"),
1934
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
1935
+ },
1936
+ },
1937
+ async ({ entity, as_of, direction, vault }) => {
1938
+ const store = getStore(vault);
1939
+
1940
+ const entityResults = store.searchEntities(entity, 1);
1941
+ const entityId = entityResults.length > 0
1942
+ ? entityResults[0]!.entity_id
1943
+ : entity.toLowerCase().replace(/[^a-z0-9]+/g, "_").replace(/^_|_$/g, "");
1944
+
1945
+ const triples = store.queryEntityTriples(entityId, { asOf: as_of, direction });
1946
+ const stats = store.getTripleStats();
1947
+
1948
+ if (triples.length === 0) {
1949
+ return {
1950
+ content: [{ type: "text", text: `No knowledge graph facts found for "${entity}". The KG has ${stats.totalTriples} total triples (${stats.currentFacts} current).` }],
1951
+ };
1952
+ }
1953
+
1954
+ const lines = [`Knowledge graph for "${entity}" (${triples.length} fact${triples.length === 1 ? '' : 's'}):\n`];
1955
+
1956
+ for (const t of triples) {
1957
+ const validity = t.current ? "current" : `ended ${t.validTo}`;
1958
+ const from = t.validFrom ? ` (since ${t.validFrom})` : "";
1959
+ const conf = Math.round(t.confidence * 100);
1960
+ lines.push(`[${t.direction}] ${t.subject} → ${t.predicate} → ${t.object}${from} [${validity}, ${conf}%]`);
1961
+ }
1962
+
1963
+ return {
1964
+ content: [{ type: "text", text: lines.join('\n') }],
1965
+ structuredContent: {
1966
+ entity,
1967
+ direction,
1968
+ as_of: as_of ?? null,
1969
+ facts: triples,
1970
+ stats,
1971
+ },
1972
+ };
1973
+ }
1974
+ );
1975
+
1921
1976
  // ---------------------------------------------------------------------------
1922
1977
  // Tool: memory_evolution_status (A-MEM)
1923
1978
  // ---------------------------------------------------------------------------
@@ -2407,6 +2462,99 @@ This is the recommended entry point for ALL memory queries.`,
2407
2462
  }
2408
2463
  );
2409
2464
 
2465
+ // ---------------------------------------------------------------------------
2466
+ // Tool: diary_write
2467
+ // ---------------------------------------------------------------------------
2468
+
2469
+ server.registerTool(
2470
+ "diary_write",
2471
+ {
2472
+ title: "Write Diary Entry",
2473
+ description: "Write to the agent's diary. Use for recording important events, decisions, or observations in environments without hook support. Entries are stored as memories and are searchable.",
2474
+ inputSchema: {
2475
+ entry: z.string().describe("Diary entry text"),
2476
+ topic: z.string().optional().default("general").describe("Topic tag (e.g., 'technical', 'user_facts', 'session')"),
2477
+ agent: z.string().optional().default("agent").describe("Agent name writing the entry"),
2478
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
2479
+ },
2480
+ },
2481
+ async ({ entry, topic, agent, vault }) => {
2482
+ const store = getStore(vault);
2483
+ const now = new Date();
2484
+ const dateStr = now.toISOString().slice(0, 10);
2485
+ const timeStr = now.toISOString().slice(11, 19).replace(/:/g, "");
2486
+ const ms = String(now.getMilliseconds()).padStart(3, "0");
2487
+ const diaryPath = `diary/${dateStr}-${timeStr}${ms}-${topic}.md`;
2488
+ const body = `---\ntitle: "${entry.slice(0, 80).replace(/"/g, '\\"')}"\ncontent_type: note\ntags: [diary, ${topic}]\ndomain: "${agent}"\n---\n\n${entry}`;
2489
+
2490
+ const result = store.saveMemory({
2491
+ collection: "_clawmem",
2492
+ path: diaryPath,
2493
+ title: entry.slice(0, 80),
2494
+ body,
2495
+ contentType: "note",
2496
+ confidence: 0.7,
2497
+ semanticPayload: `${diaryPath}::${entry}`,
2498
+ });
2499
+
2500
+ return {
2501
+ content: [{ type: "text", text: `Diary entry saved (${result.action}, doc #${result.docId})` }],
2502
+ structuredContent: { action: result.action, docId: result.docId, path: diaryPath },
2503
+ };
2504
+ }
2505
+ );
2506
+
2507
+ // ---------------------------------------------------------------------------
2508
+ // Tool: diary_read
2509
+ // ---------------------------------------------------------------------------
2510
+
2511
+ server.registerTool(
2512
+ "diary_read",
2513
+ {
2514
+ title: "Read Diary Entries",
2515
+ description: "Read recent diary entries. Use to review past observations and events recorded by the agent.",
2516
+ inputSchema: {
2517
+ last_n: z.number().optional().default(10).describe("Number of recent entries to return"),
2518
+ agent: z.string().optional().describe("Filter by agent name"),
2519
+ vault: z.string().optional().describe("Named vault (omit for default vault)"),
2520
+ },
2521
+ },
2522
+ async ({ last_n, agent, vault }) => {
2523
+ const store = getStore(vault);
2524
+ const params: any[] = [];
2525
+ let agentFilter = "";
2526
+ if (agent) {
2527
+ agentFilter = "AND d.domain = ?";
2528
+ params.push(agent);
2529
+ }
2530
+ params.push(last_n);
2531
+
2532
+ const rows = store.db.prepare(`
2533
+ SELECT d.id, d.path, d.title, d.modified_at as modifiedAt, d.domain
2534
+ FROM documents d
2535
+ WHERE d.active = 1 AND d.collection = '_clawmem' AND d.path LIKE 'diary/%'
2536
+ ${agentFilter}
2537
+ ORDER BY d.modified_at DESC
2538
+ LIMIT ?
2539
+ `).all(...params) as any[];
2540
+
2541
+ if (rows.length === 0) {
2542
+ return { content: [{ type: "text", text: "No diary entries found." }] };
2543
+ }
2544
+
2545
+ const lines = [`Diary (${rows.length} entries):\n`];
2546
+ for (const row of rows) {
2547
+ const agentLabel = row.domain ? ` [${row.domain}]` : "";
2548
+ lines.push(`${row.modifiedAt.slice(0, 16)}${agentLabel} ${row.title}`);
2549
+ }
2550
+
2551
+ return {
2552
+ content: [{ type: "text", text: lines.join('\n') }],
2553
+ structuredContent: { entries: rows },
2554
+ };
2555
+ }
2556
+ );
2557
+
2410
2558
  // ---------------------------------------------------------------------------
2411
2559
  // Connect
2412
2560
  // ---------------------------------------------------------------------------
package/src/memory.ts CHANGED
@@ -20,6 +20,7 @@ export const HALF_LIVES: Record<string, number> = {
20
20
  project: 120,
21
21
  preference: Infinity,
22
22
  decision: Infinity,
23
+ deductive: Infinity,
23
24
  hub: Infinity,
24
25
  };
25
26
 
@@ -29,6 +30,7 @@ export const HALF_LIVES: Record<string, number> = {
29
30
 
30
31
  export const TYPE_BASELINES: Record<string, number> = {
31
32
  decision: 0.85,
33
+ deductive: 0.85,
32
34
  preference: 0.80,
33
35
  hub: 0.80,
34
36
  problem: 0.75,
@@ -45,7 +47,7 @@ export const TYPE_BASELINES: Record<string, number> = {
45
47
  // Content Type Inference
46
48
  // =============================================================================
47
49
 
48
- export type ContentType = "decision" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
50
+ export type ContentType = "decision" | "deductive" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
49
51
 
50
52
  export function inferContentType(path: string, explicitType?: string): ContentType {
51
53
  if (explicitType && explicitType in TYPE_BASELINES) return explicitType as ContentType;
@@ -75,7 +77,7 @@ export type MemoryType = "episodic" | "semantic" | "procedural";
75
77
  */
76
78
  export function inferMemoryType(path: string, contentType: string, body?: string): MemoryType {
77
79
  if (["handoff", "progress", "conversation"].includes(contentType)) return "episodic";
78
- if (["decision", "hub", "research"].includes(contentType)) return "semantic";
80
+ if (["decision", "deductive", "hub", "research"].includes(contentType)) return "semantic";
79
81
  if (body && /\b(step\s+\d|workflow|recipe|how\s+to|procedure|runbook|playbook)\b/i.test(body)) return "procedural";
80
82
  if (path.includes("sop") || path.includes("runbook") || path.includes("playbook")) return "procedural";
81
83
  if (contentType === "antipattern") return "semantic";
@@ -150,7 +152,7 @@ export function confidenceScore(
150
152
  // Attention decay: reduce confidence if not accessed recently (5% per week)
151
153
  // Only apply to episodic/progress content — skip for durable types (decision, hub, research)
152
154
  // Also skip if last_accessed_at was backfilled from modified_at (no real access yet)
153
- const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern", "preference"]);
155
+ const DECAY_EXEMPT_TYPES = new Set(["decision", "deductive", "hub", "research", "antipattern", "preference"]);
154
156
  let attentionDecay = 1.0;
155
157
  if (lastAccessedAt && !DECAY_EXEMPT_TYPES.has(contentType)) {
156
158
  const lastAccess = typeof lastAccessedAt === "string" ? new Date(lastAccessedAt) : lastAccessedAt;