@oh-my-pi/pi-mnemopi 16.0.4 → 16.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.0.6] - 2026-06-18
6
+
7
+ ### Fixed
8
+
9
+ - Forced the on-demand fastembed runtime install to override fastembed's archived `onnxruntime-node@1.21.0` transitive pin with Mnemopi's `onnxruntime-node@1.26.0` pin, fixing local embedding startup on macOS ARM64. ([#2920](https://github.com/can1357/oh-my-pi/issues/2920))
10
+
11
+ ### Changed
12
+
13
+ - Updated OpenRouter request headers to use standard shared headers from the pi-ai package
14
+
15
+ ## [16.0.5] - 2026-06-17
16
+
17
+ ### Fixed
18
+
19
+ - Capped `sleep_consolidation` episodic rows at `maxEpisodeChars` (default 100KB, `MNEMOPI_MAX_EPISODE_CHARS`) so raw session transcripts cannot be stored and extracted as multi-megabyte episodes. ([#2869](https://github.com/can1357/oh-my-pi/issues/2869))
20
+ - Skipped regex-only entity and pattern fact extraction for oversized raw transcripts so progress/log noise cannot flood MEMORIA with junk facts. ([#2868](https://github.com/can1357/oh-my-pi/issues/2868))
21
+
5
22
  ## [15.13.1] - 2026-06-15
6
23
 
7
24
  ### Added
@@ -33,6 +33,7 @@ export declare function apiEmbeddingsAvailable(env?: Env): boolean;
33
33
  export declare function workingMemoryMaxItems(env?: Env): number;
34
34
  export declare function workingMemoryTtlHours(env?: Env): number;
35
35
  export declare function episodicRecallLimit(env?: Env): number;
36
+ export declare function maxEpisodeChars(env?: Env): number;
36
37
  export declare function sleepBatchSize(env?: Env): number;
37
38
  export declare function scratchpadMaxItems(env?: Env): number;
38
39
  export declare function recencyHalflifeHours(env?: Env): number;
@@ -38,6 +38,7 @@ export interface BeamConfig {
38
38
  importanceWeight: number;
39
39
  useCloud: boolean;
40
40
  localLlmEnabled: boolean;
41
+ maxEpisodeChars: number;
41
42
  }
42
43
  export interface BeamMemoryOptions {
43
44
  sessionId?: string;
@@ -1,4 +1,6 @@
1
1
  export declare const ENTITY_EXTRACTION_STOP_WORDS: ReadonlySet<string>;
2
+ /** Maximum raw text size for regex-only entity extraction. */
3
+ export declare const REGEX_EXTRACTION_MAX_INPUT_CHARS = 50000;
2
4
  export declare function levenshteinDistance(s1: string, s2: string): number;
3
5
  export declare function similarity(s1: string, s2: string): number;
4
6
  export declare function extractEntitiesRegex(text: string): string[];
@@ -1,4 +1,14 @@
1
+ import { type RuntimeInstallSpec } from "@oh-my-pi/pi-utils";
1
2
  import type * as Fastembed from "fastembed";
2
3
  type FastembedModule = typeof Fastembed;
4
+ /** Runtime install inputs for the optional fastembed embedding stack. */
5
+ export interface FastembedRuntimeInstallPlan {
6
+ /** Cache directory key; changes when runtime resolution policy changes. */
7
+ versionKey: string;
8
+ /** Dependency graph written to the runtime cache package manifest. */
9
+ install: RuntimeInstallSpec;
10
+ }
11
+ /** Build the deterministic fastembed runtime install plan used by local embeddings. */
12
+ export declare function fastembedRuntimeInstallPlan(): FastembedRuntimeInstallPlan;
3
13
  export declare function loadFastembed(): Promise<FastembedModule>;
4
14
  export {};
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-mnemopi",
4
- "version": "16.0.4",
4
+ "version": "16.0.6",
5
5
  "description": "Local SQLite memory engine for Oh My Pi agents",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -39,9 +39,9 @@
39
39
  "fmt": "biome format --write ."
40
40
  },
41
41
  "dependencies": {
42
- "@oh-my-pi/pi-ai": "16.0.4",
43
- "@oh-my-pi/pi-catalog": "16.0.4",
44
- "@oh-my-pi/pi-utils": "16.0.4",
42
+ "@oh-my-pi/pi-ai": "16.0.6",
43
+ "@oh-my-pi/pi-catalog": "16.0.6",
44
+ "@oh-my-pi/pi-utils": "16.0.6",
45
45
  "lru-cache": "11.5.1"
46
46
  },
47
47
  "peerDependencies": {
package/src/config.ts CHANGED
@@ -126,6 +126,10 @@ export function episodicRecallLimit(env: Env = process.env): number {
126
126
  return envInt("MNEMOPI_EP_LIMIT", 50000, env);
127
127
  }
128
128
 
129
+ export function maxEpisodeChars(env: Env = process.env): number {
130
+ return Math.max(1, envInt("MNEMOPI_MAX_EPISODE_CHARS", 100000, env));
131
+ }
132
+
129
133
  export function sleepBatchSize(env: Env = process.env): number {
130
134
  return envInt("MNEMOPI_SLEEP_BATCH", 5000, env);
131
135
  }
@@ -1,6 +1,7 @@
1
1
  import type { SQLQueryBindings } from "bun:sqlite";
2
2
  import { generateId, stableMemoryId } from "../../util/ids";
3
3
  import { aaakEncode } from "../aaak";
4
+ import { REGEX_EXTRACTION_MAX_INPUT_CHARS } from "../entities";
4
5
  import { EpisodicGraph } from "../episodic-graph";
5
6
  import { heuristicExtractFacts } from "../extraction";
6
7
  import { clampVeracity } from "../veracity-consolidation";
@@ -57,6 +58,70 @@ const TIER2_DAYS = envInt("MNEMOPI_TIER2_DAYS", 30);
57
58
  const TIER3_DAYS = envInt("MNEMOPI_TIER3_DAYS", 180);
58
59
  const DEGRADE_BATCH_SIZE = envInt("MNEMOPI_DEGRADE_BATCH", 100);
59
60
  const TIER3_MAX_CHARS = envInt("MNEMOPI_TIER3_MAX_CHARS", 300);
61
+ const DEFAULT_MAX_EPISODE_CHARS = 100_000;
62
+ const SLEEP_SUMMARY_SEPARATOR = " | ";
63
+ const SLEEP_TRUNCATION_MARKER = "\n[... sleep_consolidation episode truncated by maxEpisodeChars ...]";
64
+ const PATTERN_FACT_EXTRACTION_MAX_INPUT_CHARS = REGEX_EXTRACTION_MAX_INPUT_CHARS;
65
+
66
+ type SleepSummary = {
67
+ summary: string;
68
+ originalChars: number;
69
+ truncated: boolean;
70
+ maxChars: number;
71
+ };
72
+ type SleepChunk = {
73
+ items: Row[];
74
+ originalChars: number;
75
+ };
76
+
77
+ function normalizedMaxEpisodeChars(beam: BeamMemoryState): number {
78
+ const configured = Math.trunc(beam.config?.maxEpisodeChars ?? DEFAULT_MAX_EPISODE_CHARS);
79
+ return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_MAX_EPISODE_CHARS;
80
+ }
81
+
82
+ function markTruncated(content: string, maxChars: number): string {
83
+ if (maxChars <= 0) return "";
84
+ if (maxChars <= SLEEP_TRUNCATION_MARKER.length) return content.slice(0, maxChars);
85
+ const bodyChars = maxChars - SLEEP_TRUNCATION_MARKER.length;
86
+ return `${content.slice(0, bodyChars).trimEnd()}${SLEEP_TRUNCATION_MARKER}`;
87
+ }
88
+
89
+ function splitSleepItems(beam: BeamMemoryState, source: string, items: readonly Row[]): SleepChunk[] {
90
+ const maxChars = normalizedMaxEpisodeChars(beam);
91
+ const prefixChars = `[${source}] `.length;
92
+ const joinedLimit = Math.max(0, maxChars - prefixChars);
93
+ const chunks: SleepChunk[] = [];
94
+ let current: Row[] = [];
95
+ let currentChars = 0;
96
+
97
+ for (const item of items) {
98
+ const contentChars = (rowValue(item, "content") ?? "").length;
99
+ const separatorChars = current.length === 0 ? 0 : SLEEP_SUMMARY_SEPARATOR.length;
100
+ if (current.length > 0 && currentChars + separatorChars + contentChars > joinedLimit) {
101
+ chunks.push({ items: current, originalChars: currentChars });
102
+ current = [];
103
+ currentChars = 0;
104
+ }
105
+ current.push(item);
106
+ currentChars += (current.length === 1 ? 0 : SLEEP_SUMMARY_SEPARATOR.length) + contentChars;
107
+ }
108
+ if (current.length > 0) chunks.push({ items: current, originalChars: currentChars });
109
+ return chunks;
110
+ }
111
+
112
+ function buildSleepSummary(beam: BeamMemoryState, source: string, chunk: SleepChunk): SleepSummary {
113
+ const maxChars = normalizedMaxEpisodeChars(beam);
114
+ const prefix = `[${source}] `;
115
+ const joined = chunk.items.map(item => rowValue(item, "content") ?? "").join(SLEEP_SUMMARY_SEPARATOR);
116
+ const uncapped = `${prefix}${aaakEncode(joined)}`;
117
+ const truncated = uncapped.length > maxChars;
118
+ return {
119
+ summary: truncated ? markTruncated(uncapped, maxChars) : uncapped,
120
+ originalChars: chunk.originalChars,
121
+ truncated,
122
+ maxChars,
123
+ };
124
+ }
60
125
 
61
126
  function isoNow(): string {
62
127
  return new Date().toISOString();
@@ -440,6 +505,7 @@ export function extractAndStoreFacts(
440
505
  decision: 0,
441
506
  };
442
507
  const text = String(content ?? "");
508
+ if (text.length > PATTERN_FACT_EXTRACTION_MAX_INPUT_CHARS) return counts;
443
509
  for (const match of text.matchAll(
444
510
  /(\d+(?:[.,]\d+)?)\s*(ms|sec|seconds?|minutes?|hours?|days?|weeks?|months?|%|KB|MB|GB|TB|rows?|columns?|roles?|features?|bugs?|commits?|cards?|users?|items?|tests?|APIs?|endpoints?|sprints?|tickets?)\b/gi,
445
511
  )) {
@@ -878,26 +944,34 @@ export function sleep(beam: BeamMemoryState, dryRun = false): SleepResult {
878
944
  const consolidatedIds: string[] = [];
879
945
  let summariesCreated = 0;
880
946
  for (const [source, items] of grouped) {
881
- const lines = items.map(item => rowValue(item, "content") ?? "");
882
- const ids = items.map(item => rowValue(item, "id")).filter((id): id is string => id !== null);
883
- let scope = "session";
884
- let validUntil: string | null = null;
885
- for (const item of items) {
886
- if (rowValue(item, "scope") === "global") scope = "global";
887
- const itemValidUntil = rowValue(item, "valid_until");
888
- if (itemValidUntil && (validUntil === null || itemValidUntil < validUntil)) validUntil = itemValidUntil;
889
- }
890
- const summary = `[${source}] ${aaakEncode(lines.join(" | "))}`;
891
- if (!dryRun) {
892
- consolidateToEpisodic(beam, summary, ids, "sleep_consolidation", 0.6, {
893
- scope,
894
- validUntil,
895
- veracity: aggregateEpisodicVeracity(items.map(item => rowValue(item, "veracity") ?? "unknown")),
896
- metadata: { original_count: items.length, source, llm_used: false },
897
- });
947
+ for (const chunk of splitSleepItems(beam, source, items)) {
948
+ const ids = chunk.items.map(item => rowValue(item, "id")).filter((id): id is string => id !== null);
949
+ let scope = "session";
950
+ let validUntil: string | null = null;
951
+ for (const item of chunk.items) {
952
+ if (rowValue(item, "scope") === "global") scope = "global";
953
+ const itemValidUntil = rowValue(item, "valid_until");
954
+ if (itemValidUntil && (validUntil === null || itemValidUntil < validUntil)) validUntil = itemValidUntil;
955
+ }
956
+ const sleepSummary = buildSleepSummary(beam, source, chunk);
957
+ const metadata: Metadata = { original_count: chunk.items.length, source, llm_used: false };
958
+ if (sleepSummary.truncated) {
959
+ metadata.truncated = true;
960
+ metadata.original_chars = sleepSummary.originalChars;
961
+ metadata.max_chars = sleepSummary.maxChars;
962
+ }
963
+ const summary = sleepSummary.summary;
964
+ if (!dryRun) {
965
+ consolidateToEpisodic(beam, summary, ids, "sleep_consolidation", 0.6, {
966
+ scope,
967
+ validUntil,
968
+ veracity: aggregateEpisodicVeracity(chunk.items.map(item => rowValue(item, "veracity") ?? "unknown")),
969
+ metadata,
970
+ });
971
+ }
972
+ consolidatedIds.push(...ids);
973
+ summariesCreated++;
898
974
  }
899
- consolidatedIds.push(...ids);
900
- summariesCreated++;
901
975
  }
902
976
  if (!dryRun) {
903
977
  beam.db.run(
@@ -1,6 +1,6 @@
1
1
  import type { Database } from "bun:sqlite";
2
2
  import { existsSync } from "node:fs";
3
- import { ftsWeight, importanceWeight, vectorWeight } from "../../config";
3
+ import { ftsWeight, importanceWeight, maxEpisodeChars, vectorWeight } from "../../config";
4
4
  import { closeQuietly, openDatabase } from "../../db";
5
5
  import { AnnotationStore } from "../annotations";
6
6
  import { EpisodicGraph } from "../episodic-graph";
@@ -68,6 +68,7 @@ const DEFAULT_CONFIG: BeamConfig = {
68
68
  importanceWeight: 0.2,
69
69
  useCloud: false,
70
70
  localLlmEnabled: false,
71
+ maxEpisodeChars: 100_000,
71
72
  };
72
73
 
73
74
  function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
@@ -82,6 +83,7 @@ function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
82
83
  importanceWeight: configured.importanceWeight ?? importanceWeight(),
83
84
  useCloud,
84
85
  localLlmEnabled: configured.localLlmEnabled ?? DEFAULT_CONFIG.localLlmEnabled,
86
+ maxEpisodeChars: configured.maxEpisodeChars ?? maxEpisodeChars(),
85
87
  };
86
88
  }
87
89
  function autoMigrateAnnotations(db: Database, dbPath: string | undefined): void {
@@ -53,6 +53,7 @@ export interface BeamConfig {
53
53
  importanceWeight: number;
54
54
  useCloud: boolean;
55
55
  localLlmEnabled: boolean;
56
+ maxEpisodeChars: number;
56
57
  }
57
58
 
58
59
  export interface BeamMemoryOptions {
@@ -1,5 +1,5 @@
1
1
  import { mkdirSync } from "node:fs";
2
- import { type ApiKey, ProviderHttpError, withAuth } from "@oh-my-pi/pi-ai";
2
+ import { type ApiKey, getOpenRouterHeaders, ProviderHttpError, withAuth } from "@oh-my-pi/pi-ai";
3
3
  import { hostMatchesUrl } from "@oh-my-pi/pi-catalog/hosts";
4
4
  import {
5
5
  $env,
@@ -11,7 +11,6 @@ import {
11
11
  } from "@oh-my-pi/pi-utils";
12
12
  import type { EmbeddingModel } from "fastembed";
13
13
  import { LRUCache } from "lru-cache/raw";
14
- import packageJson from "../../package.json" with { type: "json" };
15
14
  import { loadFastembed } from "./fastembed-runtime";
16
15
  import {
17
16
  type EmbeddingOutput,
@@ -268,10 +267,7 @@ async function embedApi(texts: readonly string[]): Promise<EmbeddingMatrix | nul
268
267
  const response = await withAuth(apiKey, async key => {
269
268
  const headers: Record<string, string> = {
270
269
  "Content-Type": "application/json",
271
- "User-Agent": `Oh-My-Pi/${packageJson.version}`,
272
- "HTTP-Referer": "https://omp.sh/",
273
- "X-OpenRouter-Title": "Oh-My-Pi",
274
- "X-OpenRouter-Categories": "cli-agent",
270
+ ...getOpenRouterHeaders(),
275
271
  };
276
272
  if (key !== "") {
277
273
  headers.Authorization = `Bearer ${key}`;
@@ -118,6 +118,9 @@ const ENTITY_EXTRACTION_STOP_WORD_VALUES = [
118
118
  ] as const;
119
119
 
120
120
  export const ENTITY_EXTRACTION_STOP_WORDS: ReadonlySet<string> = new Set(ENTITY_EXTRACTION_STOP_WORD_VALUES);
121
+ /** Maximum raw text size for regex-only entity extraction. */
122
+ export const REGEX_EXTRACTION_MAX_INPUT_CHARS = 50_000;
123
+
121
124
  const ENTITY_PATTERNS: readonly RegExp[] = [
122
125
  /@(\w{2,30})/g,
123
126
  /#(\w{2,30})/g,
@@ -192,6 +195,7 @@ function isPureNumber(entity: string): boolean {
192
195
 
193
196
  export function extractEntitiesRegex(text: string): string[] {
194
197
  if (typeof text !== "string" || text.length === 0) return [];
198
+ if (text.length > REGEX_EXTRACTION_MAX_INPUT_CHARS) return [];
195
199
 
196
200
  const entities = new Set<string>();
197
201
  for (const sourcePattern of ENTITY_PATTERNS) {
@@ -5,6 +5,7 @@ import {
5
5
  getFastembedRuntimeDir,
6
6
  installRuntimeModuleResolver,
7
7
  logger,
8
+ type RuntimeInstallSpec,
8
9
  resolveRuntimeModule,
9
10
  } from "@oh-my-pi/pi-utils";
10
11
  import type * as Fastembed from "fastembed";
@@ -12,13 +13,21 @@ import packageManifest from "../../package.json" with { type: "json" };
12
13
 
13
14
  type FastembedModule = typeof Fastembed;
14
15
 
16
+ /** Runtime install inputs for the optional fastembed embedding stack. */
17
+ export interface FastembedRuntimeInstallPlan {
18
+ /** Cache directory key; changes when runtime resolution policy changes. */
19
+ versionKey: string;
20
+ /** Dependency graph written to the runtime cache package manifest. */
21
+ install: RuntimeInstallSpec;
22
+ }
23
+
15
24
  /**
16
25
  * `fastembed` and `onnxruntime-node` are optional peers (~270MB of native
17
26
  * assets across platforms), never bundled and never installed eagerly. When
18
27
  * the direct import cannot resolve — bundled `dist/cli.js`, compiled binary,
19
- * or a consumer that skipped the optional peers the pinned pair is
20
- * `bun install`ed into a per-version runtime cache on first use and loaded
21
- * from there (#2389).
28
+ * a consumer that skipped the optional peers, or a native loader failure from
29
+ * fastembed's nested ORT the pinned pair is `bun install`ed into a
30
+ * per-version runtime cache on first use and loaded from there (#2389, #2920).
22
31
  *
23
32
  * The pins live in `peerDependencies` as exact versions (not `catalog:`) so
24
33
  * this module reads concrete specs even when the workspace manifest is
@@ -27,6 +36,16 @@ type FastembedModule = typeof Fastembed;
27
36
  const FASTEMBED_SPEC = packageManifest.peerDependencies.fastembed;
28
37
  const ORT_SPEC = packageManifest.peerDependencies["onnxruntime-node"];
29
38
 
39
+ /** Build the deterministic fastembed runtime install plan used by local embeddings. */
40
+ export function fastembedRuntimeInstallPlan(): FastembedRuntimeInstallPlan {
41
+ return {
42
+ versionKey: `fastembed-${FASTEMBED_SPEC}_ort-${ORT_SPEC}_forced-ort`.replace(/[^A-Za-z0-9._-]/g, "_"),
43
+ install: {
44
+ dependencies: { fastembed: FASTEMBED_SPEC, "onnxruntime-node": ORT_SPEC },
45
+ overrides: { "onnxruntime-common": ORT_SPEC, "onnxruntime-node": ORT_SPEC },
46
+ },
47
+ };
48
+ }
30
49
  let fastembedLoad: Promise<FastembedModule> | null = null;
31
50
 
32
51
  export function loadFastembed(): Promise<FastembedModule> {
@@ -42,15 +61,15 @@ async function loadFastembedOnce(): Promise<FastembedModule> {
42
61
  // native addons and may be absent at runtime — a static import would load
43
62
  // the addon at module-init and crash every consumer without the peers.
44
63
  try {
45
- // Preload ORT 1.24 before fastembed's nested ORT 1.21 — only on Windows,
64
+ // Preload the pinned ORT before fastembed's nested ORT — only on Windows,
46
65
  // where loading the older binding first triggers a DLL-reuse crash.
47
66
  if (process.platform === "win32") {
48
67
  await import("onnxruntime-node");
49
68
  }
50
69
  return await import("fastembed");
51
70
  } catch (error) {
52
- if (!isModuleResolutionError(error)) throw error;
53
- logger.debug("mnemopi: fastembed not resolvable, using on-demand runtime install", {
71
+ if (!isRecoverableFastembedLoadError(error)) throw error;
72
+ logger.debug("mnemopi: fastembed not loadable, using on-demand runtime install", {
54
73
  error: String(error),
55
74
  });
56
75
  return loadFromRuntimeInstall();
@@ -58,10 +77,10 @@ async function loadFastembedOnce(): Promise<FastembedModule> {
58
77
  }
59
78
 
60
79
  async function loadFromRuntimeInstall(): Promise<FastembedModule> {
61
- const versionKey = `fastembed-${FASTEMBED_SPEC}_ort-${ORT_SPEC}`.replace(/[^A-Za-z0-9._-]/g, "_");
80
+ const plan = fastembedRuntimeInstallPlan();
62
81
  const runtimeDir = await ensureRuntimeInstalled({
63
- runtimeDir: path.join(getFastembedRuntimeDir(), versionKey),
64
- install: { dependencies: { fastembed: FASTEMBED_SPEC, "onnxruntime-node": ORT_SPEC } },
82
+ runtimeDir: path.join(getFastembedRuntimeDir(), plan.versionKey),
83
+ install: plan.install,
65
84
  probePackage: "fastembed",
66
85
  });
67
86
  const nodeModules = path.join(runtimeDir, "node_modules");
@@ -80,10 +99,10 @@ async function loadFromRuntimeInstall(): Promise<FastembedModule> {
80
99
  return requireRuntime(entry) as FastembedModule;
81
100
  }
82
101
 
83
- function isModuleResolutionError(error: unknown): boolean {
102
+ function isRecoverableFastembedLoadError(error: unknown): boolean {
84
103
  if (typeof error !== "object" || error === null) return false;
85
104
  const { name, code, message } = error as { name?: unknown; code?: unknown; message?: unknown };
86
105
  if (name === "ResolveMessage") return true;
87
- if (code === "ERR_MODULE_NOT_FOUND" || code === "MODULE_NOT_FOUND") return true;
106
+ if (code === "ERR_MODULE_NOT_FOUND" || code === "MODULE_NOT_FOUND" || code === "ERR_DLOPEN_FAILED") return true;
88
107
  return typeof message === "string" && /cannot find (module|package)/i.test(message);
89
108
  }