npm - @oh-my-pi/pi-mnemopi - Versions diffs - 16.1.6 → 16.1.8 - Mend

@oh-my-pi/pi-mnemopi 16.1.6 → 16.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CHANGELOG.md +7 -0
package/dist/types/config.d.ts +23 -4
package/dist/types/core/beam/types.d.ts +2 -0
package/dist/types/core/memory.d.ts +1 -0
package/dist/types/core/runtime-options.d.ts +3 -0
package/package.json +4 -4
package/src/config.ts +32 -7
package/src/core/beam/index.ts +4 -1
package/src/core/beam/store.ts +6 -1
package/src/core/beam/types.ts +2 -0
package/src/core/embeddings.ts +74 -0
package/src/core/memory.ts +6 -1
package/src/core/runtime-options.ts +3 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,13 @@
 ## [Unreleased]
+## [16.1.8] - 2026-06-20
+### Fixed
+- Capped per-input length in `embed()` at `MNEMOPI_EMBEDDING_MAX_INPUT_CHARS` (default 8192 chars, override via the env var or `embeddings.maxInputChars` runtime option; `0` disables) so a long retention transcript can no longer overflow the embedding model's context window. Oversized inputs are clipped with a head/tail split so chronological transcripts keep both the opening setup and the most recent turns instead of losing the latest content under a naive prefix slice. llama.cpp's `/embeddings` server used to reject the request with `request (N tokens) exceeds the available context size`, silently dropping vector recall for that memory ([#3126](https://github.com/can1357/oh-my-pi/issues/3126)).
+- Fixed the proactive-linking write path ignoring host configuration: `proactiveLinkIfEnabled` read `MNEMOPI_PROACTIVE_LINKING` directly, so a host that enabled proactive linking through `configureRecallFeatures()` had no effect unless the environment variable was also set. `proactiveLinking` is now a `RecallFeatureFlags` option resolved through a `proactiveLinkingEnabled()` fallback, matching the existing polyphonic and enhanced recall flags, with the `MNEMOPI_PROACTIVE_LINKING` environment variable still taking precedence whenever it is set. ([#2440](https://github.com/can1357/oh-my-pi/issues/2440))
 ## [16.1.3] - 2026-06-19
 ### Added

package/dist/types/config.d.ts CHANGED Viewed

@@ -28,6 +28,23 @@ export declare function embeddingApiKey(env?: Env): string;
 export declare function embeddingApiUrl(env?: Env): string;
 export declare function embeddingsViaApi(env?: Env): boolean;
 export declare function embeddingsDisabled(env?: Env): boolean;
+/**
+ * Per-input character cap applied inside `embed()` before any provider sees the text.
+ *
+ * Long retention transcripts (full multi-turn session windows) routinely outgrow
+ * embedding model context windows: BGE/E5 defaults are 512 tokens, bge-m3 is
+ * 8192, and OpenAI's text-embedding-3-* is 8192. llama.cpp's `/embeddings`
+ * server rejects oversized requests with `request (N tokens) exceeds the
+ * available context size`; OpenAI silently right-truncates. Capping at the
+ * source gives both backends deterministic behavior and prevents the silent
+ * recall degradation we saw in issue #3126.
+ *
+ * Default `8192` chars is intentionally conservative for 8192-token embedding
+ * contexts (bge-m3, OpenAI text-embedding-3) and CJK-heavy transcripts. Raise
+ * it for larger local contexts (for example Qwen3-Embedding with 32k ctx).
+ * `0` disables the cap.
+ */
+export declare function embeddingMaxInputChars(env?: Env): number;
 export declare function isApiEmbeddingModel(model?: string, env?: Env): boolean;
 export declare function apiEmbeddingsAvailable(env?: Env): boolean;
 export declare function workingMemoryMaxItems(env?: Env): number;
@@ -57,21 +74,23 @@ export declare function ftsWeight(env?: Env): number;
 export declare function importanceWeight(env?: Env): number;
 export declare function normalizedRecallWeights(vec?: number, fts?: number, importance?: number): readonly [number, number, number];
 export declare function autoMigrateEnabled(env?: Env): boolean;
-export declare function proactiveLinkingEnabled(env?: Env): boolean;
 export interface RecallFeatureFlags {
     polyphonicRecall?: boolean;
     enhancedRecall?: boolean;
+    proactiveLinking?: boolean;
 }
 /**
  * Sets process-wide defaults for the env-gated recall features. Host configuration
- * (e.g. the coding-agent `mnemopi.polyphonicRecall` / `mnemopi.enhancedRecall`
- * settings) lands here; the `MNEMOPI_POLYPHONIC_RECALL` / `MNEMOPI_ENHANCED_RECALL`
- * environment variables still win whenever they are set.
+ * (e.g. the coding-agent `mnemopi.polyphonicRecall` / `mnemopi.enhancedRecall` /
+ * `mnemopi.proactiveLinking` settings) lands here; the `MNEMOPI_POLYPHONIC_RECALL` /
+ * `MNEMOPI_ENHANCED_RECALL` / `MNEMOPI_PROACTIVE_LINKING` environment variables still
+ * win whenever they are set.
  */
 export declare function configureRecallFeatures(flags: RecallFeatureFlags): void;
 export declare function polyphonicRecallEnabled(env?: Env): boolean;
 export declare function temporalHalflifeHours(env?: Env): number;
 export declare function enhancedRecallEnabled(env?: Env): boolean;
+export declare function proactiveLinkingEnabled(env?: Env): boolean;
 export declare function llmEnabled(env?: Env): boolean;
 export declare function llmMaxTokens(env?: Env): number;
 export declare function llmThreads(env?: Env): number;

package/dist/types/core/beam/types.d.ts CHANGED Viewed

@@ -39,6 +39,7 @@ export interface BeamConfig {
     useCloud: boolean;
     localLlmEnabled: boolean;
     maxEpisodeChars: number;
+    proactiveLinking?: boolean;
 }
 export interface BeamMemoryOptions {
     sessionId?: string;
@@ -47,6 +48,7 @@ export interface BeamMemoryOptions {
     authorType?: string | null;
     channelId?: string | null;
     useCloud?: boolean;
+    proactiveLinking?: boolean;
     eventEmitter?: (event: BeamEvent) => void;
     pluginManager?: BeamPluginManager | null;
     annotations?: AnnotationStoreLike | null;

package/dist/types/core/memory.d.ts CHANGED Viewed

@@ -27,6 +27,7 @@ export interface MnemopiOptions {
     readonly llmApiKey?: ApiKey;
     readonly llmModel?: string | Model<Api>;
     readonly llm?: false | MnemopiLlmRuntimeOptions | Model<Api> | MnemopiLlmCompletion;
+    readonly proactiveLinking?: boolean;
     /** Escalate best-effort failure logs (embedding pipeline) from debug to warn. */
     readonly debug?: boolean;
     /**

package/dist/types/core/runtime-options.d.ts CHANGED Viewed

@@ -24,6 +24,8 @@ export interface MnemopiEmbeddingRuntimeOptions {
     apiUrl?: string;
     apiKey?: ApiKey;
     provider?: MnemopiEmbeddingProvider | ((texts: readonly string[]) => EmbeddingOutput | Promise<EmbeddingOutput>);
+    /** Override `MNEMOPI_EMBEDDING_MAX_INPUT_CHARS`. `0` disables the cap. See `config.embeddingMaxInputChars`. */
+    maxInputChars?: number;
 }
 export interface MnemopiLlmRuntimeOptions {
     enabled?: boolean;
@@ -49,6 +51,7 @@ export interface ResolvedMnemopiEmbeddingRuntimeOptions {
     apiUrl?: string;
     apiKey?: ApiKey;
     provider?: MnemopiEmbeddingProvider;
+    maxInputChars?: number;
 }
 export interface ResolvedMnemopiLlmRuntimeOptions {
     enabled?: boolean;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-mnemopi",
-	"version": "16.1.6",
+	"version": "16.1.8",
 	"description": "Local SQLite memory engine for Oh My Pi agents",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -39,9 +39,9 @@
 		"fmt": "biome format --write ."
 	},
 	"dependencies": {
-		"@oh-my-pi/pi-ai": "16.1.6",
-		"@oh-my-pi/pi-catalog": "16.1.6",
-		"@oh-my-pi/pi-utils": "16.1.6",
+		"@oh-my-pi/pi-ai": "16.1.8",
+		"@oh-my-pi/pi-catalog": "16.1.8",
+		"@oh-my-pi/pi-utils": "16.1.8",
 		"lru-cache": "11.5.1"
 	},
 	"peerDependencies": {

package/src/config.ts CHANGED Viewed

@@ -99,6 +99,26 @@ export function embeddingsDisabled(env: Env = process.env): boolean {
 	return envString("MNEMOPI_NO_EMBEDDINGS", "", env) !== "";
 }
+/**
+ * Per-input character cap applied inside `embed()` before any provider sees the text.
+ *
+ * Long retention transcripts (full multi-turn session windows) routinely outgrow
+ * embedding model context windows: BGE/E5 defaults are 512 tokens, bge-m3 is
+ * 8192, and OpenAI's text-embedding-3-* is 8192. llama.cpp's `/embeddings`
+ * server rejects oversized requests with `request (N tokens) exceeds the
+ * available context size`; OpenAI silently right-truncates. Capping at the
+ * source gives both backends deterministic behavior and prevents the silent
+ * recall degradation we saw in issue #3126.
+ *
+ * Default `8192` chars is intentionally conservative for 8192-token embedding
+ * contexts (bge-m3, OpenAI text-embedding-3) and CJK-heavy transcripts. Raise
+ * it for larger local contexts (for example Qwen3-Embedding with 32k ctx).
+ * `0` disables the cap.
+ */
+export function embeddingMaxInputChars(env: Env = process.env): number {
+	return Math.max(0, envInt("MNEMOPI_EMBEDDING_MAX_INPUT_CHARS", 8192, env));
+}
 export function isApiEmbeddingModel(model = embeddingModel(), env: Env = process.env): boolean {
 	if (model.startsWith("openai/") || model.includes("text-embedding") || model.startsWith("text-embedding"))
 		return true;
@@ -248,27 +268,27 @@ export function autoMigrateEnabled(env: Env = process.env): boolean {
 	return envString("MNEMOPI_AUTO_MIGRATE", "1", env) !== "0";
 }
-export function proactiveLinkingEnabled(env: Env = process.env): boolean {
-	return envString("MNEMOPI_PROACTIVE_LINKING", "0", env) === "1";
-}
 export interface RecallFeatureFlags {
 	polyphonicRecall?: boolean;
 	enhancedRecall?: boolean;
+	proactiveLinking?: boolean;
 }
 let polyphonicRecallDefault = false;
 let enhancedRecallDefault = false;
+let proactiveLinkingDefault = false;
 /**
  * Sets process-wide defaults for the env-gated recall features. Host configuration
- * (e.g. the coding-agent `mnemopi.polyphonicRecall` / `mnemopi.enhancedRecall`
- * settings) lands here; the `MNEMOPI_POLYPHONIC_RECALL` / `MNEMOPI_ENHANCED_RECALL`
- * environment variables still win whenever they are set.
+ * (e.g. the coding-agent `mnemopi.polyphonicRecall` / `mnemopi.enhancedRecall` /
+ * `mnemopi.proactiveLinking` settings) lands here; the `MNEMOPI_POLYPHONIC_RECALL` /
+ * `MNEMOPI_ENHANCED_RECALL` / `MNEMOPI_PROACTIVE_LINKING` environment variables still
+ * win whenever they are set.
  */
 export function configureRecallFeatures(flags: RecallFeatureFlags): void {
 	if (flags.polyphonicRecall !== undefined) polyphonicRecallDefault = flags.polyphonicRecall;
 	if (flags.enhancedRecall !== undefined) enhancedRecallDefault = flags.enhancedRecall;
+	if (flags.proactiveLinking !== undefined) proactiveLinkingDefault = flags.proactiveLinking;
 }
 export function polyphonicRecallEnabled(env: Env = process.env): boolean {
@@ -285,6 +305,11 @@ export function enhancedRecallEnabled(env: Env = process.env): boolean {
 	return value === undefined ? enhancedRecallDefault : value === "1";
 }
+export function proactiveLinkingEnabled(env: Env = process.env): boolean {
+	const value = envOptionalString("MNEMOPI_PROACTIVE_LINKING", env);
+	return value === undefined ? proactiveLinkingDefault : value === "1";
+}
 export function llmEnabled(env: Env = process.env): boolean {
 	return envBool("MNEMOPI_LLM_ENABLED", true, env);
 }

package/src/core/beam/index.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { Database } from "bun:sqlite";
 import { existsSync } from "node:fs";
-import { ftsWeight, importanceWeight, maxEpisodeChars, vectorWeight } from "../../config";
+import { ftsWeight, importanceWeight, maxEpisodeChars, proactiveLinkingEnabled, vectorWeight } from "../../config";
 import { closeQuietly, openDatabase } from "../../db";
 import { AnnotationStore } from "../annotations";
 import { EpisodicGraph } from "../episodic-graph";
@@ -69,11 +69,13 @@ const DEFAULT_CONFIG: BeamConfig = {
 	useCloud: false,
 	localLlmEnabled: false,
 	maxEpisodeChars: 100_000,
+	proactiveLinking: false,
 };
 function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
 	const configured = options.config ?? {};
 	const useCloud = options.useCloud ?? configured.useCloud ?? DEFAULT_CONFIG.useCloud;
+	const proactiveLinking = options.proactiveLinking ?? configured.proactiveLinking ?? proactiveLinkingEnabled({});
 	return {
 		workingMemoryLimit: configured.workingMemoryLimit ?? DEFAULT_CONFIG.workingMemoryLimit,
 		workingMemoryTtlHours: configured.workingMemoryTtlHours ?? DEFAULT_CONFIG.workingMemoryTtlHours,
@@ -84,6 +86,7 @@ function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
 		useCloud,
 		localLlmEnabled: configured.localLlmEnabled ?? DEFAULT_CONFIG.localLlmEnabled,
 		maxEpisodeChars: configured.maxEpisodeChars ?? maxEpisodeChars(),
+		proactiveLinking,
 	};
 }
 function autoMigrateAnnotations(db: Database, dbPath: string | undefined): void {

package/src/core/beam/store.ts CHANGED Viewed

@@ -187,13 +187,18 @@ function addTemporalAnnotations(beam: BeamMemoryState, memoryId: string, timesta
 	}
 }
+function proactiveLinkingAllowed(beam: BeamMemoryState): boolean {
+	const override = process.env.MNEMOPI_PROACTIVE_LINKING;
+	return override === undefined ? beam.config.proactiveLinking === true : override === "1";
+}
 function proactiveLinkIfEnabled(
 	beam: BeamMemoryState,
 	memoryId: string,
 	content: string,
 	extractEntities: boolean,
 ): void {
-	if (process.env.MNEMOPI_PROACTIVE_LINKING !== "1") return;
+	if (!proactiveLinkingAllowed(beam)) return;
 	try {
 		const graph =
 			beam.episodicGraph instanceof EpisodicGraph

package/src/core/beam/types.ts CHANGED Viewed

@@ -54,6 +54,7 @@ export interface BeamConfig {
 	useCloud: boolean;
 	localLlmEnabled: boolean;
 	maxEpisodeChars: number;
+	proactiveLinking?: boolean;
 }
 export interface BeamMemoryOptions {
@@ -63,6 +64,7 @@ export interface BeamMemoryOptions {
 	authorType?: string | null;
 	channelId?: string | null;
 	useCloud?: boolean;
+	proactiveLinking?: boolean;
 	eventEmitter?: (event: BeamEvent) => void;
 	pluginManager?: BeamPluginManager | null;
 	annotations?: AnnotationStoreLike | null;

package/src/core/embeddings.ts CHANGED Viewed

@@ -120,6 +120,79 @@ export function embeddingsDisabled(): boolean {
 	return $flag("MNEMOPI_NO_EMBEDDINGS");
 }
+/**
+ * Resolved per-input character cap for {@link embed}.
+ *
+ * Reads (in order): the active runtime scope's `embeddings.maxInputChars`, then
+ * `MNEMOPI_EMBEDDING_MAX_INPUT_CHARS`, then the bundled `8192` default. `0`
+ * disables the cap entirely.
+ */
+function effectiveMaxInputChars(): number {
+	const override = activeEmbeddingOptions()?.maxInputChars;
+	if (override !== undefined) return Math.max(0, Math.trunc(override));
+	const envValue = Number.parseInt($env.MNEMOPI_EMBEDDING_MAX_INPUT_CHARS ?? "", 10);
+	if (Number.isFinite(envValue) && envValue >= 0) return envValue;
+	return 8192;
+}
+/** Elision marker injected between the retained head and tail of an oversized input. */
+const EMBEDDING_ELISION_MARKER = "\n\n[...]\n\n";
+/**
+ * Right-clip a single oversized input to {@link max} chars while preserving
+ * both ends. Retention transcripts are chronological (oldest → newest), so a
+ * naive `slice(0, max)` would drop the most recent — and most semantically
+ * loaded — turns once a session passed the cap, leaving every later retained
+ * episode with essentially the same prefix vector. Keeping a head/tail split
+ * lets the embedding capture the topic setup at the start AND the latest
+ * exchanges at the end. Falls back to a tail-only clip when `max` is too
+ * small to fit the elision marker plus a useful slice on either side.
+ */
+function clipToWindow(text: string, max: number): string {
+	if (text.length <= max) return text;
+	if (max <= EMBEDDING_ELISION_MARKER.length + 16) return text.slice(text.length - max);
+	const budget = max - EMBEDDING_ELISION_MARKER.length;
+	const headLen = budget >>> 1;
+	const tailLen = budget - headLen;
+	return text.slice(0, headLen) + EMBEDDING_ELISION_MARKER + text.slice(text.length - tailLen);
+}
+/**
+ * Clip every input to {@link effectiveMaxInputChars} so a runaway retention
+ * transcript can't blow past the embedding model's context window. Uses a
+ * head/tail split via {@link clipToWindow} so the embedding still sees the
+ * tail of the conversation (where the latest topic shifts live) and not just
+ * the stale prefix. Returns the original array when no input needs trimming
+ * (the common case); the new array is allocated only when at least one input
+ * is oversized so we don't churn arrays for the typical short-query path
+ * through `embedQuery`. Emits one debug-or-warn log per call summarizing how
+ * many inputs were trimmed and by how much — silent truncation was the
+ * original bug (#3126).
+ */
+function capInputs(texts: readonly string[]): readonly string[] {
+	const max = effectiveMaxInputChars();
+	if (max === 0) return texts;
+	let trimmed: string[] | null = null;
+	let trimmedCount = 0;
+	let maxOriginalLen = 0;
+	for (let i = 0; i < texts.length; i++) {
+		const text = texts[i] ?? "";
+		if (text.length <= max) continue;
+		if (trimmed === null) trimmed = texts.slice() as string[];
+		trimmed[i] = clipToWindow(text, max);
+		trimmedCount++;
+		if (text.length > maxOriginalLen) maxOriginalLen = text.length;
+	}
+	if (trimmed === null) return texts;
+	logger[mnemopiDebugEnabled() ? "warn" : "debug"]("mnemopi: embedding input truncated", {
+		inputCount: texts.length,
+		trimmedCount,
+		maxOriginalLen,
+		maxInputChars: max,
+	});
+	return trimmed;
+}
 function embeddingApiKey(): ApiKey {
 	const active = activeEmbeddingOptions();
 	if (active?.apiKey !== undefined) {
@@ -408,6 +481,7 @@ export async function embed(texts: readonly string[]): Promise<EmbeddingMatrix |
 	if (texts.length === 0 || embeddingsDisabled()) {
 		return null;
 	}
+	texts = capInputs(texts);
 	const activeProvider = resolveEmbeddingProvider(activeEmbeddingOptions()?.provider);
 	if (activeProvider !== undefined) {
 		try {

package/src/core/memory.ts CHANGED Viewed

@@ -43,6 +43,7 @@ export interface MnemopiOptions {
 	readonly llmApiKey?: ApiKey;
 	readonly llmModel?: string | Model<Api>;
 	readonly llm?: false | MnemopiLlmRuntimeOptions | Model<Api> | MnemopiLlmCompletion;
+	readonly proactiveLinking?: boolean;
 	/** Escalate best-effort failure logs (embedding pipeline) from debug to warn. */
 	readonly debug?: boolean;
 	/**
@@ -161,19 +162,22 @@ function resolveRuntimeOptions(options: MnemopiOptions): ResolvedMnemopiRuntimeO
 	const embeddingApiUrl = options.embeddingApiUrl ?? nestedEmbeddings?.apiUrl;
 	const embeddingApiKey = options.embeddingApiKey ?? nestedEmbeddings?.apiKey;
 	const embeddingProvider = resolveEmbeddingProvider(nestedEmbeddings?.provider);
+	const embeddingMaxInputChars = nestedEmbeddings?.maxInputChars;
 	const embeddings =
 		embeddingDisabled !== undefined ||
 		embeddingModel !== undefined ||
 		embeddingApiUrl !== undefined ||
 		embeddingApiKey !== undefined ||
-		embeddingProvider !== undefined
+		embeddingProvider !== undefined ||
+		embeddingMaxInputChars !== undefined
 			? {
 					disabled: embeddingDisabled,
 					model: embeddingModel,
 					apiUrl: embeddingApiUrl,
 					apiKey: embeddingApiKey,
 					provider: embeddingProvider,
+					maxInputChars: embeddingMaxInputChars,
 				}
 			: undefined;
@@ -380,6 +384,7 @@ export class Mnemopi {
 			authorId: this.authorId,
 			authorType: this.authorType,
 			channelId: this.channelId,
+			proactiveLinking: options.proactiveLinking,
 		});
 		this.#ownsDb = options.db === undefined;
 		if (options.db !== undefined) {

package/src/core/runtime-options.ts CHANGED Viewed

@@ -33,6 +33,8 @@ export interface MnemopiEmbeddingRuntimeOptions {
 	apiUrl?: string;
 	apiKey?: ApiKey;
 	provider?: MnemopiEmbeddingProvider | ((texts: readonly string[]) => EmbeddingOutput | Promise<EmbeddingOutput>);
+	/** Override `MNEMOPI_EMBEDDING_MAX_INPUT_CHARS`. `0` disables the cap. See `config.embeddingMaxInputChars`. */
+	maxInputChars?: number;
 }
 export interface MnemopiLlmRuntimeOptions {
@@ -61,6 +63,7 @@ export interface ResolvedMnemopiEmbeddingRuntimeOptions {
 	apiUrl?: string;
 	apiKey?: ApiKey;
 	provider?: MnemopiEmbeddingProvider;
+	maxInputChars?: number;
 }
 export interface ResolvedMnemopiLlmRuntimeOptions {