npm - @oh-my-pi/pi-mnemopi - Versions diffs - 16.0.4 → 16.0.5 - Mend

@oh-my-pi/pi-mnemopi 16.0.4 → 16.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +7 -0
package/dist/types/config.d.ts +1 -0
package/dist/types/core/beam/types.d.ts +1 -0
package/dist/types/core/entities.d.ts +2 -0
package/package.json +4 -4
package/src/config.ts +4 -0
package/src/core/beam/consolidate.ts +93 -19
package/src/core/beam/index.ts +3 -1
package/src/core/beam/types.ts +1 -0
package/src/core/entities.ts +4 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,13 @@
 ## [Unreleased]
+## [16.0.5] - 2026-06-17
+### Fixed
+- Capped `sleep_consolidation` episodic rows at `maxEpisodeChars` (default 100KB, `MNEMOPI_MAX_EPISODE_CHARS`) so raw session transcripts cannot be stored and extracted as multi-megabyte episodes. ([#2869](https://github.com/can1357/oh-my-pi/issues/2869))
+- Skipped regex-only entity and pattern fact extraction for oversized raw transcripts so progress/log noise cannot flood MEMORIA with junk facts. ([#2868](https://github.com/can1357/oh-my-pi/issues/2868))
 ## [15.13.1] - 2026-06-15
 ### Added

package/dist/types/config.d.ts CHANGED Viewed

@@ -33,6 +33,7 @@ export declare function apiEmbeddingsAvailable(env?: Env): boolean;
 export declare function workingMemoryMaxItems(env?: Env): number;
 export declare function workingMemoryTtlHours(env?: Env): number;
 export declare function episodicRecallLimit(env?: Env): number;
+export declare function maxEpisodeChars(env?: Env): number;
 export declare function sleepBatchSize(env?: Env): number;
 export declare function scratchpadMaxItems(env?: Env): number;
 export declare function recencyHalflifeHours(env?: Env): number;

package/dist/types/core/beam/types.d.ts CHANGED Viewed

@@ -38,6 +38,7 @@ export interface BeamConfig {
     importanceWeight: number;
     useCloud: boolean;
     localLlmEnabled: boolean;
+    maxEpisodeChars: number;
 }
 export interface BeamMemoryOptions {
     sessionId?: string;

package/dist/types/core/entities.d.ts CHANGED Viewed

@@ -1,4 +1,6 @@
 export declare const ENTITY_EXTRACTION_STOP_WORDS: ReadonlySet<string>;
+/** Maximum raw text size for regex-only entity extraction. */
+export declare const REGEX_EXTRACTION_MAX_INPUT_CHARS = 50000;
 export declare function levenshteinDistance(s1: string, s2: string): number;
 export declare function similarity(s1: string, s2: string): number;
 export declare function extractEntitiesRegex(text: string): string[];

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-mnemopi",
-	"version": "16.0.4",
+	"version": "16.0.5",
 	"description": "Local SQLite memory engine for Oh My Pi agents",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -39,9 +39,9 @@
 		"fmt": "biome format --write ."
 	},
 	"dependencies": {
-		"@oh-my-pi/pi-ai": "16.0.4",
-		"@oh-my-pi/pi-catalog": "16.0.4",
-		"@oh-my-pi/pi-utils": "16.0.4",
+		"@oh-my-pi/pi-ai": "16.0.5",
+		"@oh-my-pi/pi-catalog": "16.0.5",
+		"@oh-my-pi/pi-utils": "16.0.5",
 		"lru-cache": "11.5.1"
 	},
 	"peerDependencies": {

package/src/config.ts CHANGED Viewed

@@ -126,6 +126,10 @@ export function episodicRecallLimit(env: Env = process.env): number {
 	return envInt("MNEMOPI_EP_LIMIT", 50000, env);
 }
+export function maxEpisodeChars(env: Env = process.env): number {
+	return Math.max(1, envInt("MNEMOPI_MAX_EPISODE_CHARS", 100000, env));
+}
 export function sleepBatchSize(env: Env = process.env): number {
 	return envInt("MNEMOPI_SLEEP_BATCH", 5000, env);
 }

package/src/core/beam/consolidate.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import type { SQLQueryBindings } from "bun:sqlite";
 import { generateId, stableMemoryId } from "../../util/ids";
 import { aaakEncode } from "../aaak";
+import { REGEX_EXTRACTION_MAX_INPUT_CHARS } from "../entities";
 import { EpisodicGraph } from "../episodic-graph";
 import { heuristicExtractFacts } from "../extraction";
 import { clampVeracity } from "../veracity-consolidation";
@@ -57,6 +58,70 @@ const TIER2_DAYS = envInt("MNEMOPI_TIER2_DAYS", 30);
 const TIER3_DAYS = envInt("MNEMOPI_TIER3_DAYS", 180);
 const DEGRADE_BATCH_SIZE = envInt("MNEMOPI_DEGRADE_BATCH", 100);
 const TIER3_MAX_CHARS = envInt("MNEMOPI_TIER3_MAX_CHARS", 300);
+const DEFAULT_MAX_EPISODE_CHARS = 100_000;
+const SLEEP_SUMMARY_SEPARATOR = " | ";
+const SLEEP_TRUNCATION_MARKER = "\n[... sleep_consolidation episode truncated by maxEpisodeChars ...]";
+const PATTERN_FACT_EXTRACTION_MAX_INPUT_CHARS = REGEX_EXTRACTION_MAX_INPUT_CHARS;
+type SleepSummary = {
+	summary: string;
+	originalChars: number;
+	truncated: boolean;
+	maxChars: number;
+};
+type SleepChunk = {
+	items: Row[];
+	originalChars: number;
+};
+function normalizedMaxEpisodeChars(beam: BeamMemoryState): number {
+	const configured = Math.trunc(beam.config?.maxEpisodeChars ?? DEFAULT_MAX_EPISODE_CHARS);
+	return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_MAX_EPISODE_CHARS;
+}
+function markTruncated(content: string, maxChars: number): string {
+	if (maxChars <= 0) return "";
+	if (maxChars <= SLEEP_TRUNCATION_MARKER.length) return content.slice(0, maxChars);
+	const bodyChars = maxChars - SLEEP_TRUNCATION_MARKER.length;
+	return `${content.slice(0, bodyChars).trimEnd()}${SLEEP_TRUNCATION_MARKER}`;
+}
+function splitSleepItems(beam: BeamMemoryState, source: string, items: readonly Row[]): SleepChunk[] {
+	const maxChars = normalizedMaxEpisodeChars(beam);
+	const prefixChars = `[${source}] `.length;
+	const joinedLimit = Math.max(0, maxChars - prefixChars);
+	const chunks: SleepChunk[] = [];
+	let current: Row[] = [];
+	let currentChars = 0;
+	for (const item of items) {
+		const contentChars = (rowValue(item, "content") ?? "").length;
+		const separatorChars = current.length === 0 ? 0 : SLEEP_SUMMARY_SEPARATOR.length;
+		if (current.length > 0 && currentChars + separatorChars + contentChars > joinedLimit) {
+			chunks.push({ items: current, originalChars: currentChars });
+			current = [];
+			currentChars = 0;
+		}
+		current.push(item);
+		currentChars += (current.length === 1 ? 0 : SLEEP_SUMMARY_SEPARATOR.length) + contentChars;
+	}
+	if (current.length > 0) chunks.push({ items: current, originalChars: currentChars });
+	return chunks;
+}
+function buildSleepSummary(beam: BeamMemoryState, source: string, chunk: SleepChunk): SleepSummary {
+	const maxChars = normalizedMaxEpisodeChars(beam);
+	const prefix = `[${source}] `;
+	const joined = chunk.items.map(item => rowValue(item, "content") ?? "").join(SLEEP_SUMMARY_SEPARATOR);
+	const uncapped = `${prefix}${aaakEncode(joined)}`;
+	const truncated = uncapped.length > maxChars;
+	return {
+		summary: truncated ? markTruncated(uncapped, maxChars) : uncapped,
+		originalChars: chunk.originalChars,
+		truncated,
+		maxChars,
+	};
+}
 function isoNow(): string {
 	return new Date().toISOString();
@@ -440,6 +505,7 @@ export function extractAndStoreFacts(
 		decision: 0,
 	};
 	const text = String(content ?? "");
+	if (text.length > PATTERN_FACT_EXTRACTION_MAX_INPUT_CHARS) return counts;
 	for (const match of text.matchAll(
 		/(\d+(?:[.,]\d+)?)\s*(ms|sec|seconds?|minutes?|hours?|days?|weeks?|months?|%|KB|MB|GB|TB|rows?|columns?|roles?|features?|bugs?|commits?|cards?|users?|items?|tests?|APIs?|endpoints?|sprints?|tickets?)\b/gi,
 	)) {
@@ -878,26 +944,34 @@ export function sleep(beam: BeamMemoryState, dryRun = false): SleepResult {
 	const consolidatedIds: string[] = [];
 	let summariesCreated = 0;
 	for (const [source, items] of grouped) {
-		const lines = items.map(item => rowValue(item, "content") ?? "");
-		const ids = items.map(item => rowValue(item, "id")).filter((id): id is string => id !== null);
-		let scope = "session";
-		let validUntil: string | null = null;
-		for (const item of items) {
-			if (rowValue(item, "scope") === "global") scope = "global";
-			const itemValidUntil = rowValue(item, "valid_until");
-			if (itemValidUntil && (validUntil === null || itemValidUntil < validUntil)) validUntil = itemValidUntil;
-		}
-		const summary = `[${source}] ${aaakEncode(lines.join(" | "))}`;
-		if (!dryRun) {
-			consolidateToEpisodic(beam, summary, ids, "sleep_consolidation", 0.6, {
-				scope,
-				validUntil,
-				veracity: aggregateEpisodicVeracity(items.map(item => rowValue(item, "veracity") ?? "unknown")),
-				metadata: { original_count: items.length, source, llm_used: false },
-			});
+		for (const chunk of splitSleepItems(beam, source, items)) {
+			const ids = chunk.items.map(item => rowValue(item, "id")).filter((id): id is string => id !== null);
+			let scope = "session";
+			let validUntil: string | null = null;
+			for (const item of chunk.items) {
+				if (rowValue(item, "scope") === "global") scope = "global";
+				const itemValidUntil = rowValue(item, "valid_until");
+				if (itemValidUntil && (validUntil === null || itemValidUntil < validUntil)) validUntil = itemValidUntil;
+			}
+			const sleepSummary = buildSleepSummary(beam, source, chunk);
+			const metadata: Metadata = { original_count: chunk.items.length, source, llm_used: false };
+			if (sleepSummary.truncated) {
+				metadata.truncated = true;
+				metadata.original_chars = sleepSummary.originalChars;
+				metadata.max_chars = sleepSummary.maxChars;
+			}
+			const summary = sleepSummary.summary;
+			if (!dryRun) {
+				consolidateToEpisodic(beam, summary, ids, "sleep_consolidation", 0.6, {
+					scope,
+					validUntil,
+					veracity: aggregateEpisodicVeracity(chunk.items.map(item => rowValue(item, "veracity") ?? "unknown")),
+					metadata,
+				});
+			}
+			consolidatedIds.push(...ids);
+			summariesCreated++;
 		}
-		consolidatedIds.push(...ids);
-		summariesCreated++;
 	}
 	if (!dryRun) {
 		beam.db.run(

package/src/core/beam/index.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { Database } from "bun:sqlite";
 import { existsSync } from "node:fs";
-import { ftsWeight, importanceWeight, vectorWeight } from "../../config";
+import { ftsWeight, importanceWeight, maxEpisodeChars, vectorWeight } from "../../config";
 import { closeQuietly, openDatabase } from "../../db";
 import { AnnotationStore } from "../annotations";
 import { EpisodicGraph } from "../episodic-graph";
@@ -68,6 +68,7 @@ const DEFAULT_CONFIG: BeamConfig = {
 	importanceWeight: 0.2,
 	useCloud: false,
 	localLlmEnabled: false,
+	maxEpisodeChars: 100_000,
 };
 function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
@@ -82,6 +83,7 @@ function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
 		importanceWeight: configured.importanceWeight ?? importanceWeight(),
 		useCloud,
 		localLlmEnabled: configured.localLlmEnabled ?? DEFAULT_CONFIG.localLlmEnabled,
+		maxEpisodeChars: configured.maxEpisodeChars ?? maxEpisodeChars(),
 	};
 }
 function autoMigrateAnnotations(db: Database, dbPath: string | undefined): void {

package/src/core/beam/types.ts CHANGED Viewed

@@ -53,6 +53,7 @@ export interface BeamConfig {
 	importanceWeight: number;
 	useCloud: boolean;
 	localLlmEnabled: boolean;
+	maxEpisodeChars: number;
 }
 export interface BeamMemoryOptions {

package/src/core/entities.ts CHANGED Viewed

@@ -118,6 +118,9 @@ const ENTITY_EXTRACTION_STOP_WORD_VALUES = [
 ] as const;
 export const ENTITY_EXTRACTION_STOP_WORDS: ReadonlySet<string> = new Set(ENTITY_EXTRACTION_STOP_WORD_VALUES);
+/** Maximum raw text size for regex-only entity extraction. */
+export const REGEX_EXTRACTION_MAX_INPUT_CHARS = 50_000;
 const ENTITY_PATTERNS: readonly RegExp[] = [
 	/@(\w{2,30})/g,
 	/#(\w{2,30})/g,
@@ -192,6 +195,7 @@ function isPureNumber(entity: string): boolean {
 export function extractEntitiesRegex(text: string): string[] {
 	if (typeof text !== "string" || text.length === 0) return [];
+	if (text.length > REGEX_EXTRACTION_MAX_INPUT_CHARS) return [];
 	const entities = new Set<string>();
 	for (const sourcePattern of ENTITY_PATTERNS) {