@oh-my-pi/pi-mnemopi 16.0.4 → 16.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.0.5] - 2026-06-17
6
+
7
+ ### Fixed
8
+
9
+ - Capped `sleep_consolidation` episodic rows at `maxEpisodeChars` (default 100KB, `MNEMOPI_MAX_EPISODE_CHARS`) so raw session transcripts cannot be stored and extracted as multi-megabyte episodes. ([#2869](https://github.com/can1357/oh-my-pi/issues/2869))
10
+ - Skipped regex-only entity and pattern fact extraction for oversized raw transcripts so progress/log noise cannot flood MEMORIA with junk facts. ([#2868](https://github.com/can1357/oh-my-pi/issues/2868))
11
+
5
12
  ## [15.13.1] - 2026-06-15
6
13
 
7
14
  ### Added
@@ -33,6 +33,7 @@ export declare function apiEmbeddingsAvailable(env?: Env): boolean;
33
33
  export declare function workingMemoryMaxItems(env?: Env): number;
34
34
  export declare function workingMemoryTtlHours(env?: Env): number;
35
35
  export declare function episodicRecallLimit(env?: Env): number;
36
+ export declare function maxEpisodeChars(env?: Env): number;
36
37
  export declare function sleepBatchSize(env?: Env): number;
37
38
  export declare function scratchpadMaxItems(env?: Env): number;
38
39
  export declare function recencyHalflifeHours(env?: Env): number;
@@ -38,6 +38,7 @@ export interface BeamConfig {
38
38
  importanceWeight: number;
39
39
  useCloud: boolean;
40
40
  localLlmEnabled: boolean;
41
+ maxEpisodeChars: number;
41
42
  }
42
43
  export interface BeamMemoryOptions {
43
44
  sessionId?: string;
@@ -1,4 +1,6 @@
1
1
  export declare const ENTITY_EXTRACTION_STOP_WORDS: ReadonlySet<string>;
2
+ /** Maximum raw text size for regex-only entity extraction. */
3
+ export declare const REGEX_EXTRACTION_MAX_INPUT_CHARS = 50000;
2
4
  export declare function levenshteinDistance(s1: string, s2: string): number;
3
5
  export declare function similarity(s1: string, s2: string): number;
4
6
  export declare function extractEntitiesRegex(text: string): string[];
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-mnemopi",
4
- "version": "16.0.4",
4
+ "version": "16.0.5",
5
5
  "description": "Local SQLite memory engine for Oh My Pi agents",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -39,9 +39,9 @@
39
39
  "fmt": "biome format --write ."
40
40
  },
41
41
  "dependencies": {
42
- "@oh-my-pi/pi-ai": "16.0.4",
43
- "@oh-my-pi/pi-catalog": "16.0.4",
44
- "@oh-my-pi/pi-utils": "16.0.4",
42
+ "@oh-my-pi/pi-ai": "16.0.5",
43
+ "@oh-my-pi/pi-catalog": "16.0.5",
44
+ "@oh-my-pi/pi-utils": "16.0.5",
45
45
  "lru-cache": "11.5.1"
46
46
  },
47
47
  "peerDependencies": {
package/src/config.ts CHANGED
@@ -126,6 +126,10 @@ export function episodicRecallLimit(env: Env = process.env): number {
126
126
  return envInt("MNEMOPI_EP_LIMIT", 50000, env);
127
127
  }
128
128
 
129
+ export function maxEpisodeChars(env: Env = process.env): number {
130
+ return Math.max(1, envInt("MNEMOPI_MAX_EPISODE_CHARS", 100000, env));
131
+ }
132
+
129
133
  export function sleepBatchSize(env: Env = process.env): number {
130
134
  return envInt("MNEMOPI_SLEEP_BATCH", 5000, env);
131
135
  }
@@ -1,6 +1,7 @@
1
1
  import type { SQLQueryBindings } from "bun:sqlite";
2
2
  import { generateId, stableMemoryId } from "../../util/ids";
3
3
  import { aaakEncode } from "../aaak";
4
+ import { REGEX_EXTRACTION_MAX_INPUT_CHARS } from "../entities";
4
5
  import { EpisodicGraph } from "../episodic-graph";
5
6
  import { heuristicExtractFacts } from "../extraction";
6
7
  import { clampVeracity } from "../veracity-consolidation";
@@ -57,6 +58,70 @@ const TIER2_DAYS = envInt("MNEMOPI_TIER2_DAYS", 30);
57
58
  const TIER3_DAYS = envInt("MNEMOPI_TIER3_DAYS", 180);
58
59
  const DEGRADE_BATCH_SIZE = envInt("MNEMOPI_DEGRADE_BATCH", 100);
59
60
  const TIER3_MAX_CHARS = envInt("MNEMOPI_TIER3_MAX_CHARS", 300);
61
+ const DEFAULT_MAX_EPISODE_CHARS = 100_000;
62
+ const SLEEP_SUMMARY_SEPARATOR = " | ";
63
+ const SLEEP_TRUNCATION_MARKER = "\n[... sleep_consolidation episode truncated by maxEpisodeChars ...]";
64
+ const PATTERN_FACT_EXTRACTION_MAX_INPUT_CHARS = REGEX_EXTRACTION_MAX_INPUT_CHARS;
65
+
66
+ type SleepSummary = {
67
+ summary: string;
68
+ originalChars: number;
69
+ truncated: boolean;
70
+ maxChars: number;
71
+ };
72
+ type SleepChunk = {
73
+ items: Row[];
74
+ originalChars: number;
75
+ };
76
+
77
+ function normalizedMaxEpisodeChars(beam: BeamMemoryState): number {
78
+ const configured = Math.trunc(beam.config?.maxEpisodeChars ?? DEFAULT_MAX_EPISODE_CHARS);
79
+ return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_MAX_EPISODE_CHARS;
80
+ }
81
+
82
+ function markTruncated(content: string, maxChars: number): string {
83
+ if (maxChars <= 0) return "";
84
+ if (maxChars <= SLEEP_TRUNCATION_MARKER.length) return content.slice(0, maxChars);
85
+ const bodyChars = maxChars - SLEEP_TRUNCATION_MARKER.length;
86
+ return `${content.slice(0, bodyChars).trimEnd()}${SLEEP_TRUNCATION_MARKER}`;
87
+ }
88
+
89
+ function splitSleepItems(beam: BeamMemoryState, source: string, items: readonly Row[]): SleepChunk[] {
90
+ const maxChars = normalizedMaxEpisodeChars(beam);
91
+ const prefixChars = `[${source}] `.length;
92
+ const joinedLimit = Math.max(0, maxChars - prefixChars);
93
+ const chunks: SleepChunk[] = [];
94
+ let current: Row[] = [];
95
+ let currentChars = 0;
96
+
97
+ for (const item of items) {
98
+ const contentChars = (rowValue(item, "content") ?? "").length;
99
+ const separatorChars = current.length === 0 ? 0 : SLEEP_SUMMARY_SEPARATOR.length;
100
+ if (current.length > 0 && currentChars + separatorChars + contentChars > joinedLimit) {
101
+ chunks.push({ items: current, originalChars: currentChars });
102
+ current = [];
103
+ currentChars = 0;
104
+ }
105
+ current.push(item);
106
+ currentChars += (current.length === 1 ? 0 : SLEEP_SUMMARY_SEPARATOR.length) + contentChars;
107
+ }
108
+ if (current.length > 0) chunks.push({ items: current, originalChars: currentChars });
109
+ return chunks;
110
+ }
111
+
112
+ function buildSleepSummary(beam: BeamMemoryState, source: string, chunk: SleepChunk): SleepSummary {
113
+ const maxChars = normalizedMaxEpisodeChars(beam);
114
+ const prefix = `[${source}] `;
115
+ const joined = chunk.items.map(item => rowValue(item, "content") ?? "").join(SLEEP_SUMMARY_SEPARATOR);
116
+ const uncapped = `${prefix}${aaakEncode(joined)}`;
117
+ const truncated = uncapped.length > maxChars;
118
+ return {
119
+ summary: truncated ? markTruncated(uncapped, maxChars) : uncapped,
120
+ originalChars: chunk.originalChars,
121
+ truncated,
122
+ maxChars,
123
+ };
124
+ }
60
125
 
61
126
  function isoNow(): string {
62
127
  return new Date().toISOString();
@@ -440,6 +505,7 @@ export function extractAndStoreFacts(
440
505
  decision: 0,
441
506
  };
442
507
  const text = String(content ?? "");
508
+ if (text.length > PATTERN_FACT_EXTRACTION_MAX_INPUT_CHARS) return counts;
443
509
  for (const match of text.matchAll(
444
510
  /(\d+(?:[.,]\d+)?)\s*(ms|sec|seconds?|minutes?|hours?|days?|weeks?|months?|%|KB|MB|GB|TB|rows?|columns?|roles?|features?|bugs?|commits?|cards?|users?|items?|tests?|APIs?|endpoints?|sprints?|tickets?)\b/gi,
445
511
  )) {
@@ -878,26 +944,34 @@ export function sleep(beam: BeamMemoryState, dryRun = false): SleepResult {
878
944
  const consolidatedIds: string[] = [];
879
945
  let summariesCreated = 0;
880
946
  for (const [source, items] of grouped) {
881
- const lines = items.map(item => rowValue(item, "content") ?? "");
882
- const ids = items.map(item => rowValue(item, "id")).filter((id): id is string => id !== null);
883
- let scope = "session";
884
- let validUntil: string | null = null;
885
- for (const item of items) {
886
- if (rowValue(item, "scope") === "global") scope = "global";
887
- const itemValidUntil = rowValue(item, "valid_until");
888
- if (itemValidUntil && (validUntil === null || itemValidUntil < validUntil)) validUntil = itemValidUntil;
889
- }
890
- const summary = `[${source}] ${aaakEncode(lines.join(" | "))}`;
891
- if (!dryRun) {
892
- consolidateToEpisodic(beam, summary, ids, "sleep_consolidation", 0.6, {
893
- scope,
894
- validUntil,
895
- veracity: aggregateEpisodicVeracity(items.map(item => rowValue(item, "veracity") ?? "unknown")),
896
- metadata: { original_count: items.length, source, llm_used: false },
897
- });
947
+ for (const chunk of splitSleepItems(beam, source, items)) {
948
+ const ids = chunk.items.map(item => rowValue(item, "id")).filter((id): id is string => id !== null);
949
+ let scope = "session";
950
+ let validUntil: string | null = null;
951
+ for (const item of chunk.items) {
952
+ if (rowValue(item, "scope") === "global") scope = "global";
953
+ const itemValidUntil = rowValue(item, "valid_until");
954
+ if (itemValidUntil && (validUntil === null || itemValidUntil < validUntil)) validUntil = itemValidUntil;
955
+ }
956
+ const sleepSummary = buildSleepSummary(beam, source, chunk);
957
+ const metadata: Metadata = { original_count: chunk.items.length, source, llm_used: false };
958
+ if (sleepSummary.truncated) {
959
+ metadata.truncated = true;
960
+ metadata.original_chars = sleepSummary.originalChars;
961
+ metadata.max_chars = sleepSummary.maxChars;
962
+ }
963
+ const summary = sleepSummary.summary;
964
+ if (!dryRun) {
965
+ consolidateToEpisodic(beam, summary, ids, "sleep_consolidation", 0.6, {
966
+ scope,
967
+ validUntil,
968
+ veracity: aggregateEpisodicVeracity(chunk.items.map(item => rowValue(item, "veracity") ?? "unknown")),
969
+ metadata,
970
+ });
971
+ }
972
+ consolidatedIds.push(...ids);
973
+ summariesCreated++;
898
974
  }
899
- consolidatedIds.push(...ids);
900
- summariesCreated++;
901
975
  }
902
976
  if (!dryRun) {
903
977
  beam.db.run(
@@ -1,6 +1,6 @@
1
1
  import type { Database } from "bun:sqlite";
2
2
  import { existsSync } from "node:fs";
3
- import { ftsWeight, importanceWeight, vectorWeight } from "../../config";
3
+ import { ftsWeight, importanceWeight, maxEpisodeChars, vectorWeight } from "../../config";
4
4
  import { closeQuietly, openDatabase } from "../../db";
5
5
  import { AnnotationStore } from "../annotations";
6
6
  import { EpisodicGraph } from "../episodic-graph";
@@ -68,6 +68,7 @@ const DEFAULT_CONFIG: BeamConfig = {
68
68
  importanceWeight: 0.2,
69
69
  useCloud: false,
70
70
  localLlmEnabled: false,
71
+ maxEpisodeChars: 100_000,
71
72
  };
72
73
 
73
74
  function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
@@ -82,6 +83,7 @@ function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
82
83
  importanceWeight: configured.importanceWeight ?? importanceWeight(),
83
84
  useCloud,
84
85
  localLlmEnabled: configured.localLlmEnabled ?? DEFAULT_CONFIG.localLlmEnabled,
86
+ maxEpisodeChars: configured.maxEpisodeChars ?? maxEpisodeChars(),
85
87
  };
86
88
  }
87
89
  function autoMigrateAnnotations(db: Database, dbPath: string | undefined): void {
@@ -53,6 +53,7 @@ export interface BeamConfig {
53
53
  importanceWeight: number;
54
54
  useCloud: boolean;
55
55
  localLlmEnabled: boolean;
56
+ maxEpisodeChars: number;
56
57
  }
57
58
 
58
59
  export interface BeamMemoryOptions {
@@ -118,6 +118,9 @@ const ENTITY_EXTRACTION_STOP_WORD_VALUES = [
118
118
  ] as const;
119
119
 
120
120
  export const ENTITY_EXTRACTION_STOP_WORDS: ReadonlySet<string> = new Set(ENTITY_EXTRACTION_STOP_WORD_VALUES);
121
+ /** Maximum raw text size for regex-only entity extraction. */
122
+ export const REGEX_EXTRACTION_MAX_INPUT_CHARS = 50_000;
123
+
121
124
  const ENTITY_PATTERNS: readonly RegExp[] = [
122
125
  /@(\w{2,30})/g,
123
126
  /#(\w{2,30})/g,
@@ -192,6 +195,7 @@ function isPureNumber(entity: string): boolean {
192
195
 
193
196
  export function extractEntitiesRegex(text: string): string[] {
194
197
  if (typeof text !== "string" || text.length === 0) return [];
198
+ if (text.length > REGEX_EXTRACTION_MAX_INPUT_CHARS) return [];
195
199
 
196
200
  const entities = new Set<string>();
197
201
  for (const sourcePattern of ENTITY_PATTERNS) {