@oh-my-pi/pi-mnemopi 16.0.4 → 16.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/dist/types/config.d.ts +1 -0
- package/dist/types/core/beam/types.d.ts +1 -0
- package/dist/types/core/entities.d.ts +2 -0
- package/package.json +4 -4
- package/src/config.ts +4 -0
- package/src/core/beam/consolidate.ts +93 -19
- package/src/core/beam/index.ts +3 -1
- package/src/core/beam/types.ts +1 -0
- package/src/core/entities.ts +4 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [16.0.5] - 2026-06-17
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Capped `sleep_consolidation` episodic rows at `maxEpisodeChars` (default 100KB, `MNEMOPI_MAX_EPISODE_CHARS`) so raw session transcripts cannot be stored and extracted as multi-megabyte episodes. ([#2869](https://github.com/can1357/oh-my-pi/issues/2869))
|
|
10
|
+
- Skipped regex-only entity and pattern fact extraction for oversized raw transcripts so progress/log noise cannot flood MEMORIA with junk facts. ([#2868](https://github.com/can1357/oh-my-pi/issues/2868))
|
|
11
|
+
|
|
5
12
|
## [15.13.1] - 2026-06-15
|
|
6
13
|
|
|
7
14
|
### Added
|
package/dist/types/config.d.ts
CHANGED
|
@@ -33,6 +33,7 @@ export declare function apiEmbeddingsAvailable(env?: Env): boolean;
|
|
|
33
33
|
export declare function workingMemoryMaxItems(env?: Env): number;
|
|
34
34
|
export declare function workingMemoryTtlHours(env?: Env): number;
|
|
35
35
|
export declare function episodicRecallLimit(env?: Env): number;
|
|
36
|
+
export declare function maxEpisodeChars(env?: Env): number;
|
|
36
37
|
export declare function sleepBatchSize(env?: Env): number;
|
|
37
38
|
export declare function scratchpadMaxItems(env?: Env): number;
|
|
38
39
|
export declare function recencyHalflifeHours(env?: Env): number;
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
export declare const ENTITY_EXTRACTION_STOP_WORDS: ReadonlySet<string>;
|
|
2
|
+
/** Maximum raw text size for regex-only entity extraction. */
|
|
3
|
+
export declare const REGEX_EXTRACTION_MAX_INPUT_CHARS = 50000;
|
|
2
4
|
export declare function levenshteinDistance(s1: string, s2: string): number;
|
|
3
5
|
export declare function similarity(s1: string, s2: string): number;
|
|
4
6
|
export declare function extractEntitiesRegex(text: string): string[];
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-mnemopi",
|
|
4
|
-
"version": "16.0.
|
|
4
|
+
"version": "16.0.5",
|
|
5
5
|
"description": "Local SQLite memory engine for Oh My Pi agents",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -39,9 +39,9 @@
|
|
|
39
39
|
"fmt": "biome format --write ."
|
|
40
40
|
},
|
|
41
41
|
"dependencies": {
|
|
42
|
-
"@oh-my-pi/pi-ai": "16.0.
|
|
43
|
-
"@oh-my-pi/pi-catalog": "16.0.
|
|
44
|
-
"@oh-my-pi/pi-utils": "16.0.
|
|
42
|
+
"@oh-my-pi/pi-ai": "16.0.5",
|
|
43
|
+
"@oh-my-pi/pi-catalog": "16.0.5",
|
|
44
|
+
"@oh-my-pi/pi-utils": "16.0.5",
|
|
45
45
|
"lru-cache": "11.5.1"
|
|
46
46
|
},
|
|
47
47
|
"peerDependencies": {
|
package/src/config.ts
CHANGED
|
@@ -126,6 +126,10 @@ export function episodicRecallLimit(env: Env = process.env): number {
|
|
|
126
126
|
return envInt("MNEMOPI_EP_LIMIT", 50000, env);
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
+
export function maxEpisodeChars(env: Env = process.env): number {
|
|
130
|
+
return Math.max(1, envInt("MNEMOPI_MAX_EPISODE_CHARS", 100000, env));
|
|
131
|
+
}
|
|
132
|
+
|
|
129
133
|
export function sleepBatchSize(env: Env = process.env): number {
|
|
130
134
|
return envInt("MNEMOPI_SLEEP_BATCH", 5000, env);
|
|
131
135
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { SQLQueryBindings } from "bun:sqlite";
|
|
2
2
|
import { generateId, stableMemoryId } from "../../util/ids";
|
|
3
3
|
import { aaakEncode } from "../aaak";
|
|
4
|
+
import { REGEX_EXTRACTION_MAX_INPUT_CHARS } from "../entities";
|
|
4
5
|
import { EpisodicGraph } from "../episodic-graph";
|
|
5
6
|
import { heuristicExtractFacts } from "../extraction";
|
|
6
7
|
import { clampVeracity } from "../veracity-consolidation";
|
|
@@ -57,6 +58,70 @@ const TIER2_DAYS = envInt("MNEMOPI_TIER2_DAYS", 30);
|
|
|
57
58
|
const TIER3_DAYS = envInt("MNEMOPI_TIER3_DAYS", 180);
|
|
58
59
|
const DEGRADE_BATCH_SIZE = envInt("MNEMOPI_DEGRADE_BATCH", 100);
|
|
59
60
|
const TIER3_MAX_CHARS = envInt("MNEMOPI_TIER3_MAX_CHARS", 300);
|
|
61
|
+
const DEFAULT_MAX_EPISODE_CHARS = 100_000;
|
|
62
|
+
const SLEEP_SUMMARY_SEPARATOR = " | ";
|
|
63
|
+
const SLEEP_TRUNCATION_MARKER = "\n[... sleep_consolidation episode truncated by maxEpisodeChars ...]";
|
|
64
|
+
const PATTERN_FACT_EXTRACTION_MAX_INPUT_CHARS = REGEX_EXTRACTION_MAX_INPUT_CHARS;
|
|
65
|
+
|
|
66
|
+
type SleepSummary = {
|
|
67
|
+
summary: string;
|
|
68
|
+
originalChars: number;
|
|
69
|
+
truncated: boolean;
|
|
70
|
+
maxChars: number;
|
|
71
|
+
};
|
|
72
|
+
type SleepChunk = {
|
|
73
|
+
items: Row[];
|
|
74
|
+
originalChars: number;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
function normalizedMaxEpisodeChars(beam: BeamMemoryState): number {
|
|
78
|
+
const configured = Math.trunc(beam.config?.maxEpisodeChars ?? DEFAULT_MAX_EPISODE_CHARS);
|
|
79
|
+
return Number.isFinite(configured) && configured > 0 ? configured : DEFAULT_MAX_EPISODE_CHARS;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function markTruncated(content: string, maxChars: number): string {
|
|
83
|
+
if (maxChars <= 0) return "";
|
|
84
|
+
if (maxChars <= SLEEP_TRUNCATION_MARKER.length) return content.slice(0, maxChars);
|
|
85
|
+
const bodyChars = maxChars - SLEEP_TRUNCATION_MARKER.length;
|
|
86
|
+
return `${content.slice(0, bodyChars).trimEnd()}${SLEEP_TRUNCATION_MARKER}`;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function splitSleepItems(beam: BeamMemoryState, source: string, items: readonly Row[]): SleepChunk[] {
|
|
90
|
+
const maxChars = normalizedMaxEpisodeChars(beam);
|
|
91
|
+
const prefixChars = `[${source}] `.length;
|
|
92
|
+
const joinedLimit = Math.max(0, maxChars - prefixChars);
|
|
93
|
+
const chunks: SleepChunk[] = [];
|
|
94
|
+
let current: Row[] = [];
|
|
95
|
+
let currentChars = 0;
|
|
96
|
+
|
|
97
|
+
for (const item of items) {
|
|
98
|
+
const contentChars = (rowValue(item, "content") ?? "").length;
|
|
99
|
+
const separatorChars = current.length === 0 ? 0 : SLEEP_SUMMARY_SEPARATOR.length;
|
|
100
|
+
if (current.length > 0 && currentChars + separatorChars + contentChars > joinedLimit) {
|
|
101
|
+
chunks.push({ items: current, originalChars: currentChars });
|
|
102
|
+
current = [];
|
|
103
|
+
currentChars = 0;
|
|
104
|
+
}
|
|
105
|
+
current.push(item);
|
|
106
|
+
currentChars += (current.length === 1 ? 0 : SLEEP_SUMMARY_SEPARATOR.length) + contentChars;
|
|
107
|
+
}
|
|
108
|
+
if (current.length > 0) chunks.push({ items: current, originalChars: currentChars });
|
|
109
|
+
return chunks;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function buildSleepSummary(beam: BeamMemoryState, source: string, chunk: SleepChunk): SleepSummary {
|
|
113
|
+
const maxChars = normalizedMaxEpisodeChars(beam);
|
|
114
|
+
const prefix = `[${source}] `;
|
|
115
|
+
const joined = chunk.items.map(item => rowValue(item, "content") ?? "").join(SLEEP_SUMMARY_SEPARATOR);
|
|
116
|
+
const uncapped = `${prefix}${aaakEncode(joined)}`;
|
|
117
|
+
const truncated = uncapped.length > maxChars;
|
|
118
|
+
return {
|
|
119
|
+
summary: truncated ? markTruncated(uncapped, maxChars) : uncapped,
|
|
120
|
+
originalChars: chunk.originalChars,
|
|
121
|
+
truncated,
|
|
122
|
+
maxChars,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
60
125
|
|
|
61
126
|
function isoNow(): string {
|
|
62
127
|
return new Date().toISOString();
|
|
@@ -440,6 +505,7 @@ export function extractAndStoreFacts(
|
|
|
440
505
|
decision: 0,
|
|
441
506
|
};
|
|
442
507
|
const text = String(content ?? "");
|
|
508
|
+
if (text.length > PATTERN_FACT_EXTRACTION_MAX_INPUT_CHARS) return counts;
|
|
443
509
|
for (const match of text.matchAll(
|
|
444
510
|
/(\d+(?:[.,]\d+)?)\s*(ms|sec|seconds?|minutes?|hours?|days?|weeks?|months?|%|KB|MB|GB|TB|rows?|columns?|roles?|features?|bugs?|commits?|cards?|users?|items?|tests?|APIs?|endpoints?|sprints?|tickets?)\b/gi,
|
|
445
511
|
)) {
|
|
@@ -878,26 +944,34 @@ export function sleep(beam: BeamMemoryState, dryRun = false): SleepResult {
|
|
|
878
944
|
const consolidatedIds: string[] = [];
|
|
879
945
|
let summariesCreated = 0;
|
|
880
946
|
for (const [source, items] of grouped) {
|
|
881
|
-
const
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
947
|
+
for (const chunk of splitSleepItems(beam, source, items)) {
|
|
948
|
+
const ids = chunk.items.map(item => rowValue(item, "id")).filter((id): id is string => id !== null);
|
|
949
|
+
let scope = "session";
|
|
950
|
+
let validUntil: string | null = null;
|
|
951
|
+
for (const item of chunk.items) {
|
|
952
|
+
if (rowValue(item, "scope") === "global") scope = "global";
|
|
953
|
+
const itemValidUntil = rowValue(item, "valid_until");
|
|
954
|
+
if (itemValidUntil && (validUntil === null || itemValidUntil < validUntil)) validUntil = itemValidUntil;
|
|
955
|
+
}
|
|
956
|
+
const sleepSummary = buildSleepSummary(beam, source, chunk);
|
|
957
|
+
const metadata: Metadata = { original_count: chunk.items.length, source, llm_used: false };
|
|
958
|
+
if (sleepSummary.truncated) {
|
|
959
|
+
metadata.truncated = true;
|
|
960
|
+
metadata.original_chars = sleepSummary.originalChars;
|
|
961
|
+
metadata.max_chars = sleepSummary.maxChars;
|
|
962
|
+
}
|
|
963
|
+
const summary = sleepSummary.summary;
|
|
964
|
+
if (!dryRun) {
|
|
965
|
+
consolidateToEpisodic(beam, summary, ids, "sleep_consolidation", 0.6, {
|
|
966
|
+
scope,
|
|
967
|
+
validUntil,
|
|
968
|
+
veracity: aggregateEpisodicVeracity(chunk.items.map(item => rowValue(item, "veracity") ?? "unknown")),
|
|
969
|
+
metadata,
|
|
970
|
+
});
|
|
971
|
+
}
|
|
972
|
+
consolidatedIds.push(...ids);
|
|
973
|
+
summariesCreated++;
|
|
898
974
|
}
|
|
899
|
-
consolidatedIds.push(...ids);
|
|
900
|
-
summariesCreated++;
|
|
901
975
|
}
|
|
902
976
|
if (!dryRun) {
|
|
903
977
|
beam.db.run(
|
package/src/core/beam/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Database } from "bun:sqlite";
|
|
2
2
|
import { existsSync } from "node:fs";
|
|
3
|
-
import { ftsWeight, importanceWeight, vectorWeight } from "../../config";
|
|
3
|
+
import { ftsWeight, importanceWeight, maxEpisodeChars, vectorWeight } from "../../config";
|
|
4
4
|
import { closeQuietly, openDatabase } from "../../db";
|
|
5
5
|
import { AnnotationStore } from "../annotations";
|
|
6
6
|
import { EpisodicGraph } from "../episodic-graph";
|
|
@@ -68,6 +68,7 @@ const DEFAULT_CONFIG: BeamConfig = {
|
|
|
68
68
|
importanceWeight: 0.2,
|
|
69
69
|
useCloud: false,
|
|
70
70
|
localLlmEnabled: false,
|
|
71
|
+
maxEpisodeChars: 100_000,
|
|
71
72
|
};
|
|
72
73
|
|
|
73
74
|
function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
|
|
@@ -82,6 +83,7 @@ function normalizeConfig(options: BeamMemoryOptions): BeamConfig {
|
|
|
82
83
|
importanceWeight: configured.importanceWeight ?? importanceWeight(),
|
|
83
84
|
useCloud,
|
|
84
85
|
localLlmEnabled: configured.localLlmEnabled ?? DEFAULT_CONFIG.localLlmEnabled,
|
|
86
|
+
maxEpisodeChars: configured.maxEpisodeChars ?? maxEpisodeChars(),
|
|
85
87
|
};
|
|
86
88
|
}
|
|
87
89
|
function autoMigrateAnnotations(db: Database, dbPath: string | undefined): void {
|
package/src/core/beam/types.ts
CHANGED
package/src/core/entities.ts
CHANGED
|
@@ -118,6 +118,9 @@ const ENTITY_EXTRACTION_STOP_WORD_VALUES = [
|
|
|
118
118
|
] as const;
|
|
119
119
|
|
|
120
120
|
export const ENTITY_EXTRACTION_STOP_WORDS: ReadonlySet<string> = new Set(ENTITY_EXTRACTION_STOP_WORD_VALUES);
|
|
121
|
+
/** Maximum raw text size for regex-only entity extraction. */
|
|
122
|
+
export const REGEX_EXTRACTION_MAX_INPUT_CHARS = 50_000;
|
|
123
|
+
|
|
121
124
|
const ENTITY_PATTERNS: readonly RegExp[] = [
|
|
122
125
|
/@(\w{2,30})/g,
|
|
123
126
|
/#(\w{2,30})/g,
|
|
@@ -192,6 +195,7 @@ function isPureNumber(entity: string): boolean {
|
|
|
192
195
|
|
|
193
196
|
export function extractEntitiesRegex(text: string): string[] {
|
|
194
197
|
if (typeof text !== "string" || text.length === 0) return [];
|
|
198
|
+
if (text.length > REGEX_EXTRACTION_MAX_INPUT_CHARS) return [];
|
|
195
199
|
|
|
196
200
|
const entities = new Set<string>();
|
|
197
201
|
for (const sourcePattern of ENTITY_PATTERNS) {
|