opencodekit 0.17.13 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +4 -6
- package/dist/template/.opencode/dcp.jsonc +81 -81
- package/dist/template/.opencode/memory/memory.db +0 -0
- package/dist/template/.opencode/memory.db +0 -0
- package/dist/template/.opencode/memory.db-shm +0 -0
- package/dist/template/.opencode/memory.db-wal +0 -0
- package/dist/template/.opencode/opencode.json +199 -23
- package/dist/template/.opencode/opencode.json.tui-migration.bak +1380 -0
- package/dist/template/.opencode/package.json +1 -1
- package/dist/template/.opencode/plugin/lib/capture.ts +177 -0
- package/dist/template/.opencode/plugin/lib/context.ts +194 -0
- package/dist/template/.opencode/plugin/lib/curator.ts +234 -0
- package/dist/template/.opencode/plugin/lib/db/maintenance.ts +312 -0
- package/dist/template/.opencode/plugin/lib/db/observations.ts +299 -0
- package/dist/template/.opencode/plugin/lib/db/pipeline.ts +520 -0
- package/dist/template/.opencode/plugin/lib/db/schema.ts +356 -0
- package/dist/template/.opencode/plugin/lib/db/types.ts +211 -0
- package/dist/template/.opencode/plugin/lib/distill.ts +376 -0
- package/dist/template/.opencode/plugin/lib/inject.ts +126 -0
- package/dist/template/.opencode/plugin/lib/memory-admin-tools.ts +188 -0
- package/dist/template/.opencode/plugin/lib/memory-db.ts +54 -936
- package/dist/template/.opencode/plugin/lib/memory-helpers.ts +202 -0
- package/dist/template/.opencode/plugin/lib/memory-hooks.ts +240 -0
- package/dist/template/.opencode/plugin/lib/memory-tools.ts +341 -0
- package/dist/template/.opencode/plugin/memory.ts +56 -60
- package/dist/template/.opencode/plugin/sessions.ts +372 -93
- package/dist/template/.opencode/tui.json +15 -0
- package/package.json +1 -1
- package/dist/template/.opencode/tool/action-queue.ts +0 -313
- package/dist/template/.opencode/tool/memory-admin.ts +0 -445
- package/dist/template/.opencode/tool/memory-get.ts +0 -143
- package/dist/template/.opencode/tool/memory-read.ts +0 -45
- package/dist/template/.opencode/tool/memory-search.ts +0 -264
- package/dist/template/.opencode/tool/memory-timeline.ts +0 -105
- package/dist/template/.opencode/tool/memory-update.ts +0 -63
- package/dist/template/.opencode/tool/observation.ts +0 -357
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Message Capture Module
|
|
3
|
+
*
|
|
4
|
+
* Handles message.part.updated events: extracts text content from parts,
|
|
5
|
+
* estimates tokens, and stores/updates temporal_messages for later distillation.
|
|
6
|
+
*
|
|
7
|
+
* Architecture note: OpenCode fires separate events for messages and parts.
|
|
8
|
+
* - message.updated → { info: Message } (metadata only, no text)
|
|
9
|
+
* - message.part.updated → { part: Part, delta?: string } (has text content)
|
|
10
|
+
*
|
|
11
|
+
* We capture from message.part.updated since that's where the text lives.
|
|
12
|
+
* Each part update upserts the message row, accumulating content.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { Database } from "bun:sqlite";
|
|
16
|
+
import {
|
|
17
|
+
estimateTokens,
|
|
18
|
+
getMemoryDB,
|
|
19
|
+
MEMORY_CONFIG,
|
|
20
|
+
type TemporalMessageInput,
|
|
21
|
+
} from "./memory-db.js";
|
|
22
|
+
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// Types
|
|
25
|
+
// ============================================================================
|
|
26
|
+
|
|
27
|
+
/** Shape of properties from EventMessagePartUpdated */
|
|
28
|
+
export interface PartEvent {
|
|
29
|
+
part?: {
|
|
30
|
+
id?: string;
|
|
31
|
+
sessionID?: string;
|
|
32
|
+
messageID?: string;
|
|
33
|
+
type?: string;
|
|
34
|
+
text?: string;
|
|
35
|
+
};
|
|
36
|
+
delta?: string;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Shape of properties from EventMessageUpdated (metadata only) */
|
|
40
|
+
export interface MessageEvent {
|
|
41
|
+
info?: {
|
|
42
|
+
id?: string;
|
|
43
|
+
sessionID?: string;
|
|
44
|
+
role?: string;
|
|
45
|
+
time?: { created?: number };
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ============================================================================
|
|
50
|
+
// Capture Handlers
|
|
51
|
+
// ============================================================================
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Process a message.part.updated event.
|
|
55
|
+
* Upserts the temporal_messages row for this message, appending new text.
|
|
56
|
+
*
|
|
57
|
+
* Returns true if part was captured, false if skipped.
|
|
58
|
+
*/
|
|
59
|
+
export function captureMessagePart(props: PartEvent): boolean {
|
|
60
|
+
if (!MEMORY_CONFIG.capture.enabled) return false;
|
|
61
|
+
|
|
62
|
+
const part = props.part;
|
|
63
|
+
if (!part?.sessionID || !part.messageID) return false;
|
|
64
|
+
|
|
65
|
+
// Only capture text and reasoning parts
|
|
66
|
+
if (part.type !== "text" && part.type !== "reasoning") return false;
|
|
67
|
+
|
|
68
|
+
const text = part.text;
|
|
69
|
+
if (!text || text.trim().length === 0) return false;
|
|
70
|
+
|
|
71
|
+
// Cap content length
|
|
72
|
+
const cappedText = text.slice(0, MEMORY_CONFIG.capture.maxContentLength);
|
|
73
|
+
|
|
74
|
+
try {
|
|
75
|
+
const db = getMemoryDB();
|
|
76
|
+
upsertMessageContent(db, part.sessionID, part.messageID, cappedText);
|
|
77
|
+
return true;
|
|
78
|
+
} catch {
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Process a message.updated event to capture role metadata.
|
|
85
|
+
* Only stores role — content comes from message.part.updated.
|
|
86
|
+
*
|
|
87
|
+
* Returns true if metadata was captured.
|
|
88
|
+
*/
|
|
89
|
+
export function captureMessageMeta(props: MessageEvent): boolean {
|
|
90
|
+
if (!MEMORY_CONFIG.capture.enabled) return false;
|
|
91
|
+
|
|
92
|
+
const info = props.info;
|
|
93
|
+
if (!info?.id || !info.sessionID) return false;
|
|
94
|
+
|
|
95
|
+
const role = info.role ?? "unknown";
|
|
96
|
+
if (role !== "user" && role !== "assistant") return false;
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const db = getMemoryDB();
|
|
100
|
+
// Upsert with role but empty content — parts will fill it in
|
|
101
|
+
const existing = db
|
|
102
|
+
.query<{ content: string }, [string]>(
|
|
103
|
+
"SELECT content FROM temporal_messages WHERE message_id = ?",
|
|
104
|
+
)
|
|
105
|
+
.get(info.id);
|
|
106
|
+
|
|
107
|
+
if (!existing) {
|
|
108
|
+
// Insert placeholder that parts will update
|
|
109
|
+
const now = info.time?.created ?? Date.now();
|
|
110
|
+
db.run(
|
|
111
|
+
`INSERT OR IGNORE INTO temporal_messages
|
|
112
|
+
(session_id, message_id, role, content, token_estimate, time_created)
|
|
113
|
+
VALUES (?, ?, ?, '', 0, ?)`,
|
|
114
|
+
info.sessionID,
|
|
115
|
+
info.id,
|
|
116
|
+
role,
|
|
117
|
+
now,
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
return true;
|
|
121
|
+
} catch {
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ============================================================================
|
|
127
|
+
// Internal
|
|
128
|
+
// ============================================================================
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Upsert a message row, replacing content with the latest part text.
|
|
132
|
+
* Since parts stream in incrementally, we replace (not append) with the
|
|
133
|
+
* latest full text from the part — OpenCode sends cumulative text.
|
|
134
|
+
*/
|
|
135
|
+
function upsertMessageContent(
|
|
136
|
+
db: Database,
|
|
137
|
+
sessionId: string,
|
|
138
|
+
messageId: string,
|
|
139
|
+
text: string,
|
|
140
|
+
): void {
|
|
141
|
+
const tokenEstimate = estimateTokens(text);
|
|
142
|
+
const now = Date.now();
|
|
143
|
+
|
|
144
|
+
// Try update first (most common case — message.updated fires before parts)
|
|
145
|
+
const result = db.run(
|
|
146
|
+
`UPDATE temporal_messages
|
|
147
|
+
SET content = ?, token_estimate = ?
|
|
148
|
+
WHERE message_id = ?`,
|
|
149
|
+
text,
|
|
150
|
+
tokenEstimate,
|
|
151
|
+
messageId,
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
if (result.changes === 0) {
|
|
155
|
+
// No existing row — insert with unknown role (message.updated hasn't fired yet)
|
|
156
|
+
const input: TemporalMessageInput = {
|
|
157
|
+
session_id: sessionId,
|
|
158
|
+
message_id: messageId,
|
|
159
|
+
role: "assistant", // Parts without prior message.updated are typically assistant
|
|
160
|
+
content: text,
|
|
161
|
+
token_estimate: tokenEstimate,
|
|
162
|
+
time_created: now,
|
|
163
|
+
};
|
|
164
|
+
|
|
165
|
+
db.run(
|
|
166
|
+
`INSERT OR IGNORE INTO temporal_messages
|
|
167
|
+
(session_id, message_id, role, content, token_estimate, time_created)
|
|
168
|
+
VALUES (?, ?, ?, ?, ?, ?)`,
|
|
169
|
+
input.session_id,
|
|
170
|
+
input.message_id,
|
|
171
|
+
input.role,
|
|
172
|
+
input.content,
|
|
173
|
+
input.token_estimate,
|
|
174
|
+
input.time_created,
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Window Management Module
|
|
3
|
+
*
|
|
4
|
+
* Implements messages.transform: estimates token usage across conversation
|
|
5
|
+
* messages and compresses oldest messages when approaching the budget limit.
|
|
6
|
+
*
|
|
7
|
+
* Protects the most recent N messages from compression.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { estimateTokens, MEMORY_CONFIG } from "./memory-db.js";
|
|
11
|
+
|
|
12
|
+
// ============================================================================
|
|
13
|
+
// Types
|
|
14
|
+
// ============================================================================
|
|
15
|
+
|
|
16
|
+
/** Minimal message shape from messages.transform output */
|
|
17
|
+
interface TransformMessage {
|
|
18
|
+
info: {
|
|
19
|
+
id?: string;
|
|
20
|
+
role?: string;
|
|
21
|
+
time?: {
|
|
22
|
+
created?: number;
|
|
23
|
+
};
|
|
24
|
+
};
|
|
25
|
+
parts: Array<{
|
|
26
|
+
type?: string;
|
|
27
|
+
text?: string;
|
|
28
|
+
data?: {
|
|
29
|
+
type?: string;
|
|
30
|
+
text?: string;
|
|
31
|
+
};
|
|
32
|
+
}>;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ============================================================================
|
|
36
|
+
// Token Estimation
|
|
37
|
+
// ============================================================================
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Estimate total tokens in a message (all text parts).
|
|
41
|
+
*/
|
|
42
|
+
function estimateMessageTokens(msg: TransformMessage): number {
|
|
43
|
+
let total = 0;
|
|
44
|
+
for (const part of msg.parts) {
|
|
45
|
+
if (part.text) {
|
|
46
|
+
total += estimateTokens(part.text);
|
|
47
|
+
} else if (part.data?.text) {
|
|
48
|
+
total += estimateTokens(part.data.text);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
// Add overhead for message framing (~10 tokens per message)
|
|
52
|
+
return total + 10;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Create a compressed summary of a message.
|
|
57
|
+
* Preserves non-text parts (tool calls, tool results, images) to maintain
|
|
58
|
+
* conversation coherence. Only compresses text parts.
|
|
59
|
+
*/
|
|
60
|
+
function compressMessage(msg: TransformMessage): TransformMessage {
|
|
61
|
+
const role = msg.info.role ?? "unknown";
|
|
62
|
+
|
|
63
|
+
// Separate text parts from non-text parts (tool calls, tool results, etc.)
|
|
64
|
+
const textParts: Array<(typeof msg.parts)[number]> = [];
|
|
65
|
+
const nonTextParts: Array<(typeof msg.parts)[number]> = [];
|
|
66
|
+
|
|
67
|
+
for (const part of msg.parts) {
|
|
68
|
+
const isTextPart =
|
|
69
|
+
part.type === "text" || (!part.type && (part.text || part.data?.text));
|
|
70
|
+
if (isTextPart) {
|
|
71
|
+
textParts.push(part);
|
|
72
|
+
} else {
|
|
73
|
+
nonTextParts.push(part);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Extract text content for summarization
|
|
78
|
+
const texts: string[] = [];
|
|
79
|
+
for (const part of textParts) {
|
|
80
|
+
if (part.text) texts.push(part.text);
|
|
81
|
+
else if (part.data?.text) texts.push(part.data.text);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const fullText = texts.join("\n");
|
|
85
|
+
|
|
86
|
+
// Build compressed parts: summarized text + preserved non-text parts
|
|
87
|
+
const compressedParts: typeof msg.parts = [];
|
|
88
|
+
|
|
89
|
+
if (fullText.length > 0) {
|
|
90
|
+
compressedParts.push({
|
|
91
|
+
type: "text",
|
|
92
|
+
text: createSummary(fullText, role),
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Preserve tool calls, tool results, and other structural parts
|
|
97
|
+
compressedParts.push(...nonTextParts);
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
info: msg.info,
|
|
101
|
+
parts:
|
|
102
|
+
compressedParts.length > 0
|
|
103
|
+
? compressedParts
|
|
104
|
+
: [{ type: "text", text: `[compressed ${role} message]` }],
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Create a brief summary of message content.
|
|
110
|
+
* Keeps first and last sentence, truncates middle.
|
|
111
|
+
*/
|
|
112
|
+
function createSummary(text: string, role: string): string {
|
|
113
|
+
const maxChars = 200;
|
|
114
|
+
|
|
115
|
+
if (text.length <= maxChars) return text;
|
|
116
|
+
|
|
117
|
+
// Split into sentences
|
|
118
|
+
const sentences = text
|
|
119
|
+
.split(/(?<=[.!?])\s+|\n+/)
|
|
120
|
+
.map((s) => s.trim())
|
|
121
|
+
.filter((s) => s.length > 0);
|
|
122
|
+
|
|
123
|
+
if (sentences.length <= 2) {
|
|
124
|
+
return `${text.slice(0, maxChars)}...`;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const first = sentences[0];
|
|
128
|
+
const last = sentences[sentences.length - 1];
|
|
129
|
+
|
|
130
|
+
const summary = `[compressed ${role} message] ${first} [...${sentences.length - 2} more sentences...] ${last}`;
|
|
131
|
+
|
|
132
|
+
return summary.length > maxChars * 2
|
|
133
|
+
? `${summary.slice(0, maxChars * 2)}...`
|
|
134
|
+
: summary;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// ============================================================================
|
|
138
|
+
// Context Manager
|
|
139
|
+
// ============================================================================
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Process messages through context management.
|
|
143
|
+
*
|
|
144
|
+
* If total tokens exceed maxContextTokens, compresses oldest messages
|
|
145
|
+
* (excluding the most recent `protectedMessages` count).
|
|
146
|
+
*
|
|
147
|
+
* @param messages - Current conversation messages
|
|
148
|
+
* @returns Potentially compressed messages array
|
|
149
|
+
*/
|
|
150
|
+
export function manageContext(
|
|
151
|
+
messages: TransformMessage[],
|
|
152
|
+
): TransformMessage[] {
|
|
153
|
+
if (!MEMORY_CONFIG.context.enabled) return messages;
|
|
154
|
+
|
|
155
|
+
const maxTokens = MEMORY_CONFIG.context.maxContextTokens;
|
|
156
|
+
const protectedCount = MEMORY_CONFIG.context.protectedMessages;
|
|
157
|
+
|
|
158
|
+
// Estimate total tokens
|
|
159
|
+
let totalTokens = 0;
|
|
160
|
+
const tokenCounts: number[] = [];
|
|
161
|
+
|
|
162
|
+
for (const msg of messages) {
|
|
163
|
+
const tokens = estimateMessageTokens(msg);
|
|
164
|
+
tokenCounts.push(tokens);
|
|
165
|
+
totalTokens += tokens;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// If under budget, return unchanged
|
|
169
|
+
if (totalTokens <= maxTokens) return messages;
|
|
170
|
+
|
|
171
|
+
// Calculate how many tokens to shed
|
|
172
|
+
const tokensToShed = totalTokens - maxTokens;
|
|
173
|
+
|
|
174
|
+
// Identify compressible messages (all except protected recent ones)
|
|
175
|
+
const compressibleEnd = Math.max(0, messages.length - protectedCount);
|
|
176
|
+
let shedSoFar = 0;
|
|
177
|
+
|
|
178
|
+
const result = [...messages];
|
|
179
|
+
|
|
180
|
+
// Compress from oldest to newest until we've shed enough
|
|
181
|
+
for (let i = 0; i < compressibleEnd && shedSoFar < tokensToShed; i++) {
|
|
182
|
+
const originalTokens = tokenCounts[i];
|
|
183
|
+
const compressed = compressMessage(messages[i]);
|
|
184
|
+
const compressedTokens = estimateMessageTokens(compressed);
|
|
185
|
+
const saved = originalTokens - compressedTokens;
|
|
186
|
+
|
|
187
|
+
if (saved > 0) {
|
|
188
|
+
result[i] = compressed;
|
|
189
|
+
shedSoFar += saved;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return result;
|
|
194
|
+
}
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Curator Module
|
|
3
|
+
*
|
|
4
|
+
* Pattern-matches distillations to automatically create observations.
|
|
5
|
+
* Uses regex patterns to detect decisions, bugfixes, patterns, discoveries, warnings.
|
|
6
|
+
* All curated observations are created with source='curator'.
|
|
7
|
+
*
|
|
8
|
+
* No LLM dependency — pure heuristic pattern matching.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
type DistillationRow,
|
|
13
|
+
getRecentDistillations,
|
|
14
|
+
MEMORY_CONFIG,
|
|
15
|
+
type ObservationInput,
|
|
16
|
+
type ObservationType,
|
|
17
|
+
storeObservation,
|
|
18
|
+
} from "./memory-db.js";
|
|
19
|
+
|
|
20
|
+
// ============================================================================
|
|
21
|
+
// Pattern Definitions
|
|
22
|
+
// ============================================================================
|
|
23
|
+
|
|
24
|
+
/** Curator patterns — regex + type mapping.
|
|
25
|
+
*
|
|
26
|
+
* Each pattern requires multi-word phrases or contextual markers to reduce
|
|
27
|
+
* false positives. Single common words like "using", "found", "always" are
|
|
28
|
+
* NOT matched alone — they must appear in decision/discovery phrases.
|
|
29
|
+
*/
|
|
30
|
+
const CURATOR_PATTERNS: Array<{
|
|
31
|
+
type: ObservationType;
|
|
32
|
+
pattern: RegExp;
|
|
33
|
+
titleExtractor: (match: RegExpMatchArray, sentence: string) => string;
|
|
34
|
+
}> = [
|
|
35
|
+
{
|
|
36
|
+
type: "decision",
|
|
37
|
+
pattern:
|
|
38
|
+
/\b(decided to|chose to|selected\s+\w+\s+(?:over|instead)|went with|opted for|switched to|migrated to|picked\s+\w+\s+(?:over|for))\b/i,
|
|
39
|
+
titleExtractor: (_match, sentence) => truncateSentence(sentence, 80),
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
type: "bugfix",
|
|
43
|
+
pattern:
|
|
44
|
+
/\b(fixed (?:a|the|an)\b|resolved (?:a|the|an)\b|patched (?:a|the|an)\b|corrected (?:a|the|an)\b|bug in\b|error in\b|crash in\b|regression in\b)\b/i,
|
|
45
|
+
titleExtractor: (_match, sentence) => truncateSentence(sentence, 80),
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
type: "pattern",
|
|
49
|
+
pattern:
|
|
50
|
+
/\b(pattern(?::|is| for)\b|convention(?::|is)\b|best practice\b|standard practice\b|workflow for\b|(?:we|I|the team) (?:always|never)\b)\b/i,
|
|
51
|
+
titleExtractor: (_match, sentence) => truncateSentence(sentence, 80),
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
type: "discovery",
|
|
55
|
+
pattern:
|
|
56
|
+
/\b(found that|discovered that|noticed that|learned that|turns out|realized that|it (?:seems|appears) that)\b/i,
|
|
57
|
+
titleExtractor: (_match, sentence) => truncateSentence(sentence, 80),
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
type: "warning",
|
|
61
|
+
pattern:
|
|
62
|
+
/\b(warning:|caution:|careful with|gotcha:|pitfall(?:s|:)?\b|don't use\b|avoid (?:using|calling|importing)\b|beware of\b|watch out for\b|never (?:use|call|import|commit|push)\b)\b/i,
|
|
63
|
+
titleExtractor: (_match, sentence) => truncateSentence(sentence, 80),
|
|
64
|
+
},
|
|
65
|
+
];
|
|
66
|
+
|
|
67
|
+
// ============================================================================
|
|
68
|
+
// Utilities
|
|
69
|
+
// ============================================================================
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Truncate a sentence to max length, preserving word boundaries.
|
|
73
|
+
*/
|
|
74
|
+
function truncateSentence(sentence: string, maxLen: number): string {
|
|
75
|
+
const clean = sentence.replace(/\s+/g, " ").trim();
|
|
76
|
+
if (clean.length <= maxLen) return clean;
|
|
77
|
+
|
|
78
|
+
const truncated = clean.slice(0, maxLen);
|
|
79
|
+
const lastSpace = truncated.lastIndexOf(" ");
|
|
80
|
+
return lastSpace > maxLen / 2
|
|
81
|
+
? `${truncated.slice(0, lastSpace)}...`
|
|
82
|
+
: `${truncated}...`;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Split distillation content into sentences.
|
|
87
|
+
*/
|
|
88
|
+
function splitSentences(content: string): string[] {
|
|
89
|
+
return content
|
|
90
|
+
.split(/(?<=[.!?])\s+|\n+/)
|
|
91
|
+
.map((s) => s.trim())
|
|
92
|
+
.filter((s) => s.length > 30); // Skip short fragments that lack enough context
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Extract concept tags from a sentence (significant nouns/terms).
|
|
97
|
+
*/
|
|
98
|
+
function extractConcepts(sentence: string): string[] {
|
|
99
|
+
// Extract potential concept words (3+ chars, not common stop words)
|
|
100
|
+
const words = sentence
|
|
101
|
+
.toLowerCase()
|
|
102
|
+
.replace(/[^a-z0-9_\-/.]+/g, " ")
|
|
103
|
+
.split(/\s+/)
|
|
104
|
+
.filter((w) => w.length > 3);
|
|
105
|
+
|
|
106
|
+
// Return unique terms, limited to 5
|
|
107
|
+
return [...new Set(words)].slice(0, 5);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Deduplicate against existing observation titles (simple guard).
|
|
112
|
+
* Returns true if a similar title already exists.
|
|
113
|
+
*/
|
|
114
|
+
function isDuplicateTitle(title: string, existingTitles: Set<string>): boolean {
|
|
115
|
+
const normalized = title.toLowerCase().trim();
|
|
116
|
+
if (existingTitles.has(normalized)) return true;
|
|
117
|
+
|
|
118
|
+
// Also check prefix match (first 40 chars) for fuzzy dedup
|
|
119
|
+
const prefix = normalized.slice(0, 40);
|
|
120
|
+
for (const existing of existingTitles) {
|
|
121
|
+
if (
|
|
122
|
+
existing.startsWith(prefix) ||
|
|
123
|
+
prefix.startsWith(existing.slice(0, 40))
|
|
124
|
+
) {
|
|
125
|
+
return true;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// ============================================================================
|
|
133
|
+
// Curator Pipeline
|
|
134
|
+
// ============================================================================
|
|
135
|
+
|
|
136
|
+
interface CuratorResult {
|
|
137
|
+
created: number;
|
|
138
|
+
skipped: number;
|
|
139
|
+
patterns: Record<string, number>;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Run curator on recent distillations to extract observations.
|
|
144
|
+
*
|
|
145
|
+
* @param sessionId - If provided, only process distillations from this session
|
|
146
|
+
* @param limit - Max distillations to process (default: MEMORY_CONFIG.curator.minDistillations)
|
|
147
|
+
*/
|
|
148
|
+
export function curateFromDistillations(
|
|
149
|
+
sessionId?: string,
|
|
150
|
+
limit?: number,
|
|
151
|
+
): CuratorResult {
|
|
152
|
+
if (!MEMORY_CONFIG.curator.enabled) {
|
|
153
|
+
return { created: 0, skipped: 0, patterns: {} };
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const maxDistillations = limit ?? MEMORY_CONFIG.curator.minDistillations;
|
|
157
|
+
const distillations = getRecentDistillations(sessionId, maxDistillations);
|
|
158
|
+
|
|
159
|
+
if (distillations.length < MEMORY_CONFIG.curator.minDistillations) {
|
|
160
|
+
return { created: 0, skipped: 0, patterns: {} };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const result: CuratorResult = { created: 0, skipped: 0, patterns: {} };
|
|
164
|
+
const seenTitles = new Set<string>();
|
|
165
|
+
|
|
166
|
+
for (const distillation of distillations) {
|
|
167
|
+
const sentences = splitSentences(distillation.content);
|
|
168
|
+
|
|
169
|
+
for (const sentence of sentences) {
|
|
170
|
+
const observation = matchPatterns(sentence, distillation, seenTitles);
|
|
171
|
+
|
|
172
|
+
if (observation) {
|
|
173
|
+
try {
|
|
174
|
+
storeObservation(observation);
|
|
175
|
+
result.created++;
|
|
176
|
+
result.patterns[observation.type] =
|
|
177
|
+
(result.patterns[observation.type] ?? 0) + 1;
|
|
178
|
+
seenTitles.add(observation.title.toLowerCase().trim());
|
|
179
|
+
} catch {
|
|
180
|
+
result.skipped++;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return result;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Match a sentence against curator patterns and return an observation input if matched.
|
|
191
|
+
*/
|
|
192
|
+
function matchPatterns(
|
|
193
|
+
sentence: string,
|
|
194
|
+
distillation: DistillationRow,
|
|
195
|
+
seenTitles: Set<string>,
|
|
196
|
+
): ObservationInput | null {
|
|
197
|
+
for (const { type, pattern, titleExtractor } of CURATOR_PATTERNS) {
|
|
198
|
+
const match = sentence.match(pattern);
|
|
199
|
+
if (!match) continue;
|
|
200
|
+
|
|
201
|
+
const title = titleExtractor(match, sentence);
|
|
202
|
+
|
|
203
|
+
// Skip duplicates
|
|
204
|
+
if (isDuplicateTitle(title, seenTitles)) {
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const concepts = extractConcepts(sentence);
|
|
209
|
+
|
|
210
|
+
// Parse distillation terms for additional concepts
|
|
211
|
+
let distTerms: string[] = [];
|
|
212
|
+
try {
|
|
213
|
+
distTerms = JSON.parse(distillation.terms);
|
|
214
|
+
} catch {
|
|
215
|
+
// Invalid JSON, skip
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Merge concepts from sentence + distillation terms (max 8)
|
|
219
|
+
const allConcepts = [
|
|
220
|
+
...new Set([...concepts, ...distTerms.slice(0, 3)]),
|
|
221
|
+
].slice(0, 8);
|
|
222
|
+
|
|
223
|
+
return {
|
|
224
|
+
type,
|
|
225
|
+
title,
|
|
226
|
+
narrative: sentence,
|
|
227
|
+
concepts: allConcepts,
|
|
228
|
+
confidence: MEMORY_CONFIG.curator.defaultConfidence,
|
|
229
|
+
source: "curator",
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return null;
|
|
234
|
+
}
|