ocuclaw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,516 @@
1
+ import { filterDisplayEmojiText } from "./message-emoji-filter.js";
2
+ import { marked } from "marked";
3
+
4
+ // --- Constants ---
5
+
6
+ const DEFAULT_AGENT_NAME = "Agent";
7
+ const REPLY_DIRECTIVE_TAG_RE = /\[\[\s*(?:reply_to_current|reply_to\s*:\s*[^\]\n]+)\s*\]\]/gi;
8
+ const REPLY_DIRECTIVE_SENTINEL = "\u0000";
9
+ const STANDALONE_REPLY_DIRECTIVE_LINE_RE = /^[ \t]*\u0000[ \t]*(?:\r?\n)?/gm;
10
+ const INLINE_REPLY_DIRECTIVE_RE = /[ \t]*\u0000[ \t]*/g;
11
+ const SYNTHETIC_SESSION_START_PREFIX_RE = /^a\s+new\s+session\s+was\s+started\b/;
12
+ const SYNTHETIC_SESSION_START_SHAPE_RE =
13
+ /\b(?:new|fresh)\s+session\b|\bsession\b.*\b(?:started|reset|created)\b/;
14
+ const SYNTHETIC_SESSION_INSTRUCTION_PATTERNS = [
15
+ /\bgreet\b/,
16
+ /\bconfigured\b.*\b(?:persona|style|voice)\b/,
17
+ /\bbe yourself\b|\bmannerisms\b|\bmood\b/,
18
+ /\b(?:1-3|1 to 3|one to three)\s+sentences?\b/,
19
+ /\bask\b.*\bwhat\b.*\bwant\b.*\bdo\b/,
20
+ /\bdefault(?:_| )model\b/,
21
+ /\bdo not mention\b/,
22
+ /\binternal\b.*\b(?:steps|files|tools|reasoning)\b/,
23
+ ];
24
+ // --- State ---
25
+
26
+ let messages = [];
27
+ let agentName = DEFAULT_AGENT_NAME;
28
+ let displayEntries = [];
29
+ let cachedTranscript = "";
30
+ let transcriptDirty = false;
31
+
32
+ /**
33
+ * Build a display entry for a single message, or null if filtered out.
34
+ *
35
+ * @param {{role: string, content: string|Array, name?: string}} msg
36
+ * @param {{isFirstVisibleEntry?: boolean}} [options]
37
+ * @returns {{role: "user"|"assistant", text: string, name: string|null}|null}
38
+ */
39
+ function buildDisplayEntry(msg, options = {}) {
40
+ if (!msg || (msg.role !== "user" && msg.role !== "assistant")) return null;
41
+
42
+ const text = extractText(msg.content);
43
+ if (!text) return null;
44
+
45
+ const { text: plainText } = markdownToPlainText(text, {
46
+ stripReplyTags: msg.role === "assistant",
47
+ });
48
+ if (!plainText) return null;
49
+ if (
50
+ msg.role === "user" &&
51
+ options.isFirstVisibleEntry === true &&
52
+ isLikelySyntheticSessionStarterPrompt(plainText)
53
+ ) {
54
+ return null;
55
+ }
56
+
57
+ return {
58
+ role: msg.role,
59
+ text: plainText,
60
+ name: typeof msg.name === "string" && msg.name ? msg.name : null,
61
+ };
62
+ }
63
+
64
+ function normalizeSessionStarterCandidate(text) {
65
+ if (typeof text !== "string") return "";
66
+ return text
67
+ .replace(/^\s*>+\s*/, "")
68
+ .replace(/\s+/g, " ")
69
+ .trim()
70
+ .toLowerCase();
71
+ }
72
+
73
+ function countSyntheticSessionInstructionSignals(normalizedText) {
74
+ let count = 0;
75
+ for (const pattern of SYNTHETIC_SESSION_INSTRUCTION_PATTERNS) {
76
+ if (pattern.test(normalizedText)) count += 1;
77
+ }
78
+ return count;
79
+ }
80
+
81
+ /**
82
+ * Detect OpenClaw's synthetic bare /new or /reset starter prompt.
83
+ * This is heuristic-based (signals + shape), not exact whole-string matching.
84
+ */
85
+ function isLikelySyntheticSessionStarterPrompt(text) {
86
+ const normalized = normalizeSessionStarterCandidate(text);
87
+ if (!normalized) return false;
88
+ if (!normalized.includes("/new") || !normalized.includes("/reset")) return false;
89
+
90
+ if (SYNTHETIC_SESSION_START_PREFIX_RE.test(normalized)) {
91
+ return true;
92
+ }
93
+ if (normalized.length < 80) return false;
94
+ if (!SYNTHETIC_SESSION_START_SHAPE_RE.test(normalized)) return false;
95
+ return countSyntheticSessionInstructionSignals(normalized) >= 2;
96
+ }
97
+
98
+ /**
99
+ * Format a display entry with role prefix.
100
+ *
101
+ * @param {{role: "user"|"assistant", text: string, name: string|null}} entry
102
+ * @returns {string}
103
+ */
104
+ function formatEntry(entry) {
105
+ if (entry.role === "user") return `• ${entry.text}`;
106
+ const name = entry.name || agentName;
107
+ return `${name}: ${entry.text}`;
108
+ }
109
+
110
+ /**
111
+ * Rebuild derived display cache from raw messages.
112
+ */
113
+ function rebuildDisplayCache() {
114
+ displayEntries = [];
115
+ for (const msg of messages) {
116
+ const entry = buildDisplayEntry(msg, {
117
+ isFirstVisibleEntry: displayEntries.length === 0,
118
+ });
119
+ if (entry) displayEntries.push(entry);
120
+ }
121
+ transcriptDirty = true;
122
+ }
123
+
124
+ /**
125
+ * Return the current transcript string, rebuilding lazily if needed.
126
+ *
127
+ * @returns {string}
128
+ */
129
+ function getTranscript() {
130
+ if (!transcriptDirty) return cachedTranscript;
131
+ cachedTranscript = displayEntries.map((entry) => formatEntry(entry)).join("\n\n");
132
+ transcriptDirty = false;
133
+ return cachedTranscript;
134
+ }
135
+
136
+ // --- Markdown Pipeline ---
137
+
138
+ /**
139
+ * Remove OpenClaw inline reply directives from assistant text before display.
140
+ * These control transport threading and should never be user-visible.
141
+ */
142
+ function stripReplyDirectives(text) {
143
+ if (!text) return "";
144
+
145
+ const withSentinel = text.replace(REPLY_DIRECTIVE_TAG_RE, REPLY_DIRECTIVE_SENTINEL);
146
+ if (withSentinel === text) return text;
147
+
148
+ return withSentinel
149
+ .replace(STANDALONE_REPLY_DIRECTIVE_LINE_RE, "")
150
+ .replace(INLINE_REPLY_DIRECTIVE_RE, " ")
151
+ .replace(/[ \t]+\r?\n/g, "\n")
152
+ .replace(/\r?\n{3,}/g, "\n\n")
153
+ .replace(/^(?:\r?\n)+/, "")
154
+ .replace(/^[ \t]+/, "")
155
+ .replace(/[ \t]+$/gm, "")
156
+ .replace(/\u0000/g, "")
157
+ .trimEnd();
158
+ }
159
+
160
+ /**
161
+ * Extract plain text from an array of marked inline tokens.
162
+ */
163
+ function renderInlineTokens(tokens) {
164
+ let out = "";
165
+ for (const token of tokens) {
166
+ switch (token.type) {
167
+ case "text":
168
+ case "codespan":
169
+ out += token.text;
170
+ break;
171
+ case "strong":
172
+ case "em":
173
+ case "del":
174
+ case "link":
175
+ out += token.tokens ? renderInlineTokens(token.tokens) : (token.text || "");
176
+ break;
177
+ case "br":
178
+ out += "\n";
179
+ break;
180
+ case "escape":
181
+ out += token.text || "";
182
+ break;
183
+ case "html":
184
+ // Strip inline HTML tags
185
+ break;
186
+ case "image":
187
+ out += token.text || token.title || "";
188
+ break;
189
+ default:
190
+ // Unknown inline token — include raw text if available
191
+ if (token.text) out += token.text;
192
+ break;
193
+ }
194
+ }
195
+ return out;
196
+ }
197
+
198
+ /**
199
+ * Extract plain text from an array of marked block tokens.
200
+ * Returns an array of text blocks (each block is a paragraph-level chunk).
201
+ */
202
+ function renderBlockTokens(tokens) {
203
+ const blocks = [];
204
+
205
+ for (const token of tokens) {
206
+ switch (token.type) {
207
+ case "paragraph":
208
+ blocks.push(token.tokens ? renderInlineTokens(token.tokens) : token.text);
209
+ break;
210
+
211
+ case "heading":
212
+ blocks.push(token.tokens ? renderInlineTokens(token.tokens) : token.text);
213
+ break;
214
+
215
+ case "code":
216
+ blocks.push(token.text);
217
+ break;
218
+
219
+ case "blockquote":
220
+ if (token.tokens) {
221
+ const inner = renderBlockTokens(token.tokens);
222
+ blocks.push(...inner);
223
+ }
224
+ break;
225
+
226
+ case "list": {
227
+ const items = [];
228
+ for (const item of token.items) {
229
+ const itemText = item.tokens ? renderBlockTokens(item.tokens).join("\n") : item.text;
230
+ const bullet = token.ordered
231
+ ? `${items.length + 1}. `
232
+ : "- ";
233
+ items.push(bullet + itemText);
234
+ }
235
+ blocks.push(items.join("\n"));
236
+ break;
237
+ }
238
+
239
+ case "table": {
240
+ const rows = [];
241
+ // Header row
242
+ rows.push(
243
+ token.header.map((cell) => renderInlineTokens(cell.tokens)).join(" | ")
244
+ );
245
+ // Data rows
246
+ for (const row of token.rows) {
247
+ rows.push(
248
+ row.map((cell) => renderInlineTokens(cell.tokens)).join(" | ")
249
+ );
250
+ }
251
+ blocks.push(rows.join("\n"));
252
+ break;
253
+ }
254
+
255
+ case "hr":
256
+ // Skip horizontal rules
257
+ break;
258
+
259
+ case "space":
260
+ // Skip whitespace tokens
261
+ break;
262
+
263
+ case "html":
264
+ // Strip block-level HTML
265
+ break;
266
+
267
+ default:
268
+ // Unknown block token — include text if available
269
+ if (token.tokens) {
270
+ blocks.push(...renderBlockTokens(token.tokens));
271
+ } else if (token.text) {
272
+ blocks.push(token.text);
273
+ }
274
+ break;
275
+ }
276
+ }
277
+
278
+ return blocks;
279
+ }
280
+
281
+ function cleanupDisplayWhitespace(text) {
282
+ return text
283
+ .replace(/(\S)[ \t]{2,}(?=\S)/g, "$1 ")
284
+ .replace(/[ \t]+$/gm, "");
285
+ }
286
+
287
+ /**
288
+ * Convert Markdown text to plain text.
289
+ * Uses marked.lexer to parse, then walks the AST to extract text.
290
+ *
291
+ * @param {string} markdown
292
+ * @param {{ stripReplyTags?: boolean }} [options]
293
+ * @returns {{ text: string }}
294
+ */
295
+ function markdownToPlainText(markdown, options = {}) {
296
+ if (!markdown) return { text: "" };
297
+ const source = options.stripReplyTags ? stripReplyDirectives(markdown) : markdown;
298
+ if (!source) return { text: "" };
299
+
300
+ const tokens = marked.lexer(source);
301
+ const blocks = renderBlockTokens(tokens);
302
+ const text = cleanupDisplayWhitespace(
303
+ filterDisplayEmojiText(blocks.join("\n\n"))
304
+ );
305
+
306
+ return { text };
307
+ }
308
+
309
+ // --- Content Extraction ---
310
+
311
+ /**
312
+ * Extract displayable text from a message's content field.
313
+ * Content can be a string or an array of content blocks.
314
+ *
315
+ * @param {string|Array} content
316
+ * @returns {string|null} Text content, or null if no displayable text
317
+ */
318
+ function extractText(content) {
319
+ if (typeof content === "string") {
320
+ return content || null;
321
+ }
322
+
323
+ if (!Array.isArray(content)) return null;
324
+
325
+ const textParts = [];
326
+ let hasImage = false;
327
+ for (const block of content) {
328
+ if (block && block.type === "text" && typeof block.text === "string") {
329
+ textParts.push(block.text);
330
+ }
331
+ if (block && block.type === "image") {
332
+ hasImage = true;
333
+ }
334
+ }
335
+
336
+ const text = textParts.length > 0 ? textParts.join("\n\n") : null;
337
+ if (!hasImage) return text;
338
+ if (!text) return "[Image]";
339
+ return `[Image] ${text}`;
340
+ }
341
+
342
+ // --- Message Filtering ---
343
+
344
+ /**
345
+ * Filter messages for display and format them with name prefixes.
346
+ * Returns an array of formatted message strings.
347
+ */
348
+ function filterAndFormat() {
349
+ return displayEntries.map((entry) => ({
350
+ text: formatEntry(entry),
351
+ role: entry.role,
352
+ }));
353
+ }
354
+
355
+ // --- Turn Grouping ---
356
+
357
+ /**
358
+ * Group chronological formatted messages into turns.
359
+ * Each user message starts a new turn. Assistant messages before the first
360
+ * user message form their own turn.
361
+ *
362
+ * @param {Array<{text: string, role: string}>} formatted
363
+ * @returns {Array<Array<{text: string, role: string}>>}
364
+ */
365
+ function groupIntoTurns(formatted) {
366
+ const turns = [];
367
+ let current = [];
368
+
369
+ for (const entry of formatted) {
370
+ if (entry.role === "user" && current.length > 0) {
371
+ turns.push(current);
372
+ current = [];
373
+ }
374
+ current.push(entry);
375
+ }
376
+
377
+ if (current.length > 0) {
378
+ turns.push(current);
379
+ }
380
+
381
+ return turns;
382
+ }
383
+
384
+ // --- Pagination ---
385
+
386
+ /**
387
+ * Paginate conversation into chronological rolling pages.
388
+ * All messages are joined in order, with newest content at the tail of the
389
+ * resulting string.
390
+ *
391
+ * Relay intentionally does not char-split the transcript. Client render code
392
+ * owns virtual-page splitting.
393
+ *
394
+ * @returns {Array<{content: string, subPage: [number, number]|null, turn: null}>}
395
+ */
396
+ function paginate() {
397
+ if (displayEntries.length === 0) return [];
398
+
399
+ // Join all messages chronologically (no reversal)
400
+ const allText = getTranscript();
401
+
402
+ return [{ content: allText, subPage: null, turn: null }];
403
+ }
404
+
405
+ // --- Public API ---
406
+
407
+ const conversationState = {
408
+ /**
409
+ * Bulk load messages from chat.history and set agent name.
410
+ *
411
+ * @param {Array<{role: string, content: string|Array}>} msgs
412
+ * @param {string} [name] - Agent name for display prefix
413
+ */
414
+ hydrate(msgs, name) {
415
+ messages = Array.isArray(msgs) ? [...msgs] : [];
416
+ if (name) agentName = name;
417
+ rebuildDisplayCache();
418
+ },
419
+
420
+ /**
421
+ * Append a single message.
422
+ *
423
+ * @param {string} role
424
+ * @param {string|Array} content
425
+ * @param {string} [name] - Optional display name override (used for simulate)
426
+ */
427
+ addMessage(role, content, name) {
428
+ const msg = { role, content };
429
+ if (name) msg.name = name;
430
+ messages.push(msg);
431
+
432
+ const entry = buildDisplayEntry(msg, {
433
+ isFirstVisibleEntry: displayEntries.length === 0,
434
+ });
435
+ if (!entry) return;
436
+
437
+ displayEntries.push(entry);
438
+ const nextLine = formatEntry(entry);
439
+ if (transcriptDirty) return;
440
+ if (!cachedTranscript) {
441
+ cachedTranscript = nextLine;
442
+ } else {
443
+ cachedTranscript += `\n\n${nextLine}`;
444
+ }
445
+ },
446
+
447
+ /**
448
+ * Update the agent name prefix used in formatted output.
449
+ *
450
+ * @param {string} name
451
+ */
452
+ setAgentName(name) {
453
+ const next = name || DEFAULT_AGENT_NAME;
454
+ if (agentName === next) return;
455
+ agentName = next;
456
+ transcriptDirty = true;
457
+ },
458
+
459
+ /**
460
+ * Return paginated, filtered, markdown-stripped page array.
461
+ *
462
+ * @returns {Array<{content: string, subPage: [number, number]|null}>}
463
+ */
464
+ getPages() {
465
+ return paginate();
466
+ },
467
+
468
+ /**
469
+ * Return the number of pages.
470
+ *
471
+ * @returns {number}
472
+ */
473
+ getPageCount() {
474
+ return displayEntries.length > 0 ? 1 : 0;
475
+ },
476
+
477
+ /**
478
+ * Return the full unfiltered transcript.
479
+ *
480
+ * @returns {Array<{role: string, content: string|Array}>}
481
+ */
482
+ getRawMessages() {
483
+ return [...messages];
484
+ },
485
+
486
+ /**
487
+ * Reset all state.
488
+ */
489
+ clear() {
490
+ messages = [];
491
+ agentName = DEFAULT_AGENT_NAME;
492
+ displayEntries = [];
493
+ cachedTranscript = "";
494
+ transcriptDirty = false;
495
+ },
496
+
497
+ // Exposed for testing
498
+ _markdownToPlainText: markdownToPlainText,
499
+ _extractText: extractText,
500
+ _isLikelySyntheticSessionStarterPrompt: isLikelySyntheticSessionStarterPrompt,
501
+ };
502
+
503
+ export const {
504
+ hydrate,
505
+ addMessage,
506
+ setAgentName,
507
+ getPages,
508
+ getPageCount,
509
+ getRawMessages,
510
+ clear,
511
+ _markdownToPlainText,
512
+ _extractText,
513
+ _isLikelySyntheticSessionStarterPrompt,
514
+ } = conversationState;
515
+
516
+ export default conversationState;