@oh-my-pi/pi-coding-agent 13.3.7 → 13.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/CHANGELOG.md +82 -0
  2. package/package.json +9 -18
  3. package/scripts/format-prompts.ts +7 -172
  4. package/src/config/prompt-templates.ts +2 -54
  5. package/src/config/settings-schema.ts +24 -0
  6. package/src/discovery/codex.ts +1 -2
  7. package/src/discovery/helpers.ts +0 -5
  8. package/src/lsp/client.ts +8 -0
  9. package/src/lsp/config.ts +2 -3
  10. package/src/lsp/index.ts +379 -99
  11. package/src/lsp/render.ts +21 -31
  12. package/src/lsp/types.ts +21 -8
  13. package/src/lsp/utils.ts +193 -1
  14. package/src/mcp/config-writer.ts +3 -0
  15. package/src/modes/components/settings-defs.ts +9 -0
  16. package/src/modes/interactive-mode.ts +8 -1
  17. package/src/modes/theme/mermaid-cache.ts +4 -4
  18. package/src/modes/theme/theme.ts +33 -0
  19. package/src/prompts/system/subagent-user-prompt.md +2 -0
  20. package/src/prompts/system/system-prompt.md +12 -1
  21. package/src/prompts/tools/ast-find.md +20 -0
  22. package/src/prompts/tools/ast-replace.md +21 -0
  23. package/src/prompts/tools/bash.md +2 -0
  24. package/src/prompts/tools/hashline.md +26 -8
  25. package/src/prompts/tools/lsp.md +22 -5
  26. package/src/sdk.ts +11 -1
  27. package/src/session/agent-session.ts +261 -82
  28. package/src/task/executor.ts +8 -5
  29. package/src/tools/ast-find.ts +316 -0
  30. package/src/tools/ast-replace.ts +294 -0
  31. package/src/tools/bash.ts +2 -1
  32. package/src/tools/browser.ts +2 -8
  33. package/src/tools/fetch.ts +55 -18
  34. package/src/tools/index.ts +8 -0
  35. package/src/tools/path-utils.ts +34 -0
  36. package/src/tools/python.ts +2 -1
  37. package/src/tools/renderers.ts +4 -0
  38. package/src/tools/ssh.ts +2 -1
  39. package/src/tools/todo-write.ts +34 -0
  40. package/src/tools/tool-timeouts.ts +29 -0
  41. package/src/utils/mime.ts +37 -14
  42. package/src/utils/prompt-format.ts +172 -0
  43. package/src/web/scrapers/arxiv.ts +12 -12
  44. package/src/web/scrapers/go-pkg.ts +2 -2
  45. package/src/web/scrapers/iacr.ts +17 -9
  46. package/src/web/scrapers/readthedocs.ts +3 -3
  47. package/src/web/scrapers/twitter.ts +11 -11
  48. package/src/web/scrapers/wikipedia.ts +4 -5
  49. package/src/utils/ignore-files.ts +0 -119
@@ -5,7 +5,7 @@ import type { Component } from "@oh-my-pi/pi-tui";
5
5
  import { Text } from "@oh-my-pi/pi-tui";
6
6
  import { ptree, truncate } from "@oh-my-pi/pi-utils";
7
7
  import { type Static, Type } from "@sinclair/typebox";
8
- import { parse as parseHtml } from "node-html-parser";
8
+ import { parseHTML } from "linkedom";
9
9
  import { renderPromptTemplate } from "../config/prompt-templates";
10
10
  import type { RenderResultOptions } from "../extensibility/custom-tools/types";
11
11
  import { type Theme, theme } from "../modes/theme/theme";
@@ -24,6 +24,7 @@ import { formatStyledArtifactReference, type OutputMeta } from "./output-meta";
24
24
  import { formatExpandHint, getDomain } from "./render-utils";
25
25
  import { ToolAbortError } from "./tool-errors";
26
26
  import { toolResult } from "./tool-result";
27
+ import { clampTimeout } from "./tool-timeouts";
27
28
 
28
29
  // =============================================================================
29
30
  // Types and Constants
@@ -248,6 +249,36 @@ async function tryContentNegotiation(
248
249
  return null;
249
250
  }
250
251
 
252
+ /**
253
+ * Read a single HTML attribute from a tag string
254
+ */
255
+ function getHtmlAttribute(tag: string, attribute: string): string | null {
256
+ const pattern = new RegExp(`\\b${attribute}\\s*=\\s*(?:"([^"]*)"|'([^']*)'|([^\\s"'=<>]+))`, "i");
257
+ const match = tag.match(pattern);
258
+ if (!match) return null;
259
+ return (match[1] ?? match[2] ?? match[3] ?? "").trim();
260
+ }
261
+
262
+ /**
263
+ * Extract bounded <head> markup to avoid expensive whole-page parsing
264
+ */
265
+ function extractHeadHtml(html: string): string {
266
+ const lower = html.toLowerCase();
267
+ const headStart = lower.indexOf("<head");
268
+ if (headStart === -1) {
269
+ return html.slice(0, 32 * 1024);
270
+ }
271
+
272
+ const headTagEnd = html.indexOf(">", headStart);
273
+ if (headTagEnd === -1) {
274
+ return html.slice(headStart, headStart + 32 * 1024);
275
+ }
276
+
277
+ const headEnd = lower.indexOf("</head>", headTagEnd + 1);
278
+ const fallbackEnd = Math.min(html.length, headTagEnd + 1 + 32 * 1024);
279
+ return html.slice(headStart, headEnd === -1 ? fallbackEnd : headEnd + 7);
280
+ }
281
+
251
282
  /**
252
283
  * Parse alternate links from HTML head
253
284
  */
@@ -255,13 +286,17 @@ function parseAlternateLinks(html: string, pageUrl: string): string[] {
255
286
  const links: string[] = [];
256
287
 
257
288
  try {
258
- const doc = parseHtml(html.slice(0, 262144));
259
- const alternateLinks = doc.querySelectorAll('link[rel="alternate"]');
289
+ const pagePath = new URL(pageUrl).pathname;
290
+ const headHtml = extractHeadHtml(html);
291
+ const linkTags = headHtml.match(/<link\b[^>]*>/gi) ?? [];
260
292
 
261
- for (const link of alternateLinks) {
262
- const href = link.getAttribute("href");
263
- const type = link.getAttribute("type")?.toLowerCase() ?? "";
293
+ for (const tag of linkTags) {
294
+ const rel = getHtmlAttribute(tag, "rel")?.toLowerCase() ?? "";
295
+ const relTokens = rel.split(/\s+/).filter(Boolean);
296
+ if (!relTokens.includes("alternate")) continue;
264
297
 
298
+ const href = getHtmlAttribute(tag, "href");
299
+ const type = getHtmlAttribute(tag, "type")?.toLowerCase() ?? "";
265
300
  if (!href) continue;
266
301
 
267
302
  // Skip site-wide feeds
@@ -278,7 +313,7 @@ function parseAlternateLinks(html: string, pageUrl: string): string[] {
278
313
  links.push(href);
279
314
  } else if (
280
315
  (type.includes("rss") || type.includes("atom") || type.includes("feed")) &&
281
- (href.includes(new URL(pageUrl).pathname) || href.includes("comments"))
316
+ (href.includes(pagePath) || href.includes("comments"))
282
317
  ) {
283
318
  links.push(href);
284
319
  }
@@ -293,20 +328,22 @@ function parseAlternateLinks(html: string, pageUrl: string): string[] {
293
328
  */
294
329
  function extractDocumentLinks(html: string, baseUrl: string): string[] {
295
330
  const links: string[] = [];
331
+ const seen = new Set<string>();
296
332
 
297
333
  try {
298
- const doc = parseHtml(html);
299
- const anchors = doc.querySelectorAll("a[href]");
300
-
301
- for (const anchor of anchors) {
302
- const href = anchor.getAttribute("href");
334
+ const anchorTags = html.slice(0, 512 * 1024).match(/<a\b[^>]*>/gi) ?? [];
335
+ for (const tag of anchorTags) {
336
+ const href = getHtmlAttribute(tag, "href");
303
337
  if (!href) continue;
304
338
 
305
339
  const ext = path.extname(href).toLowerCase();
306
- if (CONVERTIBLE_EXTENSIONS.has(ext)) {
307
- const resolved = href.startsWith("http") ? href : new URL(href, baseUrl).href;
308
- links.push(resolved);
309
- }
340
+ if (!CONVERTIBLE_EXTENSIONS.has(ext)) continue;
341
+
342
+ const resolved = href.startsWith("http") ? href : new URL(href, baseUrl).href;
343
+ if (seen.has(resolved)) continue;
344
+ seen.add(resolved);
345
+ links.push(resolved);
346
+ if (links.length >= 20) break;
310
347
  }
311
348
  } catch {}
312
349
 
@@ -333,7 +370,7 @@ function cleanFeedText(text: string): string {
333
370
  */
334
371
  function parseFeedToMarkdown(content: string, maxItems = 10): string {
335
372
  try {
336
- const doc = parseHtml(content, { parseNoneClosedTags: true });
373
+ const doc = parseHTML(content).document;
337
374
 
338
375
  // Try RSS
339
376
  const channel = doc.querySelector("channel");
@@ -872,7 +909,7 @@ export class FetchTool implements AgentTool<typeof fetchSchema, FetchToolDetails
872
909
  const { url, timeout: rawTimeout = 20, raw = false } = params;
873
910
 
874
911
  // Clamp to valid range (seconds)
875
- const effectiveTimeout = Math.min(Math.max(rawTimeout, 1), 45);
912
+ const effectiveTimeout = clampTimeout("fetch", rawTimeout);
876
913
 
877
914
  if (signal?.aborted) {
878
915
  throw new ToolAbortError();
@@ -15,6 +15,8 @@ import type { AgentOutputManager } from "../task/output-manager";
15
15
  import type { EventBus } from "../utils/event-bus";
16
16
  import { SearchTool } from "../web/search";
17
17
  import { AskTool } from "./ask";
18
+ import { AstFindTool } from "./ast-find";
19
+ import { AstReplaceTool } from "./ast-replace";
18
20
  import { AwaitTool } from "./await-tool";
19
21
  import { BashTool } from "./bash";
20
22
  import { BrowserTool } from "./browser";
@@ -54,6 +56,8 @@ export * from "../session/streaming-output";
54
56
  export { BUNDLED_AGENTS, TaskTool } from "../task";
55
57
  export * from "../web/search";
56
58
  export { AskTool, type AskToolDetails } from "./ask";
59
+ export { AstFindTool, type AstFindToolDetails } from "./ast-find";
60
+ export { AstReplaceTool, type AstReplaceToolDetails } from "./ast-replace";
57
61
  export { AwaitTool, type AwaitToolDetails } from "./await-tool";
58
62
  export { BashTool, type BashToolDetails, type BashToolInput, type BashToolOptions } from "./bash";
59
63
  export { BrowserTool, type BrowserToolDetails } from "./browser";
@@ -155,6 +159,8 @@ export interface ToolSession {
155
159
  type ToolFactory = (session: ToolSession) => Tool | null | Promise<Tool | null>;
156
160
 
157
161
  export const BUILTIN_TOOLS: Record<string, ToolFactory> = {
162
+ ast_find: s => new AstFindTool(s),
163
+ ast_replace: s => new AstReplaceTool(s),
158
164
  ask: AskTool.createIf,
159
165
  bash: s => new BashTool(s),
160
166
  python: s => new PythonTool(s),
@@ -283,6 +289,8 @@ export async function createTools(session: ToolSession, toolNames?: string[]): P
283
289
  if (name === "todo_write") return !includeSubmitResult && session.settings.get("todo.enabled");
284
290
  if (name === "find") return session.settings.get("find.enabled");
285
291
  if (name === "grep") return session.settings.get("grep.enabled");
292
+ if (name === "ast_find") return session.settings.get("astFind.enabled");
293
+ if (name === "ast_replace") return session.settings.get("astReplace.enabled");
286
294
  if (name === "notebook") return session.settings.get("notebook.enabled");
287
295
  if (name === "fetch") return session.settings.get("fetch.enabled");
288
296
  if (name === "web_search") return session.settings.get("web_search.enabled");
@@ -88,6 +88,40 @@ export function resolveToCwd(filePath: string, cwd: string): string {
88
88
  return path.resolve(cwd, expanded);
89
89
  }
90
90
 
91
+ const GLOB_PATH_CHARS = ["*", "?", "[", "{"] as const;
92
+
93
+ export function hasGlobPathChars(filePath: string): boolean {
94
+ return GLOB_PATH_CHARS.some(char => filePath.includes(char));
95
+ }
96
+
97
+ export interface ParsedSearchPath {
98
+ basePath: string;
99
+ glob?: string;
100
+ }
101
+
102
+ /**
103
+ * Split a user path into a base path + glob pattern for tools that delegate to
104
+ * APIs accepting separate `path` and `glob` arguments.
105
+ */
106
+ export function parseSearchPath(filePath: string): ParsedSearchPath {
107
+ const normalizedPath = filePath.replace(/\\/g, "/");
108
+ if (!hasGlobPathChars(normalizedPath)) {
109
+ return { basePath: filePath };
110
+ }
111
+
112
+ const segments = normalizedPath.split("/");
113
+ const firstGlobIndex = segments.findIndex(segment => hasGlobPathChars(segment));
114
+
115
+ if (firstGlobIndex <= 0) {
116
+ return { basePath: ".", glob: normalizedPath };
117
+ }
118
+
119
+ return {
120
+ basePath: segments.slice(0, firstGlobIndex).join("/"),
121
+ glob: segments.slice(firstGlobIndex).join("/"),
122
+ };
123
+ }
124
+
91
125
  export function resolveReadPath(filePath: string, cwd: string): string {
92
126
  const resolved = resolveToCwd(filePath, cwd);
93
127
 
@@ -21,6 +21,7 @@ import { resolveToCwd } from "./path-utils";
21
21
  import { formatTitle, replaceTabs, shortenPath, truncateToWidth, wrapBrackets } from "./render-utils";
22
22
  import { ToolAbortError, ToolError } from "./tool-errors";
23
23
  import { toolResult } from "./tool-result";
24
+ import { clampTimeout } from "./tool-timeouts";
24
25
 
25
26
  export const PYTHON_DEFAULT_PREVIEW_LINES = 10;
26
27
 
@@ -177,7 +178,7 @@ export class PythonTool implements AgentTool<typeof pythonSchema> {
177
178
 
178
179
  const { cells, timeout: rawTimeout = 30, cwd, reset } = params;
179
180
  // Clamp to reasonable range: 1s - 600s (10 min)
180
- const timeoutSec = Math.max(1, Math.min(600, rawTimeout));
181
+ const timeoutSec = clampTimeout("python", rawTimeout);
181
182
  const timeoutMs = timeoutSec * 1000;
182
183
  const timeoutSignal = AbortSignal.timeout(timeoutMs);
183
184
  const combinedSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
@@ -11,6 +11,8 @@ import { editToolRenderer } from "../patch";
11
11
  import { taskToolRenderer } from "../task/render";
12
12
  import { webSearchToolRenderer } from "../web/search/render";
13
13
  import { askToolRenderer } from "./ask";
14
+ import { astFindToolRenderer } from "./ast-find";
15
+ import { astReplaceToolRenderer } from "./ast-replace";
14
16
  import { bashToolRenderer } from "./bash";
15
17
  import { calculatorToolRenderer } from "./calculator";
16
18
  import { fetchToolRenderer } from "./fetch";
@@ -38,6 +40,8 @@ type ToolRenderer = {
38
40
 
39
41
  export const toolRenderers: Record<string, ToolRenderer> = {
40
42
  ask: askToolRenderer as ToolRenderer,
43
+ ast_find: astFindToolRenderer as ToolRenderer,
44
+ ast_replace: astReplaceToolRenderer as ToolRenderer,
41
45
  bash: bashToolRenderer as ToolRenderer,
42
46
  python: pythonToolRenderer as ToolRenderer,
43
47
  calc: calculatorToolRenderer as ToolRenderer,
package/src/tools/ssh.ts CHANGED
@@ -19,6 +19,7 @@ import type { ToolSession } from ".";
19
19
  import { formatStyledTruncationWarning, type OutputMeta } from "./output-meta";
20
20
  import { ToolError } from "./tool-errors";
21
21
  import { toolResult } from "./tool-result";
22
+ import { clampTimeout } from "./tool-timeouts";
22
23
 
23
24
  const sshSchema = Type.Object({
24
25
  host: Type.String({ description: "Host name from managed SSH config or discovered ssh.json files" }),
@@ -155,7 +156,7 @@ export class SshTool implements AgentTool<typeof sshSchema, SSHToolDetails> {
155
156
  const remoteCommand = buildRemoteCommand(command, cwd, hostInfo);
156
157
 
157
158
  // Clamp to reasonable range: 1s - 3600s (1 hour)
158
- const timeoutSec = Math.max(1, Math.min(3600, rawTimeout));
159
+ const timeoutSec = clampTimeout("ssh", rawTimeout);
159
160
  const timeoutMs = timeoutSec * 1000;
160
161
 
161
162
  const tailBuffer = new TailBuffer(DEFAULT_MAX_BYTES);
@@ -161,6 +161,23 @@ function clonePhases(phases: TodoPhase[]): TodoPhase[] {
161
161
  return phases.map(phase => ({ ...phase, tasks: phase.tasks.map(task => ({ ...task })) }));
162
162
  }
163
163
 
164
+ function normalizeInProgressTask(phases: TodoPhase[]): void {
165
+ const orderedTasks = phases.flatMap(phase => phase.tasks);
166
+ if (orderedTasks.length === 0) return;
167
+
168
+ const inProgressTasks = orderedTasks.filter(task => task.status === "in_progress");
169
+ if (inProgressTasks.length > 1) {
170
+ for (const task of inProgressTasks.slice(1)) {
171
+ task.status = "pending";
172
+ }
173
+ }
174
+
175
+ if (inProgressTasks.length > 0) return;
176
+
177
+ const firstPendingTask = orderedTasks.find(task => task.status === "pending");
178
+ if (firstPendingTask) firstPendingTask.status = "in_progress";
179
+ }
180
+
164
181
  export function getLatestTodoPhasesFromEntries(entries: SessionEntry[]): TodoPhase[] {
165
182
  for (let i = entries.length - 1; i >= 0; i--) {
166
183
  const entry = entries[i];
@@ -246,6 +263,7 @@ function applyOps(file: TodoFile, ops: TodoWriteParams["ops"]): { file: TodoFile
246
263
  }
247
264
  }
248
265
 
266
+ normalizeInProgressTask(file.phases);
249
267
  return { file, errors };
250
268
  }
251
269
 
@@ -253,6 +271,14 @@ function formatSummary(phases: TodoPhase[], errors: string[]): string {
253
271
  const tasks = phases.flatMap(p => p.tasks);
254
272
  if (tasks.length === 0) return errors.length > 0 ? `Errors: ${errors.join("; ")}` : "Todo list cleared.";
255
273
 
274
+ const remainingByPhase = phases
275
+ .map(phase => ({
276
+ name: phase.name,
277
+ tasks: phase.tasks.filter(task => task.status === "pending" || task.status === "in_progress"),
278
+ }))
279
+ .filter(phase => phase.tasks.length > 0);
280
+ const remainingTasks = remainingByPhase.flatMap(phase => phase.tasks.map(task => ({ ...task, phase: phase.name })));
281
+
256
282
  // Find current phase
257
283
  let currentIdx = phases.findIndex(p => p.tasks.some(t => t.status === "pending" || t.status === "in_progress"));
258
284
  if (currentIdx === -1) currentIdx = phases.length - 1;
@@ -261,6 +287,14 @@ function formatSummary(phases: TodoPhase[], errors: string[]): string {
261
287
 
262
288
  const lines: string[] = [];
263
289
  if (errors.length > 0) lines.push(`Errors: ${errors.join("; ")}`);
290
+ if (remainingTasks.length === 0) {
291
+ lines.push("Remaining items: none.");
292
+ } else {
293
+ lines.push(`Remaining items (${remainingTasks.length}):`);
294
+ for (const task of remainingTasks) {
295
+ lines.push(` - ${task.id} ${task.content} [${task.status}] (${task.phase})`);
296
+ }
297
+ }
264
298
  lines.push(
265
299
  `Phase ${currentIdx + 1}/${phases.length} "${current.name}" — ${done}/${current.tasks.length} tasks complete`,
266
300
  );
@@ -0,0 +1,29 @@
1
+ export interface ToolTimeoutConfig {
2
+ /** Default timeout in seconds when agent omits the field */
3
+ default: number;
4
+ /** Minimum allowed timeout in seconds */
5
+ min: number;
6
+ /** Maximum allowed timeout in seconds (per-tool ceiling) */
7
+ max: number;
8
+ }
9
+
10
+ export const TOOL_TIMEOUTS = {
11
+ bash: { default: 300, min: 1, max: 3600 },
12
+ python: { default: 30, min: 1, max: 600 },
13
+ browser: { default: 30, min: 1, max: 120 },
14
+ ssh: { default: 60, min: 1, max: 3600 },
15
+ fetch: { default: 20, min: 1, max: 45 },
16
+ lsp: { default: 20, min: 5, max: 60 },
17
+ } as const satisfies Record<string, ToolTimeoutConfig>;
18
+
19
+ export type ToolWithTimeout = keyof typeof TOOL_TIMEOUTS;
20
+
21
+ /**
22
+ * Clamp a raw timeout to the allowed range for a tool.
23
+ * If rawTimeout is undefined, returns the tool's default.
24
+ */
25
+ export function clampTimeout(tool: ToolWithTimeout, rawTimeout?: number): number {
26
+ const config = TOOL_TIMEOUTS[tool];
27
+ const timeout = rawTimeout ?? config.default;
28
+ return Math.max(config.min, Math.min(config.max, timeout));
29
+ }
package/src/utils/mime.ts CHANGED
@@ -1,9 +1,42 @@
1
1
  import * as fs from "node:fs/promises";
2
- import { fileTypeFromBuffer } from "file-type";
3
2
 
4
- const IMAGE_MIME_TYPES = new Set(["image/jpeg", "image/png", "image/gif", "image/webp"]);
3
+ const FILE_TYPE_SNIFF_BYTES = 12;
5
4
 
6
- const FILE_TYPE_SNIFF_BYTES = 4100;
5
+ function detectMimeFromBytes(buf: Buffer, bytesRead: number): string | null {
6
+ if (bytesRead >= 3 && buf[0] === 0xff && buf[1] === 0xd8 && buf[2] === 0xff) {
7
+ return "image/jpeg";
8
+ }
9
+ if (
10
+ bytesRead >= 8 &&
11
+ buf[0] === 0x89 &&
12
+ buf[1] === 0x50 &&
13
+ buf[2] === 0x4e &&
14
+ buf[3] === 0x47 &&
15
+ buf[4] === 0x0d &&
16
+ buf[5] === 0x0a &&
17
+ buf[6] === 0x1a &&
18
+ buf[7] === 0x0a
19
+ ) {
20
+ return "image/png";
21
+ }
22
+ if (bytesRead >= 4 && buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x38) {
23
+ return "image/gif";
24
+ }
25
+ if (
26
+ bytesRead >= 12 &&
27
+ buf[0] === 0x52 &&
28
+ buf[1] === 0x49 &&
29
+ buf[2] === 0x46 &&
30
+ buf[3] === 0x46 &&
31
+ buf[8] === 0x57 &&
32
+ buf[9] === 0x45 &&
33
+ buf[10] === 0x42 &&
34
+ buf[11] === 0x50
35
+ ) {
36
+ return "image/webp";
37
+ }
38
+ return null;
39
+ }
7
40
 
8
41
  export async function detectSupportedImageMimeTypeFromFile(filePath: string): Promise<string | null> {
9
42
  const fileHandle = await fs.open(filePath, "r");
@@ -13,17 +46,7 @@ export async function detectSupportedImageMimeTypeFromFile(filePath: string): Pr
13
46
  if (bytesRead === 0) {
14
47
  return null;
15
48
  }
16
-
17
- const fileType = await fileTypeFromBuffer(buffer.subarray(0, bytesRead));
18
- if (!fileType) {
19
- return null;
20
- }
21
-
22
- if (!IMAGE_MIME_TYPES.has(fileType.mime)) {
23
- return null;
24
- }
25
-
26
- return fileType.mime;
49
+ return detectMimeFromBytes(buffer, bytesRead);
27
50
  } finally {
28
51
  await fileHandle.close();
29
52
  }
@@ -0,0 +1,172 @@
1
+ export type PromptRenderPhase = "pre-render" | "post-render";
2
+
3
+ export interface PromptFormatOptions {
4
+ renderPhase?: PromptRenderPhase;
5
+ replaceAsciiSymbols?: boolean;
6
+ boldRfc2119Keywords?: boolean;
7
+ }
8
+
9
+ // Opening XML tag (not self-closing, not closing)
10
+ const OPENING_XML = /^<([a-z_-]+)(?:\s+[^>]*)?>$/;
11
+ // Closing XML tag
12
+ const CLOSING_XML = /^<\/([a-z_-]+)>$/;
13
+ // Handlebars block start: {{#if}}, {{#has}}, {{#list}}, etc.
14
+ const OPENING_HBS = /^\{\{#/;
15
+ // Handlebars block end: {{/if}}, {{/has}}, {{/list}}, etc.
16
+ const CLOSING_HBS = /^\{\{\//;
17
+ // List item (- or * or 1.)
18
+ const LIST_ITEM = /^(?:[-*]\s|\d+\.\s)/;
19
+ // Code fence
20
+ const CODE_FENCE = /^```/;
21
+ // Table row
22
+ const TABLE_ROW = /^\|.*\|$/;
23
+ // Table separator (|---|---|)
24
+ const TABLE_SEP = /^\|[-:\s|]+\|$/;
25
+
26
+ /** RFC 2119 keywords used in prompts. */
27
+ const RFC2119_KEYWORDS = /\b(?:MUST NOT|SHOULD NOT|SHALL NOT|RECOMMENDED|REQUIRED|OPTIONAL|SHOULD|SHALL|MUST|MAY)\b/g;
28
+
29
+ function boldRfc2119Keywords(line: string): string {
30
+ return line.replace(RFC2119_KEYWORDS, (match, offset, source) => {
31
+ const isAlreadyBold =
32
+ source[offset - 2] === "*" &&
33
+ source[offset - 1] === "*" &&
34
+ source[offset + match.length] === "*" &&
35
+ source[offset + match.length + 1] === "*";
36
+ if (isAlreadyBold) {
37
+ return match;
38
+ }
39
+ return `**${match}**`;
40
+ });
41
+ }
42
+
43
+ /** Compact a table row by trimming cell padding */
44
+ function compactTableRow(line: string): string {
45
+ const cells = line.split("|");
46
+ return cells.map(c => c.trim()).join("|");
47
+ }
48
+
49
+ /** Compact a table separator row */
50
+ function compactTableSep(line: string): string {
51
+ const cells = line.split("|").filter(c => c.trim());
52
+ const normalized = cells.map(c => {
53
+ const trimmed = c.trim();
54
+ const left = trimmed.startsWith(":");
55
+ const right = trimmed.endsWith(":");
56
+ if (left && right) return ":---:";
57
+ if (left) return ":---";
58
+ if (right) return "---:";
59
+ return "---";
60
+ });
61
+ return `|${normalized.join("|")}|`;
62
+ }
63
+
64
+ function replaceCommonAsciiSymbols(line: string): string {
65
+ return line
66
+ .replace(/\.{3}/g, "…")
67
+ .replace(/<->/g, "↔")
68
+ .replace(/->/g, "→")
69
+ .replace(/<-/g, "←")
70
+ .replace(/!=/g, "≠")
71
+ .replace(/<=/g, "≤")
72
+ .replace(/>=/g, "≥");
73
+ }
74
+
75
+ export function formatPromptContent(content: string, options: PromptFormatOptions = {}): string {
76
+ const {
77
+ renderPhase = "post-render",
78
+ replaceAsciiSymbols = false,
79
+ boldRfc2119Keywords: shouldBoldRfc2119 = false,
80
+ } = options;
81
+ const isPreRender = renderPhase === "pre-render";
82
+ const lines = content.split("\n");
83
+ const result: string[] = [];
84
+ let inCodeBlock = false;
85
+ const topLevelTags: string[] = [];
86
+
87
+ for (let i = 0; i < lines.length; i++) {
88
+ let line = lines[i].trimEnd();
89
+ const trimmed = line.trimStart();
90
+
91
+ if (CODE_FENCE.test(trimmed)) {
92
+ inCodeBlock = !inCodeBlock;
93
+ result.push(line);
94
+ continue;
95
+ }
96
+
97
+ if (inCodeBlock) {
98
+ result.push(line);
99
+ continue;
100
+ }
101
+
102
+ if (replaceAsciiSymbols) {
103
+ line = replaceCommonAsciiSymbols(line);
104
+ }
105
+
106
+ const isOpeningXml = OPENING_XML.test(trimmed) && !trimmed.endsWith("/>");
107
+ if (isOpeningXml && line.length === trimmed.length) {
108
+ const match = OPENING_XML.exec(trimmed);
109
+ if (match) topLevelTags.push(match[1]);
110
+ }
111
+
112
+ const closingMatch = CLOSING_XML.exec(trimmed);
113
+ if (closingMatch) {
114
+ const tagName = closingMatch[1];
115
+ if (topLevelTags.length > 0 && topLevelTags[topLevelTags.length - 1] === tagName) {
116
+ line = trimmed;
117
+ topLevelTags.pop();
118
+ } else {
119
+ line = line.trimEnd();
120
+ }
121
+ } else if (isPreRender && trimmed.startsWith("{{")) {
122
+ line = trimmed;
123
+ } else if (TABLE_SEP.test(trimmed)) {
124
+ line = compactTableSep(trimmed);
125
+ } else if (TABLE_ROW.test(trimmed)) {
126
+ line = compactTableRow(trimmed);
127
+ } else {
128
+ line = line.trimEnd();
129
+ }
130
+
131
+ if (shouldBoldRfc2119) {
132
+ line = boldRfc2119Keywords(line);
133
+ }
134
+
135
+ const isBlank = trimmed === "";
136
+ if (isBlank) {
137
+ const prevLine = result[result.length - 1]?.trim() ?? "";
138
+ const nextLine = lines[i + 1]?.trim() ?? "";
139
+
140
+ if (LIST_ITEM.test(nextLine)) {
141
+ continue;
142
+ }
143
+
144
+ if (OPENING_XML.test(prevLine) || (isPreRender && OPENING_HBS.test(prevLine))) {
145
+ continue;
146
+ }
147
+
148
+ if (CLOSING_XML.test(nextLine) || (isPreRender && CLOSING_HBS.test(nextLine))) {
149
+ continue;
150
+ }
151
+
152
+ const prevIsBlank = prevLine === "";
153
+ if (prevIsBlank) {
154
+ continue;
155
+ }
156
+ }
157
+
158
+ if (CLOSING_XML.test(trimmed) || (isPreRender && CLOSING_HBS.test(trimmed))) {
159
+ while (result.length > 0 && result[result.length - 1].trim() === "") {
160
+ result.pop();
161
+ }
162
+ }
163
+
164
+ result.push(line);
165
+ }
166
+
167
+ while (result.length > 0 && result[result.length - 1].trim() === "") {
168
+ result.pop();
169
+ }
170
+
171
+ return result.join("\n");
172
+ }
@@ -1,4 +1,4 @@
1
- import { parse as parseHtml } from "node-html-parser";
1
+ import { parseHTML } from "linkedom";
2
2
  import type { RenderResult, SpecialHandler } from "./types";
3
3
  import { buildResult, loadPage } from "./types";
4
4
  import { convertWithMarkitdown, fetchBinary } from "./utils";
@@ -31,22 +31,22 @@ export const handleArxiv: SpecialHandler = async (
31
31
  if (!result.ok) return null;
32
32
 
33
33
  // Parse the Atom feed response
34
- const doc = parseHtml(result.content, { parseNoneClosedTags: true });
34
+ const doc = parseHTML(result.content).document;
35
35
  const entry = doc.querySelector("entry");
36
36
 
37
37
  if (!entry) return null;
38
38
 
39
- const title = entry.querySelector("title")?.text?.trim()?.replace(/\s+/g, " ");
40
- const summary = entry.querySelector("summary")?.text?.trim();
41
- const authors = entry
42
- .querySelectorAll("author name")
43
- .map(n => n.text?.trim())
44
- .filter(Boolean);
45
- const published = entry.querySelector("published")?.text?.trim()?.split("T")[0];
46
- const categories = entry
47
- .querySelectorAll("category")
39
+ const title = entry.querySelector("title")?.textContent?.trim()?.replace(/\s+/g, " ");
40
+ const summary = entry.querySelector("summary")?.textContent?.trim();
41
+ const authors = Array.from(entry.querySelectorAll("author name") as Iterable<{ textContent: string | null }>)
42
+ .map(n => n.textContent?.trim())
43
+ .filter((name): name is string => Boolean(name));
44
+ const published = entry.querySelector("published")?.textContent?.trim()?.split("T")[0];
45
+ const categories = Array.from(
46
+ entry.querySelectorAll("category") as Iterable<{ getAttribute: (name: string) => string | null }>,
47
+ )
48
48
  .map(c => c.getAttribute("term"))
49
- .filter(Boolean);
49
+ .filter((term): term is string => Boolean(term));
50
50
  const pdfLink = entry.querySelector('link[title="pdf"]')?.getAttribute("href");
51
51
 
52
52
  let md = `# ${title || "arXiv Paper"}\n\n`;
@@ -1,5 +1,5 @@
1
1
  import { tryParseJson } from "@oh-my-pi/pi-utils";
2
- import { parse as parseHtml } from "node-html-parser";
2
+ import { parseHTML } from "linkedom";
3
3
  import type { RenderResult, SpecialHandler } from "./types";
4
4
  import { buildResult, htmlToBasicMarkdown, loadPage } from "./types";
5
5
 
@@ -97,7 +97,7 @@ export const handleGoPkg: SpecialHandler = async (
97
97
  });
98
98
  }
99
99
 
100
- const doc = parseHtml(pageResult.content);
100
+ const doc = parseHTML(pageResult.content).document;
101
101
 
102
102
  // Extract actual module path from breadcrumb or header
103
103
  const breadcrumb = doc.querySelector(".go-Breadcrumb");