@oh-my-pi/pi-coding-agent 13.3.6 → 13.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +115 -0
  2. package/package.json +9 -18
  3. package/scripts/format-prompts.ts +7 -172
  4. package/src/capability/mcp.ts +5 -0
  5. package/src/cli/args.ts +1 -0
  6. package/src/config/prompt-templates.ts +9 -55
  7. package/src/config/settings-schema.ts +24 -0
  8. package/src/discovery/builtin.ts +1 -0
  9. package/src/discovery/codex.ts +1 -2
  10. package/src/discovery/helpers.ts +0 -5
  11. package/src/discovery/mcp-json.ts +2 -0
  12. package/src/internal-urls/docs-index.generated.ts +1 -1
  13. package/src/lsp/client.ts +8 -0
  14. package/src/lsp/config.ts +2 -3
  15. package/src/lsp/index.ts +379 -99
  16. package/src/lsp/render.ts +21 -31
  17. package/src/lsp/types.ts +21 -8
  18. package/src/lsp/utils.ts +193 -1
  19. package/src/mcp/config-writer.ts +3 -0
  20. package/src/mcp/config.ts +1 -0
  21. package/src/mcp/oauth-flow.ts +3 -1
  22. package/src/mcp/types.ts +5 -0
  23. package/src/modes/components/settings-defs.ts +9 -0
  24. package/src/modes/components/status-line.ts +1 -1
  25. package/src/modes/controllers/mcp-command-controller.ts +6 -2
  26. package/src/modes/interactive-mode.ts +8 -1
  27. package/src/modes/theme/mermaid-cache.ts +4 -4
  28. package/src/modes/theme/theme.ts +33 -0
  29. package/src/prompts/system/custom-system-prompt.md +0 -10
  30. package/src/prompts/system/subagent-user-prompt.md +2 -0
  31. package/src/prompts/system/system-prompt.md +12 -9
  32. package/src/prompts/tools/ast-find.md +20 -0
  33. package/src/prompts/tools/ast-replace.md +21 -0
  34. package/src/prompts/tools/bash.md +2 -0
  35. package/src/prompts/tools/hashline.md +26 -8
  36. package/src/prompts/tools/lsp.md +22 -5
  37. package/src/prompts/tools/task.md +0 -1
  38. package/src/sdk.ts +11 -5
  39. package/src/session/agent-session.ts +293 -83
  40. package/src/system-prompt.ts +3 -34
  41. package/src/task/executor.ts +8 -7
  42. package/src/task/index.ts +8 -55
  43. package/src/task/template.ts +2 -4
  44. package/src/task/types.ts +0 -5
  45. package/src/task/worktree.ts +6 -2
  46. package/src/tools/ast-find.ts +316 -0
  47. package/src/tools/ast-replace.ts +294 -0
  48. package/src/tools/bash.ts +2 -1
  49. package/src/tools/browser.ts +2 -8
  50. package/src/tools/fetch.ts +55 -18
  51. package/src/tools/index.ts +8 -0
  52. package/src/tools/jtd-to-json-schema.ts +29 -13
  53. package/src/tools/path-utils.ts +34 -0
  54. package/src/tools/python.ts +2 -1
  55. package/src/tools/renderers.ts +4 -0
  56. package/src/tools/ssh.ts +2 -1
  57. package/src/tools/submit-result.ts +143 -44
  58. package/src/tools/todo-write.ts +34 -0
  59. package/src/tools/tool-timeouts.ts +29 -0
  60. package/src/utils/mime.ts +37 -14
  61. package/src/utils/prompt-format.ts +172 -0
  62. package/src/web/scrapers/arxiv.ts +12 -12
  63. package/src/web/scrapers/go-pkg.ts +2 -2
  64. package/src/web/scrapers/iacr.ts +17 -9
  65. package/src/web/scrapers/readthedocs.ts +3 -3
  66. package/src/web/scrapers/twitter.ts +11 -11
  67. package/src/web/scrapers/wikipedia.ts +4 -5
  68. package/src/utils/ignore-files.ts +0 -119
@@ -4,6 +4,7 @@
4
4
  * Subagents must call this tool to finish and return structured JSON output.
5
5
  */
6
6
  import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
7
+ import { enforceStrictSchema, sanitizeSchemaForStrictMode } from "@oh-my-pi/pi-ai/utils/typebox-helpers";
7
8
  import type { Static, TSchema } from "@sinclair/typebox";
8
9
  import { Type } from "@sinclair/typebox";
9
10
  import Ajv, { type ErrorObject, type ValidateFunction } from "ajv";
@@ -51,6 +52,53 @@ function formatAjvErrors(errors: ErrorObject[] | null | undefined): string {
51
52
  .join("; ");
52
53
  }
53
54
 
55
+ /**
56
+ * Resolve all $ref references in a JSON Schema by inlining definitions.
57
+ * Handles $defs and definitions at any nesting level.
58
+ * Removes $defs/definitions from the output since all refs are inlined.
59
+ */
60
+ function resolveSchemaRefs(schema: Record<string, unknown>): Record<string, unknown> {
61
+ const defs: Record<string, Record<string, unknown>> = {};
62
+ const defsObj = schema.$defs ?? schema.definitions;
63
+ if (defsObj && typeof defsObj === "object" && !Array.isArray(defsObj)) {
64
+ for (const [name, def] of Object.entries(defsObj as Record<string, unknown>)) {
65
+ if (def && typeof def === "object" && !Array.isArray(def)) {
66
+ defs[name] = def as Record<string, unknown>;
67
+ }
68
+ }
69
+ }
70
+ if (Object.keys(defs).length === 0) return schema;
71
+
72
+ const inlining = new Set<string>();
73
+ function inline(node: unknown): unknown {
74
+ if (node === null || typeof node !== "object") return node;
75
+ if (Array.isArray(node)) return node.map(inline);
76
+ const obj = node as Record<string, unknown>;
77
+ const ref = obj.$ref;
78
+ if (typeof ref === "string") {
79
+ const match = ref.match(/^#\/(?:\$defs|definitions)\/(.+)$/);
80
+ if (match) {
81
+ const name = match[1];
82
+ const def = defs[name];
83
+ if (def) {
84
+ if (inlining.has(name)) return {};
85
+ inlining.add(name);
86
+ const resolved = inline(def);
87
+ inlining.delete(name);
88
+ return resolved;
89
+ }
90
+ }
91
+ }
92
+ const result: Record<string, unknown> = {};
93
+ for (const [key, value] of Object.entries(obj)) {
94
+ if (key === "$defs" || key === "definitions") continue;
95
+ result[key] = inline(value);
96
+ }
97
+ return result;
98
+ }
99
+ return inline(schema) as Record<string, unknown>;
100
+ }
101
+
54
102
  export class SubmitResultTool implements AgentTool<TSchema, SubmitResultDetails> {
55
103
  readonly name = "submit_result";
56
104
  readonly label = "Submit Result";
@@ -58,51 +106,96 @@ export class SubmitResultTool implements AgentTool<TSchema, SubmitResultDetails>
58
106
  "Finish the task with structured JSON output. Call exactly once at the end of the task.\n\n" +
59
107
  "If you cannot complete the task, call with an error message payload.";
60
108
  readonly parameters: TSchema;
61
- readonly strict = true;
109
+ strict = true;
110
+ lenientArgValidation = true;
62
111
 
63
112
  readonly #validate?: ValidateFunction;
64
- readonly #schemaError?: string;
113
+ #schemaValidationFailures = 0;
65
114
 
66
115
  constructor(session: ToolSession) {
67
- const schemaResult = normalizeSchema(session.outputSchema);
68
- // Convert JTD to JSON Schema if needed (auto-detected)
69
- const normalizedSchema =
70
- schemaResult.normalized !== undefined ? jtdToJsonSchema(schemaResult.normalized) : undefined;
71
- let schemaError = schemaResult.error;
72
-
73
- if (normalizedSchema !== undefined && !schemaError) {
74
- try {
75
- this.#validate = ajv.compile(normalizedSchema as any);
76
- } catch (err) {
77
- schemaError = err instanceof Error ? err.message : String(err);
116
+ const createParameters = (dataSchema: TSchema): TSchema =>
117
+ Type.Object(
118
+ {
119
+ result: Type.Union([
120
+ Type.Object({ data: dataSchema }, { description: "Successfully completed the task" }),
121
+ Type.Object({
122
+ error: Type.String({ description: "Error message when the task cannot be completed" }),
123
+ }),
124
+ ]),
125
+ },
126
+ {
127
+ additionalProperties: false,
128
+ description: "Submit either `data` for success or `error` for failure",
129
+ },
130
+ ) as TSchema;
131
+
132
+ let validate: ValidateFunction | undefined;
133
+ let dataSchema: TSchema;
134
+ let parameters: TSchema;
135
+ let strict = true;
136
+
137
+ try {
138
+ const schemaResult = normalizeSchema(session.outputSchema);
139
+ // Convert JTD to JSON Schema if needed (auto-detected)
140
+ const normalizedSchema =
141
+ schemaResult.normalized !== undefined ? jtdToJsonSchema(schemaResult.normalized) : undefined;
142
+ let schemaError = schemaResult.error;
143
+
144
+ if (!schemaError && normalizedSchema === false) {
145
+ schemaError = "boolean false schema rejects all outputs";
146
+ }
147
+
148
+ if (normalizedSchema !== undefined && normalizedSchema !== false && !schemaError) {
149
+ try {
150
+ validate = ajv.compile(normalizedSchema as Record<string, unknown> | boolean);
151
+ } catch (err) {
152
+ schemaError = err instanceof Error ? err.message : String(err);
153
+ }
78
154
  }
155
+
156
+ const schemaHint = formatSchema(normalizedSchema ?? session.outputSchema);
157
+ const schemaDescription = schemaError
158
+ ? `Structured JSON output (output schema invalid; accepting unconstrained object): ${schemaError}`
159
+ : `Structured output matching the schema:\n${schemaHint}`;
160
+ const sanitizedSchema =
161
+ !schemaError &&
162
+ normalizedSchema != null &&
163
+ typeof normalizedSchema === "object" &&
164
+ !Array.isArray(normalizedSchema)
165
+ ? sanitizeSchemaForStrictMode(normalizedSchema as Record<string, unknown>)
166
+ : !schemaError && normalizedSchema === true
167
+ ? {}
168
+ : undefined;
169
+
170
+ if (sanitizedSchema !== undefined) {
171
+ const resolved = resolveSchemaRefs({
172
+ ...sanitizedSchema,
173
+ description: schemaDescription,
174
+ });
175
+ dataSchema = Type.Unsafe(resolved);
176
+ } else {
177
+ dataSchema = Type.Record(Type.String(), Type.Any(), {
178
+ description: schemaError ? schemaDescription : "Structured JSON output (no schema specified)",
179
+ });
180
+ }
181
+ parameters = createParameters(dataSchema);
182
+ const strictParameters = enforceStrictSchema(parameters as unknown as Record<string, unknown>);
183
+ JSON.stringify(strictParameters);
184
+ // Verify the final parameters compile with AJV (catches unresolved $ref, etc.)
185
+ ajv.compile(parameters as Record<string, unknown>);
186
+ } catch (err) {
187
+ const errorMsg = err instanceof Error ? err.message : String(err);
188
+ dataSchema = Type.Record(Type.String(), Type.Any(), {
189
+ description: `Structured JSON output (schema processing failed: ${errorMsg})`,
190
+ });
191
+ parameters = createParameters(dataSchema);
192
+ validate = undefined;
193
+ strict = false;
79
194
  }
80
195
 
81
- this.#schemaError = schemaError;
82
-
83
- const schemaHint = formatSchema(normalizedSchema ?? session.outputSchema);
84
-
85
- // Use actual schema if provided, otherwise fall back to Type.Any
86
- // Merge description into the JSON schema for better tool documentation
87
- const dataSchema = normalizedSchema
88
- ? Type.Unsafe({
89
- ...(normalizedSchema as object),
90
- description: `Structured output matching the schema:\n${schemaHint}`,
91
- })
92
- : Type.Record(Type.String(), Type.Any(), { description: "Structured JSON output (no schema specified)" });
93
-
94
- this.parameters = Type.Object(
95
- {
96
- result: Type.Union([
97
- Type.Object({ data: dataSchema }, { description: "Successfully completed the task" }),
98
- Type.Object({ error: Type.String({ description: "Error message when the task cannot be completed" }) }),
99
- ]),
100
- },
101
- {
102
- additionalProperties: false,
103
- description: "Submit either `data` for success or `error` for failure",
104
- },
105
- );
196
+ this.#validate = validate;
197
+ this.parameters = parameters;
198
+ this.strict = strict;
106
199
  }
107
200
 
108
201
  async execute(
@@ -130,20 +223,26 @@ export class SubmitResultTool implements AgentTool<TSchema, SubmitResultDetails>
130
223
  }
131
224
 
132
225
  const status = errorMessage !== undefined ? "aborted" : "success";
226
+ let schemaValidationOverridden = false;
133
227
  if (status === "success") {
134
228
  if (data === undefined || data === null) {
135
229
  throw new Error("data is required when submit_result indicates success");
136
230
  }
137
- if (this.#schemaError) {
138
- throw new Error(`Invalid output schema: ${this.#schemaError}`);
139
- }
140
231
  if (this.#validate && !this.#validate(data)) {
141
- throw new Error(`Output does not match schema: ${formatAjvErrors(this.#validate.errors)}`);
232
+ this.#schemaValidationFailures++;
233
+ if (this.#schemaValidationFailures <= 1) {
234
+ throw new Error(`Output does not match schema: ${formatAjvErrors(this.#validate.errors)}`);
235
+ }
236
+ schemaValidationOverridden = true;
142
237
  }
143
238
  }
144
239
 
145
- const responseText = status === "aborted" ? `Task aborted: ${errorMessage}` : "Result submitted.";
146
-
240
+ const responseText =
241
+ status === "aborted"
242
+ ? `Task aborted: ${errorMessage}`
243
+ : schemaValidationOverridden
244
+ ? `Result submitted (schema validation overridden after ${this.#schemaValidationFailures} failed attempt(s)).`
245
+ : "Result submitted.";
147
246
  return {
148
247
  content: [{ type: "text", text: responseText }],
149
248
  details: { data, status, error: errorMessage },
@@ -161,6 +161,23 @@ function clonePhases(phases: TodoPhase[]): TodoPhase[] {
161
161
  return phases.map(phase => ({ ...phase, tasks: phase.tasks.map(task => ({ ...task })) }));
162
162
  }
163
163
 
164
+ function normalizeInProgressTask(phases: TodoPhase[]): void {
165
+ const orderedTasks = phases.flatMap(phase => phase.tasks);
166
+ if (orderedTasks.length === 0) return;
167
+
168
+ const inProgressTasks = orderedTasks.filter(task => task.status === "in_progress");
169
+ if (inProgressTasks.length > 1) {
170
+ for (const task of inProgressTasks.slice(1)) {
171
+ task.status = "pending";
172
+ }
173
+ }
174
+
175
+ if (inProgressTasks.length > 0) return;
176
+
177
+ const firstPendingTask = orderedTasks.find(task => task.status === "pending");
178
+ if (firstPendingTask) firstPendingTask.status = "in_progress";
179
+ }
180
+
164
181
  export function getLatestTodoPhasesFromEntries(entries: SessionEntry[]): TodoPhase[] {
165
182
  for (let i = entries.length - 1; i >= 0; i--) {
166
183
  const entry = entries[i];
@@ -246,6 +263,7 @@ function applyOps(file: TodoFile, ops: TodoWriteParams["ops"]): { file: TodoFile
246
263
  }
247
264
  }
248
265
 
266
+ normalizeInProgressTask(file.phases);
249
267
  return { file, errors };
250
268
  }
251
269
 
@@ -253,6 +271,14 @@ function formatSummary(phases: TodoPhase[], errors: string[]): string {
253
271
  const tasks = phases.flatMap(p => p.tasks);
254
272
  if (tasks.length === 0) return errors.length > 0 ? `Errors: ${errors.join("; ")}` : "Todo list cleared.";
255
273
 
274
+ const remainingByPhase = phases
275
+ .map(phase => ({
276
+ name: phase.name,
277
+ tasks: phase.tasks.filter(task => task.status === "pending" || task.status === "in_progress"),
278
+ }))
279
+ .filter(phase => phase.tasks.length > 0);
280
+ const remainingTasks = remainingByPhase.flatMap(phase => phase.tasks.map(task => ({ ...task, phase: phase.name })));
281
+
256
282
  // Find current phase
257
283
  let currentIdx = phases.findIndex(p => p.tasks.some(t => t.status === "pending" || t.status === "in_progress"));
258
284
  if (currentIdx === -1) currentIdx = phases.length - 1;
@@ -261,6 +287,14 @@ function formatSummary(phases: TodoPhase[], errors: string[]): string {
261
287
 
262
288
  const lines: string[] = [];
263
289
  if (errors.length > 0) lines.push(`Errors: ${errors.join("; ")}`);
290
+ if (remainingTasks.length === 0) {
291
+ lines.push("Remaining items: none.");
292
+ } else {
293
+ lines.push(`Remaining items (${remainingTasks.length}):`);
294
+ for (const task of remainingTasks) {
295
+ lines.push(` - ${task.id} ${task.content} [${task.status}] (${task.phase})`);
296
+ }
297
+ }
264
298
  lines.push(
265
299
  `Phase ${currentIdx + 1}/${phases.length} "${current.name}" — ${done}/${current.tasks.length} tasks complete`,
266
300
  );
@@ -0,0 +1,29 @@
1
+ export interface ToolTimeoutConfig {
2
+ /** Default timeout in seconds when agent omits the field */
3
+ default: number;
4
+ /** Minimum allowed timeout in seconds */
5
+ min: number;
6
+ /** Maximum allowed timeout in seconds (per-tool ceiling) */
7
+ max: number;
8
+ }
9
+
10
+ export const TOOL_TIMEOUTS = {
11
+ bash: { default: 300, min: 1, max: 3600 },
12
+ python: { default: 30, min: 1, max: 600 },
13
+ browser: { default: 30, min: 1, max: 120 },
14
+ ssh: { default: 60, min: 1, max: 3600 },
15
+ fetch: { default: 20, min: 1, max: 45 },
16
+ lsp: { default: 20, min: 5, max: 60 },
17
+ } as const satisfies Record<string, ToolTimeoutConfig>;
18
+
19
+ export type ToolWithTimeout = keyof typeof TOOL_TIMEOUTS;
20
+
21
+ /**
22
+ * Clamp a raw timeout to the allowed range for a tool.
23
+ * If rawTimeout is undefined, returns the tool's default.
24
+ */
25
+ export function clampTimeout(tool: ToolWithTimeout, rawTimeout?: number): number {
26
+ const config = TOOL_TIMEOUTS[tool];
27
+ const timeout = rawTimeout ?? config.default;
28
+ return Math.max(config.min, Math.min(config.max, timeout));
29
+ }
package/src/utils/mime.ts CHANGED
@@ -1,9 +1,42 @@
1
1
  import * as fs from "node:fs/promises";
2
- import { fileTypeFromBuffer } from "file-type";
3
2
 
4
- const IMAGE_MIME_TYPES = new Set(["image/jpeg", "image/png", "image/gif", "image/webp"]);
3
+ const FILE_TYPE_SNIFF_BYTES = 12;
5
4
 
6
- const FILE_TYPE_SNIFF_BYTES = 4100;
5
+ function detectMimeFromBytes(buf: Buffer, bytesRead: number): string | null {
6
+ if (bytesRead >= 3 && buf[0] === 0xff && buf[1] === 0xd8 && buf[2] === 0xff) {
7
+ return "image/jpeg";
8
+ }
9
+ if (
10
+ bytesRead >= 8 &&
11
+ buf[0] === 0x89 &&
12
+ buf[1] === 0x50 &&
13
+ buf[2] === 0x4e &&
14
+ buf[3] === 0x47 &&
15
+ buf[4] === 0x0d &&
16
+ buf[5] === 0x0a &&
17
+ buf[6] === 0x1a &&
18
+ buf[7] === 0x0a
19
+ ) {
20
+ return "image/png";
21
+ }
22
+ if (bytesRead >= 4 && buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x38) {
23
+ return "image/gif";
24
+ }
25
+ if (
26
+ bytesRead >= 12 &&
27
+ buf[0] === 0x52 &&
28
+ buf[1] === 0x49 &&
29
+ buf[2] === 0x46 &&
30
+ buf[3] === 0x46 &&
31
+ buf[8] === 0x57 &&
32
+ buf[9] === 0x45 &&
33
+ buf[10] === 0x42 &&
34
+ buf[11] === 0x50
35
+ ) {
36
+ return "image/webp";
37
+ }
38
+ return null;
39
+ }
7
40
 
8
41
  export async function detectSupportedImageMimeTypeFromFile(filePath: string): Promise<string | null> {
9
42
  const fileHandle = await fs.open(filePath, "r");
@@ -13,17 +46,7 @@ export async function detectSupportedImageMimeTypeFromFile(filePath: string): Pr
13
46
  if (bytesRead === 0) {
14
47
  return null;
15
48
  }
16
-
17
- const fileType = await fileTypeFromBuffer(buffer.subarray(0, bytesRead));
18
- if (!fileType) {
19
- return null;
20
- }
21
-
22
- if (!IMAGE_MIME_TYPES.has(fileType.mime)) {
23
- return null;
24
- }
25
-
26
- return fileType.mime;
49
+ return detectMimeFromBytes(buffer, bytesRead);
27
50
  } finally {
28
51
  await fileHandle.close();
29
52
  }
@@ -0,0 +1,172 @@
1
+ export type PromptRenderPhase = "pre-render" | "post-render";
2
+
3
+ export interface PromptFormatOptions {
4
+ renderPhase?: PromptRenderPhase;
5
+ replaceAsciiSymbols?: boolean;
6
+ boldRfc2119Keywords?: boolean;
7
+ }
8
+
9
+ // Opening XML tag (not self-closing, not closing)
10
+ const OPENING_XML = /^<([a-z_-]+)(?:\s+[^>]*)?>$/;
11
+ // Closing XML tag
12
+ const CLOSING_XML = /^<\/([a-z_-]+)>$/;
13
+ // Handlebars block start: {{#if}}, {{#has}}, {{#list}}, etc.
14
+ const OPENING_HBS = /^\{\{#/;
15
+ // Handlebars block end: {{/if}}, {{/has}}, {{/list}}, etc.
16
+ const CLOSING_HBS = /^\{\{\//;
17
+ // List item (- or * or 1.)
18
+ const LIST_ITEM = /^(?:[-*]\s|\d+\.\s)/;
19
+ // Code fence
20
+ const CODE_FENCE = /^```/;
21
+ // Table row
22
+ const TABLE_ROW = /^\|.*\|$/;
23
+ // Table separator (|---|---|)
24
+ const TABLE_SEP = /^\|[-:\s|]+\|$/;
25
+
26
+ /** RFC 2119 keywords used in prompts. */
27
+ const RFC2119_KEYWORDS = /\b(?:MUST NOT|SHOULD NOT|SHALL NOT|RECOMMENDED|REQUIRED|OPTIONAL|SHOULD|SHALL|MUST|MAY)\b/g;
28
+
29
+ function boldRfc2119Keywords(line: string): string {
30
+ return line.replace(RFC2119_KEYWORDS, (match, offset, source) => {
31
+ const isAlreadyBold =
32
+ source[offset - 2] === "*" &&
33
+ source[offset - 1] === "*" &&
34
+ source[offset + match.length] === "*" &&
35
+ source[offset + match.length + 1] === "*";
36
+ if (isAlreadyBold) {
37
+ return match;
38
+ }
39
+ return `**${match}**`;
40
+ });
41
+ }
42
+
43
+ /** Compact a table row by trimming cell padding */
44
+ function compactTableRow(line: string): string {
45
+ const cells = line.split("|");
46
+ return cells.map(c => c.trim()).join("|");
47
+ }
48
+
49
+ /** Compact a table separator row */
50
+ function compactTableSep(line: string): string {
51
+ const cells = line.split("|").filter(c => c.trim());
52
+ const normalized = cells.map(c => {
53
+ const trimmed = c.trim();
54
+ const left = trimmed.startsWith(":");
55
+ const right = trimmed.endsWith(":");
56
+ if (left && right) return ":---:";
57
+ if (left) return ":---";
58
+ if (right) return "---:";
59
+ return "---";
60
+ });
61
+ return `|${normalized.join("|")}|`;
62
+ }
63
+
64
+ function replaceCommonAsciiSymbols(line: string): string {
65
+ return line
66
+ .replace(/\.{3}/g, "…")
67
+ .replace(/<->/g, "↔")
68
+ .replace(/->/g, "→")
69
+ .replace(/<-/g, "←")
70
+ .replace(/!=/g, "≠")
71
+ .replace(/<=/g, "≤")
72
+ .replace(/>=/g, "≥");
73
+ }
74
+
75
+ export function formatPromptContent(content: string, options: PromptFormatOptions = {}): string {
76
+ const {
77
+ renderPhase = "post-render",
78
+ replaceAsciiSymbols = false,
79
+ boldRfc2119Keywords: shouldBoldRfc2119 = false,
80
+ } = options;
81
+ const isPreRender = renderPhase === "pre-render";
82
+ const lines = content.split("\n");
83
+ const result: string[] = [];
84
+ let inCodeBlock = false;
85
+ const topLevelTags: string[] = [];
86
+
87
+ for (let i = 0; i < lines.length; i++) {
88
+ let line = lines[i].trimEnd();
89
+ const trimmed = line.trimStart();
90
+
91
+ if (CODE_FENCE.test(trimmed)) {
92
+ inCodeBlock = !inCodeBlock;
93
+ result.push(line);
94
+ continue;
95
+ }
96
+
97
+ if (inCodeBlock) {
98
+ result.push(line);
99
+ continue;
100
+ }
101
+
102
+ if (replaceAsciiSymbols) {
103
+ line = replaceCommonAsciiSymbols(line);
104
+ }
105
+
106
+ const isOpeningXml = OPENING_XML.test(trimmed) && !trimmed.endsWith("/>");
107
+ if (isOpeningXml && line.length === trimmed.length) {
108
+ const match = OPENING_XML.exec(trimmed);
109
+ if (match) topLevelTags.push(match[1]);
110
+ }
111
+
112
+ const closingMatch = CLOSING_XML.exec(trimmed);
113
+ if (closingMatch) {
114
+ const tagName = closingMatch[1];
115
+ if (topLevelTags.length > 0 && topLevelTags[topLevelTags.length - 1] === tagName) {
116
+ line = trimmed;
117
+ topLevelTags.pop();
118
+ } else {
119
+ line = line.trimEnd();
120
+ }
121
+ } else if (isPreRender && trimmed.startsWith("{{")) {
122
+ line = trimmed;
123
+ } else if (TABLE_SEP.test(trimmed)) {
124
+ line = compactTableSep(trimmed);
125
+ } else if (TABLE_ROW.test(trimmed)) {
126
+ line = compactTableRow(trimmed);
127
+ } else {
128
+ line = line.trimEnd();
129
+ }
130
+
131
+ if (shouldBoldRfc2119) {
132
+ line = boldRfc2119Keywords(line);
133
+ }
134
+
135
+ const isBlank = trimmed === "";
136
+ if (isBlank) {
137
+ const prevLine = result[result.length - 1]?.trim() ?? "";
138
+ const nextLine = lines[i + 1]?.trim() ?? "";
139
+
140
+ if (LIST_ITEM.test(nextLine)) {
141
+ continue;
142
+ }
143
+
144
+ if (OPENING_XML.test(prevLine) || (isPreRender && OPENING_HBS.test(prevLine))) {
145
+ continue;
146
+ }
147
+
148
+ if (CLOSING_XML.test(nextLine) || (isPreRender && CLOSING_HBS.test(nextLine))) {
149
+ continue;
150
+ }
151
+
152
+ const prevIsBlank = prevLine === "";
153
+ if (prevIsBlank) {
154
+ continue;
155
+ }
156
+ }
157
+
158
+ if (CLOSING_XML.test(trimmed) || (isPreRender && CLOSING_HBS.test(trimmed))) {
159
+ while (result.length > 0 && result[result.length - 1].trim() === "") {
160
+ result.pop();
161
+ }
162
+ }
163
+
164
+ result.push(line);
165
+ }
166
+
167
+ while (result.length > 0 && result[result.length - 1].trim() === "") {
168
+ result.pop();
169
+ }
170
+
171
+ return result.join("\n");
172
+ }
@@ -1,4 +1,4 @@
1
- import { parse as parseHtml } from "node-html-parser";
1
+ import { parseHTML } from "linkedom";
2
2
  import type { RenderResult, SpecialHandler } from "./types";
3
3
  import { buildResult, loadPage } from "./types";
4
4
  import { convertWithMarkitdown, fetchBinary } from "./utils";
@@ -31,22 +31,22 @@ export const handleArxiv: SpecialHandler = async (
31
31
  if (!result.ok) return null;
32
32
 
33
33
  // Parse the Atom feed response
34
- const doc = parseHtml(result.content, { parseNoneClosedTags: true });
34
+ const doc = parseHTML(result.content).document;
35
35
  const entry = doc.querySelector("entry");
36
36
 
37
37
  if (!entry) return null;
38
38
 
39
- const title = entry.querySelector("title")?.text?.trim()?.replace(/\s+/g, " ");
40
- const summary = entry.querySelector("summary")?.text?.trim();
41
- const authors = entry
42
- .querySelectorAll("author name")
43
- .map(n => n.text?.trim())
44
- .filter(Boolean);
45
- const published = entry.querySelector("published")?.text?.trim()?.split("T")[0];
46
- const categories = entry
47
- .querySelectorAll("category")
39
+ const title = entry.querySelector("title")?.textContent?.trim()?.replace(/\s+/g, " ");
40
+ const summary = entry.querySelector("summary")?.textContent?.trim();
41
+ const authors = Array.from(entry.querySelectorAll("author name") as Iterable<{ textContent: string | null }>)
42
+ .map(n => n.textContent?.trim())
43
+ .filter((name): name is string => Boolean(name));
44
+ const published = entry.querySelector("published")?.textContent?.trim()?.split("T")[0];
45
+ const categories = Array.from(
46
+ entry.querySelectorAll("category") as Iterable<{ getAttribute: (name: string) => string | null }>,
47
+ )
48
48
  .map(c => c.getAttribute("term"))
49
- .filter(Boolean);
49
+ .filter((term): term is string => Boolean(term));
50
50
  const pdfLink = entry.querySelector('link[title="pdf"]')?.getAttribute("href");
51
51
 
52
52
  let md = `# ${title || "arXiv Paper"}\n\n`;
@@ -1,5 +1,5 @@
1
1
  import { tryParseJson } from "@oh-my-pi/pi-utils";
2
- import { parse as parseHtml } from "node-html-parser";
2
+ import { parseHTML } from "linkedom";
3
3
  import type { RenderResult, SpecialHandler } from "./types";
4
4
  import { buildResult, htmlToBasicMarkdown, loadPage } from "./types";
5
5
 
@@ -97,7 +97,7 @@ export const handleGoPkg: SpecialHandler = async (
97
97
  });
98
98
  }
99
99
 
100
- const doc = parseHtml(pageResult.content);
100
+ const doc = parseHTML(pageResult.content).document;
101
101
 
102
102
  // Extract actual module path from breadcrumb or header
103
103
  const breadcrumb = doc.querySelector(".go-Breadcrumb");
@@ -1,4 +1,4 @@
1
- import { parse as parseHtml } from "node-html-parser";
1
+ import { parseHTML } from "linkedom";
2
2
  import type { RenderResult, SpecialHandler } from "./types";
3
3
  import { buildResult, loadPage } from "./types";
4
4
  import { convertWithMarkitdown, fetchBinary } from "./utils";
@@ -30,22 +30,30 @@ export const handleIacr: SpecialHandler = async (
30
30
 
31
31
  if (!result.ok) return null;
32
32
 
33
- const doc = parseHtml(result.content);
33
+ const doc = parseHTML(result.content).document;
34
34
 
35
35
  // Extract metadata from the page
36
36
  const title =
37
- doc.querySelector("h3.mb-3")?.text?.trim() ||
37
+ doc.querySelector("h3.mb-3")?.textContent?.trim() ||
38
38
  doc.querySelector('meta[name="citation_title"]')?.getAttribute("content");
39
- const authors = doc
40
- .querySelectorAll('meta[name="citation_author"]')
39
+ const authors = Array.from(
40
+ doc.querySelectorAll('meta[name="citation_author"]') as Iterable<{
41
+ getAttribute: (name: string) => string | null;
42
+ }>,
43
+ )
41
44
  .map(m => m.getAttribute("content"))
42
- .filter(Boolean);
45
+ .filter((author): author is string => Boolean(author));
43
46
  // Abstract is in <p> after <h5>Abstract</h5>
44
- const abstractHeading = doc.querySelectorAll("h5").find(h => h.text?.includes("Abstract"));
47
+ const abstractHeading = Array.from(
48
+ doc.querySelectorAll("h5") as Iterable<{
49
+ textContent: string | null;
50
+ parentElement?: { querySelector: (selector: string) => { textContent: string | null } | null } | null;
51
+ }>,
52
+ ).find(h => h.textContent?.includes("Abstract"));
45
53
  const abstract =
46
- abstractHeading?.parentNode?.querySelector("p")?.text?.trim() ||
54
+ abstractHeading?.parentElement?.querySelector("p")?.textContent?.trim() ||
47
55
  doc.querySelector('meta[name="description"]')?.getAttribute("content");
48
- const keywords = doc.querySelector(".keywords")?.text?.replace("Keywords:", "").trim();
56
+ const keywords = doc.querySelector(".keywords")?.textContent?.replace("Keywords:", "").trim();
49
57
  const pubDate = doc.querySelector('meta[name="citation_publication_date"]')?.getAttribute("content");
50
58
 
51
59
  let md = `# ${title || "IACR ePrint Paper"}\n\n`;