npm - @oh-my-pi/pi-coding-agent - Versions diffs - 13.3.6 → 13.3.8 - Mend

@oh-my-pi/pi-coding-agent 13.3.6 → 13.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

package/CHANGELOG.md +115 -0
package/package.json +9 -18
package/scripts/format-prompts.ts +7 -172
package/src/capability/mcp.ts +5 -0
package/src/cli/args.ts +1 -0
package/src/config/prompt-templates.ts +9 -55
package/src/config/settings-schema.ts +24 -0
package/src/discovery/builtin.ts +1 -0
package/src/discovery/codex.ts +1 -2
package/src/discovery/helpers.ts +0 -5
package/src/discovery/mcp-json.ts +2 -0
package/src/internal-urls/docs-index.generated.ts +1 -1
package/src/lsp/client.ts +8 -0
package/src/lsp/config.ts +2 -3
package/src/lsp/index.ts +379 -99
package/src/lsp/render.ts +21 -31
package/src/lsp/types.ts +21 -8
package/src/lsp/utils.ts +193 -1
package/src/mcp/config-writer.ts +3 -0
package/src/mcp/config.ts +1 -0
package/src/mcp/oauth-flow.ts +3 -1
package/src/mcp/types.ts +5 -0
package/src/modes/components/settings-defs.ts +9 -0
package/src/modes/components/status-line.ts +1 -1
package/src/modes/controllers/mcp-command-controller.ts +6 -2
package/src/modes/interactive-mode.ts +8 -1
package/src/modes/theme/mermaid-cache.ts +4 -4
package/src/modes/theme/theme.ts +33 -0
package/src/prompts/system/custom-system-prompt.md +0 -10
package/src/prompts/system/subagent-user-prompt.md +2 -0
package/src/prompts/system/system-prompt.md +12 -9
package/src/prompts/tools/ast-find.md +20 -0
package/src/prompts/tools/ast-replace.md +21 -0
package/src/prompts/tools/bash.md +2 -0
package/src/prompts/tools/hashline.md +26 -8
package/src/prompts/tools/lsp.md +22 -5
package/src/prompts/tools/task.md +0 -1
package/src/sdk.ts +11 -5
package/src/session/agent-session.ts +293 -83
package/src/system-prompt.ts +3 -34
package/src/task/executor.ts +8 -7
package/src/task/index.ts +8 -55
package/src/task/template.ts +2 -4
package/src/task/types.ts +0 -5
package/src/task/worktree.ts +6 -2
package/src/tools/ast-find.ts +316 -0
package/src/tools/ast-replace.ts +294 -0
package/src/tools/bash.ts +2 -1
package/src/tools/browser.ts +2 -8
package/src/tools/fetch.ts +55 -18
package/src/tools/index.ts +8 -0
package/src/tools/jtd-to-json-schema.ts +29 -13
package/src/tools/path-utils.ts +34 -0
package/src/tools/python.ts +2 -1
package/src/tools/renderers.ts +4 -0
package/src/tools/ssh.ts +2 -1
package/src/tools/submit-result.ts +143 -44
package/src/tools/todo-write.ts +34 -0
package/src/tools/tool-timeouts.ts +29 -0
package/src/utils/mime.ts +37 -14
package/src/utils/prompt-format.ts +172 -0
package/src/web/scrapers/arxiv.ts +12 -12
package/src/web/scrapers/go-pkg.ts +2 -2
package/src/web/scrapers/iacr.ts +17 -9
package/src/web/scrapers/readthedocs.ts +3 -3
package/src/web/scrapers/twitter.ts +11 -11
package/src/web/scrapers/wikipedia.ts +4 -5
package/src/utils/ignore-files.ts +0 -119

package/src/tools/submit-result.ts CHANGED Viewed

@@ -4,6 +4,7 @@
  * Subagents must call this tool to finish and return structured JSON output.
  */
 import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@oh-my-pi/pi-agent-core";
+import { enforceStrictSchema, sanitizeSchemaForStrictMode } from "@oh-my-pi/pi-ai/utils/typebox-helpers";
 import type { Static, TSchema } from "@sinclair/typebox";
 import { Type } from "@sinclair/typebox";
 import Ajv, { type ErrorObject, type ValidateFunction } from "ajv";
@@ -51,6 +52,53 @@ function formatAjvErrors(errors: ErrorObject[] | null | undefined): string {
 		.join("; ");
 }
+/**
+ * Resolve all $ref references in a JSON Schema by inlining definitions.
+ * Handles $defs and definitions at any nesting level.
+ * Removes $defs/definitions from the output since all refs are inlined.
+ */
+function resolveSchemaRefs(schema: Record<string, unknown>): Record<string, unknown> {
+	const defs: Record<string, Record<string, unknown>> = {};
+	const defsObj = schema.$defs ?? schema.definitions;
+	if (defsObj && typeof defsObj === "object" && !Array.isArray(defsObj)) {
+		for (const [name, def] of Object.entries(defsObj as Record<string, unknown>)) {
+			if (def && typeof def === "object" && !Array.isArray(def)) {
+				defs[name] = def as Record<string, unknown>;
+			}
+		}
+	}
+	if (Object.keys(defs).length === 0) return schema;
+	const inlining = new Set<string>();
+	function inline(node: unknown): unknown {
+		if (node === null || typeof node !== "object") return node;
+		if (Array.isArray(node)) return node.map(inline);
+		const obj = node as Record<string, unknown>;
+		const ref = obj.$ref;
+		if (typeof ref === "string") {
+			const match = ref.match(/^#\/(?:\$defs|definitions)\/(.+)$/);
+			if (match) {
+				const name = match[1];
+				const def = defs[name];
+				if (def) {
+					if (inlining.has(name)) return {};
+					inlining.add(name);
+					const resolved = inline(def);
+					inlining.delete(name);
+					return resolved;
+				}
+			}
+		}
+		const result: Record<string, unknown> = {};
+		for (const [key, value] of Object.entries(obj)) {
+			if (key === "$defs" || key === "definitions") continue;
+			result[key] = inline(value);
+		}
+		return result;
+	}
+	return inline(schema) as Record<string, unknown>;
+}
 export class SubmitResultTool implements AgentTool<TSchema, SubmitResultDetails> {
 	readonly name = "submit_result";
 	readonly label = "Submit Result";
@@ -58,51 +106,96 @@ export class SubmitResultTool implements AgentTool<TSchema, SubmitResultDetails>
 		"Finish the task with structured JSON output. Call exactly once at the end of the task.\n\n" +
 		"If you cannot complete the task, call with an error message payload.";
 	readonly parameters: TSchema;
-	readonly strict = true;
+	strict = true;
+	lenientArgValidation = true;
 	readonly #validate?: ValidateFunction;
-	readonly #schemaError?: string;
+	#schemaValidationFailures = 0;
 	constructor(session: ToolSession) {
-		const schemaResult = normalizeSchema(session.outputSchema);
-		// Convert JTD to JSON Schema if needed (auto-detected)
-		const normalizedSchema =
-			schemaResult.normalized !== undefined ? jtdToJsonSchema(schemaResult.normalized) : undefined;
-		let schemaError = schemaResult.error;
-		if (normalizedSchema !== undefined && !schemaError) {
-			try {
-				this.#validate = ajv.compile(normalizedSchema as any);
-			} catch (err) {
-				schemaError = err instanceof Error ? err.message : String(err);
+		const createParameters = (dataSchema: TSchema): TSchema =>
+			Type.Object(
+				{
+					result: Type.Union([
+						Type.Object({ data: dataSchema }, { description: "Successfully completed the task" }),
+						Type.Object({
+							error: Type.String({ description: "Error message when the task cannot be completed" }),
+						}),
+					]),
+				},
+				{
+					additionalProperties: false,
+					description: "Submit either `data` for success or `error` for failure",
+				},
+			) as TSchema;
+		let validate: ValidateFunction | undefined;
+		let dataSchema: TSchema;
+		let parameters: TSchema;
+		let strict = true;
+		try {
+			const schemaResult = normalizeSchema(session.outputSchema);
+			// Convert JTD to JSON Schema if needed (auto-detected)
+			const normalizedSchema =
+				schemaResult.normalized !== undefined ? jtdToJsonSchema(schemaResult.normalized) : undefined;
+			let schemaError = schemaResult.error;
+			if (!schemaError && normalizedSchema === false) {
+				schemaError = "boolean false schema rejects all outputs";
+			}
+			if (normalizedSchema !== undefined && normalizedSchema !== false && !schemaError) {
+				try {
+					validate = ajv.compile(normalizedSchema as Record<string, unknown> | boolean);
+				} catch (err) {
+					schemaError = err instanceof Error ? err.message : String(err);
+				}
 			}
+			const schemaHint = formatSchema(normalizedSchema ?? session.outputSchema);
+			const schemaDescription = schemaError
+				? `Structured JSON output (output schema invalid; accepting unconstrained object): ${schemaError}`
+				: `Structured output matching the schema:\n${schemaHint}`;
+			const sanitizedSchema =
+				!schemaError &&
+				normalizedSchema != null &&
+				typeof normalizedSchema === "object" &&
+				!Array.isArray(normalizedSchema)
+					? sanitizeSchemaForStrictMode(normalizedSchema as Record<string, unknown>)
+					: !schemaError && normalizedSchema === true
+						? {}
+						: undefined;
+			if (sanitizedSchema !== undefined) {
+				const resolved = resolveSchemaRefs({
+					...sanitizedSchema,
+					description: schemaDescription,
+				});
+				dataSchema = Type.Unsafe(resolved);
+			} else {
+				dataSchema = Type.Record(Type.String(), Type.Any(), {
+					description: schemaError ? schemaDescription : "Structured JSON output (no schema specified)",
+				});
+			}
+			parameters = createParameters(dataSchema);
+			const strictParameters = enforceStrictSchema(parameters as unknown as Record<string, unknown>);
+			JSON.stringify(strictParameters);
+			// Verify the final parameters compile with AJV (catches unresolved $ref, etc.)
+			ajv.compile(parameters as Record<string, unknown>);
+		} catch (err) {
+			const errorMsg = err instanceof Error ? err.message : String(err);
+			dataSchema = Type.Record(Type.String(), Type.Any(), {
+				description: `Structured JSON output (schema processing failed: ${errorMsg})`,
+			});
+			parameters = createParameters(dataSchema);
+			validate = undefined;
+			strict = false;
 		}
-		this.#schemaError = schemaError;
-		const schemaHint = formatSchema(normalizedSchema ?? session.outputSchema);
-		// Use actual schema if provided, otherwise fall back to Type.Any
-		// Merge description into the JSON schema for better tool documentation
-		const dataSchema = normalizedSchema
-			? Type.Unsafe({
-					...(normalizedSchema as object),
-					description: `Structured output matching the schema:\n${schemaHint}`,
-				})
-			: Type.Record(Type.String(), Type.Any(), { description: "Structured JSON output (no schema specified)" });
-		this.parameters = Type.Object(
-			{
-				result: Type.Union([
-					Type.Object({ data: dataSchema }, { description: "Successfully completed the task" }),
-					Type.Object({ error: Type.String({ description: "Error message when the task cannot be completed" }) }),
-				]),
-			},
-			{
-				additionalProperties: false,
-				description: "Submit either `data` for success or `error` for failure",
-			},
-		);
+		this.#validate = validate;
+		this.parameters = parameters;
+		this.strict = strict;
 	}
 	async execute(
@@ -130,20 +223,26 @@ export class SubmitResultTool implements AgentTool<TSchema, SubmitResultDetails>
 		}
 		const status = errorMessage !== undefined ? "aborted" : "success";
+		let schemaValidationOverridden = false;
 		if (status === "success") {
 			if (data === undefined || data === null) {
 				throw new Error("data is required when submit_result indicates success");
 			}
-			if (this.#schemaError) {
-				throw new Error(`Invalid output schema: ${this.#schemaError}`);
-			}
 			if (this.#validate && !this.#validate(data)) {
-				throw new Error(`Output does not match schema: ${formatAjvErrors(this.#validate.errors)}`);
+				this.#schemaValidationFailures++;
+				if (this.#schemaValidationFailures <= 1) {
+					throw new Error(`Output does not match schema: ${formatAjvErrors(this.#validate.errors)}`);
+				}
+				schemaValidationOverridden = true;
 			}
 		}
-		const responseText = status === "aborted" ? `Task aborted: ${errorMessage}` : "Result submitted.";
+		const responseText =
+			status === "aborted"
+				? `Task aborted: ${errorMessage}`
+				: schemaValidationOverridden
+					? `Result submitted (schema validation overridden after ${this.#schemaValidationFailures} failed attempt(s)).`
+					: "Result submitted.";
 		return {
 			content: [{ type: "text", text: responseText }],
 			details: { data, status, error: errorMessage },

package/src/tools/todo-write.ts CHANGED Viewed

@@ -161,6 +161,23 @@ function clonePhases(phases: TodoPhase[]): TodoPhase[] {
 	return phases.map(phase => ({ ...phase, tasks: phase.tasks.map(task => ({ ...task })) }));
 }
+function normalizeInProgressTask(phases: TodoPhase[]): void {
+	const orderedTasks = phases.flatMap(phase => phase.tasks);
+	if (orderedTasks.length === 0) return;
+	const inProgressTasks = orderedTasks.filter(task => task.status === "in_progress");
+	if (inProgressTasks.length > 1) {
+		for (const task of inProgressTasks.slice(1)) {
+			task.status = "pending";
+		}
+	}
+	if (inProgressTasks.length > 0) return;
+	const firstPendingTask = orderedTasks.find(task => task.status === "pending");
+	if (firstPendingTask) firstPendingTask.status = "in_progress";
+}
 export function getLatestTodoPhasesFromEntries(entries: SessionEntry[]): TodoPhase[] {
 	for (let i = entries.length - 1; i >= 0; i--) {
 		const entry = entries[i];
@@ -246,6 +263,7 @@ function applyOps(file: TodoFile, ops: TodoWriteParams["ops"]): { file: TodoFile
 		}
 	}
+	normalizeInProgressTask(file.phases);
 	return { file, errors };
 }
@@ -253,6 +271,14 @@ function formatSummary(phases: TodoPhase[], errors: string[]): string {
 	const tasks = phases.flatMap(p => p.tasks);
 	if (tasks.length === 0) return errors.length > 0 ? `Errors: ${errors.join("; ")}` : "Todo list cleared.";
+	const remainingByPhase = phases
+		.map(phase => ({
+			name: phase.name,
+			tasks: phase.tasks.filter(task => task.status === "pending" || task.status === "in_progress"),
+		}))
+		.filter(phase => phase.tasks.length > 0);
+	const remainingTasks = remainingByPhase.flatMap(phase => phase.tasks.map(task => ({ ...task, phase: phase.name })));
 	// Find current phase
 	let currentIdx = phases.findIndex(p => p.tasks.some(t => t.status === "pending" || t.status === "in_progress"));
 	if (currentIdx === -1) currentIdx = phases.length - 1;
@@ -261,6 +287,14 @@ function formatSummary(phases: TodoPhase[], errors: string[]): string {
 	const lines: string[] = [];
 	if (errors.length > 0) lines.push(`Errors: ${errors.join("; ")}`);
+	if (remainingTasks.length === 0) {
+		lines.push("Remaining items: none.");
+	} else {
+		lines.push(`Remaining items (${remainingTasks.length}):`);
+		for (const task of remainingTasks) {
+			lines.push(`  - ${task.id} ${task.content} [${task.status}] (${task.phase})`);
+		}
+	}
 	lines.push(
 		`Phase ${currentIdx + 1}/${phases.length} "${current.name}" — ${done}/${current.tasks.length} tasks complete`,
 	);

package/src/tools/tool-timeouts.ts ADDED Viewed

@@ -0,0 +1,29 @@
+export interface ToolTimeoutConfig {
+	/** Default timeout in seconds when agent omits the field */
+	default: number;
+	/** Minimum allowed timeout in seconds */
+	min: number;
+	/** Maximum allowed timeout in seconds (per-tool ceiling) */
+	max: number;
+}
+export const TOOL_TIMEOUTS = {
+	bash: { default: 300, min: 1, max: 3600 },
+	python: { default: 30, min: 1, max: 600 },
+	browser: { default: 30, min: 1, max: 120 },
+	ssh: { default: 60, min: 1, max: 3600 },
+	fetch: { default: 20, min: 1, max: 45 },
+	lsp: { default: 20, min: 5, max: 60 },
+} as const satisfies Record<string, ToolTimeoutConfig>;
+export type ToolWithTimeout = keyof typeof TOOL_TIMEOUTS;
+/**
+ * Clamp a raw timeout to the allowed range for a tool.
+ * If rawTimeout is undefined, returns the tool's default.
+ */
+export function clampTimeout(tool: ToolWithTimeout, rawTimeout?: number): number {
+	const config = TOOL_TIMEOUTS[tool];
+	const timeout = rawTimeout ?? config.default;
+	return Math.max(config.min, Math.min(config.max, timeout));
+}

package/src/utils/mime.ts CHANGED Viewed

@@ -1,9 +1,42 @@
 import * as fs from "node:fs/promises";
-import { fileTypeFromBuffer } from "file-type";
-const IMAGE_MIME_TYPES = new Set(["image/jpeg", "image/png", "image/gif", "image/webp"]);
+const FILE_TYPE_SNIFF_BYTES = 12;
-const FILE_TYPE_SNIFF_BYTES = 4100;
+function detectMimeFromBytes(buf: Buffer, bytesRead: number): string | null {
+	if (bytesRead >= 3 && buf[0] === 0xff && buf[1] === 0xd8 && buf[2] === 0xff) {
+		return "image/jpeg";
+	}
+	if (
+		bytesRead >= 8 &&
+		buf[0] === 0x89 &&
+		buf[1] === 0x50 &&
+		buf[2] === 0x4e &&
+		buf[3] === 0x47 &&
+		buf[4] === 0x0d &&
+		buf[5] === 0x0a &&
+		buf[6] === 0x1a &&
+		buf[7] === 0x0a
+	) {
+		return "image/png";
+	}
+	if (bytesRead >= 4 && buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x38) {
+		return "image/gif";
+	}
+	if (
+		bytesRead >= 12 &&
+		buf[0] === 0x52 &&
+		buf[1] === 0x49 &&
+		buf[2] === 0x46 &&
+		buf[3] === 0x46 &&
+		buf[8] === 0x57 &&
+		buf[9] === 0x45 &&
+		buf[10] === 0x42 &&
+		buf[11] === 0x50
+	) {
+		return "image/webp";
+	}
+	return null;
+}
 export async function detectSupportedImageMimeTypeFromFile(filePath: string): Promise<string | null> {
 	const fileHandle = await fs.open(filePath, "r");
@@ -13,17 +46,7 @@ export async function detectSupportedImageMimeTypeFromFile(filePath: string): Pr
 		if (bytesRead === 0) {
 			return null;
 		}
-		const fileType = await fileTypeFromBuffer(buffer.subarray(0, bytesRead));
-		if (!fileType) {
-			return null;
-		}
-		if (!IMAGE_MIME_TYPES.has(fileType.mime)) {
-			return null;
-		}
-		return fileType.mime;
+		return detectMimeFromBytes(buffer, bytesRead);
 	} finally {
 		await fileHandle.close();
 	}

package/src/utils/prompt-format.ts ADDED Viewed

@@ -0,0 +1,172 @@
+export type PromptRenderPhase = "pre-render" | "post-render";
+export interface PromptFormatOptions {
+	renderPhase?: PromptRenderPhase;
+	replaceAsciiSymbols?: boolean;
+	boldRfc2119Keywords?: boolean;
+}
+// Opening XML tag (not self-closing, not closing)
+const OPENING_XML = /^<([a-z_-]+)(?:\s+[^>]*)?>$/;
+// Closing XML tag
+const CLOSING_XML = /^<\/([a-z_-]+)>$/;
+// Handlebars block start: {{#if}}, {{#has}}, {{#list}}, etc.
+const OPENING_HBS = /^\{\{#/;
+// Handlebars block end: {{/if}}, {{/has}}, {{/list}}, etc.
+const CLOSING_HBS = /^\{\{\//;
+// List item (- or * or 1.)
+const LIST_ITEM = /^(?:[-*]\s|\d+\.\s)/;
+// Code fence
+const CODE_FENCE = /^```/;
+// Table row
+const TABLE_ROW = /^\|.*\|$/;
+// Table separator (|---|---|)
+const TABLE_SEP = /^\|[-:\s|]+\|$/;
+/** RFC 2119 keywords used in prompts. */
+const RFC2119_KEYWORDS = /\b(?:MUST NOT|SHOULD NOT|SHALL NOT|RECOMMENDED|REQUIRED|OPTIONAL|SHOULD|SHALL|MUST|MAY)\b/g;
+function boldRfc2119Keywords(line: string): string {
+	return line.replace(RFC2119_KEYWORDS, (match, offset, source) => {
+		const isAlreadyBold =
+			source[offset - 2] === "*" &&
+			source[offset - 1] === "*" &&
+			source[offset + match.length] === "*" &&
+			source[offset + match.length + 1] === "*";
+		if (isAlreadyBold) {
+			return match;
+		}
+		return `**${match}**`;
+	});
+}
+/** Compact a table row by trimming cell padding */
+function compactTableRow(line: string): string {
+	const cells = line.split("|");
+	return cells.map(c => c.trim()).join("|");
+}
+/** Compact a table separator row */
+function compactTableSep(line: string): string {
+	const cells = line.split("|").filter(c => c.trim());
+	const normalized = cells.map(c => {
+		const trimmed = c.trim();
+		const left = trimmed.startsWith(":");
+		const right = trimmed.endsWith(":");
+		if (left && right) return ":---:";
+		if (left) return ":---";
+		if (right) return "---:";
+		return "---";
+	});
+	return `|${normalized.join("|")}|`;
+}
+function replaceCommonAsciiSymbols(line: string): string {
+	return line
+		.replace(/\.{3}/g, "…")
+		.replace(/<->/g, "↔")
+		.replace(/->/g, "→")
+		.replace(/<-/g, "←")
+		.replace(/!=/g, "≠")
+		.replace(/<=/g, "≤")
+		.replace(/>=/g, "≥");
+}
+export function formatPromptContent(content: string, options: PromptFormatOptions = {}): string {
+	const {
+		renderPhase = "post-render",
+		replaceAsciiSymbols = false,
+		boldRfc2119Keywords: shouldBoldRfc2119 = false,
+	} = options;
+	const isPreRender = renderPhase === "pre-render";
+	const lines = content.split("\n");
+	const result: string[] = [];
+	let inCodeBlock = false;
+	const topLevelTags: string[] = [];
+	for (let i = 0; i < lines.length; i++) {
+		let line = lines[i].trimEnd();
+		const trimmed = line.trimStart();
+		if (CODE_FENCE.test(trimmed)) {
+			inCodeBlock = !inCodeBlock;
+			result.push(line);
+			continue;
+		}
+		if (inCodeBlock) {
+			result.push(line);
+			continue;
+		}
+		if (replaceAsciiSymbols) {
+			line = replaceCommonAsciiSymbols(line);
+		}
+		const isOpeningXml = OPENING_XML.test(trimmed) && !trimmed.endsWith("/>");
+		if (isOpeningXml && line.length === trimmed.length) {
+			const match = OPENING_XML.exec(trimmed);
+			if (match) topLevelTags.push(match[1]);
+		}
+		const closingMatch = CLOSING_XML.exec(trimmed);
+		if (closingMatch) {
+			const tagName = closingMatch[1];
+			if (topLevelTags.length > 0 && topLevelTags[topLevelTags.length - 1] === tagName) {
+				line = trimmed;
+				topLevelTags.pop();
+			} else {
+				line = line.trimEnd();
+			}
+		} else if (isPreRender && trimmed.startsWith("{{")) {
+			line = trimmed;
+		} else if (TABLE_SEP.test(trimmed)) {
+			line = compactTableSep(trimmed);
+		} else if (TABLE_ROW.test(trimmed)) {
+			line = compactTableRow(trimmed);
+		} else {
+			line = line.trimEnd();
+		}
+		if (shouldBoldRfc2119) {
+			line = boldRfc2119Keywords(line);
+		}
+		const isBlank = trimmed === "";
+		if (isBlank) {
+			const prevLine = result[result.length - 1]?.trim() ?? "";
+			const nextLine = lines[i + 1]?.trim() ?? "";
+			if (LIST_ITEM.test(nextLine)) {
+				continue;
+			}
+			if (OPENING_XML.test(prevLine) || (isPreRender && OPENING_HBS.test(prevLine))) {
+				continue;
+			}
+			if (CLOSING_XML.test(nextLine) || (isPreRender && CLOSING_HBS.test(nextLine))) {
+				continue;
+			}
+			const prevIsBlank = prevLine === "";
+			if (prevIsBlank) {
+				continue;
+			}
+		}
+		if (CLOSING_XML.test(trimmed) || (isPreRender && CLOSING_HBS.test(trimmed))) {
+			while (result.length > 0 && result[result.length - 1].trim() === "") {
+				result.pop();
+			}
+		}
+		result.push(line);
+	}
+	while (result.length > 0 && result[result.length - 1].trim() === "") {
+		result.pop();
+	}
+	return result.join("\n");
+}

package/src/web/scrapers/arxiv.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { parse as parseHtml } from "node-html-parser";
+import { parseHTML } from "linkedom";
 import type { RenderResult, SpecialHandler } from "./types";
 import { buildResult, loadPage } from "./types";
 import { convertWithMarkitdown, fetchBinary } from "./utils";
@@ -31,22 +31,22 @@ export const handleArxiv: SpecialHandler = async (
 		if (!result.ok) return null;
 		// Parse the Atom feed response
-		const doc = parseHtml(result.content, { parseNoneClosedTags: true });
+		const doc = parseHTML(result.content).document;
 		const entry = doc.querySelector("entry");
 		if (!entry) return null;
-		const title = entry.querySelector("title")?.text?.trim()?.replace(/\s+/g, " ");
-		const summary = entry.querySelector("summary")?.text?.trim();
-		const authors = entry
-			.querySelectorAll("author name")
-			.map(n => n.text?.trim())
-			.filter(Boolean);
-		const published = entry.querySelector("published")?.text?.trim()?.split("T")[0];
-		const categories = entry
-			.querySelectorAll("category")
+		const title = entry.querySelector("title")?.textContent?.trim()?.replace(/\s+/g, " ");
+		const summary = entry.querySelector("summary")?.textContent?.trim();
+		const authors = Array.from(entry.querySelectorAll("author name") as Iterable<{ textContent: string | null }>)
+			.map(n => n.textContent?.trim())
+			.filter((name): name is string => Boolean(name));
+		const published = entry.querySelector("published")?.textContent?.trim()?.split("T")[0];
+		const categories = Array.from(
+			entry.querySelectorAll("category") as Iterable<{ getAttribute: (name: string) => string | null }>,
+		)
 			.map(c => c.getAttribute("term"))
-			.filter(Boolean);
+			.filter((term): term is string => Boolean(term));
 		const pdfLink = entry.querySelector('link[title="pdf"]')?.getAttribute("href");
 		let md = `# ${title || "arXiv Paper"}\n\n`;

package/src/web/scrapers/go-pkg.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { tryParseJson } from "@oh-my-pi/pi-utils";
-import { parse as parseHtml } from "node-html-parser";
+import { parseHTML } from "linkedom";
 import type { RenderResult, SpecialHandler } from "./types";
 import { buildResult, htmlToBasicMarkdown, loadPage } from "./types";
@@ -97,7 +97,7 @@ export const handleGoPkg: SpecialHandler = async (
 			});
 		}
-		const doc = parseHtml(pageResult.content);
+		const doc = parseHTML(pageResult.content).document;
 		// Extract actual module path from breadcrumb or header
 		const breadcrumb = doc.querySelector(".go-Breadcrumb");

package/src/web/scrapers/iacr.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { parse as parseHtml } from "node-html-parser";
+import { parseHTML } from "linkedom";
 import type { RenderResult, SpecialHandler } from "./types";
 import { buildResult, loadPage } from "./types";
 import { convertWithMarkitdown, fetchBinary } from "./utils";
@@ -30,22 +30,30 @@ export const handleIacr: SpecialHandler = async (
 		if (!result.ok) return null;
-		const doc = parseHtml(result.content);
+		const doc = parseHTML(result.content).document;
 		// Extract metadata from the page
 		const title =
-			doc.querySelector("h3.mb-3")?.text?.trim() ||
+			doc.querySelector("h3.mb-3")?.textContent?.trim() ||
 			doc.querySelector('meta[name="citation_title"]')?.getAttribute("content");
-		const authors = doc
-			.querySelectorAll('meta[name="citation_author"]')
+		const authors = Array.from(
+			doc.querySelectorAll('meta[name="citation_author"]') as Iterable<{
+				getAttribute: (name: string) => string | null;
+			}>,
+		)
 			.map(m => m.getAttribute("content"))
-			.filter(Boolean);
+			.filter((author): author is string => Boolean(author));
 		// Abstract is in <p> after <h5>Abstract</h5>
-		const abstractHeading = doc.querySelectorAll("h5").find(h => h.text?.includes("Abstract"));
+		const abstractHeading = Array.from(
+			doc.querySelectorAll("h5") as Iterable<{
+				textContent: string | null;
+				parentElement?: { querySelector: (selector: string) => { textContent: string | null } | null } | null;
+			}>,
+		).find(h => h.textContent?.includes("Abstract"));
 		const abstract =
-			abstractHeading?.parentNode?.querySelector("p")?.text?.trim() ||
+			abstractHeading?.parentElement?.querySelector("p")?.textContent?.trim() ||
 			doc.querySelector('meta[name="description"]')?.getAttribute("content");
-		const keywords = doc.querySelector(".keywords")?.text?.replace("Keywords:", "").trim();
+		const keywords = doc.querySelector(".keywords")?.textContent?.replace("Keywords:", "").trim();
 		const pubDate = doc.querySelector('meta[name="citation_publication_date"]')?.getAttribute("content");
 		let md = `# ${title || "IACR ePrint Paper"}\n\n`;