@oh-my-pi/pi-coding-agent 16.0.7 → 16.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -0
- package/dist/cli.js +4817 -12449
- package/dist/types/cli/args.d.ts +1 -0
- package/dist/types/cli/update-cli.d.ts +11 -0
- package/dist/types/commands/launch.d.ts +3 -0
- package/dist/types/debug/remote-debugger.d.ts +45 -0
- package/dist/types/goals/runtime.d.ts +4 -1
- package/dist/types/internal-urls/docs-index.d.ts +19 -0
- package/dist/types/markit/converters/docx.d.ts +6 -0
- package/dist/types/markit/converters/epub.d.ts +15 -0
- package/dist/types/markit/converters/pdf/columns.d.ts +35 -0
- package/dist/types/markit/converters/pdf/extract.d.ts +10 -0
- package/dist/types/markit/converters/pdf/grid.d.ts +25 -0
- package/dist/types/markit/converters/pdf/headers.d.ts +24 -0
- package/dist/types/markit/converters/pdf/index.d.ts +6 -0
- package/dist/types/markit/converters/pdf/render.d.ts +24 -0
- package/dist/types/markit/converters/pdf/types.d.ts +75 -0
- package/dist/types/markit/converters/pptx.d.ts +57 -0
- package/dist/types/markit/converters/xlsx.d.ts +25 -0
- package/dist/types/markit/index.d.ts +2 -0
- package/dist/types/markit/registry.d.ts +16 -0
- package/dist/types/markit/types.d.ts +30 -0
- package/dist/types/modes/print-mode.d.ts +2 -0
- package/dist/types/session/agent-session.d.ts +7 -8
- package/dist/types/session/auth-storage.d.ts +3 -2
- package/dist/types/session/yield-queue.d.ts +3 -1
- package/dist/types/tools/browser/attach.d.ts +1 -1
- package/dist/types/utils/markit.d.ts +0 -8
- package/dist/types/utils/mupdf-wasm-embed.d.ts +1 -0
- package/dist/types/utils/turndown.d.ts +15 -0
- package/dist/types/utils/zip.d.ts +119 -0
- package/package.json +20 -18
- package/scripts/build-binary.ts +7 -3
- package/scripts/bundle-dist.ts +28 -12
- package/scripts/embed-mupdf-wasm.ts +67 -0
- package/scripts/generate-docs-index.ts +48 -32
- package/scripts/omp +1 -1
- package/src/advisor/__tests__/advisor.test.ts +83 -0
- package/src/advisor/runtime.ts +16 -1
- package/src/cli/args.ts +3 -0
- package/src/cli/auth-broker-cli.ts +1 -3
- package/src/cli/auth-gateway-cli.ts +2 -5
- package/src/cli/flag-tables.ts +1 -0
- package/src/cli/update-cli.ts +63 -3
- package/src/commands/launch.ts +3 -0
- package/src/config/model-discovery.ts +20 -8
- package/src/config/models-config-schema.ts +8 -1
- package/src/debug/index.ts +44 -0
- package/src/debug/remote-debugger.ts +151 -0
- package/src/debug/report-bundle.ts +2 -1
- package/src/goals/runtime.ts +19 -7
- package/src/internal-urls/docs-index.generated.txt +2 -0
- package/src/internal-urls/docs-index.ts +102 -0
- package/src/internal-urls/omp-protocol.ts +10 -9
- package/src/main.ts +8 -0
- package/src/markit/NOTICE +32 -0
- package/src/markit/converters/docx.ts +56 -0
- package/src/markit/converters/epub.ts +136 -0
- package/src/markit/converters/mammoth.d.ts +24 -0
- package/src/markit/converters/pdf/columns.ts +103 -0
- package/src/markit/converters/pdf/extract.ts +574 -0
- package/src/markit/converters/pdf/grid.ts +780 -0
- package/src/markit/converters/pdf/headers.ts +106 -0
- package/src/markit/converters/pdf/index.ts +146 -0
- package/src/markit/converters/pdf/render.ts +501 -0
- package/src/markit/converters/pdf/types.ts +84 -0
- package/src/markit/converters/pptx.ts +325 -0
- package/src/markit/converters/xlsx.ts +173 -0
- package/src/markit/index.ts +2 -0
- package/src/markit/registry.ts +59 -0
- package/src/markit/types.ts +35 -0
- package/src/modes/components/snapcompact-shape-preview-doc.md +14 -7
- package/src/modes/components/snapcompact-shape-preview.ts +2 -2
- package/src/modes/controllers/input-controller.ts +29 -8
- package/src/modes/interactive-mode.ts +33 -12
- package/src/modes/print-mode.ts +5 -1
- package/src/prompts/advisor/advise-tool.md +3 -1
- package/src/prompts/advisor/system.md +55 -11
- package/src/sdk.ts +5 -9
- package/src/session/agent-session.ts +72 -42
- package/src/session/auth-storage.ts +2 -11
- package/src/session/yield-queue.ts +7 -1
- package/src/tools/browser/attach.ts +2 -2
- package/src/tools/fetch.ts +25 -60
- package/src/tools/read.ts +1 -1
- package/src/tools/search.ts +1 -6
- package/src/tools/write.ts +25 -65
- package/src/utils/markit.ts +25 -9
- package/src/utils/mupdf-wasm-embed.ts +12 -0
- package/src/utils/tools-manager.ts +2 -11
- package/src/utils/turndown.ts +83 -0
- package/src/{tools/archive-reader.ts → utils/zip.ts} +453 -83
- package/src/web/scrapers/types.ts +3 -46
- package/dist/types/internal-urls/docs-index.generated.d.ts +0 -2
- package/dist/types/tools/archive-reader.d.ts +0 -49
- package/src/internal-urls/docs-index.generated.ts +0 -120
package/src/tools/fetch.ts
CHANGED
|
@@ -20,12 +20,11 @@ import { CachedOutputBlock, markFramedBlockComponent } from "../tui/output-block
|
|
|
20
20
|
import { webpExclusionForModel } from "../utils/image-loading";
|
|
21
21
|
import { formatDimensionNote, resizeImage } from "../utils/image-resize";
|
|
22
22
|
import { ensureTool } from "../utils/tools-manager";
|
|
23
|
+
import { type ArchiveFormat, listArchiveRoot, sniffArchiveFormat } from "../utils/zip";
|
|
23
24
|
import { extractWithParallel, findParallelApiKey, getParallelExtractContent } from "../web/parallel";
|
|
24
|
-
import {
|
|
25
|
-
import type { RenderResult } from "../web/scrapers/types";
|
|
25
|
+
import type { RenderResult, SpecialHandler } from "../web/scrapers/types";
|
|
26
26
|
import { finalizeOutput, loadPage, looksLikeHtml, MAX_BYTES, MAX_OUTPUT_CHARS } from "../web/scrapers/types";
|
|
27
27
|
import { convertWithMarkit, fetchBinary } from "../web/scrapers/utils";
|
|
28
|
-
import { type ArchiveFormat, listArchiveRoot, sniffArchiveFormat } from "./archive-reader";
|
|
29
28
|
import { applyListLimit } from "./list-limit";
|
|
30
29
|
import { formatStyledArtifactReference, type OutputMeta } from "./output-meta";
|
|
31
30
|
import { type LineRange, parseLineRanges } from "./path-utils";
|
|
@@ -51,34 +50,9 @@ const CONVERTIBLE_MIMES = new Set([
|
|
|
51
50
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
52
51
|
"application/rtf",
|
|
53
52
|
"application/epub+zip",
|
|
54
|
-
"image/png",
|
|
55
|
-
"image/jpeg",
|
|
56
|
-
"image/gif",
|
|
57
|
-
"image/webp",
|
|
58
|
-
"audio/mpeg",
|
|
59
|
-
"audio/wav",
|
|
60
|
-
"audio/ogg",
|
|
61
53
|
]);
|
|
62
54
|
|
|
63
|
-
const CONVERTIBLE_EXTENSIONS = new Set([
|
|
64
|
-
".pdf",
|
|
65
|
-
".doc",
|
|
66
|
-
".docx",
|
|
67
|
-
".ppt",
|
|
68
|
-
".pptx",
|
|
69
|
-
".xls",
|
|
70
|
-
".xlsx",
|
|
71
|
-
".rtf",
|
|
72
|
-
".epub",
|
|
73
|
-
".png",
|
|
74
|
-
".jpg",
|
|
75
|
-
".jpeg",
|
|
76
|
-
".gif",
|
|
77
|
-
".webp",
|
|
78
|
-
".mp3",
|
|
79
|
-
".wav",
|
|
80
|
-
".ogg",
|
|
81
|
-
]);
|
|
55
|
+
const CONVERTIBLE_EXTENSIONS = new Set([".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".rtf", ".epub"]);
|
|
82
56
|
|
|
83
57
|
const NOTEBOOK_MIMES = new Set(["application/x-ipynb+json"]);
|
|
84
58
|
const NOTEBOOK_EXTENSIONS = new Set([".ipynb"]);
|
|
@@ -1044,6 +1018,18 @@ async function tryRenderBinaryPayload(
|
|
|
1044
1018
|
// Unified Special Handler Dispatch
|
|
1045
1019
|
// =============================================================================
|
|
1046
1020
|
|
|
1021
|
+
let specialHandlersPromise: Promise<SpecialHandler[]> | undefined;
|
|
1022
|
+
|
|
1023
|
+
/**
|
|
1024
|
+
* Lazily load the site-specific scraper handlers. The scrapers barrel eagerly
|
|
1025
|
+
* imports ~80 site modules, none of which are needed until the first fetch that
|
|
1026
|
+
* requires a special handler, so we keep them out of the cold-startup graph.
|
|
1027
|
+
*/
|
|
1028
|
+
function loadSpecialHandlers(): Promise<SpecialHandler[]> {
|
|
1029
|
+
specialHandlersPromise ??= import("../web/scrapers").then(m => m.specialHandlers);
|
|
1030
|
+
return specialHandlersPromise;
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1047
1033
|
/**
|
|
1048
1034
|
* Try all special handlers
|
|
1049
1035
|
*/
|
|
@@ -1053,6 +1039,7 @@ async function handleSpecialUrls(
|
|
|
1053
1039
|
signal: AbortSignal | undefined,
|
|
1054
1040
|
storage: AgentStorage | null,
|
|
1055
1041
|
): Promise<FetchRenderResult | null> {
|
|
1042
|
+
const specialHandlers = await loadSpecialHandlers();
|
|
1056
1043
|
for (const handler of specialHandlers) {
|
|
1057
1044
|
if (signal?.aborted) {
|
|
1058
1045
|
throw new ToolAbortError();
|
|
@@ -1144,45 +1131,25 @@ async function renderUrl(
|
|
|
1144
1131
|
notes.push(
|
|
1145
1132
|
`Image MIME type ${imageMimeType} is unsupported for inline model serialization; returning text metadata only`,
|
|
1146
1133
|
);
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
notes.push("Attempting binary conversion fallback for unsupported image MIME type");
|
|
1150
|
-
} else {
|
|
1151
|
-
notes.push("Falling back to textual rendering from initial response");
|
|
1152
|
-
}
|
|
1153
|
-
skipConvertibleBinaryRetry = !shouldTryConvertibleFallback;
|
|
1134
|
+
notes.push("Falling back to textual rendering from initial response");
|
|
1135
|
+
skipConvertibleBinaryRetry = true;
|
|
1154
1136
|
} else {
|
|
1155
1137
|
const binary = await fetchBinary(finalUrl, timeout, signal);
|
|
1156
1138
|
if (binary.ok) {
|
|
1157
1139
|
notes.push("Fetched image binary");
|
|
1158
|
-
const conversionExtension = getExtensionHint(finalUrl, binary.contentDisposition) || extHint;
|
|
1159
|
-
let convertedText: string | null = null;
|
|
1160
|
-
const converted = await convertWithMarkit(binary.buffer, conversionExtension, timeout, signal);
|
|
1161
|
-
if (converted.ok) {
|
|
1162
|
-
if (converted.content.trim().length > 50) {
|
|
1163
|
-
notes.push("Converted with markit");
|
|
1164
|
-
convertedText = converted.content;
|
|
1165
|
-
} else {
|
|
1166
|
-
notes.push("markit conversion produced no usable output");
|
|
1167
|
-
}
|
|
1168
|
-
} else if (converted.error) {
|
|
1169
|
-
notes.push(`markit conversion failed: ${converted.error}`);
|
|
1170
|
-
} else {
|
|
1171
|
-
notes.push("markit conversion failed");
|
|
1172
|
-
}
|
|
1173
1140
|
|
|
1174
1141
|
if (binary.buffer.byteLength > MAX_INLINE_IMAGE_SOURCE_BYTES) {
|
|
1175
1142
|
notes.push(
|
|
1176
1143
|
`Image exceeds inline source limit (${binary.buffer.byteLength} bytes > ${MAX_INLINE_IMAGE_SOURCE_BYTES} bytes)`,
|
|
1177
1144
|
);
|
|
1178
1145
|
const output = finalizeOutput(
|
|
1179
|
-
|
|
1146
|
+
`Fetched image content (${imageMimeType}), but it is too large to inline render.`,
|
|
1180
1147
|
);
|
|
1181
1148
|
return {
|
|
1182
1149
|
url,
|
|
1183
1150
|
finalUrl,
|
|
1184
1151
|
contentType: imageMimeType,
|
|
1185
|
-
method:
|
|
1152
|
+
method: "image-too-large",
|
|
1186
1153
|
content: output.content,
|
|
1187
1154
|
fetchedAt,
|
|
1188
1155
|
truncated: output.truncated,
|
|
@@ -1199,15 +1166,13 @@ async function renderUrl(
|
|
|
1199
1166
|
if (!isDecodedImage) {
|
|
1200
1167
|
notes.push(`Fetched payload could not be decoded as ${imageMimeType}; returning text metadata only`);
|
|
1201
1168
|
const output = finalizeOutput(
|
|
1202
|
-
|
|
1203
|
-
rawContent ??
|
|
1204
|
-
`Fetched payload was labeled ${imageMimeType}, but bytes were not a valid image.`,
|
|
1169
|
+
rawContent ?? `Fetched payload was labeled ${imageMimeType}, but bytes were not a valid image.`,
|
|
1205
1170
|
);
|
|
1206
1171
|
return {
|
|
1207
1172
|
url,
|
|
1208
1173
|
finalUrl,
|
|
1209
1174
|
contentType: imageMimeType,
|
|
1210
|
-
method:
|
|
1175
|
+
method: "image-invalid",
|
|
1211
1176
|
content: output.content,
|
|
1212
1177
|
fetchedAt,
|
|
1213
1178
|
truncated: output.truncated,
|
|
@@ -1219,13 +1184,13 @@ async function renderUrl(
|
|
|
1219
1184
|
`Image exceeds inline output limit after resize (${resized.buffer.length} bytes > ${MAX_INLINE_IMAGE_OUTPUT_BYTES} bytes)`,
|
|
1220
1185
|
);
|
|
1221
1186
|
const output = finalizeOutput(
|
|
1222
|
-
|
|
1187
|
+
`Fetched image content (${imageMimeType}), but it is too large to inline render.`,
|
|
1223
1188
|
);
|
|
1224
1189
|
return {
|
|
1225
1190
|
url,
|
|
1226
1191
|
finalUrl,
|
|
1227
1192
|
contentType: imageMimeType,
|
|
1228
|
-
method:
|
|
1193
|
+
method: "image-too-large",
|
|
1229
1194
|
content: output.content,
|
|
1230
1195
|
fetchedAt,
|
|
1231
1196
|
truncated: output.truncated,
|
|
@@ -1234,7 +1199,7 @@ async function renderUrl(
|
|
|
1234
1199
|
}
|
|
1235
1200
|
|
|
1236
1201
|
const dimensionNote = formatDimensionNote(resized);
|
|
1237
|
-
let imageSummary =
|
|
1202
|
+
let imageSummary = `Fetched image content (${resized.mimeType}).`;
|
|
1238
1203
|
if (dimensionNote) {
|
|
1239
1204
|
imageSummary += `\n${dimensionNote}`;
|
|
1240
1205
|
}
|
package/src/tools/read.ts
CHANGED
|
@@ -48,8 +48,8 @@ import {
|
|
|
48
48
|
webpExclusionForModel,
|
|
49
49
|
} from "../utils/image-loading";
|
|
50
50
|
import { convertFileWithMarkit } from "../utils/markit";
|
|
51
|
+
import { type ArchiveReader, formatArchiveEntryLines, openArchive, parseArchivePathCandidates } from "../utils/zip";
|
|
51
52
|
import { buildDirectoryTree, type DirectoryTree } from "../workspace-tree";
|
|
52
|
-
import { type ArchiveReader, formatArchiveEntryLines, openArchive, parseArchivePathCandidates } from "./archive-reader";
|
|
53
53
|
import {
|
|
54
54
|
type ConflictEntry,
|
|
55
55
|
type ConflictScope,
|
package/src/tools/search.ts
CHANGED
|
@@ -28,13 +28,8 @@ import {
|
|
|
28
28
|
uriHyperlink,
|
|
29
29
|
} from "../tui";
|
|
30
30
|
import { resolveFileDisplayMode } from "../utils/file-display-mode";
|
|
31
|
+
import { type ArchiveReader, type ExtractedArchiveFile, openArchive, parseArchivePathCandidates } from "../utils/zip";
|
|
31
32
|
import type { ToolSession } from ".";
|
|
32
|
-
import {
|
|
33
|
-
type ArchiveReader,
|
|
34
|
-
type ExtractedArchiveFile,
|
|
35
|
-
openArchive,
|
|
36
|
-
parseArchivePathCandidates,
|
|
37
|
-
} from "./archive-reader";
|
|
38
33
|
import { createFileRecorder, formatResultPath } from "./file-recorder";
|
|
39
34
|
import { classifyGroupedLines, formatGroupedFiles, groupLineIndicesByBlank } from "./grouped-file-output";
|
|
40
35
|
import { formatMatchLine } from "./match-line-format";
|
package/src/tools/write.ts
CHANGED
|
@@ -20,8 +20,14 @@ import writeDescription from "../prompts/tools/write.md" with { type: "text" };
|
|
|
20
20
|
import type { ToolSession } from "../sdk";
|
|
21
21
|
import { fileHyperlink, framedBlock, renderStatusLine } from "../tui";
|
|
22
22
|
import { resolveFileDisplayMode } from "../utils/file-display-mode";
|
|
23
|
+
import {
|
|
24
|
+
type ArchiveMemberContent,
|
|
25
|
+
archiveFormatFromPath,
|
|
26
|
+
parseArchivePathCandidates,
|
|
27
|
+
readArchiveEntries,
|
|
28
|
+
writeArchive,
|
|
29
|
+
} from "../utils/zip";
|
|
23
30
|
import { truncateForPrompt } from "./approval";
|
|
24
|
-
import { parseArchivePathCandidates } from "./archive-reader";
|
|
25
31
|
import { assertEditableFile } from "./auto-generated-guard";
|
|
26
32
|
import {
|
|
27
33
|
type ConflictEntry,
|
|
@@ -65,12 +71,6 @@ import { toolResult } from "./tool-result";
|
|
|
65
71
|
const LOOSE_HASHLINE_HEADER_RE = /^\s*\[[^#\r\n]+#[^ \t\r\n]*\]\s*$/;
|
|
66
72
|
const EXECUTABLE_NOTICE = "[Notice: Made executable via chmod +x]";
|
|
67
73
|
|
|
68
|
-
let fflateModulePromise: Promise<typeof import("fflate")> | undefined;
|
|
69
|
-
async function loadFflate(): Promise<typeof import("fflate")> {
|
|
70
|
-
if (!fflateModulePromise) fflateModulePromise = import("fflate");
|
|
71
|
-
return fflateModulePromise;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
74
|
const writeSchema = type({
|
|
75
75
|
path: type("string").describe("file path"),
|
|
76
76
|
content: type("string").describe("file content"),
|
|
@@ -369,9 +369,10 @@ export class WriteTool implements AgentTool<typeof writeSchema, WriteToolDetails
|
|
|
369
369
|
const finalPath = resolvedArchivePath.exists
|
|
370
370
|
? await fs.realpath(resolvedArchivePath.absolutePath).catch(() => resolvedArchivePath.absolutePath)
|
|
371
371
|
: resolvedArchivePath.absolutePath;
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
372
|
+
// A realpath swap can land on a name without an archive extension; a
|
|
373
|
+
// whole-archive rewrite then defaults to an uncompressed tar, matching the
|
|
374
|
+
// previous `isZip`/`isGzip`/else fallthrough.
|
|
375
|
+
const format = archiveFormatFromPath(finalPath) ?? "tar";
|
|
375
376
|
// Rewrites are whole-archive: write to a temp file and rename so a
|
|
376
377
|
// crash/disk-full mid-write can't destroy the original archive.
|
|
377
378
|
const tmpPath = `${finalPath}.tmp-${process.pid}`;
|
|
@@ -381,67 +382,26 @@ export class WriteTool implements AgentTool<typeof writeSchema, WriteToolDetails
|
|
|
381
382
|
await fs.mkdir(parentDir, { recursive: true });
|
|
382
383
|
}
|
|
383
384
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
if (resolvedArchivePath.exists) {
|
|
388
|
-
try {
|
|
389
|
-
const bytes = await Bun.file(resolvedArchivePath.absolutePath).bytes();
|
|
390
|
-
const { unzipSync } = await loadFflate();
|
|
391
|
-
const existing = unzipSync(new Uint8Array(bytes));
|
|
392
|
-
for (const [entryPath, data] of Object.entries(existing)) {
|
|
393
|
-
zipEntries[entryPath.replace(/\\/g, "/")] = data;
|
|
394
|
-
}
|
|
395
|
-
} catch (error) {
|
|
396
|
-
throw new ToolError(error instanceof Error ? error.message : String(error));
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
zipEntries[resolvedArchivePath.archiveSubPath] = new TextEncoder().encode(content);
|
|
401
|
-
|
|
385
|
+
const entries = new Map<string, ArchiveMemberContent>();
|
|
386
|
+
if (resolvedArchivePath.exists) {
|
|
402
387
|
try {
|
|
403
|
-
const
|
|
404
|
-
const
|
|
405
|
-
|
|
406
|
-
await fs.rename(tmpPath, finalPath);
|
|
407
|
-
} catch (error) {
|
|
408
|
-
await fs.rm(tmpPath, { force: true }).catch(() => {});
|
|
409
|
-
throw new ToolError(error instanceof Error ? error.message : String(error));
|
|
410
|
-
}
|
|
411
|
-
} else {
|
|
412
|
-
const archiveEntries: Record<string, string | File> = {};
|
|
413
|
-
if (resolvedArchivePath.exists) {
|
|
414
|
-
let archive: Bun.Archive;
|
|
415
|
-
try {
|
|
416
|
-
archive = new Bun.Archive(await Bun.file(resolvedArchivePath.absolutePath).bytes());
|
|
417
|
-
} catch (error) {
|
|
418
|
-
throw new ToolError(error instanceof Error ? error.message : String(error));
|
|
388
|
+
const existing = await readArchiveEntries({ bytes: await Bun.file(finalPath).bytes(), format });
|
|
389
|
+
for (const [entryPath, data] of existing) {
|
|
390
|
+
entries.set(entryPath, data);
|
|
419
391
|
}
|
|
420
|
-
|
|
421
|
-
let files: Map<string, File>;
|
|
422
|
-
try {
|
|
423
|
-
files = await archive.files();
|
|
424
|
-
} catch (error) {
|
|
425
|
-
throw new ToolError(error instanceof Error ? error.message : String(error));
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
for (const [entryPath, file] of files) {
|
|
429
|
-
archiveEntries[entryPath.replace(/\\/g, "/")] = file;
|
|
430
|
-
}
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
archiveEntries[resolvedArchivePath.archiveSubPath] = content;
|
|
434
|
-
|
|
435
|
-
try {
|
|
436
|
-
// `Bun.Archive.write` never infers compression from the extension;
|
|
437
|
-
// request gzip explicitly so `.tar.gz`/`.tgz` stay compressed.
|
|
438
|
-
await Bun.Archive.write(tmpPath, archiveEntries, isGzip ? { compress: "gzip" } : undefined);
|
|
439
|
-
await fs.rename(tmpPath, finalPath);
|
|
440
392
|
} catch (error) {
|
|
441
|
-
await fs.rm(tmpPath, { force: true }).catch(() => {});
|
|
442
393
|
throw new ToolError(error instanceof Error ? error.message : String(error));
|
|
443
394
|
}
|
|
444
395
|
}
|
|
396
|
+
entries.set(resolvedArchivePath.archiveSubPath, content);
|
|
397
|
+
|
|
398
|
+
try {
|
|
399
|
+
await writeArchive(tmpPath, format, entries);
|
|
400
|
+
await fs.rename(tmpPath, finalPath);
|
|
401
|
+
} catch (error) {
|
|
402
|
+
await fs.rm(tmpPath, { force: true }).catch(() => {});
|
|
403
|
+
throw new ToolError(error instanceof Error ? error.message : String(error));
|
|
404
|
+
}
|
|
445
405
|
|
|
446
406
|
invalidateFsScanAfterWrite(resolvedArchivePath.absolutePath);
|
|
447
407
|
const outputPath = `${formatPathRelativeToCwd(resolvedArchivePath.absolutePath, this.session.cwd)}:${
|
package/src/utils/markit.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { logger, untilAborted } from "@oh-my-pi/pi-utils";
|
|
2
|
-
import type { Markit, StreamInfo } from "markit
|
|
2
|
+
import type { Markit, StreamInfo } from "../markit";
|
|
3
3
|
import { ToolAbortError } from "../tools/tool-errors";
|
|
4
|
+
import { loadEmbeddedMupdfWasm } from "./mupdf-wasm-embed";
|
|
4
5
|
|
|
5
6
|
export interface MarkitConversionResult {
|
|
6
7
|
content: string;
|
|
@@ -21,10 +22,7 @@ export interface MarkitFileConversionOptions {
|
|
|
21
22
|
interface MuPdfWasmModuleConfig {
|
|
22
23
|
print?: (...values: unknown[]) => void;
|
|
23
24
|
printErr?: (...values: unknown[]) => void;
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
declare global {
|
|
27
|
-
var $libmupdf_wasm_Module: MuPdfWasmModuleConfig | undefined;
|
|
25
|
+
wasmBinary?: Uint8Array;
|
|
28
26
|
}
|
|
29
27
|
|
|
30
28
|
function logMuPdfWasmOutput(stream: "stdout" | "stderr", values: unknown[]): void {
|
|
@@ -32,17 +30,35 @@ function logMuPdfWasmOutput(stream: "stdout" | "stderr", values: unknown[]): voi
|
|
|
32
30
|
logger.debug("mupdf wasm output", { stream, message });
|
|
33
31
|
}
|
|
34
32
|
|
|
33
|
+
// `$libmupdf_wasm_Module` is declared globally (as `any`) by the mupdf package.
|
|
34
|
+
// Install print hooks before the WASM module initializes so its stdout/stderr
|
|
35
|
+
// route to the file logger instead of corrupting the TUI.
|
|
35
36
|
function installMuPdfWasmLogger(): void {
|
|
36
|
-
const moduleConfig = globalThis.$libmupdf_wasm_Module ?? {};
|
|
37
|
-
moduleConfig.print = (...values) => logMuPdfWasmOutput("stdout", values);
|
|
38
|
-
moduleConfig.printErr = (...values) => logMuPdfWasmOutput("stderr", values);
|
|
37
|
+
const moduleConfig: MuPdfWasmModuleConfig = globalThis.$libmupdf_wasm_Module ?? {};
|
|
38
|
+
moduleConfig.print = (...values: unknown[]) => logMuPdfWasmOutput("stdout", values);
|
|
39
|
+
moduleConfig.printErr = (...values: unknown[]) => logMuPdfWasmOutput("stderr", values);
|
|
40
|
+
globalThis.$libmupdf_wasm_Module = moduleConfig;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Hand the WASM module its bytes directly when the compiled binary embedded them
|
|
44
|
+
// (scripts/embed-mupdf-wasm.ts); a single-file binary has no node_modules for
|
|
45
|
+
// mupdf to read `mupdf-wasm.wasm` from. Source/npm builds get undefined here and
|
|
46
|
+
// mupdf loads its own wasm. Must run before the mupdf module evaluates.
|
|
47
|
+
function installEmbeddedMupdfWasm(): void {
|
|
48
|
+
const wasmBinary = loadEmbeddedMupdfWasm();
|
|
49
|
+
if (!wasmBinary) return;
|
|
50
|
+
const moduleConfig: MuPdfWasmModuleConfig = globalThis.$libmupdf_wasm_Module ?? {};
|
|
51
|
+
moduleConfig.wasmBinary = wasmBinary;
|
|
39
52
|
globalThis.$libmupdf_wasm_Module = moduleConfig;
|
|
40
53
|
}
|
|
41
54
|
|
|
42
55
|
installMuPdfWasmLogger();
|
|
43
56
|
|
|
44
57
|
let markit: () => Markit | Promise<Markit> = async () => {
|
|
45
|
-
|
|
58
|
+
// Lazy: keep the document engine (mammoth/mupdf) off the startup
|
|
59
|
+
// import graph — it loads only when a document is first converted.
|
|
60
|
+
installEmbeddedMupdfWasm();
|
|
61
|
+
const promise = import("../markit").then(({ Markit }) => {
|
|
46
62
|
const instance = new Markit();
|
|
47
63
|
markit = () => instance;
|
|
48
64
|
return instance;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
// AUTOGENERATED -- managed by scripts/embed-mupdf-wasm.ts. Do not edit by hand.
|
|
2
|
+
//
|
|
3
|
+
// Compiled single-file binaries cannot let mupdf resolve its `mupdf-wasm.wasm`
|
|
4
|
+
// sibling from the read-only bunfs, so the binary build (scripts/build-binary.ts
|
|
5
|
+
// and scripts/ci-release-build-binaries.ts) regenerates this module to embed the
|
|
6
|
+
// wasm bytes via `with { type: "file" }` and copies the wasm next to it. Source
|
|
7
|
+
// checkouts, `bun test`, and the npm `dist/cli.js` bundle keep mupdf external and
|
|
8
|
+
// load the wasm from node_modules, so this placeholder returns undefined and the
|
|
9
|
+
// build resets back to it afterward.
|
|
10
|
+
export function loadEmbeddedMupdfWasm(): Uint8Array | undefined {
|
|
11
|
+
return undefined;
|
|
12
|
+
}
|
|
@@ -2,6 +2,7 @@ import * as fs from "node:fs";
|
|
|
2
2
|
import * as os from "node:os";
|
|
3
3
|
import * as path from "node:path";
|
|
4
4
|
import { $which, APP_NAME, getToolsDir, logger, ptree, TempDir } from "@oh-my-pi/pi-utils";
|
|
5
|
+
import { extractArchive } from "./zip";
|
|
5
6
|
|
|
6
7
|
const TOOLS_DIR = getToolsDir();
|
|
7
8
|
const TOOL_DOWNLOAD_TIMEOUT_MS = 120_000;
|
|
@@ -220,17 +221,7 @@ async function downloadTool(tool: ToolName, signal?: AbortSignal): Promise<strin
|
|
|
220
221
|
}
|
|
221
222
|
|
|
222
223
|
try {
|
|
223
|
-
|
|
224
|
-
const files = await archive.files();
|
|
225
|
-
const extractRoot = path.resolve(tmp.path());
|
|
226
|
-
|
|
227
|
-
for (const [filePath, file] of files) {
|
|
228
|
-
const outputPath = path.resolve(extractRoot, filePath);
|
|
229
|
-
if (!outputPath.startsWith(extractRoot + path.sep)) {
|
|
230
|
-
throw new Error(`Archive entry escapes extraction dir: ${filePath}`);
|
|
231
|
-
}
|
|
232
|
-
await Bun.write(outputPath, file);
|
|
233
|
-
}
|
|
224
|
+
await extractArchive(archivePath, tmp.path());
|
|
234
225
|
} catch (err) {
|
|
235
226
|
throw new Error(`Failed to extract ${assetName}: ${err instanceof Error ? err.message : String(err)}`);
|
|
236
227
|
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import TurndownService from "turndown";
|
|
2
|
+
import { gfm } from "turndown-plugin-gfm";
|
|
3
|
+
|
|
4
|
+
type TurndownListParent = {
|
|
5
|
+
nodeName: string;
|
|
6
|
+
getAttribute(name: string): string | null;
|
|
7
|
+
children: ArrayLike<unknown>;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Build a Turndown instance configured for GFM with the fixes omp relies on:
|
|
12
|
+
* `~~strikethrough~~`, unescaped heading periods, and single-space list markers.
|
|
13
|
+
*
|
|
14
|
+
* Shared by the web scrapers (HTML → markdown) and the markit document engine
|
|
15
|
+
* (`src/markit`). The rule set must stay identical across both call sites.
|
|
16
|
+
*/
|
|
17
|
+
export function createTurndown(): TurndownService {
|
|
18
|
+
const turndown = new TurndownService({
|
|
19
|
+
headingStyle: "atx",
|
|
20
|
+
codeBlockStyle: "fenced",
|
|
21
|
+
bulletListMarker: "-",
|
|
22
|
+
});
|
|
23
|
+
turndown.use(gfm);
|
|
24
|
+
// GFM spec uses ~~ (double tilde), not ~ (single)
|
|
25
|
+
turndown.addRule("strikethrough", {
|
|
26
|
+
filter: ["del", "s", "strike"],
|
|
27
|
+
replacement(content) {
|
|
28
|
+
return `~~${content}~~`;
|
|
29
|
+
},
|
|
30
|
+
});
|
|
31
|
+
// Unescape the backslash turndown inserts before periods in headings ("1." -> "1\.")
|
|
32
|
+
turndown.addRule("heading", {
|
|
33
|
+
filter: ["h1", "h2", "h3", "h4", "h5", "h6"],
|
|
34
|
+
replacement(content, node) {
|
|
35
|
+
const level = Number(node.nodeName.charAt(1));
|
|
36
|
+
const prefix = "#".repeat(level);
|
|
37
|
+
const cleaned = content.replace(/\\([.])/g, "$1").trim();
|
|
38
|
+
return `\n\n${prefix} ${cleaned}\n\n`;
|
|
39
|
+
},
|
|
40
|
+
});
|
|
41
|
+
// Single space after the marker (turndown hardcodes three)
|
|
42
|
+
turndown.addRule("listItem", {
|
|
43
|
+
filter: "li",
|
|
44
|
+
replacement(content, node, options) {
|
|
45
|
+
const body = content.replace(/^\n+/, "").replace(/\n+$/, "\n").replace(/\n/gm, "\n ");
|
|
46
|
+
const parent = node.parentNode as unknown as TurndownListParent | null;
|
|
47
|
+
let prefix = `${options.bulletListMarker} `;
|
|
48
|
+
if (parent?.nodeName === "OL") {
|
|
49
|
+
const start = parent.getAttribute("start");
|
|
50
|
+
const index = Array.prototype.indexOf.call(parent.children, node);
|
|
51
|
+
prefix = `${(start ? Number(start) : 1) + index}. `;
|
|
52
|
+
}
|
|
53
|
+
return prefix + body + (node.nextSibling ? "\n" : "");
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
return turndown;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Normalize HTML tables so turndown-plugin-gfm can render them:
|
|
61
|
+
* - strip `<p>` tags inside `<td>`/`<th>` cells (joining paragraphs with a space)
|
|
62
|
+
* - wrap the first row in `<thead>` when missing
|
|
63
|
+
*/
|
|
64
|
+
export function normalizeTablesHtml(html: string): string {
|
|
65
|
+
let result = html.replace(
|
|
66
|
+
/<(td|th)([^>]*)>([\s\S]*?)<\/(td|th)>/gi,
|
|
67
|
+
(_match, tag: string, attrs: string, inner: string, closeTag: string) => {
|
|
68
|
+
const stripped = inner
|
|
69
|
+
.replace(/^\s*<p>/i, "")
|
|
70
|
+
.replace(/<\/p>\s*$/i, "")
|
|
71
|
+
.replace(/<\/p>\s*<p>/gi, " ");
|
|
72
|
+
return `<${tag}${attrs}>${stripped}</${closeTag}>`;
|
|
73
|
+
},
|
|
74
|
+
);
|
|
75
|
+
result = result.replace(
|
|
76
|
+
/<table([^>]*)>\s*(?:<tbody>\s*)?(<tr[\s\S]*?<\/tr>)([\s\S]*?)<\/(?:tbody>\s*<\/)?table>/gi,
|
|
77
|
+
(_match, attrs: string, firstRow: string, rest: string) => {
|
|
78
|
+
const theadRow = firstRow.replace(/<td/gi, "<th").replace(/<\/td>/gi, "</th>");
|
|
79
|
+
return `<table${attrs}><thead>${theadRow}</thead><tbody>${rest}</tbody></table>`;
|
|
80
|
+
},
|
|
81
|
+
);
|
|
82
|
+
return result;
|
|
83
|
+
}
|