@oh-my-pi/pi-coding-agent 16.0.7 → 16.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/dist/cli.js +4752 -12462
- package/dist/types/cli/update-cli.d.ts +11 -0
- package/dist/types/debug/remote-debugger.d.ts +45 -0
- package/dist/types/internal-urls/docs-index.d.ts +19 -0
- package/dist/types/markit/converters/docx.d.ts +6 -0
- package/dist/types/markit/converters/epub.d.ts +15 -0
- package/dist/types/markit/converters/pdf/columns.d.ts +35 -0
- package/dist/types/markit/converters/pdf/extract.d.ts +10 -0
- package/dist/types/markit/converters/pdf/grid.d.ts +25 -0
- package/dist/types/markit/converters/pdf/headers.d.ts +24 -0
- package/dist/types/markit/converters/pdf/index.d.ts +6 -0
- package/dist/types/markit/converters/pdf/render.d.ts +24 -0
- package/dist/types/markit/converters/pdf/types.d.ts +75 -0
- package/dist/types/markit/converters/pptx.d.ts +57 -0
- package/dist/types/markit/converters/xlsx.d.ts +25 -0
- package/dist/types/markit/index.d.ts +2 -0
- package/dist/types/markit/registry.d.ts +16 -0
- package/dist/types/markit/types.d.ts +30 -0
- package/dist/types/session/agent-session.d.ts +7 -8
- package/dist/types/session/auth-storage.d.ts +3 -2
- package/dist/types/session/yield-queue.d.ts +3 -1
- package/dist/types/tools/browser/attach.d.ts +1 -1
- package/dist/types/utils/markit.d.ts +0 -8
- package/dist/types/utils/mupdf-wasm-embed.d.ts +1 -0
- package/dist/types/utils/turndown.d.ts +15 -0
- package/dist/types/utils/zip.d.ts +119 -0
- package/package.json +20 -18
- package/scripts/build-binary.ts +7 -3
- package/scripts/bundle-dist.ts +28 -12
- package/scripts/embed-mupdf-wasm.ts +67 -0
- package/scripts/generate-docs-index.ts +48 -32
- package/scripts/omp +1 -1
- package/src/advisor/__tests__/advisor.test.ts +83 -0
- package/src/advisor/runtime.ts +16 -1
- package/src/cli/auth-broker-cli.ts +1 -3
- package/src/cli/auth-gateway-cli.ts +2 -5
- package/src/cli/update-cli.ts +63 -3
- package/src/config/model-discovery.ts +20 -8
- package/src/config/models-config-schema.ts +8 -1
- package/src/debug/index.ts +44 -0
- package/src/debug/remote-debugger.ts +151 -0
- package/src/debug/report-bundle.ts +2 -1
- package/src/internal-urls/docs-index.generated.txt +2 -0
- package/src/internal-urls/docs-index.ts +102 -0
- package/src/internal-urls/omp-protocol.ts +10 -9
- package/src/markit/NOTICE +32 -0
- package/src/markit/converters/docx.ts +56 -0
- package/src/markit/converters/epub.ts +136 -0
- package/src/markit/converters/mammoth.d.ts +24 -0
- package/src/markit/converters/pdf/columns.ts +103 -0
- package/src/markit/converters/pdf/extract.ts +574 -0
- package/src/markit/converters/pdf/grid.ts +780 -0
- package/src/markit/converters/pdf/headers.ts +106 -0
- package/src/markit/converters/pdf/index.ts +146 -0
- package/src/markit/converters/pdf/render.ts +501 -0
- package/src/markit/converters/pdf/types.ts +84 -0
- package/src/markit/converters/pptx.ts +325 -0
- package/src/markit/converters/xlsx.ts +173 -0
- package/src/markit/index.ts +2 -0
- package/src/markit/registry.ts +59 -0
- package/src/markit/types.ts +35 -0
- package/src/modes/components/snapcompact-shape-preview-doc.md +14 -7
- package/src/modes/components/snapcompact-shape-preview.ts +2 -2
- package/src/modes/controllers/input-controller.ts +29 -8
- package/src/modes/interactive-mode.ts +26 -9
- package/src/prompts/advisor/system.md +1 -0
- package/src/sdk.ts +5 -9
- package/src/session/agent-session.ts +62 -40
- package/src/session/auth-storage.ts +2 -11
- package/src/session/yield-queue.ts +7 -1
- package/src/tools/browser/attach.ts +2 -2
- package/src/tools/fetch.ts +25 -60
- package/src/tools/read.ts +1 -1
- package/src/tools/search.ts +1 -6
- package/src/tools/write.ts +25 -65
- package/src/utils/markit.ts +25 -9
- package/src/utils/mupdf-wasm-embed.ts +12 -0
- package/src/utils/tools-manager.ts +2 -11
- package/src/utils/turndown.ts +83 -0
- package/src/{tools/archive-reader.ts → utils/zip.ts} +453 -83
- package/src/web/scrapers/types.ts +3 -46
- package/dist/types/internal-urls/docs-index.generated.d.ts +0 -2
- package/dist/types/tools/archive-reader.d.ts +0 -49
- package/src/internal-urls/docs-index.generated.ts +0 -120
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
// Adapted from markit-ai (MIT). See ../NOTICE.
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import { XMLParser } from "fast-xml-parser";
|
|
4
|
+
import { unzip, unzipText } from "../../utils/zip";
|
|
5
|
+
import type { ConversionResult, Converter, StreamInfo } from "../types";
|
|
6
|
+
|
|
7
|
+
const EXTENSIONS = [".pptx"];
|
|
8
|
+
const MIMETYPES = ["application/vnd.openxmlformats-officedocument.presentationml.presentation"];
|
|
9
|
+
|
|
10
|
+
/** A text value: bare string/number, or a `{ "#text" }` node when the element carries attributes. */
|
|
11
|
+
type XmlText = string | number | { "#text"?: string };
|
|
12
|
+
|
|
13
|
+
interface TextRun {
|
|
14
|
+
"a:t"?: XmlText;
|
|
15
|
+
}
|
|
16
|
+
interface Paragraph {
|
|
17
|
+
"a:r"?: TextRun | TextRun[];
|
|
18
|
+
}
|
|
19
|
+
interface TextBody {
|
|
20
|
+
"a:p"?: Paragraph | Paragraph[];
|
|
21
|
+
}
|
|
22
|
+
interface CNvPr {
|
|
23
|
+
"@_name": string;
|
|
24
|
+
}
|
|
25
|
+
interface Placeholder {
|
|
26
|
+
"@_type": string;
|
|
27
|
+
}
|
|
28
|
+
interface NvPr {
|
|
29
|
+
"p:ph"?: Placeholder;
|
|
30
|
+
}
|
|
31
|
+
interface NvSpPr {
|
|
32
|
+
"p:cNvPr"?: CNvPr;
|
|
33
|
+
"p:nvPr"?: NvPr;
|
|
34
|
+
}
|
|
35
|
+
interface NvPicPr {
|
|
36
|
+
"p:cNvPr"?: CNvPr;
|
|
37
|
+
}
|
|
38
|
+
interface Shape {
|
|
39
|
+
"p:txBody"?: TextBody;
|
|
40
|
+
"p:nvSpPr"?: NvSpPr;
|
|
41
|
+
}
|
|
42
|
+
interface Blip {
|
|
43
|
+
"@_r:embed": string;
|
|
44
|
+
}
|
|
45
|
+
interface BlipFill {
|
|
46
|
+
"a:blip"?: Blip;
|
|
47
|
+
}
|
|
48
|
+
interface Picture {
|
|
49
|
+
"p:blipFill"?: BlipFill;
|
|
50
|
+
"p:nvSpPr"?: NvSpPr;
|
|
51
|
+
"p:nvPicPr"?: NvPicPr;
|
|
52
|
+
}
|
|
53
|
+
interface TableCell {
|
|
54
|
+
"a:txBody"?: TextBody;
|
|
55
|
+
}
|
|
56
|
+
interface TableRow {
|
|
57
|
+
"a:tc"?: TableCell | TableCell[];
|
|
58
|
+
}
|
|
59
|
+
interface Table {
|
|
60
|
+
"a:tr"?: TableRow | TableRow[];
|
|
61
|
+
}
|
|
62
|
+
interface GraphicData {
|
|
63
|
+
"a:tbl"?: Table;
|
|
64
|
+
}
|
|
65
|
+
interface Graphic {
|
|
66
|
+
"a:graphicData"?: GraphicData;
|
|
67
|
+
}
|
|
68
|
+
interface GraphicFrame {
|
|
69
|
+
"a:graphic"?: Graphic;
|
|
70
|
+
}
|
|
71
|
+
interface SpTree {
|
|
72
|
+
"p:sp"?: Shape | Shape[];
|
|
73
|
+
"p:pic"?: Picture | Picture[];
|
|
74
|
+
"p:graphicFrame"?: GraphicFrame | GraphicFrame[];
|
|
75
|
+
}
|
|
76
|
+
interface CSld {
|
|
77
|
+
"p:spTree"?: SpTree;
|
|
78
|
+
}
|
|
79
|
+
interface SlideDoc {
|
|
80
|
+
"p:sld"?: { "p:cSld"?: CSld };
|
|
81
|
+
}
|
|
82
|
+
interface NotesDoc {
|
|
83
|
+
"p:notes"?: { "p:cSld"?: CSld };
|
|
84
|
+
}
|
|
85
|
+
interface SldId {
|
|
86
|
+
"@_r:id": string;
|
|
87
|
+
}
|
|
88
|
+
interface PresentationDoc {
|
|
89
|
+
"p:presentation"?: { "p:sldIdLst"?: { "p:sldId"?: SldId | SldId[] } };
|
|
90
|
+
}
|
|
91
|
+
interface Relationship {
|
|
92
|
+
"@_Id": string;
|
|
93
|
+
"@_Target": string;
|
|
94
|
+
}
|
|
95
|
+
interface RelationshipsDoc {
|
|
96
|
+
Relationships?: { Relationship?: Relationship | Relationship[] };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export class PptxConverter implements Converter {
|
|
100
|
+
name = "pptx";
|
|
101
|
+
|
|
102
|
+
accepts(streamInfo: StreamInfo): boolean {
|
|
103
|
+
if (streamInfo.extension && EXTENSIONS.includes(streamInfo.extension)) return true;
|
|
104
|
+
if (streamInfo.mimetype && MIMETYPES.some(m => streamInfo.mimetype?.startsWith(m))) return true;
|
|
105
|
+
return false;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async convert(input: Buffer, streamInfo: StreamInfo): Promise<ConversionResult> {
|
|
109
|
+
const entries = unzip(input);
|
|
110
|
+
const parser = new XMLParser({
|
|
111
|
+
ignoreAttributes: false,
|
|
112
|
+
attributeNamePrefix: "@_",
|
|
113
|
+
textNodeName: "#text",
|
|
114
|
+
processEntities: { maxTotalExpansions: 1_000_000 },
|
|
115
|
+
});
|
|
116
|
+
// Get slide order from presentation.xml
|
|
117
|
+
const presXml = unzipText(entries, "ppt/presentation.xml");
|
|
118
|
+
if (!presXml) throw new Error("Invalid PPTX: missing presentation.xml");
|
|
119
|
+
const pres = parser.parse(presXml) as PresentationDoc;
|
|
120
|
+
const sldIdList = pres["p:presentation"]?.["p:sldIdLst"]?.["p:sldId"];
|
|
121
|
+
const sldIds = Array.isArray(sldIdList) ? sldIdList : sldIdList ? [sldIdList] : [];
|
|
122
|
+
// Get relationship mappings
|
|
123
|
+
const relsXml = unzipText(entries, "ppt/_rels/presentation.xml.rels");
|
|
124
|
+
const rels = relsXml ? (parser.parse(relsXml) as RelationshipsDoc) : null;
|
|
125
|
+
const relList = rels?.Relationships?.Relationship;
|
|
126
|
+
const relArray = Array.isArray(relList) ? relList : relList ? [relList] : [];
|
|
127
|
+
const relMap = new Map<string, string>();
|
|
128
|
+
for (const r of relArray) {
|
|
129
|
+
relMap.set(r["@_Id"], r["@_Target"]);
|
|
130
|
+
}
|
|
131
|
+
// Map slide IDs to file paths in order
|
|
132
|
+
const slidePaths: string[] = [];
|
|
133
|
+
for (const sld of sldIds) {
|
|
134
|
+
const rId = sld["@_r:id"];
|
|
135
|
+
const target = relMap.get(rId);
|
|
136
|
+
if (target) slidePaths.push(`ppt/${target}`);
|
|
137
|
+
}
|
|
138
|
+
// If we couldn't resolve from rels, fall back to finding slide files
|
|
139
|
+
if (slidePaths.length === 0) {
|
|
140
|
+
const slideFiles = Object.keys(entries)
|
|
141
|
+
.filter(f => /^ppt\/slides\/slide\d+\.xml$/.test(f))
|
|
142
|
+
.sort((a, b) => {
|
|
143
|
+
const na = parseInt(a.match(/slide(\d+)/)?.[1] || "0", 10);
|
|
144
|
+
const nb = parseInt(b.match(/slide(\d+)/)?.[1] || "0", 10);
|
|
145
|
+
return na - nb;
|
|
146
|
+
});
|
|
147
|
+
slidePaths.push(...slideFiles);
|
|
148
|
+
}
|
|
149
|
+
const imageDir = streamInfo.imageDir;
|
|
150
|
+
const sections: string[] = [];
|
|
151
|
+
let imageCount = 0;
|
|
152
|
+
for (let i = 0; i < slidePaths.length; i++) {
|
|
153
|
+
const slideXml = unzipText(entries, slidePaths[i]);
|
|
154
|
+
if (!slideXml) continue;
|
|
155
|
+
const slide = parser.parse(slideXml) as SlideDoc;
|
|
156
|
+
const spTree = slide["p:sld"]?.["p:cSld"]?.["p:spTree"];
|
|
157
|
+
if (!spTree) continue;
|
|
158
|
+
// Parse slide-level rels for image references
|
|
159
|
+
const slideRelsPath = `${slidePaths[i].replace("slides/slide", "slides/_rels/slide")}.rels`;
|
|
160
|
+
const slideRelsXml = unzipText(entries, slideRelsPath);
|
|
161
|
+
const slideRelMap = new Map<string, string>();
|
|
162
|
+
if (slideRelsXml) {
|
|
163
|
+
const slideRels = parser.parse(slideRelsXml) as RelationshipsDoc;
|
|
164
|
+
const relItems = toList(slideRels?.Relationships?.Relationship);
|
|
165
|
+
for (const r of relItems) {
|
|
166
|
+
slideRelMap.set(r["@_Id"], r["@_Target"]);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
const slideLines = [`<!-- Slide ${i + 1} -->`];
|
|
170
|
+
const shapes = spTree["p:sp"];
|
|
171
|
+
const shapeList = Array.isArray(shapes) ? shapes : shapes ? [shapes] : [];
|
|
172
|
+
let isTitle = true;
|
|
173
|
+
for (const shape of shapeList) {
|
|
174
|
+
const text = this.extractText(shape);
|
|
175
|
+
if (!text) continue;
|
|
176
|
+
if (isTitle) {
|
|
177
|
+
slideLines.push(`# ${text}`);
|
|
178
|
+
isTitle = false;
|
|
179
|
+
} else {
|
|
180
|
+
slideLines.push(text);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
// Extract embedded images
|
|
184
|
+
const pics = toList(spTree["p:pic"]);
|
|
185
|
+
for (const pic of pics) {
|
|
186
|
+
const blipFill = pic["p:blipFill"];
|
|
187
|
+
const rEmbed = blipFill?.["a:blip"]?.["@_r:embed"];
|
|
188
|
+
if (!rEmbed) continue;
|
|
189
|
+
const target = slideRelMap.get(rEmbed);
|
|
190
|
+
if (!target) continue;
|
|
191
|
+
// Resolve relative target against slide directory
|
|
192
|
+
const imagePath = target.startsWith("/") ? target.slice(1) : `ppt/slides/${target}`;
|
|
193
|
+
// Normalize path (e.g. ppt/slides/../media/image1.png → ppt/media/image1.png)
|
|
194
|
+
const normalizedPath = imagePath
|
|
195
|
+
.split("/")
|
|
196
|
+
.reduce<string[]>((parts, seg) => {
|
|
197
|
+
if (seg === "..") parts.pop();
|
|
198
|
+
else parts.push(seg);
|
|
199
|
+
return parts;
|
|
200
|
+
}, [])
|
|
201
|
+
.join("/");
|
|
202
|
+
const buf = entries[normalizedPath];
|
|
203
|
+
if (!buf) continue;
|
|
204
|
+
imageCount++;
|
|
205
|
+
const name =
|
|
206
|
+
pic["p:nvSpPr"]?.["p:cNvPr"]?.["@_name"] ||
|
|
207
|
+
pic["p:nvPicPr"]?.["p:cNvPr"]?.["@_name"] ||
|
|
208
|
+
`image_${imageCount}`;
|
|
209
|
+
if (imageDir) {
|
|
210
|
+
try {
|
|
211
|
+
const ext = normalizedPath.split(".").pop() || "png";
|
|
212
|
+
const filename = `slide${i + 1}_${imageCount}.${ext}`;
|
|
213
|
+
const filepath = path.join(imageDir, filename);
|
|
214
|
+
await Bun.write(filepath, buf);
|
|
215
|
+
slideLines.push(``);
|
|
216
|
+
} catch {
|
|
217
|
+
slideLines.push(`<!-- image: ${name} (slide ${i + 1}) -->`);
|
|
218
|
+
}
|
|
219
|
+
} else {
|
|
220
|
+
slideLines.push(`<!-- image: ${name} (slide ${i + 1}) -->`);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
// Tables
|
|
224
|
+
const graphicFrames = spTree["p:graphicFrame"];
|
|
225
|
+
const gfList = Array.isArray(graphicFrames) ? graphicFrames : graphicFrames ? [graphicFrames] : [];
|
|
226
|
+
for (const gf of gfList) {
|
|
227
|
+
const table = this.extractTable(gf);
|
|
228
|
+
if (table) slideLines.push(table);
|
|
229
|
+
}
|
|
230
|
+
// Slide notes
|
|
231
|
+
const noteFile = slidePaths[i].replace("slides/slide", "notesSlides/notesSlide");
|
|
232
|
+
const noteXml = unzipText(entries, noteFile);
|
|
233
|
+
if (noteXml) {
|
|
234
|
+
const note = parser.parse(noteXml) as NotesDoc;
|
|
235
|
+
const noteSpTree = note["p:notes"]?.["p:cSld"]?.["p:spTree"];
|
|
236
|
+
if (noteSpTree) {
|
|
237
|
+
const noteShapes = noteSpTree["p:sp"];
|
|
238
|
+
const noteList = Array.isArray(noteShapes) ? noteShapes : noteShapes ? [noteShapes] : [];
|
|
239
|
+
const noteTexts: string[] = [];
|
|
240
|
+
for (const ns of noteList) {
|
|
241
|
+
// Skip slide image placeholder
|
|
242
|
+
const phType = ns["p:nvSpPr"]?.["p:nvPr"]?.["p:ph"]?.["@_type"];
|
|
243
|
+
if (phType === "sldImg") continue;
|
|
244
|
+
const t = this.extractText(ns);
|
|
245
|
+
if (t) noteTexts.push(t);
|
|
246
|
+
}
|
|
247
|
+
if (noteTexts.length > 0) {
|
|
248
|
+
slideLines.push("\n### Notes:");
|
|
249
|
+
slideLines.push(noteTexts.join("\n"));
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
sections.push(slideLines.join("\n"));
|
|
254
|
+
}
|
|
255
|
+
return { markdown: sections.join("\n\n").trim() };
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
extractText(shape: Shape): string {
|
|
259
|
+
const txBody = shape["p:txBody"];
|
|
260
|
+
if (!txBody) return "";
|
|
261
|
+
const paragraphs = txBody["a:p"];
|
|
262
|
+
const pList = Array.isArray(paragraphs) ? paragraphs : paragraphs ? [paragraphs] : [];
|
|
263
|
+
const lines: string[] = [];
|
|
264
|
+
for (const p of pList) {
|
|
265
|
+
const runs = p["a:r"];
|
|
266
|
+
const rList = Array.isArray(runs) ? runs : runs ? [runs] : [];
|
|
267
|
+
const parts: string[] = [];
|
|
268
|
+
for (const r of rList) {
|
|
269
|
+
const t = r["a:t"];
|
|
270
|
+
if (t != null) parts.push(typeof t === "object" ? t["#text"] || "" : String(t));
|
|
271
|
+
}
|
|
272
|
+
if (parts.length > 0) lines.push(parts.join(""));
|
|
273
|
+
}
|
|
274
|
+
return lines.join("\n").trim();
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
extractTable(gf: GraphicFrame): string | null {
|
|
278
|
+
const tbl = gf?.["a:graphic"]?.["a:graphicData"]?.["a:tbl"];
|
|
279
|
+
if (!tbl) return null;
|
|
280
|
+
const rows = tbl["a:tr"];
|
|
281
|
+
const rowList = Array.isArray(rows) ? rows : rows ? [rows] : [];
|
|
282
|
+
if (rowList.length === 0) return null;
|
|
283
|
+
const mdRows: string[][] = [];
|
|
284
|
+
for (const row of rowList) {
|
|
285
|
+
const cells = row["a:tc"];
|
|
286
|
+
const cellList = Array.isArray(cells) ? cells : cells ? [cells] : [];
|
|
287
|
+
const cellTexts: string[] = [];
|
|
288
|
+
for (const cell of cellList) {
|
|
289
|
+
const txBody = cell["a:txBody"];
|
|
290
|
+
if (!txBody) {
|
|
291
|
+
cellTexts.push("");
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
const paragraphs = txBody["a:p"];
|
|
295
|
+
const pList = Array.isArray(paragraphs) ? paragraphs : paragraphs ? [paragraphs] : [];
|
|
296
|
+
const parts: string[] = [];
|
|
297
|
+
for (const p of pList) {
|
|
298
|
+
const runs = p["a:r"];
|
|
299
|
+
const rList = Array.isArray(runs) ? runs : runs ? [runs] : [];
|
|
300
|
+
for (const r of rList) {
|
|
301
|
+
const t = r["a:t"];
|
|
302
|
+
if (t != null) parts.push(typeof t === "object" ? t["#text"] || "" : String(t));
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
cellTexts.push(parts.join(" "));
|
|
306
|
+
}
|
|
307
|
+
mdRows.push(cellTexts);
|
|
308
|
+
}
|
|
309
|
+
if (mdRows.length === 0) return null;
|
|
310
|
+
const [header, ...body] = mdRows;
|
|
311
|
+
const lines: string[] = [];
|
|
312
|
+
lines.push(`| ${header.join(" | ")} |`);
|
|
313
|
+
lines.push(`| ${header.map(() => "---").join(" | ")} |`);
|
|
314
|
+
for (const row of body) {
|
|
315
|
+
while (row.length < header.length) row.push("");
|
|
316
|
+
lines.push(`| ${row.join(" | ")} |`);
|
|
317
|
+
}
|
|
318
|
+
return lines.join("\n");
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
function toList<T>(val: T | T[] | undefined): T[] {
|
|
323
|
+
if (!val) return [];
|
|
324
|
+
return Array.isArray(val) ? val : [val];
|
|
325
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
// Adapted from markit-ai (MIT). See ../NOTICE.
|
|
2
|
+
import { XMLParser } from "fast-xml-parser";
|
|
3
|
+
import { unzip, unzipText } from "../../utils/zip";
|
|
4
|
+
import type { ConversionResult, Converter, StreamInfo } from "../types";
|
|
5
|
+
|
|
6
|
+
const EXTENSIONS = [".xlsx"];
|
|
7
|
+
const MIMETYPES = ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"];
|
|
8
|
+
|
|
9
|
+
/** A text value: bare string/number, or a `{ "#text" }` node when the element carries attributes. */
|
|
10
|
+
type XmlText = string | number | { "#text"?: string };
|
|
11
|
+
|
|
12
|
+
interface RichTextRun {
|
|
13
|
+
t?: XmlText;
|
|
14
|
+
}
|
|
15
|
+
interface StringItem {
|
|
16
|
+
t?: XmlText;
|
|
17
|
+
r?: RichTextRun | RichTextRun[];
|
|
18
|
+
}
|
|
19
|
+
interface Cell {
|
|
20
|
+
"@_t"?: string;
|
|
21
|
+
v?: string | number;
|
|
22
|
+
is?: StringItem;
|
|
23
|
+
}
|
|
24
|
+
interface Row {
|
|
25
|
+
c?: Cell | Cell[];
|
|
26
|
+
}
|
|
27
|
+
interface WorksheetDoc {
|
|
28
|
+
worksheet?: { sheetData?: { row?: Row | Row[] } };
|
|
29
|
+
}
|
|
30
|
+
interface Sheet {
|
|
31
|
+
"@_name": string;
|
|
32
|
+
"@_r:id": string;
|
|
33
|
+
}
|
|
34
|
+
interface WorkbookDoc {
|
|
35
|
+
workbook?: { sheets?: { sheet?: Sheet | Sheet[] } };
|
|
36
|
+
}
|
|
37
|
+
interface SharedStringsDoc {
|
|
38
|
+
sst?: { si?: StringItem | StringItem[] };
|
|
39
|
+
}
|
|
40
|
+
interface Relationship {
|
|
41
|
+
"@_Id": string;
|
|
42
|
+
"@_Target": string;
|
|
43
|
+
}
|
|
44
|
+
interface RelationshipsDoc {
|
|
45
|
+
Relationships?: { Relationship?: Relationship | Relationship[] };
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export class XlsxConverter implements Converter {
|
|
49
|
+
name = "xlsx";
|
|
50
|
+
|
|
51
|
+
accepts(streamInfo: StreamInfo): boolean {
|
|
52
|
+
if (streamInfo.extension && EXTENSIONS.includes(streamInfo.extension)) return true;
|
|
53
|
+
if (streamInfo.mimetype && MIMETYPES.some(m => streamInfo.mimetype?.startsWith(m))) return true;
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async convert(input: Buffer, _streamInfo: StreamInfo): Promise<ConversionResult> {
|
|
58
|
+
const entries = unzip(input);
|
|
59
|
+
const parser = new XMLParser({
|
|
60
|
+
ignoreAttributes: false,
|
|
61
|
+
attributeNamePrefix: "@_",
|
|
62
|
+
textNodeName: "#text",
|
|
63
|
+
processEntities: { maxTotalExpansions: 1_000_000 },
|
|
64
|
+
});
|
|
65
|
+
// Parse shared strings
|
|
66
|
+
const ssXml = unzipText(entries, "xl/sharedStrings.xml");
|
|
67
|
+
const ss = ssXml ? (parser.parse(ssXml) as SharedStringsDoc) : null;
|
|
68
|
+
const siList = ss?.sst?.si;
|
|
69
|
+
const shared = toArray(siList);
|
|
70
|
+
// Parse workbook for sheet names
|
|
71
|
+
const wbXml = unzipText(entries, "xl/workbook.xml");
|
|
72
|
+
if (!wbXml) throw new Error("Invalid XLSX: missing workbook.xml");
|
|
73
|
+
const wb = parser.parse(wbXml) as WorkbookDoc;
|
|
74
|
+
const sheets = toArray(wb.workbook?.sheets?.sheet);
|
|
75
|
+
// Parse workbook rels to map rIds to sheet files
|
|
76
|
+
const relsXml = unzipText(entries, "xl/_rels/workbook.xml.rels");
|
|
77
|
+
const rels = relsXml ? (parser.parse(relsXml) as RelationshipsDoc) : null;
|
|
78
|
+
const relList = toArray(rels?.Relationships?.Relationship);
|
|
79
|
+
const relMap = new Map<string, string>();
|
|
80
|
+
for (const r of relList) {
|
|
81
|
+
relMap.set(r["@_Id"], r["@_Target"]);
|
|
82
|
+
}
|
|
83
|
+
const sections: string[] = [];
|
|
84
|
+
for (const sheet of sheets) {
|
|
85
|
+
const sheetName = sheet["@_name"];
|
|
86
|
+
const rId = sheet["@_r:id"];
|
|
87
|
+
const target = relMap.get(rId);
|
|
88
|
+
if (!target) continue;
|
|
89
|
+
const sheetPath = target.startsWith("/") ? target.slice(1) : `xl/${target}`;
|
|
90
|
+
const sheetXml = unzipText(entries, sheetPath);
|
|
91
|
+
if (!sheetXml) continue;
|
|
92
|
+
const parsed = parser.parse(sheetXml) as WorksheetDoc;
|
|
93
|
+
const rows = toArray(parsed.worksheet?.sheetData?.row);
|
|
94
|
+
if (rows.length === 0) continue;
|
|
95
|
+
// Extract all rows as string arrays
|
|
96
|
+
const tableRows: string[][] = [];
|
|
97
|
+
for (const row of rows) {
|
|
98
|
+
const cells = toArray(row.c);
|
|
99
|
+
const values: string[] = [];
|
|
100
|
+
for (const cell of cells) {
|
|
101
|
+
values.push(this.getCellValue(cell, shared));
|
|
102
|
+
}
|
|
103
|
+
tableRows.push(values);
|
|
104
|
+
}
|
|
105
|
+
if (tableRows.length === 0) continue;
|
|
106
|
+
// Normalize column count
|
|
107
|
+
const maxCols = Math.max(...tableRows.map(r => r.length));
|
|
108
|
+
for (const row of tableRows) {
|
|
109
|
+
while (row.length < maxCols) row.push("");
|
|
110
|
+
}
|
|
111
|
+
sections.push(`## ${sheetName}`);
|
|
112
|
+
const [header, ...body] = tableRows;
|
|
113
|
+
const lines: string[] = [];
|
|
114
|
+
lines.push(`| ${header.join(" | ")} |`);
|
|
115
|
+
lines.push(`| ${header.map(() => "---").join(" | ")} |`);
|
|
116
|
+
for (const row of body) {
|
|
117
|
+
lines.push(`| ${row.join(" | ")} |`);
|
|
118
|
+
}
|
|
119
|
+
sections.push(lines.join("\n"));
|
|
120
|
+
}
|
|
121
|
+
return { markdown: sections.join("\n\n") };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
getCellValue(cell: Cell, shared: StringItem[]): string {
|
|
125
|
+
// Shared string
|
|
126
|
+
if (cell["@_t"] === "s") {
|
|
127
|
+
return this.getSharedString(shared, Number(cell.v));
|
|
128
|
+
}
|
|
129
|
+
// Inline string
|
|
130
|
+
if (cell["@_t"] === "inlineStr") {
|
|
131
|
+
const is = cell.is;
|
|
132
|
+
if (!is) return "";
|
|
133
|
+
if (is.t != null) return textValue(is.t);
|
|
134
|
+
if (is.r)
|
|
135
|
+
return toArray(is.r)
|
|
136
|
+
.map(r => textValue(r.t))
|
|
137
|
+
.join("");
|
|
138
|
+
return "";
|
|
139
|
+
}
|
|
140
|
+
// Boolean
|
|
141
|
+
if (cell["@_t"] === "b") {
|
|
142
|
+
return cell.v === 1 || cell.v === "1" ? "TRUE" : "FALSE";
|
|
143
|
+
}
|
|
144
|
+
// Number or formula result
|
|
145
|
+
if (cell.v != null) return String(cell.v);
|
|
146
|
+
return "";
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
getSharedString(shared: StringItem[], idx: number): string {
|
|
150
|
+
const si = shared[idx];
|
|
151
|
+
if (!si) return "";
|
|
152
|
+
// Simple text
|
|
153
|
+
if (si.t != null) return textValue(si.t);
|
|
154
|
+
// Rich text runs
|
|
155
|
+
if (si.r) {
|
|
156
|
+
return toArray(si.r)
|
|
157
|
+
.map(r => textValue(r.t))
|
|
158
|
+
.join("");
|
|
159
|
+
}
|
|
160
|
+
return "";
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function textValue(t: XmlText | undefined): string {
|
|
165
|
+
if (t == null) return "";
|
|
166
|
+
if (typeof t === "object") return t["#text"] || "";
|
|
167
|
+
return String(t);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function toArray<T>(val: T | T[] | undefined): T[] {
|
|
171
|
+
if (!val) return [];
|
|
172
|
+
return Array.isArray(val) ? val : [val];
|
|
173
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
// Adapted from markit-ai (MIT). See ./NOTICE.
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import { DocxConverter } from "./converters/docx";
|
|
4
|
+
import { EpubConverter } from "./converters/epub";
|
|
5
|
+
import { PdfConverter } from "./converters/pdf";
|
|
6
|
+
import { PptxConverter } from "./converters/pptx";
|
|
7
|
+
import { XlsxConverter } from "./converters/xlsx";
|
|
8
|
+
import type { ConversionResult, Converter, MarkitOptions, StreamInfo } from "./types";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* In-house document → markdown engine (replaces the `markit-ai` package).
|
|
12
|
+
*
|
|
13
|
+
* Only the document converters omp routes are registered (pdf, docx, pptx,
|
|
14
|
+
* xlsx, epub). The first converter whose `accepts()` returns true and whose
|
|
15
|
+
* `convert()` succeeds wins.
|
|
16
|
+
*/
|
|
17
|
+
export class Markit {
|
|
18
|
+
readonly #converters: readonly Converter[];
|
|
19
|
+
readonly #options: MarkitOptions;
|
|
20
|
+
|
|
21
|
+
constructor(options: MarkitOptions = {}) {
|
|
22
|
+
this.#options = options;
|
|
23
|
+
this.#converters = [
|
|
24
|
+
new PdfConverter(),
|
|
25
|
+
new DocxConverter(),
|
|
26
|
+
new PptxConverter(),
|
|
27
|
+
new XlsxConverter(),
|
|
28
|
+
new EpubConverter(),
|
|
29
|
+
];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async convertFile(filePath: string, extra?: { imageDir?: string }): Promise<ConversionResult> {
|
|
33
|
+
const buffer = Buffer.from(await Bun.file(filePath).arrayBuffer());
|
|
34
|
+
const streamInfo: StreamInfo = {
|
|
35
|
+
localPath: filePath,
|
|
36
|
+
extension: path.extname(filePath).toLowerCase(),
|
|
37
|
+
filename: path.basename(filePath),
|
|
38
|
+
...extra,
|
|
39
|
+
};
|
|
40
|
+
return this.convert(buffer, streamInfo);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async convert(input: Buffer, streamInfo: StreamInfo): Promise<ConversionResult> {
|
|
44
|
+
const errors: { converter: string; error: Error }[] = [];
|
|
45
|
+
for (const converter of this.#converters) {
|
|
46
|
+
if (!converter.accepts(streamInfo)) continue;
|
|
47
|
+
try {
|
|
48
|
+
return await converter.convert(input, streamInfo, this.#options);
|
|
49
|
+
} catch (err) {
|
|
50
|
+
errors.push({ converter: converter.name, error: err instanceof Error ? err : new Error(String(err)) });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
if (errors.length > 0) {
|
|
54
|
+
const details = errors.map(e => ` ${e.converter}: ${e.error.message}`).join("\n");
|
|
55
|
+
throw new Error(`Conversion failed:\n${details}`);
|
|
56
|
+
}
|
|
57
|
+
throw new Error(`Unsupported format: ${streamInfo.extension || streamInfo.mimetype || "unknown"}`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// Adapted from markit-ai (MIT). See ./NOTICE.
|
|
2
|
+
|
|
3
|
+
export interface StreamInfo {
|
|
4
|
+
mimetype?: string;
|
|
5
|
+
extension?: string;
|
|
6
|
+
charset?: string;
|
|
7
|
+
filename?: string;
|
|
8
|
+
localPath?: string;
|
|
9
|
+
url?: string;
|
|
10
|
+
/** Directory to write extracted images/diagrams. */
|
|
11
|
+
imageDir?: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface ConversionResult {
|
|
15
|
+
markdown: string;
|
|
16
|
+
title?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface MarkitOptions {
|
|
20
|
+
/** Describe an image, return markdown. Receives raw bytes and mimetype. */
|
|
21
|
+
describe?: (image: Buffer, mimetype: string) => Promise<string>;
|
|
22
|
+
/** Transcribe audio, return text. Receives raw bytes and mimetype. */
|
|
23
|
+
transcribe?: (audio: Buffer, mimetype: string) => Promise<string>;
|
|
24
|
+
/** Extra instructions appended to the image description prompt. */
|
|
25
|
+
prompt?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface Converter {
|
|
29
|
+
/** Human-readable name for error messages. */
|
|
30
|
+
name: string;
|
|
31
|
+
/** Quick check: can this converter handle the given stream? */
|
|
32
|
+
accepts(streamInfo: StreamInfo): boolean;
|
|
33
|
+
/** Convert the source to markdown. */
|
|
34
|
+
convert(input: Buffer, streamInfo: StreamInfo, options?: MarkitOptions): Promise<ConversionResult>;
|
|
35
|
+
}
|
|
@@ -1,11 +1,18 @@
|
|
|
1
|
-
|
|
1
|
+
# User ¶
|
|
2
|
+
Fix the settings overlay crash. Wheeling past the last row throws.
|
|
2
3
|
|
|
3
|
-
|
|
4
|
+
# Tool call ¶
|
|
5
|
+
//Reading the select-list hit test
|
|
6
|
+
read(path="src/select-list.ts:140-180")
|
|
7
|
+
<out>
|
|
8
|
+
162: const index = Math.floor(line / rowHeight); index is never checked against bounds.
|
|
9
|
+
</out>
|
|
4
10
|
|
|
5
|
-
|
|
11
|
+
# Assistant ¶
|
|
12
|
+
Found it. The hit test indexes past the filtered list; clamping to the last row fixes the crash.
|
|
6
13
|
|
|
7
|
-
|
|
14
|
+
# User ¶
|
|
15
|
+
Does the fix survive filtering?
|
|
8
16
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
[Assistant]: Yes. The clamp applies after the filter pass, so a narrowed list keeps the hit map in sync. Added a regression test that wheels past the last row with a filter active and asserts no throw.
|
|
17
|
+
# Assistant ¶
|
|
18
|
+
Yes. The clamp applies after the filter pass, so a narrowed list keeps the hit map in sync. Added a regression test that wheels past the last row with a filter active and asserts no throw.
|
|
@@ -38,10 +38,10 @@ const ZOOM_SCALE = 4;
|
|
|
38
38
|
const MAX_IMAGE_COLS = 28;
|
|
39
39
|
const MAX_IMAGE_ROWS = 14;
|
|
40
40
|
|
|
41
|
-
/** Sample transcript with
|
|
41
|
+
/** Sample transcript with `<out>…</out>` bodies wrapped in dim-ink toggles. */
|
|
42
42
|
const PREVIEW_TEXT = sampleDoc
|
|
43
43
|
.trim()
|
|
44
|
-
.replace(
|
|
44
|
+
.replace(/<out>\n([\s\S]*?)\n<\/out>/g, (_match, body: string) => `<out>\n${DIM_ON}${body}${DIM_OFF}\n</out>`);
|
|
45
45
|
|
|
46
46
|
type PreviewEntry =
|
|
47
47
|
| { state: "rendering" }
|