@oh-my-pi/pi-coding-agent 16.0.7 → 16.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +31 -0
  2. package/dist/cli.js +4752 -12462
  3. package/dist/types/cli/update-cli.d.ts +11 -0
  4. package/dist/types/debug/remote-debugger.d.ts +45 -0
  5. package/dist/types/internal-urls/docs-index.d.ts +19 -0
  6. package/dist/types/markit/converters/docx.d.ts +6 -0
  7. package/dist/types/markit/converters/epub.d.ts +15 -0
  8. package/dist/types/markit/converters/pdf/columns.d.ts +35 -0
  9. package/dist/types/markit/converters/pdf/extract.d.ts +10 -0
  10. package/dist/types/markit/converters/pdf/grid.d.ts +25 -0
  11. package/dist/types/markit/converters/pdf/headers.d.ts +24 -0
  12. package/dist/types/markit/converters/pdf/index.d.ts +6 -0
  13. package/dist/types/markit/converters/pdf/render.d.ts +24 -0
  14. package/dist/types/markit/converters/pdf/types.d.ts +75 -0
  15. package/dist/types/markit/converters/pptx.d.ts +57 -0
  16. package/dist/types/markit/converters/xlsx.d.ts +25 -0
  17. package/dist/types/markit/index.d.ts +2 -0
  18. package/dist/types/markit/registry.d.ts +16 -0
  19. package/dist/types/markit/types.d.ts +30 -0
  20. package/dist/types/session/agent-session.d.ts +7 -8
  21. package/dist/types/session/auth-storage.d.ts +3 -2
  22. package/dist/types/session/yield-queue.d.ts +3 -1
  23. package/dist/types/tools/browser/attach.d.ts +1 -1
  24. package/dist/types/utils/markit.d.ts +0 -8
  25. package/dist/types/utils/mupdf-wasm-embed.d.ts +1 -0
  26. package/dist/types/utils/turndown.d.ts +15 -0
  27. package/dist/types/utils/zip.d.ts +119 -0
  28. package/package.json +20 -18
  29. package/scripts/build-binary.ts +7 -3
  30. package/scripts/bundle-dist.ts +28 -12
  31. package/scripts/embed-mupdf-wasm.ts +67 -0
  32. package/scripts/generate-docs-index.ts +48 -32
  33. package/scripts/omp +1 -1
  34. package/src/advisor/__tests__/advisor.test.ts +83 -0
  35. package/src/advisor/runtime.ts +16 -1
  36. package/src/cli/auth-broker-cli.ts +1 -3
  37. package/src/cli/auth-gateway-cli.ts +2 -5
  38. package/src/cli/update-cli.ts +63 -3
  39. package/src/config/model-discovery.ts +20 -8
  40. package/src/config/models-config-schema.ts +8 -1
  41. package/src/debug/index.ts +44 -0
  42. package/src/debug/remote-debugger.ts +151 -0
  43. package/src/debug/report-bundle.ts +2 -1
  44. package/src/internal-urls/docs-index.generated.txt +2 -0
  45. package/src/internal-urls/docs-index.ts +102 -0
  46. package/src/internal-urls/omp-protocol.ts +10 -9
  47. package/src/markit/NOTICE +32 -0
  48. package/src/markit/converters/docx.ts +56 -0
  49. package/src/markit/converters/epub.ts +136 -0
  50. package/src/markit/converters/mammoth.d.ts +24 -0
  51. package/src/markit/converters/pdf/columns.ts +103 -0
  52. package/src/markit/converters/pdf/extract.ts +574 -0
  53. package/src/markit/converters/pdf/grid.ts +780 -0
  54. package/src/markit/converters/pdf/headers.ts +106 -0
  55. package/src/markit/converters/pdf/index.ts +146 -0
  56. package/src/markit/converters/pdf/render.ts +501 -0
  57. package/src/markit/converters/pdf/types.ts +84 -0
  58. package/src/markit/converters/pptx.ts +325 -0
  59. package/src/markit/converters/xlsx.ts +173 -0
  60. package/src/markit/index.ts +2 -0
  61. package/src/markit/registry.ts +59 -0
  62. package/src/markit/types.ts +35 -0
  63. package/src/modes/components/snapcompact-shape-preview-doc.md +14 -7
  64. package/src/modes/components/snapcompact-shape-preview.ts +2 -2
  65. package/src/modes/controllers/input-controller.ts +29 -8
  66. package/src/modes/interactive-mode.ts +26 -9
  67. package/src/prompts/advisor/system.md +1 -0
  68. package/src/sdk.ts +5 -9
  69. package/src/session/agent-session.ts +62 -40
  70. package/src/session/auth-storage.ts +2 -11
  71. package/src/session/yield-queue.ts +7 -1
  72. package/src/tools/browser/attach.ts +2 -2
  73. package/src/tools/fetch.ts +25 -60
  74. package/src/tools/read.ts +1 -1
  75. package/src/tools/search.ts +1 -6
  76. package/src/tools/write.ts +25 -65
  77. package/src/utils/markit.ts +25 -9
  78. package/src/utils/mupdf-wasm-embed.ts +12 -0
  79. package/src/utils/tools-manager.ts +2 -11
  80. package/src/utils/turndown.ts +83 -0
  81. package/src/{tools/archive-reader.ts → utils/zip.ts} +453 -83
  82. package/src/web/scrapers/types.ts +3 -46
  83. package/dist/types/internal-urls/docs-index.generated.d.ts +0 -2
  84. package/dist/types/tools/archive-reader.d.ts +0 -49
  85. package/src/internal-urls/docs-index.generated.ts +0 -120
@@ -0,0 +1,106 @@
1
+ // Adapted from markit-ai (MIT). See ../../NOTICE.
2
+
3
+ /**
4
+ * Running header/footer detection and removal.
5
+ *
6
+ * Many PDFs have repeated text at the top or bottom of every page:
7
+ * document titles, chapter names, page numbers, copyright notices.
8
+ * These pollute the markdown output as false headings or noise.
9
+ *
10
+ * Algorithm:
11
+ * 1. For each page, bucket text boxes by Y position (top/bottom zones)
12
+ * 2. Collect the text content at each zone across all pages
13
+ * 3. Text appearing on >20% of pages OR 8+ consecutive pages is a
14
+ * running header/footer
15
+ * 4. Remove matching text boxes before further processing
16
+ */
17
+ import type { PageContent } from "./types";
18
+
19
+ /** Minimum number of pages to enable header/footer detection. */
20
+ const MIN_PAGES = 5;
21
+ /** Minimum Y position for top zone (from bottom of page in PDF coords). */
22
+ const TOP_ZONE_MIN_Y = 700;
23
+ /** Maximum Y position for bottom zone. */
24
+ const BOTTOM_ZONE_MAX_Y = 80;
25
+ /**
26
+ * Minimum consecutive pages a text must appear on to be considered a
27
+ * running header/footer. Catches both document-wide headers (appearing
28
+ * on every page) and chapter-specific headers (appearing on 4+ consecutive
29
+ * pages within a chapter).
30
+ */
31
+ const MIN_CONSECUTIVE_PAGES = 8;
32
+
33
+ /**
34
+ * Detect and remove running headers and footers from all pages.
35
+ * Mutates the pages array in place, removing header/footer text boxes.
36
+ *
37
+ * Uses two strategies:
38
+ * 1. Global frequency: text appearing on > 20% of all pages
39
+ * 2. Consecutive runs: text appearing on 8+ consecutive pages
40
+ */
41
+ export function stripHeadersFooters(pages: PageContent[]): void {
42
+ if (pages.length < MIN_PAGES) return;
43
+ // Step 1: Build per-page zone text sets
44
+ const pageZoneTexts: Set<string>[] = [];
45
+ for (const page of pages) {
46
+ const zoneTexts = new Set<string>();
47
+ for (const tb of page.textBoxes) {
48
+ const midY = (tb.bounds.top + tb.bounds.bottom) / 2;
49
+ if (midY >= TOP_ZONE_MIN_Y || midY <= BOTTOM_ZONE_MAX_Y) {
50
+ const key = tb.text.trim().replace(/\s+/g, " ");
51
+ if (key.length > 0) zoneTexts.add(key);
52
+ }
53
+ }
54
+ pageZoneTexts.push(zoneTexts);
55
+ }
56
+ // Step 2: Count global frequency AND longest consecutive run for each text
57
+ const globalCount = new Map<string, number>();
58
+ const maxConsecutive = new Map<string, number>();
59
+ // Collect all unique zone texts
60
+ const allTexts = new Set<string>();
61
+ for (const zts of pageZoneTexts) {
62
+ for (const t of zts) allTexts.add(t);
63
+ }
64
+ for (const text of allTexts) {
65
+ let total = 0;
66
+ let consecutive = 0;
67
+ let maxRun = 0;
68
+ for (const zts of pageZoneTexts) {
69
+ if (zts.has(text)) {
70
+ total++;
71
+ consecutive++;
72
+ if (consecutive > maxRun) maxRun = consecutive;
73
+ } else {
74
+ consecutive = 0;
75
+ }
76
+ }
77
+ globalCount.set(text, total);
78
+ maxConsecutive.set(text, maxRun);
79
+ }
80
+ // Step 3: Identify running headers/footers
81
+ const globalThreshold = Math.max(3, Math.floor(pages.length * 0.2));
82
+ const repeatedTexts = new Set<string>();
83
+ for (const text of allTexts) {
84
+ const gc = globalCount.get(text) ?? 0;
85
+ const mc = maxConsecutive.get(text) ?? 0;
86
+ // Global: appears on 20%+ of pages
87
+ if (gc >= globalThreshold) {
88
+ repeatedTexts.add(text);
89
+ continue;
90
+ }
91
+ // Consecutive: appears on 8+ consecutive pages (chapter-level headers)
92
+ if (mc >= MIN_CONSECUTIVE_PAGES) {
93
+ repeatedTexts.add(text);
94
+ }
95
+ }
96
+ if (repeatedTexts.size === 0) return;
97
+ // Step 4: Remove matching text boxes from each page
98
+ for (const page of pages) {
99
+ page.textBoxes = page.textBoxes.filter(tb => {
100
+ const midY = (tb.bounds.top + tb.bounds.bottom) / 2;
101
+ if (midY < TOP_ZONE_MIN_Y && midY > BOTTOM_ZONE_MAX_Y) return true;
102
+ const normalized = tb.text.trim().replace(/\s+/g, " ");
103
+ return !repeatedTexts.has(normalized);
104
+ });
105
+ }
106
+ }
@@ -0,0 +1,146 @@
1
+ // Adapted from markit-ai (MIT). See ../../NOTICE.
2
+
3
+ /**
4
+ * PDF to Markdown converter.
5
+ *
6
+ * Uses mupdf (native WASM) for fast PDF parsing and a custom pipeline for
7
+ * table detection via vector line extraction + raycasting.
8
+ *
9
+ * Pipeline:
10
+ * 1. Extract text boxes + vector segments + image regions per page (mupdf)
11
+ * 2. Detect column layout (single vs multi-column)
12
+ * 3. Per column: detect table grids from segments (grid detection + raycasting)
13
+ * 4. Render diagrams as PNG files (if output directory provided)
14
+ * 5. Render tables as markdown tables, free text as paragraphs/headings
15
+ */
16
+ import * as path from "node:path";
17
+ import type { ConversionResult, Converter, StreamInfo } from "../../types";
18
+ import { detectColumns } from "./columns";
19
+ import { extractPages, renderImageRegion } from "./extract";
20
+ import { resolveTableGrids } from "./grid";
21
+ import { stripHeadersFooters } from "./headers";
22
+ import { renderPageContent } from "./render";
23
+ import type { Segment, TextBox } from "./types";
24
+
25
+ const EXTENSIONS = [".pdf"];
26
+ const MIMETYPES = ["application/pdf", "application/x-pdf"];
27
+
28
+ type ImageBlock = { topY: number; markdown: string };
29
+
30
+ /**
31
+ * Process a set of text boxes (one column or full page): run table detection,
32
+ * separate free text, and render to markdown.
33
+ */
34
+ function processColumn(
35
+ pageNumber: number,
36
+ textBoxes: TextBox[],
37
+ segments: Segment[],
38
+ imageBlocks: ImageBlock[],
39
+ ): string {
40
+ const { grids, consumedIds } = resolveTableGrids(pageNumber, textBoxes, segments);
41
+ const consumedSet = new Set(consumedIds);
42
+ const freeTextBoxes = textBoxes.filter(tb => !consumedSet.has(tb.id));
43
+ return renderPageContent(freeTextBoxes, grids, imageBlocks, textBoxes);
44
+ }
45
+
46
+ export class PdfConverter implements Converter {
47
+ name = "pdf";
48
+
49
+ accepts(streamInfo: StreamInfo): boolean {
50
+ if (streamInfo.extension && EXTENSIONS.includes(streamInfo.extension)) {
51
+ return true;
52
+ }
53
+ if (streamInfo.mimetype && MIMETYPES.some(m => streamInfo.mimetype?.startsWith(m))) {
54
+ return true;
55
+ }
56
+ return false;
57
+ }
58
+
59
+ async convert(input: Buffer, streamInfo: StreamInfo): Promise<ConversionResult> {
60
+ const pdfBytes = new Uint8Array(input);
61
+ const pages = await extractPages(pdfBytes);
62
+ // Remove running headers/footers before processing.
63
+ stripHeadersFooters(pages);
64
+ const imageDir = streamInfo.imageDir;
65
+
66
+ const pageMarkdowns: string[] = [];
67
+ for (const page of pages) {
68
+ // Build image blocks for this page.
69
+ const imageBlocks: ImageBlock[] = [];
70
+ if (imageDir && page.images.length > 0) {
71
+ for (const img of page.images) {
72
+ const filename = `${img.id}.png`;
73
+ const filepath = path.join(imageDir, filename);
74
+ try {
75
+ const png = await renderImageRegion(pdfBytes, img);
76
+ await Bun.write(filepath, png);
77
+ imageBlocks.push({ topY: img.topY, markdown: `![${img.id}](${filepath})` });
78
+ } catch {
79
+ // Image rendering failed — skip.
80
+ }
81
+ }
82
+ } else if (page.images.length > 0) {
83
+ for (const img of page.images) {
84
+ imageBlocks.push({
85
+ topY: img.topY,
86
+ markdown: `<!-- image: ${img.id} (page ${img.pageNumber}, ${img.bbox.w}x${img.bbox.h}pt) -->`,
87
+ });
88
+ }
89
+ }
90
+
91
+ // Detect column layout.
92
+ // If the page has vertical segments (tables), suppress column detection
93
+ // when one detected column is very narrow — that's a table's first column,
94
+ // not a page layout column.
95
+ const layout = detectColumns(page.textBoxes);
96
+ if (layout.columnCount > 1 && page.segments.some(s => Math.abs(s.x1 - s.x2) <= 0.8)) {
97
+ const pageXMin = Math.min(...page.textBoxes.map(tb => tb.bounds.left));
98
+ const pageXMax = Math.max(...page.textBoxes.map(tb => tb.bounds.right));
99
+ const pageWidth = pageXMax - pageXMin;
100
+ const minColFraction = 0.3;
101
+ const tooNarrow = layout.columns.some(col => {
102
+ const colXMin = Math.min(...col.map(tb => tb.bounds.left));
103
+ const colXMax = Math.max(...col.map(tb => tb.bounds.right));
104
+ return (colXMax - colXMin) / pageWidth < minColFraction;
105
+ });
106
+ if (tooNarrow) {
107
+ layout.columnCount = 1;
108
+ layout.columns = [page.textBoxes];
109
+ layout.boundaries = [];
110
+ }
111
+ }
112
+
113
+ if (layout.columnCount === 1) {
114
+ // Single column — process normally.
115
+ const md = processColumn(page.pageNumber, page.textBoxes, page.segments, imageBlocks);
116
+ if (md.length > 0) pageMarkdowns.push(md);
117
+ } else {
118
+ // Multi-column — process each column independently, then join.
119
+ const columnMarkdowns: string[] = [];
120
+ for (const colBoxes of layout.columns) {
121
+ // Filter segments to those within this column's X range.
122
+ const colXMin = Math.min(...colBoxes.map(tb => tb.bounds.left));
123
+ const colXMax = Math.max(...colBoxes.map(tb => tb.bounds.right));
124
+ const margin = 10;
125
+ const colSegments = page.segments.filter(seg => {
126
+ const segXMin = Math.min(seg.x1, seg.x2);
127
+ const segXMax = Math.max(seg.x1, seg.x2);
128
+ return segXMax >= colXMin - margin && segXMin <= colXMax + margin;
129
+ });
130
+ // Images go with the first column only (no X info to split by).
131
+ const md = processColumn(
132
+ page.pageNumber,
133
+ colBoxes,
134
+ colSegments,
135
+ columnMarkdowns.length === 0 ? imageBlocks : [],
136
+ );
137
+ if (md.length > 0) columnMarkdowns.push(md);
138
+ }
139
+ const joined = columnMarkdowns.join("\n\n");
140
+ if (joined.length > 0) pageMarkdowns.push(joined);
141
+ }
142
+ }
143
+
144
+ return { markdown: pageMarkdowns.join("\n\n") };
145
+ }
146
+ }