opencode-froggy 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
  <a href="https://www.npmjs.com/package/opencode-froggy"><img src="https://badge.fury.io/js/opencode-froggy.svg" alt="npm version"></a>
8
8
  </p>
9
9
 
10
- OpenCode plugin providing hooks, specialized agents (architect, doc-writer, rubber-duck, partner, code-reviewer, code-simplifier), skills (code-release), and tools (gitingest, blockchain queries, agent-promote).
10
+ OpenCode plugin providing hooks, specialized agents (architect, doc-writer, rubber-duck, partner, code-reviewer, code-simplifier), skills (code-release), and tools (gitingest, pdf-to-markdown, blockchain queries, agent-promote).
11
11
 
12
12
  ---
13
13
 
@@ -22,6 +22,7 @@ OpenCode plugin providing hooks, specialized agents (architect, doc-writer, rubb
22
22
  - [gitingest](#gitingest)
23
23
  - [prompt-session](#prompt-session)
24
24
  - [list-child-sessions](#list-child-sessions)
25
+ - [pdf-to-markdown](#pdf-to-markdown)
25
26
  - [agent-promote](#agent-promote)
26
27
  - [Blockchain](#blockchain)
27
28
  - [Configuration](#configuration)
@@ -245,6 +246,37 @@ Child sessions (2):
245
246
 
246
247
  ---
247
248
 
249
+ ### pdf-to-markdown
250
+
251
+ Convert a text-based PDF into enriched Markdown (headings, paragraphs, lists). Returns Markdown as plain text.
252
+
253
+ #### Parameters
254
+
255
+ | Parameter | Type | Required | Default | Description |
256
+ |-----------|------|----------|---------|-------------|
257
+ | `filePath` | `string` | Yes | - | Absolute path to the PDF file to convert |
258
+ | `maxPages` | `number` | No | All pages | Maximum number of pages to convert (positive integer) |
259
+
260
+ #### Usage Examples
261
+
262
+ ```typescript
263
+ // Convert an entire PDF
264
+ pdfToMarkdown({ filePath: "/path/to/file.pdf" })
265
+
266
+ // Convert the first 3 pages
267
+ pdfToMarkdown({
268
+ filePath: "/path/to/file.pdf",
269
+ maxPages: 3
270
+ })
271
+ ```
272
+
273
+ #### Notes
274
+
275
+ - The conversion extracts text content; image-only PDFs may return empty output.
276
+ - `maxPages` is capped at the document's total page count.
277
+
278
+ ---
279
+
248
280
  ### agent-promote
249
281
 
250
282
  Promote an agent to primary (default) or specify a grade.
@@ -26,7 +26,6 @@ Automatically detect documentation files in the project:
26
26
  - Look for `README.md` at project root
27
27
  - Look for `docs/` or `documentation/` directories
28
28
  - Look for other `.md` files that describe usage, API, or features
29
- - Check if `CHANGELOG.md` exists (do NOT create it if missing)
30
29
 
31
30
  ## Update Phase
32
31
 
package/dist/index.js CHANGED
@@ -5,7 +5,7 @@ import { getGlobalHookDir, getProjectHookDir } from "./config-paths";
5
5
  import { hasCodeExtension } from "./code-files";
6
6
  import { log } from "./logger";
7
7
  import { executeBashAction, DEFAULT_BASH_TIMEOUT, } from "./bash-executor";
8
- import { gitingestTool, createPromptSessionTool, createListChildSessionsTool, createAgentPromoteTool, createSkillTool, getPromotedAgents, ethTransactionTool, ethAddressTxsTool, ethAddressBalanceTool, ethTokenTransfersTool, } from "./tools";
8
+ import { gitingestTool, pdfToMarkdownTool, createPromptSessionTool, createListChildSessionsTool, createAgentPromoteTool, createSkillTool, getPromotedAgents, ethTransactionTool, ethAddressTxsTool, ethAddressBalanceTool, ethTokenTransfersTool, } from "./tools";
9
9
  export { parseFrontmatter, loadAgents, loadCommands } from "./loaders";
10
10
  export { buildSkillActivationBlock } from "./skill-activation";
11
11
  import { buildSkillActivationBlock } from "./skill-activation";
@@ -46,6 +46,7 @@ const SmartfrogPlugin = async (ctx) => {
46
46
  hooks: Array.from(hooks.keys()),
47
47
  tools: [
48
48
  "gitingest",
49
+ "pdf-to-markdown",
49
50
  "skill",
50
51
  "agent-promote",
51
52
  "eth-transaction",
@@ -204,6 +205,7 @@ const SmartfrogPlugin = async (ctx) => {
204
205
  },
205
206
  tool: {
206
207
  gitingest: gitingestTool,
208
+ "pdf-to-markdown": pdfToMarkdownTool,
207
209
  skill: skillTool,
208
210
  "prompt-session": createPromptSessionTool(ctx.client),
209
211
  "list-child-sessions": createListChildSessionsTool(ctx.client),
@@ -1,4 +1,6 @@
1
1
  export { gitingestTool, fetchGitingest, type GitingestArgs } from "./gitingest";
2
+ export { convertPdfToMarkdown, type PdfToMarkdownArgs } from "./pdf-to-markdown-core";
3
+ export { pdfToMarkdownTool } from "./pdf-to-markdown";
2
4
  export { createPromptSessionTool, type PromptSessionArgs } from "./prompt-session";
3
5
  export { createListChildSessionsTool } from "./list-child-sessions";
4
6
  export { createAgentPromoteTool, getPromotedAgents, type AgentPromoteArgs } from "./agent-promote";
@@ -1,4 +1,6 @@
1
1
  export { gitingestTool, fetchGitingest } from "./gitingest";
2
+ export { convertPdfToMarkdown } from "./pdf-to-markdown-core";
3
+ export { pdfToMarkdownTool } from "./pdf-to-markdown";
2
4
  export { createPromptSessionTool } from "./prompt-session";
3
5
  export { createListChildSessionsTool } from "./list-child-sessions";
4
6
  export { createAgentPromoteTool, getPromotedAgents } from "./agent-promote";
@@ -0,0 +1,9 @@
1
+ export interface PdfToMarkdownArgs {
2
+ filePath: string;
3
+ maxPages?: number;
4
+ }
5
+ interface MarkdownOptions {
6
+ maxPages?: number;
7
+ }
8
+ export declare function convertPdfToMarkdown(filePath: string, options?: MarkdownOptions): Promise<string>;
9
+ export {};
@@ -0,0 +1,179 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
3
+ function isTextItem(item) {
4
+ return "str" in item;
5
+ }
6
+ function getFontSize(item) {
7
+ if (typeof item.height === "number" && item.height > 0) {
8
+ return item.height;
9
+ }
10
+ const [a, b] = item.transform;
11
+ const size = Math.hypot(a, b);
12
+ return size > 0 ? size : 0;
13
+ }
14
+ function mergeLineText(previous, next) {
15
+ if (previous.endsWith("-") && !previous.endsWith(" -")) {
16
+ return `${previous.slice(0, -1)}${next}`;
17
+ }
18
+ if (/[\s-]$/.test(previous) || /^[,.;:!?)]/.test(next)) {
19
+ return `${previous}${next}`;
20
+ }
21
+ return `${previous} ${next}`;
22
+ }
23
+ function groupTextLines(items) {
24
+ const sorted = [...items].sort((a, b) => {
25
+ const yDiff = b.transform[5] - a.transform[5];
26
+ if (Math.abs(yDiff) > 0.1)
27
+ return yDiff;
28
+ return a.transform[4] - b.transform[4];
29
+ });
30
+ const lines = [];
31
+ for (const item of sorted) {
32
+ const text = item.str.trim();
33
+ if (!text)
34
+ continue;
35
+ const y = item.transform[5];
36
+ const x = item.transform[4];
37
+ const fontSize = getFontSize(item);
38
+ const lastLine = lines[lines.length - 1];
39
+ if (lastLine) {
40
+ const tolerance = Math.max(2, Math.min(lastLine.fontSize, fontSize) * 0.5);
41
+ if (Math.abs(lastLine.y - y) <= tolerance) {
42
+ lastLine.text = mergeLineText(lastLine.text, text);
43
+ lastLine.fontSize = Math.max(lastLine.fontSize, fontSize);
44
+ continue;
45
+ }
46
+ }
47
+ lines.push({ text, y, x, fontSize });
48
+ }
49
+ return lines;
50
+ }
51
+ function getHeadingLevel(fontSize, maxFontSize, bodyFontSize) {
52
+ if (maxFontSize === 0)
53
+ return null;
54
+ if (fontSize < bodyFontSize * 1.2)
55
+ return null;
56
+ const ratio = fontSize / maxFontSize;
57
+ if (ratio >= 0.85)
58
+ return 1;
59
+ if (ratio >= 0.7)
60
+ return 2;
61
+ if (ratio >= 0.6)
62
+ return 3;
63
+ return null;
64
+ }
65
+ function getBodyFontSize(lines) {
66
+ const counts = new Map();
67
+ for (const line of lines) {
68
+ const rounded = Math.round(line.fontSize * 10) / 10;
69
+ counts.set(rounded, (counts.get(rounded) ?? 0) + 1);
70
+ }
71
+ let bestSize = 0;
72
+ let bestCount = 0;
73
+ for (const [size, count] of counts) {
74
+ if (count > bestCount || (count === bestCount && size < bestSize)) {
75
+ bestSize = size;
76
+ bestCount = count;
77
+ }
78
+ }
79
+ return bestSize || Math.max(...lines.map(line => line.fontSize));
80
+ }
81
+ function formatListItem(text) {
82
+ const trimmed = text.trim();
83
+ const match = /^([*\-]|\u2022|\d+\.)\s+(.*)$/.exec(trimmed);
84
+ if (!match)
85
+ return null;
86
+ return `- ${match[2].trim()}`;
87
+ }
88
+ function shouldMergeLines(previous, next) {
89
+ if (next.y > previous.y)
90
+ return false;
91
+ const gap = previous.y - next.y;
92
+ const fontSize = previous.fontSize || next.fontSize;
93
+ const gapThreshold = Math.max(4, fontSize * 1.6);
94
+ const fontDelta = Math.abs(previous.fontSize - next.fontSize);
95
+ if (fontDelta > fontSize * 0.4)
96
+ return false;
97
+ return gap <= gapThreshold;
98
+ }
99
+ function linesToMarkdown(lines) {
100
+ if (lines.length === 0)
101
+ return [];
102
+ const maxFontSize = Math.max(...lines.map(line => line.fontSize));
103
+ const bodyFontSize = getBodyFontSize(lines);
104
+ const output = [];
105
+ let currentParagraph = null;
106
+ let lastLine = null;
107
+ const flushParagraph = () => {
108
+ if (currentParagraph) {
109
+ output.push(currentParagraph, "");
110
+ currentParagraph = null;
111
+ }
112
+ };
113
+ for (const line of lines) {
114
+ const text = line.text.replace(/\s+/g, " ").trim();
115
+ if (!text)
116
+ continue;
117
+ const headingLevel = getHeadingLevel(line.fontSize, maxFontSize, bodyFontSize);
118
+ const listItem = formatListItem(text);
119
+ if (headingLevel) {
120
+ flushParagraph();
121
+ output.push(`${"#".repeat(headingLevel)} ${text}`, "");
122
+ lastLine = line;
123
+ continue;
124
+ }
125
+ if (listItem) {
126
+ flushParagraph();
127
+ output.push(listItem, "");
128
+ lastLine = line;
129
+ continue;
130
+ }
131
+ if (currentParagraph && lastLine && shouldMergeLines(lastLine, line)) {
132
+ currentParagraph = `${currentParagraph} ${text}`;
133
+ }
134
+ else {
135
+ flushParagraph();
136
+ currentParagraph = text;
137
+ }
138
+ lastLine = line;
139
+ }
140
+ flushParagraph();
141
+ while (output.length > 0 && output[output.length - 1].trim() === "") {
142
+ output.pop();
143
+ }
144
+ return output;
145
+ }
146
+ export async function convertPdfToMarkdown(filePath, options = {}) {
147
+ let data;
148
+ try {
149
+ const buffer = await readFile(filePath);
150
+ data = new Uint8Array(buffer);
151
+ }
152
+ catch (error) {
153
+ throw new Error(`Failed to read PDF at ${filePath}: ${String(error)}`);
154
+ }
155
+ const loadingTask = getDocument({ data });
156
+ const pdf = await loadingTask.promise;
157
+ const totalPages = pdf.numPages;
158
+ const maxPages = options.maxPages && options.maxPages > 0
159
+ ? Math.min(options.maxPages, totalPages)
160
+ : totalPages;
161
+ const markdownLines = [];
162
+ for (let pageNumber = 1; pageNumber <= maxPages; pageNumber += 1) {
163
+ const page = await pdf.getPage(pageNumber);
164
+ const textContent = await page.getTextContent({ normalizeWhitespace: true });
165
+ const items = textContent.items.filter(isTextItem);
166
+ const lines = groupTextLines(items);
167
+ const pageMarkdown = linesToMarkdown(lines);
168
+ if (pageMarkdown.length > 0) {
169
+ if (markdownLines.length > 0) {
170
+ markdownLines.push("");
171
+ }
172
+ markdownLines.push(...pageMarkdown);
173
+ }
174
+ }
175
+ while (markdownLines.length > 0 && markdownLines[markdownLines.length - 1].trim() === "") {
176
+ markdownLines.pop();
177
+ }
178
+ return markdownLines.join("\n");
179
+ }
@@ -0,0 +1,12 @@
1
+ import { type ToolContext } from "@opencode-ai/plugin";
2
+ export declare const pdfToMarkdownTool: {
3
+ description: string;
4
+ args: {
5
+ filePath: import("zod").ZodString;
6
+ maxPages: import("zod").ZodOptional<import("zod").ZodNumber>;
7
+ };
8
+ execute(args: {
9
+ filePath: string;
10
+ maxPages?: number | undefined;
11
+ }, context: ToolContext): Promise<string>;
12
+ };
@@ -0,0 +1,17 @@
1
+ import { tool } from "@opencode-ai/plugin";
2
+ import { convertPdfToMarkdown } from "./pdf-to-markdown-core";
3
+ export const pdfToMarkdownTool = tool({
4
+ description: "Convert a text-based PDF into enriched Markdown (headings, paragraphs, lists). Returns Markdown as plain text.",
5
+ args: {
6
+ filePath: tool.schema.string().describe("Absolute path to the PDF file to convert"),
7
+ maxPages: tool.schema
8
+ .number()
9
+ .int()
10
+ .positive()
11
+ .optional()
12
+ .describe("Limit the number of pages to convert"),
13
+ },
14
+ async execute(args, _context) {
15
+ return convertPdfToMarkdown(args.filePath, { maxPages: args.maxPages });
16
+ },
17
+ });
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,84 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { mkdtemp, rm, writeFile } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { convertPdfToMarkdown } from "./pdf-to-markdown-core";
6
+ function escapePdfText(text) {
7
+ return text.replace(/\\/g, "\\\\").replace(/\(/g, "\\(").replace(/\)/g, "\\)");
8
+ }
9
+ function buildPdf(lines) {
10
+ const header = "%PDF-1.4\n%\xFF\xFF\xFF\xFF\n";
11
+ const objects = [];
12
+ objects.push("1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n");
13
+ objects.push("2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n");
14
+ objects.push("3 0 obj\n" +
15
+ "<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] " +
16
+ "/Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>\n" +
17
+ "endobj\n");
18
+ objects.push("4 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\nendobj\n");
19
+ const textCommands = lines
20
+ .map(line => {
21
+ const escaped = escapePdfText(line.text);
22
+ return `/F1 ${line.fontSize} Tf 1 0 0 1 ${line.x} ${line.y} Tm (${escaped}) Tj`;
23
+ })
24
+ .join("\n");
25
+ const stream = `BT\n${textCommands}\nET`;
26
+ const streamLength = Buffer.byteLength(stream, "utf-8");
27
+ objects.push(`5 0 obj\n<< /Length ${streamLength} >>\nstream\n${stream}\nendstream\nendobj\n`);
28
+ const offsets = [0];
29
+ let currentOffset = Buffer.byteLength(header, "utf-8");
30
+ for (const obj of objects) {
31
+ offsets.push(currentOffset);
32
+ currentOffset += Buffer.byteLength(obj, "utf-8");
33
+ }
34
+ const xrefOffset = currentOffset;
35
+ const xrefLines = ["xref", `0 ${objects.length + 1}`, "0000000000 65535 f "];
36
+ for (let i = 1; i < offsets.length; i += 1) {
37
+ xrefLines.push(`${String(offsets[i]).padStart(10, "0")} 00000 n `);
38
+ }
39
+ const xref = `${xrefLines.join("\n")}\n`;
40
+ const trailer = "trailer\n" +
41
+ `<< /Size ${objects.length + 1} /Root 1 0 R >>\n` +
42
+ "startxref\n" +
43
+ `${xrefOffset}\n` +
44
+ "%%EOF\n";
45
+ const pdfContent = [header, ...objects, xref, trailer].join("");
46
+ return Buffer.from(pdfContent, "binary");
47
+ }
48
+ async function withTempPdf(lines, run) {
49
+ const dir = await mkdtemp(join(tmpdir(), "pdf-md-"));
50
+ const filePath = join(dir, "sample.pdf");
51
+ try {
52
+ await writeFile(filePath, buildPdf(lines));
53
+ await run(filePath);
54
+ }
55
+ finally {
56
+ await rm(dir, { recursive: true, force: true });
57
+ }
58
+ }
59
+ describe("pdf-to-markdown", () => {
60
+ it("renders headings based on font size", async () => {
61
+ await withTempPdf([
62
+ { text: "Title", fontSize: 24, x: 100, y: 700 },
63
+ { text: "Body text", fontSize: 12, x: 100, y: 660 },
64
+ ], async (filePath) => {
65
+ const markdown = await convertPdfToMarkdown(filePath);
66
+ expect(markdown).toContain("# Title");
67
+ });
68
+ });
69
+ it("merges nearby lines into a paragraph", async () => {
70
+ await withTempPdf([
71
+ { text: "First line", fontSize: 12, x: 100, y: 700 },
72
+ { text: "Second line", fontSize: 12, x: 100, y: 684 },
73
+ ], async (filePath) => {
74
+ const markdown = await convertPdfToMarkdown(filePath);
75
+ expect(markdown).toContain("First line Second line");
76
+ });
77
+ });
78
+ it("formats bullet markers as list items", async () => {
79
+ await withTempPdf([{ text: "- Bullet item", fontSize: 12, x: 100, y: 700 }], async (filePath) => {
80
+ const markdown = await convertPdfToMarkdown(filePath);
81
+ expect(markdown).toContain("- Bullet item");
82
+ });
83
+ });
84
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-froggy",
3
- "version": "0.7.2",
3
+ "version": "0.8.0",
4
4
  "description": "OpenCode plugin with a hook layer (tool.before.*, session.idle...), agents (code-reviewer, doc-writer), and commands (/review-pr, /commit)",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -39,12 +39,14 @@
39
39
  },
40
40
  "dependencies": {
41
41
  "js-yaml": "^4.1.0",
42
+ "pdfjs-dist": "^4.8.69",
42
43
  "viem": "^2.44.1"
43
44
  },
44
45
  "devDependencies": {
45
46
  "@opencode-ai/plugin": "latest",
46
47
  "@types/bun": "latest",
47
48
  "@types/js-yaml": "^4.0.9",
49
+ "@vitest/coverage-v8": "^4.0.17",
48
50
  "typescript": "^5.7.0",
49
51
  "vitest": "^4.0.16"
50
52
  },