@cyber-dash-tech/revela 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +239 -0
  3. package/README.zh-CN.md +270 -0
  4. package/designs/default/DESIGN.md +1100 -0
  5. package/designs/editorial-ribbon/DESIGN.md +1092 -0
  6. package/designs/minimal/DESIGN.md +1079 -0
  7. package/domains/consulting/INDUSTRY.md +230 -0
  8. package/domains/deeptech-investment/INDUSTRY.md +160 -0
  9. package/domains/general/INDUSTRY.md +6 -0
  10. package/index.ts +1 -0
  11. package/lib/agents/research-prompt.ts +129 -0
  12. package/lib/commands/designs.ts +59 -0
  13. package/lib/commands/disable.ts +14 -0
  14. package/lib/commands/domains.ts +59 -0
  15. package/lib/commands/enable.ts +48 -0
  16. package/lib/commands/help.ts +35 -0
  17. package/lib/config.ts +65 -0
  18. package/lib/ctx.ts +27 -0
  19. package/lib/design/designs.ts +389 -0
  20. package/lib/domain/domains.ts +258 -0
  21. package/lib/frontmatter.ts +63 -0
  22. package/lib/log.ts +35 -0
  23. package/lib/prompt-builder.ts +194 -0
  24. package/lib/qa/checks.ts +594 -0
  25. package/lib/qa/index.ts +38 -0
  26. package/lib/qa/measure.ts +287 -0
  27. package/lib/read-hooks/extractors/docx.ts +16 -0
  28. package/lib/read-hooks/extractors/pdf.ts +19 -0
  29. package/lib/read-hooks/extractors/pptx.ts +53 -0
  30. package/lib/read-hooks/extractors/xlsx.ts +81 -0
  31. package/lib/read-hooks/image/compress.ts +36 -0
  32. package/lib/read-hooks/index.ts +12 -0
  33. package/lib/read-hooks/post-read.ts +74 -0
  34. package/lib/read-hooks/pre-read.ts +51 -0
  35. package/package.json +65 -0
  36. package/plugin.ts +365 -0
  37. package/skill/SKILL.md +676 -0
  38. package/tools/designs.ts +126 -0
  39. package/tools/domains.ts +73 -0
  40. package/tools/qa.ts +61 -0
  41. package/tools/research-save.ts +96 -0
  42. package/tools/workspace-scan.ts +154 -0
@@ -0,0 +1,287 @@
1
+ /**
2
+ * lib/qa/measure.ts
3
+ *
4
+ * Puppeteer-based slide layout measurement.
5
+ * Opens the HTML file with a headless Chrome, navigates to each slide,
6
+ * and records the bounding boxes of all visible elements inside the
7
+ * slide canvas (1920×1080).
8
+ *
9
+ * Returns raw per-slide geometry data consumed by checks.ts.
10
+ */
11
+
12
+ import puppeteer from "puppeteer-core"
13
+ import { pathToFileURL } from "url"
14
+
15
+ // ── Constants ────────────────────────────────────────────────────────────────
16
+
17
+ /** The canonical slide canvas size (matches the design system). */
18
+ export const CANVAS_W = 1920
19
+ export const CANVAS_H = 1080
20
+
21
+ /** Path to system Chrome on macOS. Falls back to common Linux paths. */
22
+ const CHROME_PATHS = [
23
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
24
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
25
+ "/usr/bin/google-chrome-stable",
26
+ "/usr/bin/google-chrome",
27
+ "/usr/bin/chromium-browser",
28
+ "/usr/bin/chromium",
29
+ ]
30
+
31
+ // ── Types ────────────────────────────────────────────────────────────────────
32
+
33
+ export interface Rect {
34
+ left: number
35
+ top: number
36
+ right: number
37
+ bottom: number
38
+ width: number
39
+ height: number
40
+ }
41
+
42
+ export interface ElementInfo {
43
+ /** CSS selector path (tag + nth-child chain), for human-readable reports */
44
+ selector: string
45
+ rect: Rect
46
+ /** true if element is considered "visible" (non-zero size, not hidden) */
47
+ visible: boolean
48
+ /** direct children that are also visible */
49
+ children: ElementInfo[]
50
+ }
51
+
52
+ export interface SlideMetrics {
53
+ /** 0-based slide index */
54
+ index: number
55
+ /** slide title extracted from the first h1/h2 inside the slide */
56
+ title: string
57
+ /**
58
+ * Structural role from the slide's `data-slide-type` attribute.
59
+ * Valid values: "cover", "toc", "content", "closing", "divider", "summary".
60
+ * Undefined when the attribute is absent (old/third-party HTML).
61
+ */
62
+ slideType?: string
63
+ /** bounding box of the slide-canvas element itself (post-scale) */
64
+ canvasRect: Rect
65
+ /** top-level visible children of .slide-canvas */
66
+ elements: ElementInfo[]
67
+ /** union bounding box of all visible leaf elements */
68
+ contentRect: Rect
69
+ }
70
+
71
+ // ── Helpers ──────────────────────────────────────────────────────────────────
72
+
73
+ function findChromePath(): string {
74
+ const { existsSync } = require("fs") as typeof import("fs")
75
+ for (const p of CHROME_PATHS) {
76
+ if (existsSync(p)) return p
77
+ }
78
+ throw new Error(
79
+ "Could not find a Chrome/Chromium installation. " +
80
+ "Tried: " + CHROME_PATHS.join(", ")
81
+ )
82
+ }
83
+
84
+ // ── Main export ──────────────────────────────────────────────────────────────
85
+
86
+ /**
87
+ * Open `htmlFilePath` in a headless Chrome at 1920×1080, measure each slide,
88
+ * and return an array of SlideMetrics (one per .slide element).
89
+ */
90
+ export async function measureSlides(htmlFilePath: string): Promise<SlideMetrics[]> {
91
+ const executablePath = findChromePath()
92
+ const fileUrl = pathToFileURL(htmlFilePath).href
93
+
94
+ const browser = await puppeteer.launch({
95
+ executablePath,
96
+ headless: true,
97
+ args: [
98
+ "--no-sandbox",
99
+ "--disable-setuid-sandbox",
100
+ "--disable-dev-shm-usage",
101
+ "--window-size=1920,1080",
102
+ ],
103
+ })
104
+
105
+ try {
106
+ const page = await browser.newPage()
107
+
108
+ // Set viewport to exact canvas size so scale === 1 (no CSS transform needed).
109
+ await page.setViewport({ width: CANVAS_W, height: CANVAS_H })
110
+ await page.goto(fileUrl, { waitUntil: "networkidle0", timeout: 30000 })
111
+
112
+ // Wait for any entrance animations / intersection observers to fire.
113
+ await new Promise((r) => setTimeout(r, 600))
114
+
115
+ // Measure slides one-by-one: scroll each into view, wait for animations,
116
+ // then collect geometry relative to the canvas coordinate system.
117
+ const slideCount: number = await page.evaluate(
118
+ () => document.querySelectorAll(".slide").length
119
+ )
120
+
121
+ const metrics: SlideMetrics[] = []
122
+
123
+ for (let idx = 0; idx < slideCount; idx++) {
124
+ // Scroll the slide into view and wait for intersection observers / animations
125
+ await page.evaluate((i: number) => {
126
+ const slides = document.querySelectorAll(".slide")
127
+ const slide = slides[i] as HTMLElement
128
+ if (slide) {
129
+ slide.scrollIntoView({ behavior: "instant" })
130
+ // Force all .reveal elements visible (in case IO didn't fire)
131
+ slide.querySelectorAll(".reveal").forEach((el) => el.classList.add("visible"))
132
+ }
133
+ }, idx)
134
+
135
+ // Wait for CSS transitions + any JS rendering (ECharts, bar animations, etc.)
136
+ await new Promise((r) => setTimeout(r, 800))
137
+
138
+ const slideData = await page.evaluate(
139
+ (slideIdx: number) => {
140
+ // ── In-browser helpers ───────────────────────────────────────────
141
+
142
+ function isVisible(el: Element): boolean {
143
+ const r = el.getBoundingClientRect()
144
+ if (r.width === 0 || r.height === 0) return false
145
+ const style = window.getComputedStyle(el)
146
+ if (style.visibility === "hidden") return false
147
+ if (style.display === "none") return false
148
+ if (parseFloat(style.opacity) < 0.01) return false
149
+ return true
150
+ }
151
+
152
+ function toRectRelative(r: DOMRect, offsetTop: number, offsetLeft: number) {
153
+ return {
154
+ left: r.left - offsetLeft,
155
+ top: r.top - offsetTop,
156
+ right: r.right - offsetLeft,
157
+ bottom: r.bottom - offsetTop,
158
+ width: r.width,
159
+ height: r.height,
160
+ }
161
+ }
162
+
163
+ function selectorOf(el: Element): string {
164
+ const parts: string[] = []
165
+ let cur: Element | null = el
166
+ while (cur && cur !== document.body) {
167
+ const tag = cur.tagName.toLowerCase()
168
+ const cls = Array.from(cur.classList)
169
+ .slice(0, 2)
170
+ .map((c) => "." + c)
171
+ .join("")
172
+ parts.unshift(tag + cls)
173
+ cur = cur.parentElement
174
+ }
175
+ return parts.slice(-3).join(" > ")
176
+ }
177
+
178
+ type EI = {
179
+ selector: string
180
+ rect: ReturnType<typeof toRectRelative>
181
+ visible: boolean
182
+ children: EI[]
183
+ }
184
+
185
+ function collectChildren(
186
+ el: Element,
187
+ offsetTop: number,
188
+ offsetLeft: number,
189
+ depth = 0
190
+ ): EI[] {
191
+ if (depth > 4) return []
192
+ const result: EI[] = []
193
+ for (const child of Array.from(el.children)) {
194
+ if (!isVisible(child)) continue
195
+ const rawR = child.getBoundingClientRect()
196
+ const cls = child.className || ""
197
+ if (
198
+ typeof cls === "string" &&
199
+ (cls.includes("aurora") ||
200
+ cls.includes("stars") ||
201
+ cls.includes("progress") ||
202
+ cls.includes("nav-dot") ||
203
+ cls.includes("deco-blob"))
204
+ ) continue
205
+ const relR = toRectRelative(rawR, offsetTop, offsetLeft)
206
+ result.push({
207
+ selector: selectorOf(child),
208
+ rect: relR,
209
+ visible: true,
210
+ children: collectChildren(child, offsetTop, offsetLeft, depth + 1),
211
+ })
212
+ }
213
+ return result
214
+ }
215
+
216
+ function unionRect(els: EI[]): ReturnType<typeof toRectRelative> {
217
+ let left = Infinity, top = Infinity, right = -Infinity, bottom = -Infinity
218
+ function walk(list: EI[]) {
219
+ for (const e of list) {
220
+ if (!e.visible) continue
221
+ if (e.children.length > 0) {
222
+ walk(e.children)
223
+ } else {
224
+ left = Math.min(left, e.rect.left)
225
+ top = Math.min(top, e.rect.top)
226
+ right = Math.max(right, e.rect.right)
227
+ bottom = Math.max(bottom, e.rect.bottom)
228
+ }
229
+ }
230
+ }
231
+ walk(els)
232
+ if (left === Infinity) return { left: 0, top: 0, right: 0, bottom: 0, width: 0, height: 0 }
233
+ return { left, top, right, bottom, width: right - left, height: bottom - top }
234
+ }
235
+
236
+ // ── Per-slide measurement ────────────────────────────────────────
237
+
238
+ const slide = document.querySelectorAll(".slide")[slideIdx]
239
+ if (!slide) return null
240
+
241
+ // Read the semantic slide type if the author provided it
242
+ const slideType = (slide as HTMLElement).dataset.slideType || slide.getAttribute("data-slide-type") || undefined
243
+
244
+ const canvas = slide.querySelector(".slide-canvas") as HTMLElement | null
245
+ if (!canvas) return null
246
+
247
+ const canvasRaw = canvas.getBoundingClientRect()
248
+ // Use canvas top-left as the coordinate origin
249
+ const offsetTop = canvasRaw.top
250
+ const offsetLeft = canvasRaw.left
251
+
252
+ const canvasRect = {
253
+ left: 0,
254
+ top: 0,
255
+ right: canvasRaw.width,
256
+ bottom: canvasRaw.height,
257
+ width: canvasRaw.width,
258
+ height: canvasRaw.height,
259
+ }
260
+
261
+ const elements = collectChildren(canvas, offsetTop, offsetLeft)
262
+
263
+ const titleEl = canvas.querySelector("h1, h2")
264
+ const title = titleEl
265
+ ? (titleEl.textContent || "").replace(/\s+/g, " ").trim().slice(0, 80)
266
+ : `Slide ${slideIdx + 1}`
267
+
268
+ return {
269
+ index: slideIdx,
270
+ title,
271
+ slideType,
272
+ canvasRect,
273
+ elements,
274
+ contentRect: unionRect(elements),
275
+ }
276
+ },
277
+ idx
278
+ )
279
+
280
+ if (slideData) metrics.push(slideData as SlideMetrics)
281
+ }
282
+
283
+ return metrics
284
+ } finally {
285
+ await browser.close()
286
+ }
287
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * lib/read-hooks/extractors/docx.ts
3
+ *
4
+ * DOCX text extraction using mammoth.js (pure JS, 6k+ stars).
5
+ * Extracts raw text without formatting — suitable for LLM context.
6
+ */
7
+
8
+ import mammoth from "mammoth"
9
+
10
+ /**
11
+ * Extract plain text from a DOCX buffer.
12
+ */
13
+ export async function extractDocx(buf: Buffer): Promise<string> {
14
+ const result = await mammoth.extractRawText({ buffer: buf })
15
+ return result.value
16
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * lib/read-hooks/extractors/pdf.ts
3
+ *
4
+ * PDF text extraction using unpdf (zero-dependency, pure JS, serverless PDF.js).
5
+ * Only extracts text — image extraction from PDFs requires native deps (@napi-rs/canvas)
6
+ * and is intentionally excluded.
7
+ */
8
+
9
+ import { getDocumentProxy, extractText } from "unpdf"
10
+
11
+ /**
12
+ * Extract all text from a PDF buffer.
13
+ * Pages are merged into a single string with double newlines between them.
14
+ */
15
+ export async function extractPdfText(buf: Buffer): Promise<string> {
16
+ const pdf = await getDocumentProxy(new Uint8Array(buf))
17
+ const { text } = await extractText(pdf, { mergePages: true })
18
+ return text
19
+ }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * lib/read-hooks/extractors/pptx.ts
3
+ *
4
+ * PPTX text extraction using fflate (ZIP decompression) + @xmldom/xmldom (XML parsing).
5
+ * Pure JS, zero native dependencies.
6
+ *
7
+ * PPTX is a ZIP archive containing slide XML files at ppt/slides/slideN.xml.
8
+ * Text content is stored in <a:t> elements under the DrawingML namespace.
9
+ */
10
+
11
+ import { unzipSync } from "fflate"
12
+ import { DOMParser } from "@xmldom/xmldom"
13
+
14
+ const DRAWINGML_NS = "http://schemas.openxmlformats.org/drawingml/2006/main"
15
+
16
+ /**
17
+ * Extract text from all slides in a PPTX buffer.
18
+ * Returns slides in order, each prefixed with "--- Slide N ---".
19
+ */
20
+ export async function extractPptx(buf: Buffer): Promise<string> {
21
+ const files = unzipSync(new Uint8Array(buf))
22
+ const parser = new DOMParser()
23
+ const slides: string[] = []
24
+
25
+ // Collect and sort slide files by slide number
26
+ const slideFiles = Object.keys(files)
27
+ .filter((f) => /^ppt\/slides\/slide\d+\.xml$/.test(f))
28
+ .sort((a, b) => {
29
+ const na = parseInt(a.match(/\d+/)![0], 10)
30
+ const nb = parseInt(b.match(/\d+/)![0], 10)
31
+ return na - nb
32
+ })
33
+
34
+ for (const path of slideFiles) {
35
+ const xml = new TextDecoder().decode(files[path])
36
+ const doc = parser.parseFromString(xml, "text/xml")
37
+
38
+ // Extract all <a:t> text nodes
39
+ const textNodes = doc.getElementsByTagNameNS(DRAWINGML_NS, "t")
40
+ const texts: string[] = []
41
+ for (let i = 0; i < textNodes.length; i++) {
42
+ const t = textNodes[i].textContent?.trim()
43
+ if (t) texts.push(t)
44
+ }
45
+
46
+ if (texts.length) {
47
+ const slideNum = path.match(/\d+/)![0]
48
+ slides.push(`--- Slide ${slideNum} ---\n${texts.join("\n")}`)
49
+ }
50
+ }
51
+
52
+ return slides.join("\n\n")
53
+ }
@@ -0,0 +1,81 @@
1
+ /**
2
+ * lib/read-hooks/extractors/xlsx.ts
3
+ *
4
+ * XLSX text extraction using fflate (ZIP decompression) + @xmldom/xmldom (XML parsing).
5
+ * Pure JS, zero native dependencies.
6
+ *
7
+ * XLSX is a ZIP archive. Text values are stored in xl/sharedStrings.xml;
8
+ * cell references index into that shared table. Sheet data lives in
9
+ * xl/worksheets/sheetN.xml.
10
+ */
11
+
12
+ import { unzipSync } from "fflate"
13
+ import { DOMParser } from "@xmldom/xmldom"
14
+
15
+ /**
16
+ * Extract tabular text from all sheets in an XLSX buffer.
17
+ * Returns sheets in order, each prefixed with "--- Sheet N ---".
18
+ * Cells are tab-separated, rows are newline-separated.
19
+ */
20
+ export async function extractXlsx(buf: Buffer): Promise<string> {
21
+ const files = unzipSync(new Uint8Array(buf))
22
+ const parser = new DOMParser()
23
+
24
+ // 1. Parse sharedStrings.xml — all string values are stored here by index
25
+ const sharedStrings: string[] = []
26
+ const ssFile = files["xl/sharedStrings.xml"]
27
+ if (ssFile) {
28
+ const doc = parser.parseFromString(new TextDecoder().decode(ssFile), "text/xml")
29
+ const siNodes = doc.getElementsByTagName("si")
30
+ for (let i = 0; i < siNodes.length; i++) {
31
+ const tNodes = siNodes[i].getElementsByTagName("t")
32
+ const parts: string[] = []
33
+ for (let j = 0; j < tNodes.length; j++) {
34
+ parts.push(tNodes[j].textContent ?? "")
35
+ }
36
+ sharedStrings.push(parts.join(""))
37
+ }
38
+ }
39
+
40
+ // 2. Parse each worksheet
41
+ const sheets: string[] = []
42
+ const sheetFiles = Object.keys(files)
43
+ .filter((f) => /^xl\/worksheets\/sheet\d+\.xml$/.test(f))
44
+ .sort((a, b) => {
45
+ const na = parseInt(a.match(/\d+/)![0], 10)
46
+ const nb = parseInt(b.match(/\d+/)![0], 10)
47
+ return na - nb
48
+ })
49
+
50
+ for (const path of sheetFiles) {
51
+ const xml = new TextDecoder().decode(files[path])
52
+ const doc = parser.parseFromString(xml, "text/xml")
53
+ const rows = doc.getElementsByTagName("row")
54
+ const rowTexts: string[] = []
55
+
56
+ for (let r = 0; r < rows.length; r++) {
57
+ const cells = rows[r].getElementsByTagName("c")
58
+ const cellValues: string[] = []
59
+
60
+ for (let c = 0; c < cells.length; c++) {
61
+ const cell = cells[c]
62
+ const type = cell.getAttribute("t")
63
+ const vNode = cell.getElementsByTagName("v")[0]
64
+ const v = vNode?.textContent ?? ""
65
+ // type="s" → shared string index; otherwise use raw value
66
+ cellValues.push(type === "s" ? (sharedStrings[parseInt(v, 10)] ?? v) : v)
67
+ }
68
+
69
+ if (cellValues.some(Boolean)) {
70
+ rowTexts.push(cellValues.join("\t"))
71
+ }
72
+ }
73
+
74
+ if (rowTexts.length) {
75
+ const sheetNum = path.match(/\d+/)![0]
76
+ sheets.push(`--- Sheet ${sheetNum} ---\n${rowTexts.join("\n")}`)
77
+ }
78
+ }
79
+
80
+ return sheets.join("\n\n")
81
+ }
@@ -0,0 +1,36 @@
1
+ /**
2
+ * lib/read-hooks/image/compress.ts
3
+ *
4
+ * Image compression using jimp (pure JS, zero native dependencies, 14k+ stars).
5
+ * Goal: reduce base64 attachment size to save LLM context tokens.
6
+ *
7
+ * Strategy:
8
+ * - Resize to max 1024px on longest side (preserving aspect ratio)
9
+ * - Convert to JPEG at 60% quality
10
+ * - This typically achieves 60-80% size reduction
11
+ */
12
+
13
+ import { Jimp } from "jimp"
14
+
15
+ const MAX_DIMENSION = 1024
16
+ const JPEG_QUALITY = 60
17
+
18
+ /**
19
+ * Compress an image buffer.
20
+ * Returns a JPEG buffer regardless of input format.
21
+ */
22
+ export async function compressImage(buf: Buffer): Promise<Buffer> {
23
+ const image = await Jimp.read(buf)
24
+ const { width, height } = image.bitmap
25
+
26
+ // Proportional resize if either dimension exceeds MAX_DIMENSION
27
+ if (width > MAX_DIMENSION || height > MAX_DIMENSION) {
28
+ if (width >= height) {
29
+ image.resize({ w: MAX_DIMENSION })
30
+ } else {
31
+ image.resize({ h: MAX_DIMENSION })
32
+ }
33
+ }
34
+
35
+ return await image.getBuffer("image/jpeg", { quality: JPEG_QUALITY })
36
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * lib/read-hooks/index.ts
3
+ *
4
+ * Entry point for the read-hooks module.
5
+ * Exports preRead and postRead for use in plugins/revela.ts hook handlers.
6
+ *
7
+ * preRead → tool.execute.before: redirect binary files (DOCX/PPTX/XLSX) to temp txt
8
+ * postRead → tool.execute.after: transform PDF/image attachments before LLM sees them
9
+ */
10
+
11
+ export { preRead } from "./pre-read"
12
+ export { postRead } from "./post-read"
@@ -0,0 +1,74 @@
1
+ /**
2
+ * lib/read-hooks/post-read.ts
3
+ *
4
+ * After-hook handler for the OpenCode `read` tool.
5
+ * Called from `tool.execute.after` in plugin.ts.
6
+ *
7
+ * Handles PDF and images — formats where read tool succeeds and returns
8
+ * a base64 attachment. The after-hook fires after execution but before
9
+ * the result reaches the LLM, so we can replace the output here.
10
+ *
11
+ * PDF strategy: extract text from base64 → replace output string → remove attachment
12
+ * Image strategy: decompress base64 → jimp compress → re-encode → replace attachment
13
+ *
14
+ * Note: `output.attachments` is present at runtime despite not being in the
15
+ * TypeScript type definition for tool.execute.after. Confirmed via source inspection
16
+ * of packages/opencode/src/session/prompt.ts.
17
+ */
18
+
19
+ import { extname, basename } from "path"
20
+ import { extractPdfText } from "./extractors/pdf"
21
+ import { compressImage } from "./image/compress"
22
+
23
+ const IMAGE_EXTS = new Set([".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp", ".gif"])
24
+
25
+ interface ReadOutput {
26
+ title: string
27
+ output: string
28
+ metadata: any
29
+ attachments?: Array<{ url: string; mime: string; [k: string]: any }>
30
+ }
31
+
32
+ /**
33
+ * Post-process read tool output for PDF and image files.
34
+ *
35
+ * @param args - Read tool args (input.args in after-hook)
36
+ * @param output - Mutable read tool output (output in after-hook)
37
+ */
38
+ export async function postRead(
39
+ args: { filePath: string; [k: string]: any },
40
+ output: ReadOutput,
41
+ ): Promise<void> {
42
+ if (!output.attachments?.length) return
43
+
44
+ const ext = extname(args.filePath).toLowerCase()
45
+
46
+ // ── PDF: extract text, drop base64 attachment ───────────────────────────
47
+ if (ext === ".pdf") {
48
+ const attachment = output.attachments[0]
49
+ const base64 = attachment.url.split(",")[1]
50
+ if (!base64) return
51
+
52
+ const buf = Buffer.from(base64, "base64")
53
+ const text = await extractPdfText(buf)
54
+
55
+ output.output = `[Extracted from: ${basename(args.filePath)}]\n\n${text}`
56
+ output.title = `Extracted text from ${basename(args.filePath)}`
57
+ output.attachments.length = 0 // Remove base64 — saves significant tokens
58
+ return
59
+ }
60
+
61
+ // ── Images: compress attachment to reduce token cost ────────────────────
62
+ if (IMAGE_EXTS.has(ext)) {
63
+ const attachment = output.attachments[0]
64
+ const base64 = attachment.url.split(",")[1]
65
+ if (!base64) return
66
+
67
+ const buf = Buffer.from(base64, "base64")
68
+ const compressed = await compressImage(buf)
69
+
70
+ // Replace with compressed JPEG
71
+ attachment.url = `data:image/jpeg;base64,${compressed.toString("base64")}`
72
+ attachment.mime = "image/jpeg"
73
+ }
74
+ }
@@ -0,0 +1,51 @@
1
+ /**
2
+ * lib/read-hooks/pre-read.ts
3
+ *
4
+ * Before-hook handler for the OpenCode `read` tool.
5
+ * Called from `tool.execute.before` in plugin.ts.
6
+ *
7
+ * Handles DOCX, PPTX, XLSX — formats that cause read tool to throw
8
+ * Effect.fail("Cannot read binary file"), so the after-hook never fires.
9
+ *
10
+ * Strategy: extract text → write temp .txt file → redirect args.filePath.
11
+ * The read tool then reads the temp file normally. LLM is unaware of the redirect.
12
+ */
13
+
14
+ import { readFileSync, writeFileSync } from "fs"
15
+ import { extname, basename, join } from "path"
16
+ import { tmpdir } from "os"
17
+ import { randomUUID } from "crypto"
18
+ import { extractDocx } from "./extractors/docx"
19
+ import { extractPptx } from "./extractors/pptx"
20
+ import { extractXlsx } from "./extractors/xlsx"
21
+
22
+ // Extension → extractor function mapping
23
+ const HANDLERS: Record<string, (buf: Buffer) => Promise<string>> = {
24
+ ".docx": extractDocx,
25
+ ".pptx": extractPptx,
26
+ ".xlsx": extractXlsx,
27
+ }
28
+
29
+ /**
30
+ * Intercept read tool args before execution.
31
+ * If the file is a supported binary format, extract its text and redirect
32
+ * args.filePath to a temp .txt file containing the extracted content.
33
+ *
34
+ * @param args - Mutable read tool args object (from output.args in before-hook)
35
+ */
36
+ export async function preRead(args: { filePath: string; [k: string]: any }): Promise<void> {
37
+ const ext = extname(args.filePath).toLowerCase()
38
+ const handler = HANDLERS[ext]
39
+ if (!handler) return // Not a handled format — let read tool proceed normally
40
+
41
+ const buf = readFileSync(args.filePath)
42
+ const text = await handler(buf)
43
+
44
+ // Write extracted text to a temp file, prefixed with source info
45
+ const header = `[Extracted from: ${basename(args.filePath)}]\n\n`
46
+ const tmpPath = join(tmpdir(), `revela-${randomUUID()}.txt`)
47
+ writeFileSync(tmpPath, header + text, "utf-8")
48
+
49
+ // Redirect read tool to the temp file
50
+ args.filePath = tmpPath
51
+ }