@cyber-dash-tech/revela 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -37,8 +37,17 @@ Given a research brief specifying your topic and axis, you will:
|
|
|
37
37
|
Use the **\`revela-workspace-scan\`** tool in a single call to discover all document
|
|
38
38
|
files in the workspace (PDF, Word, Excel, PowerPoint, CSV, text).
|
|
39
39
|
|
|
40
|
-
Then
|
|
41
|
-
|
|
40
|
+
Then select the files relevant to your research axis.
|
|
41
|
+
|
|
42
|
+
For every selected file, call **\`revela-extract-document-materials\`** first.
|
|
43
|
+
- \`pptx\`, \`docx\`, and \`xlsx\` will produce a manifest plus extracted text and any available embedded materials
|
|
44
|
+
- unsupported file types will be skipped automatically
|
|
45
|
+
|
|
46
|
+
After that, use the \`read\` tool on:
|
|
47
|
+
- the original relevant file when you want the plain extracted text
|
|
48
|
+
- the generated manifest and extracted image/table files when visual or tabular evidence matters
|
|
49
|
+
|
|
50
|
+
For PDFs and Office formats, the Revela plugin extracts text transparently — just call \`read\` normally.
|
|
42
51
|
|
|
43
52
|
---
|
|
44
53
|
|
|
@@ -125,6 +134,7 @@ Gaps:
|
|
|
125
134
|
- **NEVER** ask the user for information you can find through search or workspace files
|
|
126
135
|
- **NEVER** use the raw \`write\` tool — always use \`revela-research-save\`
|
|
127
136
|
- **NEVER** fabricate image URLs — only record URLs you actually found
|
|
137
|
+
- **Always** call \`revela-extract-document-materials\` for every selected workspace file before deciding which extracted materials to read next
|
|
128
138
|
- **Always** include source attribution on every data point
|
|
129
139
|
- **Always** use tables for comparative data (more useful than bullets for presentations)
|
|
130
140
|
- **Preserve** raw data — the primary agent will select what to include in slides
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
import { createHash } from "crypto"
|
|
2
|
+
import { existsSync, mkdirSync, readFileSync, statSync, writeFileSync } from "fs"
|
|
3
|
+
import { basename, dirname, extname, isAbsolute, join, relative, resolve } from "path"
|
|
4
|
+
import { DOMParser } from "@xmldom/xmldom"
|
|
5
|
+
import { unzipSync } from "fflate"
|
|
6
|
+
import { extractDocx } from "../read-hooks/extractors/docx"
|
|
7
|
+
import { extractPptx } from "../read-hooks/extractors/pptx"
|
|
8
|
+
import { extractXlsx } from "../read-hooks/extractors/xlsx"
|
|
9
|
+
|
|
10
|
+
export type DocumentMaterial = {
|
|
11
|
+
path: string
|
|
12
|
+
source_ref: string
|
|
13
|
+
page_or_slide?: string
|
|
14
|
+
note?: string
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export type DocumentMaterialsResult = {
|
|
18
|
+
status: "processed" | "skipped" | "failed"
|
|
19
|
+
source: string
|
|
20
|
+
type: "pptx" | "docx" | "xlsx" | "other"
|
|
21
|
+
cache_dir?: string
|
|
22
|
+
manifest_path?: string
|
|
23
|
+
text_path?: string
|
|
24
|
+
images?: DocumentMaterial[]
|
|
25
|
+
tables?: DocumentMaterial[]
|
|
26
|
+
reason?: string
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
type SupportedType = Exclude<DocumentMaterialsResult["type"], "other">
|
|
30
|
+
|
|
31
|
+
type CachedManifest = {
|
|
32
|
+
source: string
|
|
33
|
+
type: SupportedType
|
|
34
|
+
fingerprint: string
|
|
35
|
+
cache_dir: string
|
|
36
|
+
manifest_path: string
|
|
37
|
+
text_path: string
|
|
38
|
+
images: DocumentMaterial[]
|
|
39
|
+
tables: DocumentMaterial[]
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const SUPPORTED_EXTENSIONS: Record<string, SupportedType> = {
|
|
43
|
+
".pptx": "pptx",
|
|
44
|
+
".docx": "docx",
|
|
45
|
+
".xlsx": "xlsx",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function normalizeZipTarget(basePath: string, target: string): string {
|
|
49
|
+
const segments = join(dirname(basePath), target).split("/")
|
|
50
|
+
const normalized: string[] = []
|
|
51
|
+
|
|
52
|
+
for (const segment of segments) {
|
|
53
|
+
if (!segment || segment === ".") continue
|
|
54
|
+
if (segment === "..") {
|
|
55
|
+
normalized.pop()
|
|
56
|
+
continue
|
|
57
|
+
}
|
|
58
|
+
normalized.push(segment)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return normalized.join("/")
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function ensureWorkspacePath(filePath: string, workspaceDir: string): string {
|
|
65
|
+
const resolvedWorkspace = resolve(workspaceDir)
|
|
66
|
+
const resolvedFile = isAbsolute(filePath) ? resolve(filePath) : resolve(workspaceDir, filePath)
|
|
67
|
+
|
|
68
|
+
if (resolvedFile !== resolvedWorkspace && !resolvedFile.startsWith(resolvedWorkspace + "/")) {
|
|
69
|
+
throw new Error("file must be within workspace")
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return resolvedFile
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function workspaceRelative(filePath: string, workspaceDir: string): string {
|
|
76
|
+
return relative(workspaceDir, filePath).replace(/\\/g, "/")
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function buildFingerprint(filePath: string): string {
|
|
80
|
+
const stat = statSync(filePath)
|
|
81
|
+
return createHash("sha1")
|
|
82
|
+
.update(`${resolve(filePath)}:${stat.mtimeMs}:${stat.size}`)
|
|
83
|
+
.digest("hex")
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function writeCachedBuffer(targetPath: string, buf: Uint8Array): void {
|
|
87
|
+
mkdirSync(dirname(targetPath), { recursive: true })
|
|
88
|
+
writeFileSync(targetPath, new Uint8Array(buf))
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function materialPath(cacheDir: string, workspaceDir: string, ...segments: string[]): string {
|
|
92
|
+
return workspaceRelative(join(cacheDir, ...segments), workspaceDir)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function parseXml(files: Record<string, Uint8Array>, path: string): any | null {
|
|
96
|
+
const file = files[path]
|
|
97
|
+
if (!file) return null
|
|
98
|
+
return new DOMParser().parseFromString(new TextDecoder().decode(file), "text/xml")
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function extractPptxImages(files: Record<string, Uint8Array>, cacheDir: string, workspaceDir: string): DocumentMaterial[] {
|
|
102
|
+
const relFiles = Object.keys(files)
|
|
103
|
+
.filter((file) => /^ppt\/slides\/_rels\/slide\d+\.xml\.rels$/.test(file))
|
|
104
|
+
.sort((a, b) => a.localeCompare(b, undefined, { numeric: true }))
|
|
105
|
+
|
|
106
|
+
const images: DocumentMaterial[] = []
|
|
107
|
+
const seenTargets = new Set<string>()
|
|
108
|
+
|
|
109
|
+
for (const relPath of relFiles) {
|
|
110
|
+
const slideMatch = relPath.match(/slide(\d+)\.xml\.rels$/)
|
|
111
|
+
const slideNumber = slideMatch?.[1] ?? "0"
|
|
112
|
+
const slidePath = relPath.replace("/_rels/", "/").replace(/\.rels$/, "")
|
|
113
|
+
const doc = parseXml(files, relPath)
|
|
114
|
+
if (!doc) continue
|
|
115
|
+
const relationships = doc.getElementsByTagName("Relationship")
|
|
116
|
+
let imageIndex = 0
|
|
117
|
+
|
|
118
|
+
for (let i = 0; i < relationships.length; i++) {
|
|
119
|
+
const rel = relationships[i]
|
|
120
|
+
const target = rel.getAttribute("Target")
|
|
121
|
+
if (!target) continue
|
|
122
|
+
const normalized = normalizeZipTarget(slidePath, target)
|
|
123
|
+
if (!normalized.startsWith("ppt/media/")) continue
|
|
124
|
+
const media = files[normalized]
|
|
125
|
+
if (!media) continue
|
|
126
|
+
|
|
127
|
+
imageIndex += 1
|
|
128
|
+
seenTargets.add(normalized)
|
|
129
|
+
const exportedName = `slide-${slideNumber.padStart(2, "0")}-image-${String(imageIndex).padStart(2, "0")}${extname(normalized)}`
|
|
130
|
+
const outputPath = join(cacheDir, "images", exportedName)
|
|
131
|
+
writeCachedBuffer(outputPath, media)
|
|
132
|
+
|
|
133
|
+
images.push({
|
|
134
|
+
path: materialPath(cacheDir, workspaceDir, "images", exportedName),
|
|
135
|
+
source_ref: normalized,
|
|
136
|
+
page_or_slide: `slide-${slideNumber.padStart(2, "0")}`,
|
|
137
|
+
})
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const remainingMedia = Object.keys(files)
|
|
142
|
+
.filter((file) => file.startsWith("ppt/media/") && !seenTargets.has(file))
|
|
143
|
+
.sort()
|
|
144
|
+
|
|
145
|
+
for (const mediaPath of remainingMedia) {
|
|
146
|
+
const exportedName = `unmapped-${basename(mediaPath)}`
|
|
147
|
+
const outputPath = join(cacheDir, "images", exportedName)
|
|
148
|
+
writeCachedBuffer(outputPath, files[mediaPath])
|
|
149
|
+
|
|
150
|
+
images.push({
|
|
151
|
+
path: materialPath(cacheDir, workspaceDir, "images", exportedName),
|
|
152
|
+
source_ref: mediaPath,
|
|
153
|
+
note: "No slide-level relationship found",
|
|
154
|
+
})
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return images
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function extractDocxImages(files: Record<string, Uint8Array>, cacheDir: string, workspaceDir: string): DocumentMaterial[] {
|
|
161
|
+
return Object.keys(files)
|
|
162
|
+
.filter((file) => file.startsWith("word/media/"))
|
|
163
|
+
.sort()
|
|
164
|
+
.map((mediaPath, index) => {
|
|
165
|
+
const exportedName = `document-image-${String(index + 1).padStart(2, "0")}${extname(mediaPath)}`
|
|
166
|
+
const outputPath = join(cacheDir, "images", exportedName)
|
|
167
|
+
writeCachedBuffer(outputPath, files[mediaPath])
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
path: materialPath(cacheDir, workspaceDir, "images", exportedName),
|
|
171
|
+
source_ref: mediaPath,
|
|
172
|
+
note: "Document-wide association",
|
|
173
|
+
}
|
|
174
|
+
})
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function extractXlsxImages(files: Record<string, Uint8Array>, cacheDir: string, workspaceDir: string): DocumentMaterial[] {
|
|
178
|
+
const drawingToImages = new Map<string, string[]>()
|
|
179
|
+
const drawingRelFiles = Object.keys(files)
|
|
180
|
+
.filter((file) => /^xl\/drawings\/_rels\/drawing\d+\.xml\.rels$/.test(file))
|
|
181
|
+
.sort((a, b) => a.localeCompare(b, undefined, { numeric: true }))
|
|
182
|
+
|
|
183
|
+
for (const relPath of drawingRelFiles) {
|
|
184
|
+
const relDoc = parseXml(files, relPath)
|
|
185
|
+
if (!relDoc) continue
|
|
186
|
+
const drawingPath = relPath.replace("/_rels/", "/").replace(/\.rels$/, "")
|
|
187
|
+
const drawingDoc = parseXml(files, drawingPath)
|
|
188
|
+
if (!drawingDoc) continue
|
|
189
|
+
|
|
190
|
+
const targetByRid = new Map<string, string>()
|
|
191
|
+
const relationships = relDoc.getElementsByTagName("Relationship")
|
|
192
|
+
for (let i = 0; i < relationships.length; i++) {
|
|
193
|
+
const rel = relationships[i]
|
|
194
|
+
const id = rel.getAttribute("Id")
|
|
195
|
+
const target = rel.getAttribute("Target")
|
|
196
|
+
if (!id || !target) continue
|
|
197
|
+
const normalized = normalizeZipTarget(drawingPath, target)
|
|
198
|
+
if (normalized.startsWith("xl/media/")) {
|
|
199
|
+
targetByRid.set(id, normalized)
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const blips = drawingDoc.getElementsByTagName("a:blip")
|
|
204
|
+
const mediaPaths: string[] = []
|
|
205
|
+
for (let i = 0; i < blips.length; i++) {
|
|
206
|
+
const rid = blips[i].getAttribute("r:embed") || blips[i].getAttribute("embed")
|
|
207
|
+
if (!rid) continue
|
|
208
|
+
const mediaPath = targetByRid.get(rid)
|
|
209
|
+
if (mediaPath) mediaPaths.push(mediaPath)
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
if (mediaPaths.length > 0) {
|
|
213
|
+
drawingToImages.set(drawingPath, mediaPaths)
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const images: DocumentMaterial[] = []
|
|
218
|
+
const exportedMedia = new Set<string>()
|
|
219
|
+
const sheetRelFiles = Object.keys(files)
|
|
220
|
+
.filter((file) => /^xl\/worksheets\/_rels\/sheet\d+\.xml\.rels$/.test(file))
|
|
221
|
+
.sort((a, b) => a.localeCompare(b, undefined, { numeric: true }))
|
|
222
|
+
|
|
223
|
+
for (const relPath of sheetRelFiles) {
|
|
224
|
+
const sheetMatch = relPath.match(/sheet(\d+)\.xml\.rels$/)
|
|
225
|
+
const sheetNumber = sheetMatch?.[1] ?? "0"
|
|
226
|
+
const sheetPath = relPath.replace("/_rels/", "/").replace(/\.rels$/, "")
|
|
227
|
+
const relDoc = parseXml(files, relPath)
|
|
228
|
+
if (!relDoc) continue
|
|
229
|
+
const relationships = relDoc.getElementsByTagName("Relationship")
|
|
230
|
+
let imageIndex = 0
|
|
231
|
+
|
|
232
|
+
for (let i = 0; i < relationships.length; i++) {
|
|
233
|
+
const rel = relationships[i]
|
|
234
|
+
const target = rel.getAttribute("Target")
|
|
235
|
+
if (!target) continue
|
|
236
|
+
const normalized = normalizeZipTarget(sheetPath, target)
|
|
237
|
+
const mediaPaths = drawingToImages.get(normalized)
|
|
238
|
+
if (!mediaPaths) continue
|
|
239
|
+
|
|
240
|
+
for (const mediaPath of mediaPaths) {
|
|
241
|
+
const media = files[mediaPath]
|
|
242
|
+
if (!media) continue
|
|
243
|
+
imageIndex += 1
|
|
244
|
+
exportedMedia.add(mediaPath)
|
|
245
|
+
const exportedName = `sheet-${sheetNumber.padStart(2, "0")}-image-${String(imageIndex).padStart(2, "0")}${extname(mediaPath)}`
|
|
246
|
+
const outputPath = join(cacheDir, "images", exportedName)
|
|
247
|
+
writeCachedBuffer(outputPath, media)
|
|
248
|
+
|
|
249
|
+
images.push({
|
|
250
|
+
path: materialPath(cacheDir, workspaceDir, "images", exportedName),
|
|
251
|
+
source_ref: mediaPath,
|
|
252
|
+
page_or_slide: `sheet-${sheetNumber.padStart(2, "0")}`,
|
|
253
|
+
})
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const unmapped = Object.keys(files)
|
|
259
|
+
.filter((file) => file.startsWith("xl/media/") && !exportedMedia.has(file))
|
|
260
|
+
.sort()
|
|
261
|
+
|
|
262
|
+
for (const mediaPath of unmapped) {
|
|
263
|
+
const exportedName = `unmapped-${basename(mediaPath)}`
|
|
264
|
+
const outputPath = join(cacheDir, "images", exportedName)
|
|
265
|
+
writeCachedBuffer(outputPath, files[mediaPath])
|
|
266
|
+
|
|
267
|
+
images.push({
|
|
268
|
+
path: materialPath(cacheDir, workspaceDir, "images", exportedName),
|
|
269
|
+
source_ref: mediaPath,
|
|
270
|
+
note: "No sheet-level relationship found",
|
|
271
|
+
})
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return images
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function extractTables(type: SupportedType, textPath: string): DocumentMaterial[] {
|
|
278
|
+
if (type !== "xlsx") return []
|
|
279
|
+
return [{ path: textPath, source_ref: "workbook", note: "Sheet text and tables extracted to text file" }]
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
async function processOfficeFile(filePath: string, workspaceDir: string, type: SupportedType): Promise<DocumentMaterialsResult> {
|
|
283
|
+
const relativeSource = workspaceRelative(filePath, workspaceDir)
|
|
284
|
+
const fingerprint = buildFingerprint(filePath)
|
|
285
|
+
const cacheDir = join(workspaceDir, ".opencode", "revela", "doc-materials", fingerprint)
|
|
286
|
+
const manifestPath = join(cacheDir, "manifest.json")
|
|
287
|
+
|
|
288
|
+
if (existsSync(manifestPath)) {
|
|
289
|
+
const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
|
|
290
|
+
return {
|
|
291
|
+
status: "processed",
|
|
292
|
+
source: manifest.source,
|
|
293
|
+
type: manifest.type,
|
|
294
|
+
cache_dir: manifest.cache_dir,
|
|
295
|
+
manifest_path: manifest.manifest_path,
|
|
296
|
+
text_path: manifest.text_path,
|
|
297
|
+
images: manifest.images,
|
|
298
|
+
tables: manifest.tables,
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
mkdirSync(join(cacheDir, "images"), { recursive: true })
|
|
303
|
+
mkdirSync(join(cacheDir, "tables"), { recursive: true })
|
|
304
|
+
|
|
305
|
+
const buf = readFileSync(filePath)
|
|
306
|
+
const files = unzipSync(new Uint8Array(buf))
|
|
307
|
+
|
|
308
|
+
const text = type === "pptx"
|
|
309
|
+
? await extractPptx(buf)
|
|
310
|
+
: type === "docx"
|
|
311
|
+
? await extractDocx(buf)
|
|
312
|
+
: await extractXlsx(buf)
|
|
313
|
+
|
|
314
|
+
const textPath = join(cacheDir, "text.txt")
|
|
315
|
+
writeFileSync(textPath, `[Extracted from: ${basename(filePath)}]\n\n${text}`, "utf-8")
|
|
316
|
+
|
|
317
|
+
const images = type === "pptx"
|
|
318
|
+
? extractPptxImages(files, cacheDir, workspaceDir)
|
|
319
|
+
: type === "docx"
|
|
320
|
+
? extractDocxImages(files, cacheDir, workspaceDir)
|
|
321
|
+
: extractXlsxImages(files, cacheDir, workspaceDir)
|
|
322
|
+
|
|
323
|
+
const result: DocumentMaterialsResult = {
|
|
324
|
+
status: "processed",
|
|
325
|
+
source: relativeSource,
|
|
326
|
+
type,
|
|
327
|
+
cache_dir: workspaceRelative(cacheDir, workspaceDir),
|
|
328
|
+
manifest_path: workspaceRelative(manifestPath, workspaceDir),
|
|
329
|
+
text_path: workspaceRelative(textPath, workspaceDir),
|
|
330
|
+
images,
|
|
331
|
+
tables: extractTables(type, workspaceRelative(textPath, workspaceDir)),
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const manifest: CachedManifest = {
|
|
335
|
+
source: result.source,
|
|
336
|
+
type,
|
|
337
|
+
fingerprint,
|
|
338
|
+
cache_dir: result.cache_dir!,
|
|
339
|
+
manifest_path: result.manifest_path!,
|
|
340
|
+
text_path: result.text_path!,
|
|
341
|
+
images: result.images ?? [],
|
|
342
|
+
tables: result.tables ?? [],
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
writeFileSync(manifestPath, JSON.stringify(manifest, null, 2), "utf-8")
|
|
346
|
+
return result
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
export async function extractDocumentMaterials(filePath: string, workspaceDir: string): Promise<DocumentMaterialsResult> {
|
|
350
|
+
try {
|
|
351
|
+
const resolvedFile = ensureWorkspacePath(filePath, workspaceDir)
|
|
352
|
+
const relativeSource = workspaceRelative(resolvedFile, workspaceDir)
|
|
353
|
+
const type = SUPPORTED_EXTENSIONS[extname(resolvedFile).toLowerCase()]
|
|
354
|
+
|
|
355
|
+
if (!type) {
|
|
356
|
+
return {
|
|
357
|
+
status: "skipped",
|
|
358
|
+
source: relativeSource,
|
|
359
|
+
type: "other",
|
|
360
|
+
reason: "unsupported_file_type",
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
return await processOfficeFile(resolvedFile, workspaceDir, type)
|
|
365
|
+
} catch (e) {
|
|
366
|
+
return {
|
|
367
|
+
status: "failed",
|
|
368
|
+
source: filePath,
|
|
369
|
+
type: "other",
|
|
370
|
+
reason: e instanceof Error ? e.message : String(e),
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
package/package.json
CHANGED
package/plugin.ts
CHANGED
|
@@ -50,6 +50,7 @@ import designsTool from "./tools/designs"
|
|
|
50
50
|
import domainsTool from "./tools/domains"
|
|
51
51
|
import researchSaveTool from "./tools/research-save"
|
|
52
52
|
import workspaceScanTool from "./tools/workspace-scan"
|
|
53
|
+
import extractDocumentMaterialsTool from "./tools/extract-document-materials"
|
|
53
54
|
import qaTool from "./tools/qa"
|
|
54
55
|
import { RESEARCH_PROMPT, RESEARCH_AGENT_SIGNATURE } from "./lib/agents/research-prompt"
|
|
55
56
|
import { runQA, formatReport } from "./lib/qa"
|
|
@@ -225,12 +226,13 @@ const server: Plugin = (async (pluginCtx) => {
|
|
|
225
226
|
throw new Error("__REVELA_UNKNOWN_HANDLED__")
|
|
226
227
|
},
|
|
227
228
|
|
|
228
|
-
// ── LLM tools: designs, domains, research, qa
|
|
229
|
+
// ── LLM tools: designs, domains, research, document materials, qa ─────
|
|
229
230
|
tool: {
|
|
230
231
|
"revela-designs": designsTool,
|
|
231
232
|
"revela-domains": domainsTool,
|
|
232
233
|
"revela-research-save": researchSaveTool,
|
|
233
234
|
"revela-workspace-scan": workspaceScanTool,
|
|
235
|
+
"revela-extract-document-materials": extractDocumentMaterialsTool,
|
|
234
236
|
"revela-qa": qaTool,
|
|
235
237
|
},
|
|
236
238
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { tool } from "@opencode-ai/plugin"
|
|
2
|
+
import { extractDocumentMaterials } from "../lib/document-materials/extract"
|
|
3
|
+
|
|
4
|
+
export default tool({
|
|
5
|
+
description:
|
|
6
|
+
"Extract research materials from a workspace document into a workspace-local cache. " +
|
|
7
|
+
"Supports pptx, docx, and xlsx. Produces a manifest plus extracted text, embedded images, and available slide/sheet mappings. " +
|
|
8
|
+
"Unsupported file types are skipped instead of failing.",
|
|
9
|
+
args: {
|
|
10
|
+
file: tool.schema
|
|
11
|
+
.string()
|
|
12
|
+
.describe("Document path relative to workspace root. Supports pptx, docx, and xlsx; other file types are skipped."),
|
|
13
|
+
},
|
|
14
|
+
async execute(args, context) {
|
|
15
|
+
const workspaceDir = context.directory ?? process.cwd()
|
|
16
|
+
return JSON.stringify(await extractDocumentMaterials(args.file, workspaceDir), null, 2)
|
|
17
|
+
},
|
|
18
|
+
})
|