@cyber-dash-tech/revela 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
**English** | [中文](README.zh-CN.md)
|
|
4
4
|
|
|
5
|
-
[](https://www.npmjs.com/package/@cyber-dash-tech/revela) [](LICENSE) [](https://www.npmjs.com/package/@cyber-dash-tech/revela) [](LICENSE) [](tests/) [](https://opencode.ai) [](https://bun.sh)
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
8
|
<img src="assets/img/logo.png" alt="Revela" width="800" />
|
|
@@ -19,7 +19,7 @@ Enable it for the current session, assign a presentation task, and the agent can
|
|
|
19
19
|
|
|
20
20
|
- injects a presentation-specific system prompt into your current agent with `/revela enable`
|
|
21
21
|
- builds that prompt from 3 layers: core skill, active domain, active design
|
|
22
|
-
- supports workspace document discovery
|
|
22
|
+
- supports workspace document discovery, transparent text extraction for `.pdf`, `.docx`, `.pptx`, and `.xlsx`, and cached embedded-material extraction for those formats
|
|
23
23
|
- runs automatic layout QA whenever the agent writes `decks/*.html`
|
|
24
24
|
- exports finished decks to PDF and editable PPTX
|
|
25
25
|
- switches designs and domains locally with zero LLM cost
|
package/README.zh-CN.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[English](README.md) | **中文**
|
|
4
4
|
|
|
5
|
-
[](https://www.npmjs.com/package/@cyber-dash-tech/revela) [](LICENSE) [](https://www.npmjs.com/package/@cyber-dash-tech/revela) [](LICENSE) [](tests/) [](https://opencode.ai) [](https://bun.sh)
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
8
|
<img src="assets/img/logo.png" alt="Revela" width="800" />
|
|
@@ -19,7 +19,7 @@ Revela 是一个 [OpenCode](https://opencode.ai) 插件,可以把你当前使
|
|
|
19
19
|
|
|
20
20
|
- 通过 `/revela enable` 向当前 agent 注入演示文稿专用 system prompt
|
|
21
21
|
- prompt 由 3 层组成:核心 skill、当前 domain、当前 design
|
|
22
|
-
- 支持工作区文档扫描,以及 `.pdf`、`.docx`、`.pptx`、`.xlsx`
|
|
22
|
+
- 支持工作区文档扫描,以及 `.pdf`、`.docx`、`.pptx`、`.xlsx` 的透明文本提取和嵌入素材缓存提取
|
|
23
23
|
- agent 每次写入 `decks/*.html` 时自动执行布局 QA
|
|
24
24
|
- 支持导出成 PDF 和可编辑 PPTX
|
|
25
25
|
- design 和 domain 的切换都在本地完成,不消耗 LLM token
|
|
@@ -40,7 +40,7 @@ files in the workspace (PDF, Word, Excel, PowerPoint, CSV, text).
|
|
|
40
40
|
Then select the files relevant to your research axis.
|
|
41
41
|
|
|
42
42
|
For every selected file, call **\`revela-extract-document-materials\`** first.
|
|
43
|
-
- \`pptx\`, \`docx\`, and \`xlsx\` will produce a manifest plus extracted text and any available embedded materials
|
|
43
|
+
- \`pdf\`, \`pptx\`, \`docx\`, and \`xlsx\` will produce a manifest plus extracted text and any available embedded materials
|
|
44
44
|
- unsupported file types will be skipped automatically
|
|
45
45
|
|
|
46
46
|
After that, use the \`read\` tool on:
|
|
@@ -3,7 +3,10 @@ import { existsSync, mkdirSync, readFileSync, realpathSync, statSync, writeFileS
|
|
|
3
3
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve } from "path"
|
|
4
4
|
import { DOMParser } from "@xmldom/xmldom"
|
|
5
5
|
import { unzipSync } from "fflate"
|
|
6
|
+
import { Jimp } from "jimp"
|
|
7
|
+
import { extractImages, getDocumentProxy } from "unpdf"
|
|
6
8
|
import { extractDocx } from "../read-hooks/extractors/docx"
|
|
9
|
+
import { extractPdfText } from "../read-hooks/extractors/pdf"
|
|
7
10
|
import { extractPptx } from "../read-hooks/extractors/pptx"
|
|
8
11
|
import { extractXlsx } from "../read-hooks/extractors/xlsx"
|
|
9
12
|
|
|
@@ -48,7 +51,7 @@ export type PptxSlide = {
|
|
|
48
51
|
export type DocumentMaterialsResult = {
|
|
49
52
|
status: "processed" | "skipped" | "failed"
|
|
50
53
|
source: string
|
|
51
|
-
type: "pptx" | "docx" | "xlsx" | "other"
|
|
54
|
+
type: "pptx" | "docx" | "xlsx" | "pdf" | "other"
|
|
52
55
|
cache_dir?: string
|
|
53
56
|
manifest_path?: string
|
|
54
57
|
text_path?: string
|
|
@@ -83,8 +86,11 @@ const SUPPORTED_EXTENSIONS: Record<string, SupportedType> = {
|
|
|
83
86
|
".pptx": "pptx",
|
|
84
87
|
".docx": "docx",
|
|
85
88
|
".xlsx": "xlsx",
|
|
89
|
+
".pdf": "pdf",
|
|
86
90
|
}
|
|
87
91
|
|
|
92
|
+
type PdfImageData = Awaited<ReturnType<typeof extractImages>>[number]
|
|
93
|
+
|
|
88
94
|
function normalizeZipTarget(basePath: string, target: string): string {
|
|
89
95
|
const segments = join(dirname(basePath), target).split("/")
|
|
90
96
|
const normalized: string[] = []
|
|
@@ -151,6 +157,47 @@ function materialPath(cacheDir: string, workspaceDir: string, ...segments: strin
|
|
|
151
157
|
return workspaceRelative(join(cacheDir, ...segments), workspaceDir)
|
|
152
158
|
}
|
|
153
159
|
|
|
160
|
+
function toRgbaBuffer(image: PdfImageData): Buffer {
|
|
161
|
+
const pixelCount = image.width * image.height
|
|
162
|
+
|
|
163
|
+
if (image.channels === 4) {
|
|
164
|
+
return Buffer.from(image.data.buffer, image.data.byteOffset, image.data.byteLength)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const rgba = Buffer.alloc(pixelCount * 4)
|
|
168
|
+
|
|
169
|
+
for (let i = 0; i < pixelCount; i++) {
|
|
170
|
+
const dest = i * 4
|
|
171
|
+
if (image.channels === 3) {
|
|
172
|
+
const src = i * 3
|
|
173
|
+
rgba[dest] = image.data[src]!
|
|
174
|
+
rgba[dest + 1] = image.data[src + 1]!
|
|
175
|
+
rgba[dest + 2] = image.data[src + 2]!
|
|
176
|
+
rgba[dest + 3] = 255
|
|
177
|
+
continue
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const value = image.data[i]!
|
|
181
|
+
rgba[dest] = value
|
|
182
|
+
rgba[dest + 1] = value
|
|
183
|
+
rgba[dest + 2] = value
|
|
184
|
+
rgba[dest + 3] = 255
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return rgba
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
async function encodePdfImageAsPng(image: PdfImageData): Promise<Buffer> {
|
|
191
|
+
const bitmap = {
|
|
192
|
+
data: toRgbaBuffer(image),
|
|
193
|
+
width: image.width,
|
|
194
|
+
height: image.height,
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const png = Jimp.fromBitmap(bitmap)
|
|
198
|
+
return await png.getBuffer("image/png")
|
|
199
|
+
}
|
|
200
|
+
|
|
154
201
|
function parseXml(files: Record<string, Uint8Array>, path: string): any | null {
|
|
155
202
|
const file = files[path]
|
|
156
203
|
if (!file) return null
|
|
@@ -589,6 +636,94 @@ function extractTables(type: SupportedType, textPath: string): DocumentMaterial[
|
|
|
589
636
|
return [{ path: textPath, source_ref: "workbook", note: "Sheet text and tables extracted to text file" }]
|
|
590
637
|
}
|
|
591
638
|
|
|
639
|
+
async function extractPdfImages(buf: Buffer, cacheDir: string, workspaceDir: string): Promise<DocumentMaterial[]> {
|
|
640
|
+
const pdf = await getDocumentProxy(new Uint8Array(buf))
|
|
641
|
+
const images: DocumentMaterial[] = []
|
|
642
|
+
|
|
643
|
+
for (let pageNumber = 1; pageNumber <= pdf.numPages; pageNumber++) {
|
|
644
|
+
const extracted = await extractImages(pdf, pageNumber)
|
|
645
|
+
|
|
646
|
+
for (let index = 0; index < extracted.length; index++) {
|
|
647
|
+
const image = extracted[index]!
|
|
648
|
+
const exportedName = `page-${String(pageNumber).padStart(2, "0")}-image-${String(index + 1).padStart(2, "0")}.png`
|
|
649
|
+
const outputPath = join(cacheDir, "images", exportedName)
|
|
650
|
+
const png = await encodePdfImageAsPng(image)
|
|
651
|
+
writeFileSync(outputPath, new Uint8Array(png))
|
|
652
|
+
|
|
653
|
+
images.push({
|
|
654
|
+
path: materialPath(cacheDir, workspaceDir, "images", exportedName),
|
|
655
|
+
source_ref: `pdf/page-${String(pageNumber).padStart(2, "0")}/${image.key}`,
|
|
656
|
+
page_or_slide: `page-${String(pageNumber).padStart(2, "0")}`,
|
|
657
|
+
note: `Embedded PDF image (${image.width}x${image.height}, ${image.channels} channel${image.channels === 1 ? "" : "s"})`,
|
|
658
|
+
})
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
return images
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
async function processPdfFile(filePath: string, workspaceDir: string): Promise<DocumentMaterialsResult> {
|
|
666
|
+
const relativeSource = workspaceRelative(filePath, workspaceDir)
|
|
667
|
+
const fingerprint = buildFingerprint(filePath)
|
|
668
|
+
const cacheDir = join(workspaceDir, ".opencode", "revela", "doc-materials", fingerprint)
|
|
669
|
+
const manifestPath = join(cacheDir, "manifest.json")
|
|
670
|
+
|
|
671
|
+
if (existsSync(manifestPath)) {
|
|
672
|
+
const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
|
|
673
|
+
return {
|
|
674
|
+
status: "processed",
|
|
675
|
+
source: manifest.source,
|
|
676
|
+
type: manifest.type,
|
|
677
|
+
cache_dir: manifest.cache_dir,
|
|
678
|
+
manifest_path: manifest.manifest_path,
|
|
679
|
+
text_path: manifest.text_path,
|
|
680
|
+
images: manifest.images,
|
|
681
|
+
skipped_assets: manifest.skipped_assets,
|
|
682
|
+
slides: manifest.slides,
|
|
683
|
+
tables: manifest.tables,
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
mkdirSync(join(cacheDir, "images"), { recursive: true })
|
|
688
|
+
mkdirSync(join(cacheDir, "tables"), { recursive: true })
|
|
689
|
+
|
|
690
|
+
const buf = readFileSync(filePath)
|
|
691
|
+
const text = await extractPdfText(buf)
|
|
692
|
+
const textPath = join(cacheDir, "text.txt")
|
|
693
|
+
writeFileSync(textPath, `[Extracted from: ${basename(filePath)}]\n\n${text}`, "utf-8")
|
|
694
|
+
|
|
695
|
+
const images = await extractPdfImages(buf, cacheDir, workspaceDir)
|
|
696
|
+
|
|
697
|
+
const result: DocumentMaterialsResult = {
|
|
698
|
+
status: "processed",
|
|
699
|
+
source: relativeSource,
|
|
700
|
+
type: "pdf",
|
|
701
|
+
cache_dir: workspaceRelative(cacheDir, workspaceDir),
|
|
702
|
+
manifest_path: workspaceRelative(manifestPath, workspaceDir),
|
|
703
|
+
text_path: workspaceRelative(textPath, workspaceDir),
|
|
704
|
+
images,
|
|
705
|
+
skipped_assets: [],
|
|
706
|
+
slides: [],
|
|
707
|
+
tables: [],
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
const manifest: CachedManifest = {
|
|
711
|
+
source: result.source,
|
|
712
|
+
type: "pdf",
|
|
713
|
+
fingerprint,
|
|
714
|
+
cache_dir: result.cache_dir!,
|
|
715
|
+
manifest_path: result.manifest_path!,
|
|
716
|
+
text_path: result.text_path!,
|
|
717
|
+
images: result.images ?? [],
|
|
718
|
+
skipped_assets: [],
|
|
719
|
+
slides: [],
|
|
720
|
+
tables: [],
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
writeFileSync(manifestPath, JSON.stringify(manifest, null, 2), "utf-8")
|
|
724
|
+
return result
|
|
725
|
+
}
|
|
726
|
+
|
|
592
727
|
async function processOfficeFile(filePath: string, workspaceDir: string, type: SupportedType): Promise<DocumentMaterialsResult> {
|
|
593
728
|
const relativeSource = workspaceRelative(filePath, workspaceDir)
|
|
594
729
|
const fingerprint = buildFingerprint(filePath)
|
|
@@ -683,7 +818,9 @@ export async function extractDocumentMaterials(filePath: string, workspaceDir: s
|
|
|
683
818
|
}
|
|
684
819
|
}
|
|
685
820
|
|
|
686
|
-
return
|
|
821
|
+
return type === "pdf"
|
|
822
|
+
? await processPdfFile(resolvedFile, workspaceDir)
|
|
823
|
+
: await processOfficeFile(resolvedFile, workspaceDir, type)
|
|
687
824
|
} catch (e) {
|
|
688
825
|
return {
|
|
689
826
|
status: "failed",
|
package/package.json
CHANGED
|
@@ -4,12 +4,12 @@ import { extractDocumentMaterials } from "../lib/document-materials/extract"
|
|
|
4
4
|
export default tool({
|
|
5
5
|
description:
|
|
6
6
|
"Extract reusable materials from a workspace document into a workspace-local cache. " +
|
|
7
|
-
"Supports pptx, docx, and xlsx. Produces a manifest plus extracted text, embedded images, and available slide/sheet mappings. " +
|
|
7
|
+
"Supports pdf, pptx, docx, and xlsx. Produces a manifest plus extracted text, embedded images, and available page/slide/sheet mappings. " +
|
|
8
8
|
"Unsupported file types are skipped instead of failing.",
|
|
9
9
|
args: {
|
|
10
10
|
file: tool.schema
|
|
11
11
|
.string()
|
|
12
|
-
.describe("Document path relative to workspace root. Supports pptx, docx, and xlsx; other file types are skipped."),
|
|
12
|
+
.describe("Document path relative to workspace root. Supports pdf, pptx, docx, and xlsx; other file types are skipped."),
|
|
13
13
|
},
|
|
14
14
|
async execute(args, context) {
|
|
15
15
|
const workspaceDir = context.directory ?? process.cwd()
|