@cyber-dash-tech/revela 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,6 +49,12 @@ by the brief. Use \`workspace.sourceMaterials\`, \`workspace.deckMemory\`, and
49
49
  \`workspace.openQuestions\` as workspace context. Treat sourceMaterials as a
50
50
  candidate index, not as proof by itself.
51
51
 
52
+ Before extracting or deeply reading a workspace document, check whether its
53
+ \`workspace.sourceMaterials\` record has the same fingerprint and valid
54
+ \`extraction.manifestPath\`, \`extraction.textPath\`, and \`extraction.cacheDir\`.
55
+ When those paths are present, reuse them instead of re-extracting or rereading
56
+ the original document.
57
+
52
58
  Do not write or patch \`DECKS.json\`. You only write research findings through
53
59
  \`revela-research-save\`; the primary agent decides which stable deck state to preserve.
54
60
 
@@ -63,7 +69,8 @@ Use **\`revela-workspace-scan\`** as a lightweight freshness check when needed:
63
69
 
64
70
  Do not deep-read the whole workspace. Select only files relevant to your axis.
65
71
 
66
- For every selected file, call **\`revela-extract-document-materials\`** first.
72
+ For every selected PDF/PPTX/DOCX/XLSX without valid reusable extraction paths,
73
+ call **\`revela-extract-document-materials\`** first.
67
74
  - \`pdf\`, \`pptx\`, \`docx\`, and \`xlsx\` will produce a manifest plus extracted text and any available embedded materials
68
75
  - unsupported file types will be skipped automatically
69
76
 
@@ -42,13 +42,14 @@ Workflow:
42
42
  - \`decks/**/*.pdf\`
43
43
  - \`slides/**/*.pdf\`
44
44
  Run these searches only inside the current workspace root. These are generated/output decks, not necessarily source materials. If \`decks/\` contains exactly one HTML file, treat it as the current deck artifact. If \`decks/\` contains multiple HTML files, stop and ask the user to move extra decks to separate workspaces before adopting one.
45
- 3. Select the files that look most relevant for future slide decks. Prioritize source decks, PDFs, Word docs, spreadsheets, CSVs, Markdown, text notes, and relevant existing generated decks.
46
- 4. For selected PDF/PPTX/DOCX/XLSX files, call \`revela-extract-document-materials\` before deciding what to summarize.
47
- 5. Read only the materials needed to form a conservative workspace memory. Do not exhaustively read every file if the workspace is large.
48
- 6. Call \`revela-decks\` with action \`init\` to create ${DECKS_STATE_FILE} if needed.
49
- 7. If this conversation or the workspace contains a concrete deck task or an existing deck artifact, call \`revela-decks\` with action \`upsertDeck\` and later \`upsertSlides\` for explicit deck information. Do not pass or ask for a deck key; the tool uses the workspace folder name internally. Do not mark readiness ready during init.
50
- 8. When adopting an existing HTML deck, analyze the artifact and create one conservative \`SlideSpec\` per identifiable slide/page. The \`SlideSpec[]\` itself is the worklist; do not create a separate target slide count.
51
- 9. Report what was initialized or updated and list any open questions.
45
+ 3. Register or refresh source material records by passing the scan result's \`sourceMaterial\` objects to \`revela-decks\` action \`init\`. Preserve unchanged existing records; the tool will upsert by path and fingerprint.
46
+ 4. Select the files that look most relevant for future slide decks. Prioritize source decks, PDFs, Word docs, spreadsheets, CSVs, Markdown, text notes, and relevant existing generated decks.
47
+ 5. Do not automatically extract every PDF/PPTX/DOCX/XLSX during init. Call \`revela-extract-document-materials\` only for selected files that are clearly needed to form conservative workspace memory, or when the user explicitly asked to analyze the material now.
48
+ 6. Before extracting or deeply reading a selected document, check \`DECKS.json.workspace.sourceMaterials\`. If the same path has the same fingerprint and valid extraction paths, reuse those paths instead of repeating extraction.
49
+ 7. Read only the materials needed to form a conservative workspace memory. Do not exhaustively read every file if the workspace is large.
50
+ 8. If this conversation or the workspace contains a concrete deck task or an existing deck artifact, call \`revela-decks\` with action \`upsertDeck\` and later \`upsertSlides\` for explicit deck information. Do not pass or ask for a deck key; the tool uses the workspace folder name internally. Do not mark readiness ready during init.
51
+ 9. When adopting an existing HTML deck, analyze the artifact and create one conservative \`SlideSpec\` per identifiable slide/page. The \`SlideSpec[]\` itself is the worklist; do not create a separate target slide count.
52
+ 10. Report what was initialized or updated and list any open questions.
52
53
 
53
54
  Memory rules:
54
55
  - Only write facts supported by workspace files into ${DECKS_STATE_FILE} workspace state, source materials, deck memory, and open questions.
@@ -26,9 +26,20 @@ export interface DecksState {
26
26
  export interface SourceMaterial {
27
27
  path: string
28
28
  type?: string
29
+ size?: number
30
+ fingerprint?: string
31
+ status?: "discovered" | "extracted" | "summarized" | "researched"
32
+ extraction?: {
33
+ manifestPath?: string
34
+ textPath?: string
35
+ cacheDir?: string
36
+ }
29
37
  summary?: string
30
38
  bestUsedFor?: string
39
+ firstSeen?: string
31
40
  lastChecked?: string
41
+ lastExtracted?: string
42
+ lastSummarized?: string
32
43
  }
33
44
 
34
45
  export interface DeckMemoryEntry {
@@ -1,5 +1,4 @@
1
- import { createHash } from "crypto"
2
- import { existsSync, mkdirSync, readFileSync, realpathSync, statSync, writeFileSync } from "fs"
1
+ import { existsSync, mkdirSync, readFileSync, realpathSync, writeFileSync } from "fs"
3
2
  import { basename, dirname, extname, isAbsolute, join, relative, resolve } from "path"
4
3
  import { DOMParser } from "@xmldom/xmldom"
5
4
  import { unzipSync } from "fflate"
@@ -9,6 +8,8 @@ import { extractDocx } from "../read-hooks/extractors/docx"
9
8
  import { extractPdfText } from "../read-hooks/extractors/pdf"
10
9
  import { extractPptx } from "../read-hooks/extractors/pptx"
11
10
  import { extractXlsx } from "../read-hooks/extractors/xlsx"
11
+ import { hasDecksState, readDecksState, writeDecksState } from "../decks-state"
12
+ import { computeSourceFingerprint, sourceMaterialMetadata, upsertSourceMaterial } from "../source-materials"
12
13
 
13
14
  export type DocumentMaterial = {
14
15
  path: string
@@ -50,6 +51,7 @@ export type PptxSlide = {
50
51
 
51
52
  export type DocumentMaterialsResult = {
52
53
  status: "processed" | "skipped" | "failed"
54
+ cache_status?: "hit" | "miss"
53
55
  source: string
54
56
  type: "pptx" | "docx" | "xlsx" | "pdf" | "other"
55
57
  cache_dir?: string
@@ -142,10 +144,7 @@ function workspaceRelative(filePath: string, workspaceDir: string): string {
142
144
  }
143
145
 
144
146
  function buildFingerprint(filePath: string): string {
145
- const stat = statSync(filePath)
146
- return createHash("sha1")
147
- .update(`${resolve(filePath)}:${stat.mtimeMs}:${stat.size}`)
148
- .digest("hex")
147
+ return computeSourceFingerprint(filePath)
149
148
  }
150
149
 
151
150
  function writeCachedBuffer(targetPath: string, buf: Uint8Array): void {
@@ -157,6 +156,33 @@ function materialPath(cacheDir: string, workspaceDir: string, ...segments: strin
157
156
  return workspaceRelative(join(cacheDir, ...segments), workspaceDir)
158
157
  }
159
158
 
159
+ function updateDecksSourceMaterialIndex(
160
+ workspaceDir: string,
161
+ filePath: string,
162
+ result: DocumentMaterialsResult,
163
+ ): void {
164
+ if (!hasDecksState(workspaceDir)) return
165
+
166
+ const base = sourceMaterialMetadata(filePath, workspaceDir)
167
+ const extracted = result.status === "processed" && result.manifest_path && result.text_path && result.cache_dir
168
+ const state = readDecksState(workspaceDir)
169
+ const existing = state.workspace.sourceMaterials.find((entry) => entry.path === base.path)
170
+ const now = new Date().toISOString()
171
+ upsertSourceMaterial(state, {
172
+ ...base,
173
+ status: extracted ? "extracted" : "discovered",
174
+ extraction: extracted
175
+ ? {
176
+ manifestPath: result.manifest_path,
177
+ textPath: result.text_path,
178
+ cacheDir: result.cache_dir,
179
+ }
180
+ : undefined,
181
+ lastExtracted: extracted ? (result.cache_status === "hit" ? existing?.lastExtracted ?? now : now) : undefined,
182
+ }, extracted ? "extracted" : "discovered")
183
+ writeDecksState(workspaceDir, state)
184
+ }
185
+
160
186
  function toRgbaBuffer(image: PdfImageData): Buffer {
161
187
  const pixelCount = image.width * image.height
162
188
 
@@ -672,6 +698,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
672
698
  const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
673
699
  return {
674
700
  status: "processed",
701
+ cache_status: "hit",
675
702
  source: manifest.source,
676
703
  type: manifest.type,
677
704
  cache_dir: manifest.cache_dir,
@@ -696,6 +723,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
696
723
 
697
724
  const result: DocumentMaterialsResult = {
698
725
  status: "processed",
726
+ cache_status: "miss",
699
727
  source: relativeSource,
700
728
  type: "pdf",
701
729
  cache_dir: workspaceRelative(cacheDir, workspaceDir),
@@ -734,6 +762,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
734
762
  const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
735
763
  return {
736
764
  status: "processed",
765
+ cache_status: "hit",
737
766
  source: manifest.source,
738
767
  type: manifest.type,
739
768
  cache_dir: manifest.cache_dir,
@@ -775,6 +804,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
775
804
 
776
805
  const result: DocumentMaterialsResult = {
777
806
  status: "processed",
807
+ cache_status: "miss",
778
808
  source: relativeSource,
779
809
  type,
780
810
  cache_dir: workspaceRelative(cacheDir, workspaceDir),
@@ -810,17 +840,21 @@ export async function extractDocumentMaterials(filePath: string, workspaceDir: s
810
840
  const type = SUPPORTED_EXTENSIONS[extname(resolvedFile).toLowerCase()]
811
841
 
812
842
  if (!type) {
813
- return {
843
+ const result: DocumentMaterialsResult = {
814
844
  status: "skipped",
815
845
  source: relativeSource,
816
846
  type: "other",
817
847
  reason: "unsupported_file_type",
818
848
  }
849
+ updateDecksSourceMaterialIndex(workspaceDir, resolvedFile, result)
850
+ return result
819
851
  }
820
852
 
821
- return type === "pdf"
853
+ const result = type === "pdf"
822
854
  ? await processPdfFile(resolvedFile, workspaceDir)
823
855
  : await processOfficeFile(resolvedFile, workspaceDir, type)
856
+ updateDecksSourceMaterialIndex(workspaceDir, resolvedFile, result)
857
+ return result
824
858
  } catch (e) {
825
859
  return {
826
860
  status: "failed",
@@ -0,0 +1,93 @@
1
+ import { createHash } from "crypto"
2
+ import { existsSync, realpathSync, statSync } from "fs"
3
+ import { extname, isAbsolute, join, relative, resolve, sep } from "path"
4
+ import {
5
+ type DecksState,
6
+ type SourceMaterial,
7
+ } from "./decks-state"
8
+
9
+ export type SourceMaterialStatus = NonNullable<SourceMaterial["status"]>
10
+
11
+ export function ensureWorkspaceFile(filePath: string, workspaceRoot: string): string {
12
+ const resolvedWorkspace = realpathSync(resolve(workspaceRoot))
13
+ const candidate = isAbsolute(filePath) ? resolve(filePath) : resolve(workspaceRoot, filePath)
14
+ const resolvedFile = existsSync(candidate) ? realpathSync(candidate) : candidate
15
+
16
+ if (resolvedFile !== resolvedWorkspace && !resolvedFile.startsWith(resolvedWorkspace + sep)) {
17
+ throw new Error("file must be within workspace")
18
+ }
19
+
20
+ return resolvedFile
21
+ }
22
+
23
+ export function workspaceRelativePath(filePath: string, workspaceRoot: string): string {
24
+ const resolvedWorkspace = realpathSync(resolve(workspaceRoot))
25
+ const candidate = resolve(filePath)
26
+ const resolvedFile = existsSync(candidate) ? realpathSync(candidate) : candidate
27
+ return relative(resolvedWorkspace, resolvedFile).replace(/\\/g, "/")
28
+ }
29
+
30
+ export function sourceMaterialType(filePath: string): string {
31
+ return extname(filePath).replace(/^\./, "").toLowerCase() || "other"
32
+ }
33
+
34
+ export function computeSourceFingerprint(filePath: string): string {
35
+ const stat = statSync(filePath)
36
+ return createHash("sha1")
37
+ .update(`${resolve(filePath)}:${stat.mtimeMs}:${stat.size}`)
38
+ .digest("hex")
39
+ }
40
+
41
+ export function sourceMaterialMetadata(filePath: string, workspaceRoot: string): SourceMaterial {
42
+ const resolvedFile = ensureWorkspaceFile(filePath, workspaceRoot)
43
+ const stat = statSync(resolvedFile)
44
+ return {
45
+ path: workspaceRelativePath(resolvedFile, workspaceRoot),
46
+ type: sourceMaterialType(resolvedFile),
47
+ size: stat.size,
48
+ fingerprint: computeSourceFingerprint(resolvedFile),
49
+ }
50
+ }
51
+
52
+ export function hasValidExtraction(material: SourceMaterial, workspaceRoot: string): boolean {
53
+ const extraction = material.extraction
54
+ if (!extraction?.manifestPath || !extraction.textPath || !extraction.cacheDir) return false
55
+ return [extraction.manifestPath, extraction.textPath, extraction.cacheDir]
56
+ .every((item) => existsSync(join(workspaceRoot, item)))
57
+ }
58
+
59
+ export function upsertSourceMaterial(
60
+ state: DecksState,
61
+ material: SourceMaterial,
62
+ status: SourceMaterialStatus = material.status ?? "discovered",
63
+ ): DecksState {
64
+ const now = new Date().toISOString()
65
+ const list = state.workspace.sourceMaterials ?? []
66
+ const path = material.path.replace(/\\/g, "/")
67
+ const existingIndex = list.findIndex((entry) => entry.path === path)
68
+ const existing = existingIndex >= 0 ? list[existingIndex] : undefined
69
+ const changedFingerprint = Boolean(existing?.fingerprint && material.fingerprint && existing.fingerprint !== material.fingerprint)
70
+ const nextStatus = changedFingerprint
71
+ ? status === "extracted" ? "extracted" : "discovered"
72
+ : status === "discovered" && existing?.status ? existing.status : status
73
+
74
+ const next: SourceMaterial = {
75
+ ...existing,
76
+ ...material,
77
+ path,
78
+ type: material.type ?? existing?.type,
79
+ status: nextStatus,
80
+ firstSeen: existing?.firstSeen ?? material.firstSeen ?? now,
81
+ lastChecked: now,
82
+ }
83
+
84
+ if (changedFingerprint && status !== "extracted") {
85
+ delete next.extraction
86
+ delete next.lastExtracted
87
+ }
88
+
89
+ if (existingIndex >= 0) list[existingIndex] = next
90
+ else list.push(next)
91
+ state.workspace.sourceMaterials = list.sort((a, b) => a.path.localeCompare(b.path))
92
+ return state
93
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cyber-dash-tech/revela",
3
- "version": "0.8.4",
3
+ "version": "0.8.5",
4
4
  "description": "OpenCode plugin that turns AI into an HTML slide deck generator",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
package/skill/SKILL.md CHANGED
@@ -169,7 +169,14 @@ The subagent writes exactly one file through `revela-research-save`:
169
169
 
170
170
  Use `revela-decks` action `read` before scanning from scratch. Its
171
171
  `workspace.sourceMaterials` state is the workspace material index created by
172
- `/revela init`. Use it to choose candidate files and avoid repeated deep reading.
172
+ `/revela init` and refreshed by document extraction. Use it to choose candidate
173
+ files and avoid repeated deep reading.
174
+
175
+ Before extracting or deeply reading a workspace document, check
176
+ `DECKS.json.workspace.sourceMaterials`. If the same path has an unchanged
177
+ fingerprint and valid `extraction.manifestPath`, `extraction.textPath`, and
178
+ `extraction.cacheDir`, reuse those materials instead of extracting or reading
179
+ the original document again.
173
180
 
174
181
  Use `revela-workspace-scan` or file tools as a freshness check when needed:
175
182
  - discover files added after `/revela init`
@@ -177,7 +184,9 @@ Use `revela-workspace-scan` or file tools as a freshness check when needed:
177
184
  - find user-provided attachments or topic-specific files not in `DECKS.json`
178
185
 
179
186
  Avoid repeated expensive work. Only call `revela-extract-document-materials` or
180
- deep-read files that are relevant to the current Research Brief.
187
+ deep-read files that are relevant to the current Research Brief. If the user
188
+ adds material mid-project, run `revela-workspace-scan` as a freshness check and
189
+ register new `sourceMaterial` records before deciding which ones need analysis.
181
190
 
182
191
  #### After Agents Complete
183
192
 
package/tools/decks.ts CHANGED
@@ -12,8 +12,10 @@ import {
12
12
  type DeckSpec,
13
13
  type RequiredInputs,
14
14
  type ResearchAxis,
15
+ type SourceMaterial,
15
16
  type SlideSpec,
16
17
  } from "../lib/decks-state"
18
+ import { upsertSourceMaterial } from "../lib/source-materials"
17
19
 
18
20
  export default tool({
19
21
  description:
@@ -44,6 +46,24 @@ export default tool({
44
46
  slidePlanConfirmed: tool.schema.boolean().optional(),
45
47
  designLayoutsFetched: tool.schema.boolean().optional(),
46
48
  }).optional().describe("For upsertDeck: checklist state. Only set true for explicit completed prerequisites."),
49
+ sourceMaterials: tool.schema.array(tool.schema.object({
50
+ path: tool.schema.string().describe("Workspace-relative source material path."),
51
+ type: tool.schema.string().optional().describe("File type such as pdf, pptx, docx, xlsx, csv, md, or txt."),
52
+ size: tool.schema.number().optional().describe("File size in bytes."),
53
+ fingerprint: tool.schema.string().optional().describe("File fingerprint for the current version."),
54
+ status: tool.schema.enum(["discovered", "extracted", "summarized", "researched"]).optional().describe("How far this source has been processed."),
55
+ summary: tool.schema.string().optional().describe("Conservative source summary if already known."),
56
+ bestUsedFor: tool.schema.string().optional().describe("Short note on deck sections this material is best used for."),
57
+ firstSeen: tool.schema.string().optional().describe("ISO timestamp when first seen."),
58
+ lastChecked: tool.schema.string().optional().describe("ISO timestamp when last checked."),
59
+ lastExtracted: tool.schema.string().optional().describe("ISO timestamp when last extracted."),
60
+ lastSummarized: tool.schema.string().optional().describe("ISO timestamp when last summarized."),
61
+ extraction: tool.schema.object({
62
+ manifestPath: tool.schema.string().optional(),
63
+ textPath: tool.schema.string().optional(),
64
+ cacheDir: tool.schema.string().optional(),
65
+ }).optional().describe("Reusable extraction output paths, if any."),
66
+ })).optional().describe("For init/readiness refresh: source material records discovered in the workspace."),
47
67
  researchPlan: tool.schema.array(tool.schema.object({
48
68
  axis: tool.schema.string().describe("Research axis name."),
49
69
  needed: tool.schema.boolean().describe("Whether this research axis is needed for the deck."),
@@ -87,6 +107,9 @@ export default tool({
87
107
  const defaultSlug = workspaceDeckSlug(workspaceRoot)
88
108
 
89
109
  if (args.action === "init") {
110
+ for (const material of (args.sourceMaterials ?? []) as SourceMaterial[]) {
111
+ upsertSourceMaterial(state, material, material.status ?? "discovered")
112
+ }
90
113
  writeDecksState(workspaceRoot, state)
91
114
  return JSON.stringify({ ok: true, path: DECKS_STATE_FILE, state }, null, 2)
92
115
  }
@@ -1,6 +1,8 @@
1
1
  import { tool } from "@opencode-ai/plugin"
2
2
  import { readdirSync, statSync, existsSync } from "fs"
3
3
  import { join, relative, extname, resolve, sep, isAbsolute } from "path"
4
+ import { sourceMaterialMetadata } from "../lib/source-materials"
5
+ import type { SourceMaterial } from "../lib/decks-state"
4
6
 
5
7
  const DOC_EXTENSIONS = new Set([
6
8
  ".pdf", ".docx", ".doc", ".xlsx", ".xls",
@@ -18,6 +20,8 @@ type FileEntry = {
18
20
  path: string
19
21
  type: string
20
22
  size: string
23
+ sizeBytes: number
24
+ sourceMaterial: SourceMaterial
21
25
  }
22
26
 
23
27
  /**
@@ -80,10 +84,13 @@ function scanDir(dir: string, rootDir: string, results: FileEntry[], maxDepth: n
80
84
  } else if (stat.isFile()) {
81
85
  const ext = extname(entry).toLowerCase()
82
86
  if (DOC_EXTENSIONS.has(ext)) {
87
+ const sourceMaterial = sourceMaterialMetadata(fullPath, rootDir)
83
88
  results.push({
84
89
  path: relative(rootDir, fullPath),
85
90
  type: typeLabel(ext),
86
91
  size: formatSize(stat.size),
92
+ sizeBytes: stat.size,
93
+ sourceMaterial: { ...sourceMaterial, status: "discovered" },
87
94
  })
88
95
  }
89
96
  }