npm - @cyber-dash-tech/revela - Versions diffs - 0.8.4 → 0.8.5 - Mend

@cyber-dash-tech/revela 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/lib/agents/research-prompt.ts +8 -1
package/lib/commands/init.ts +8 -7
package/lib/decks-state.ts +11 -0
package/lib/document-materials/extract.ts +42 -8
package/lib/source-materials.ts +93 -0
package/package.json +1 -1
package/skill/SKILL.md +11 -2
package/tools/decks.ts +23 -0
package/tools/workspace-scan.ts +7 -0

package/lib/agents/research-prompt.ts CHANGED Viewed

@@ -49,6 +49,12 @@ by the brief. Use \`workspace.sourceMaterials\`, \`workspace.deckMemory\`, and
 \`workspace.openQuestions\` as workspace context. Treat sourceMaterials as a
 candidate index, not as proof by itself.
+Before extracting or deeply reading a workspace document, check whether its
+\`workspace.sourceMaterials\` record has the same fingerprint and valid
+\`extraction.manifestPath\`, \`extraction.textPath\`, and \`extraction.cacheDir\`.
+When those paths are present, reuse them instead of re-extracting or rereading
+the original document.
 Do not write or patch \`DECKS.json\`. You only write research findings through
 \`revela-research-save\`; the primary agent decides which stable deck state to preserve.
@@ -63,7 +69,8 @@ Use **\`revela-workspace-scan\`** as a lightweight freshness check when needed:
 Do not deep-read the whole workspace. Select only files relevant to your axis.
-For every selected file, call **\`revela-extract-document-materials\`** first.
+For every selected PDF/PPTX/DOCX/XLSX without valid reusable extraction paths,
+call **\`revela-extract-document-materials\`** first.
 - \`pdf\`, \`pptx\`, \`docx\`, and \`xlsx\` will produce a manifest plus extracted text and any available embedded materials
 - unsupported file types will be skipped automatically

package/lib/commands/init.ts CHANGED Viewed

@@ -42,13 +42,14 @@ Workflow:
    - \`decks/**/*.pdf\`
    - \`slides/**/*.pdf\`
    Run these searches only inside the current workspace root. These are generated/output decks, not necessarily source materials. If \`decks/\` contains exactly one HTML file, treat it as the current deck artifact. If \`decks/\` contains multiple HTML files, stop and ask the user to move extra decks to separate workspaces before adopting one.
-3. Select the files that look most relevant for future slide decks. Prioritize source decks, PDFs, Word docs, spreadsheets, CSVs, Markdown, text notes, and relevant existing generated decks.
-4. For selected PDF/PPTX/DOCX/XLSX files, call \`revela-extract-document-materials\` before deciding what to summarize.
-5. Read only the materials needed to form a conservative workspace memory. Do not exhaustively read every file if the workspace is large.
-6. Call \`revela-decks\` with action \`init\` to create ${DECKS_STATE_FILE} if needed.
-7. If this conversation or the workspace contains a concrete deck task or an existing deck artifact, call \`revela-decks\` with action \`upsertDeck\` and later \`upsertSlides\` for explicit deck information. Do not pass or ask for a deck key; the tool uses the workspace folder name internally. Do not mark readiness ready during init.
-8. When adopting an existing HTML deck, analyze the artifact and create one conservative \`SlideSpec\` per identifiable slide/page. The \`SlideSpec[]\` itself is the worklist; do not create a separate target slide count.
-9. Report what was initialized or updated and list any open questions.
+3. Register or refresh source material records by passing the scan result's \`sourceMaterial\` objects to \`revela-decks\` action \`init\`. Preserve unchanged existing records; the tool will upsert by path and fingerprint.
+4. Select the files that look most relevant for future slide decks. Prioritize source decks, PDFs, Word docs, spreadsheets, CSVs, Markdown, text notes, and relevant existing generated decks.
+5. Do not automatically extract every PDF/PPTX/DOCX/XLSX during init. Call \`revela-extract-document-materials\` only for selected files that are clearly needed to form conservative workspace memory, or when the user explicitly asked to analyze the material now.
+6. Before extracting or deeply reading a selected document, check \`DECKS.json.workspace.sourceMaterials\`. If the same path has the same fingerprint and valid extraction paths, reuse those paths instead of repeating extraction.
+7. Read only the materials needed to form a conservative workspace memory. Do not exhaustively read every file if the workspace is large.
+8. If this conversation or the workspace contains a concrete deck task or an existing deck artifact, call \`revela-decks\` with action \`upsertDeck\` and later \`upsertSlides\` for explicit deck information. Do not pass or ask for a deck key; the tool uses the workspace folder name internally. Do not mark readiness ready during init.
+9. When adopting an existing HTML deck, analyze the artifact and create one conservative \`SlideSpec\` per identifiable slide/page. The \`SlideSpec[]\` itself is the worklist; do not create a separate target slide count.
+10. Report what was initialized or updated and list any open questions.
 Memory rules:
 - Only write facts supported by workspace files into ${DECKS_STATE_FILE} workspace state, source materials, deck memory, and open questions.

package/lib/decks-state.ts CHANGED Viewed

@@ -26,9 +26,20 @@ export interface DecksState {
 export interface SourceMaterial {
   path: string
   type?: string
+  size?: number
+  fingerprint?: string
+  status?: "discovered" | "extracted" | "summarized" | "researched"
+  extraction?: {
+    manifestPath?: string
+    textPath?: string
+    cacheDir?: string
+  }
   summary?: string
   bestUsedFor?: string
+  firstSeen?: string
   lastChecked?: string
+  lastExtracted?: string
+  lastSummarized?: string
 }
 export interface DeckMemoryEntry {

package/lib/document-materials/extract.ts CHANGED Viewed

@@ -1,5 +1,4 @@
-import { createHash } from "crypto"
-import { existsSync, mkdirSync, readFileSync, realpathSync, statSync, writeFileSync } from "fs"
+import { existsSync, mkdirSync, readFileSync, realpathSync, writeFileSync } from "fs"
 import { basename, dirname, extname, isAbsolute, join, relative, resolve } from "path"
 import { DOMParser } from "@xmldom/xmldom"
 import { unzipSync } from "fflate"
@@ -9,6 +8,8 @@ import { extractDocx } from "../read-hooks/extractors/docx"
 import { extractPdfText } from "../read-hooks/extractors/pdf"
 import { extractPptx } from "../read-hooks/extractors/pptx"
 import { extractXlsx } from "../read-hooks/extractors/xlsx"
+import { hasDecksState, readDecksState, writeDecksState } from "../decks-state"
+import { computeSourceFingerprint, sourceMaterialMetadata, upsertSourceMaterial } from "../source-materials"
 export type DocumentMaterial = {
   path: string
@@ -50,6 +51,7 @@ export type PptxSlide = {
 export type DocumentMaterialsResult = {
   status: "processed" | "skipped" | "failed"
+  cache_status?: "hit" | "miss"
   source: string
   type: "pptx" | "docx" | "xlsx" | "pdf" | "other"
   cache_dir?: string
@@ -142,10 +144,7 @@ function workspaceRelative(filePath: string, workspaceDir: string): string {
 }
 function buildFingerprint(filePath: string): string {
-  const stat = statSync(filePath)
-  return createHash("sha1")
-    .update(`${resolve(filePath)}:${stat.mtimeMs}:${stat.size}`)
-    .digest("hex")
+  return computeSourceFingerprint(filePath)
 }
 function writeCachedBuffer(targetPath: string, buf: Uint8Array): void {
@@ -157,6 +156,33 @@ function materialPath(cacheDir: string, workspaceDir: string, ...segments: strin
   return workspaceRelative(join(cacheDir, ...segments), workspaceDir)
 }
+function updateDecksSourceMaterialIndex(
+  workspaceDir: string,
+  filePath: string,
+  result: DocumentMaterialsResult,
+): void {
+  if (!hasDecksState(workspaceDir)) return
+  const base = sourceMaterialMetadata(filePath, workspaceDir)
+  const extracted = result.status === "processed" && result.manifest_path && result.text_path && result.cache_dir
+  const state = readDecksState(workspaceDir)
+  const existing = state.workspace.sourceMaterials.find((entry) => entry.path === base.path)
+  const now = new Date().toISOString()
+  upsertSourceMaterial(state, {
+    ...base,
+    status: extracted ? "extracted" : "discovered",
+    extraction: extracted
+      ? {
+          manifestPath: result.manifest_path,
+          textPath: result.text_path,
+          cacheDir: result.cache_dir,
+        }
+      : undefined,
+    lastExtracted: extracted ? (result.cache_status === "hit" ? existing?.lastExtracted ?? now : now) : undefined,
+  }, extracted ? "extracted" : "discovered")
+  writeDecksState(workspaceDir, state)
+}
 function toRgbaBuffer(image: PdfImageData): Buffer {
   const pixelCount = image.width * image.height
@@ -672,6 +698,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
     const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
     return {
       status: "processed",
+      cache_status: "hit",
       source: manifest.source,
       type: manifest.type,
       cache_dir: manifest.cache_dir,
@@ -696,6 +723,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
   const result: DocumentMaterialsResult = {
     status: "processed",
+    cache_status: "miss",
     source: relativeSource,
     type: "pdf",
     cache_dir: workspaceRelative(cacheDir, workspaceDir),
@@ -734,6 +762,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
     const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
     return {
       status: "processed",
+      cache_status: "hit",
       source: manifest.source,
       type: manifest.type,
       cache_dir: manifest.cache_dir,
@@ -775,6 +804,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
   const result: DocumentMaterialsResult = {
     status: "processed",
+    cache_status: "miss",
     source: relativeSource,
     type,
     cache_dir: workspaceRelative(cacheDir, workspaceDir),
@@ -810,17 +840,21 @@ export async function extractDocumentMaterials(filePath: string, workspaceDir: s
     const type = SUPPORTED_EXTENSIONS[extname(resolvedFile).toLowerCase()]
     if (!type) {
-      return {
+      const result: DocumentMaterialsResult = {
         status: "skipped",
         source: relativeSource,
         type: "other",
         reason: "unsupported_file_type",
       }
+      updateDecksSourceMaterialIndex(workspaceDir, resolvedFile, result)
+      return result
     }
-    return type === "pdf"
+    const result = type === "pdf"
       ? await processPdfFile(resolvedFile, workspaceDir)
       : await processOfficeFile(resolvedFile, workspaceDir, type)
+    updateDecksSourceMaterialIndex(workspaceDir, resolvedFile, result)
+    return result
   } catch (e) {
     return {
       status: "failed",

package/lib/source-materials.ts ADDED Viewed

@@ -0,0 +1,93 @@
+import { createHash } from "crypto"
+import { existsSync, realpathSync, statSync } from "fs"
+import { extname, isAbsolute, join, relative, resolve, sep } from "path"
+import {
+  type DecksState,
+  type SourceMaterial,
+} from "./decks-state"
+export type SourceMaterialStatus = NonNullable<SourceMaterial["status"]>
+export function ensureWorkspaceFile(filePath: string, workspaceRoot: string): string {
+  const resolvedWorkspace = realpathSync(resolve(workspaceRoot))
+  const candidate = isAbsolute(filePath) ? resolve(filePath) : resolve(workspaceRoot, filePath)
+  const resolvedFile = existsSync(candidate) ? realpathSync(candidate) : candidate
+  if (resolvedFile !== resolvedWorkspace && !resolvedFile.startsWith(resolvedWorkspace + sep)) {
+    throw new Error("file must be within workspace")
+  }
+  return resolvedFile
+}
+export function workspaceRelativePath(filePath: string, workspaceRoot: string): string {
+  const resolvedWorkspace = realpathSync(resolve(workspaceRoot))
+  const candidate = resolve(filePath)
+  const resolvedFile = existsSync(candidate) ? realpathSync(candidate) : candidate
+  return relative(resolvedWorkspace, resolvedFile).replace(/\\/g, "/")
+}
+export function sourceMaterialType(filePath: string): string {
+  return extname(filePath).replace(/^\./, "").toLowerCase() || "other"
+}
+export function computeSourceFingerprint(filePath: string): string {
+  const stat = statSync(filePath)
+  return createHash("sha1")
+    .update(`${resolve(filePath)}:${stat.mtimeMs}:${stat.size}`)
+    .digest("hex")
+}
+export function sourceMaterialMetadata(filePath: string, workspaceRoot: string): SourceMaterial {
+  const resolvedFile = ensureWorkspaceFile(filePath, workspaceRoot)
+  const stat = statSync(resolvedFile)
+  return {
+    path: workspaceRelativePath(resolvedFile, workspaceRoot),
+    type: sourceMaterialType(resolvedFile),
+    size: stat.size,
+    fingerprint: computeSourceFingerprint(resolvedFile),
+  }
+}
+export function hasValidExtraction(material: SourceMaterial, workspaceRoot: string): boolean {
+  const extraction = material.extraction
+  if (!extraction?.manifestPath || !extraction.textPath || !extraction.cacheDir) return false
+  return [extraction.manifestPath, extraction.textPath, extraction.cacheDir]
+    .every((item) => existsSync(join(workspaceRoot, item)))
+}
+export function upsertSourceMaterial(
+  state: DecksState,
+  material: SourceMaterial,
+  status: SourceMaterialStatus = material.status ?? "discovered",
+): DecksState {
+  const now = new Date().toISOString()
+  const list = state.workspace.sourceMaterials ?? []
+  const path = material.path.replace(/\\/g, "/")
+  const existingIndex = list.findIndex((entry) => entry.path === path)
+  const existing = existingIndex >= 0 ? list[existingIndex] : undefined
+  const changedFingerprint = Boolean(existing?.fingerprint && material.fingerprint && existing.fingerprint !== material.fingerprint)
+  const nextStatus = changedFingerprint
+    ? status === "extracted" ? "extracted" : "discovered"
+    : status === "discovered" && existing?.status ? existing.status : status
+  const next: SourceMaterial = {
+    ...existing,
+    ...material,
+    path,
+    type: material.type ?? existing?.type,
+    status: nextStatus,
+    firstSeen: existing?.firstSeen ?? material.firstSeen ?? now,
+    lastChecked: now,
+  }
+  if (changedFingerprint && status !== "extracted") {
+    delete next.extraction
+    delete next.lastExtracted
+  }
+  if (existingIndex >= 0) list[existingIndex] = next
+  else list.push(next)
+  state.workspace.sourceMaterials = list.sort((a, b) => a.path.localeCompare(b.path))
+  return state
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cyber-dash-tech/revela",
-  "version": "0.8.4",
+  "version": "0.8.5",
   "description": "OpenCode plugin that turns AI into an HTML slide deck generator",
   "type": "module",
   "main": "./index.ts",

package/skill/SKILL.md CHANGED Viewed

@@ -169,7 +169,14 @@ The subagent writes exactly one file through `revela-research-save`:
 Use `revela-decks` action `read` before scanning from scratch. Its
 `workspace.sourceMaterials` state is the workspace material index created by
-`/revela init`. Use it to choose candidate files and avoid repeated deep reading.
+`/revela init` and refreshed by document extraction. Use it to choose candidate
+files and avoid repeated deep reading.
+Before extracting or deeply reading a workspace document, check
+`DECKS.json.workspace.sourceMaterials`. If the same path has an unchanged
+fingerprint and valid `extraction.manifestPath`, `extraction.textPath`, and
+`extraction.cacheDir`, reuse those materials instead of extracting or reading
+the original document again.
 Use `revela-workspace-scan` or file tools as a freshness check when needed:
 - discover files added after `/revela init`
@@ -177,7 +184,9 @@ Use `revela-workspace-scan` or file tools as a freshness check when needed:
 - find user-provided attachments or topic-specific files not in `DECKS.json`
 Avoid repeated expensive work. Only call `revela-extract-document-materials` or
-deep-read files that are relevant to the current Research Brief.
+deep-read files that are relevant to the current Research Brief. If the user
+adds material mid-project, run `revela-workspace-scan` as a freshness check and
+register new `sourceMaterial` records before deciding which ones need analysis.
 #### After Agents Complete

package/tools/decks.ts CHANGED Viewed

@@ -12,8 +12,10 @@ import {
   type DeckSpec,
   type RequiredInputs,
   type ResearchAxis,
+  type SourceMaterial,
   type SlideSpec,
 } from "../lib/decks-state"
+import { upsertSourceMaterial } from "../lib/source-materials"
 export default tool({
   description:
@@ -44,6 +46,24 @@ export default tool({
       slidePlanConfirmed: tool.schema.boolean().optional(),
       designLayoutsFetched: tool.schema.boolean().optional(),
     }).optional().describe("For upsertDeck: checklist state. Only set true for explicit completed prerequisites."),
+    sourceMaterials: tool.schema.array(tool.schema.object({
+      path: tool.schema.string().describe("Workspace-relative source material path."),
+      type: tool.schema.string().optional().describe("File type such as pdf, pptx, docx, xlsx, csv, md, or txt."),
+      size: tool.schema.number().optional().describe("File size in bytes."),
+      fingerprint: tool.schema.string().optional().describe("File fingerprint for the current version."),
+      status: tool.schema.enum(["discovered", "extracted", "summarized", "researched"]).optional().describe("How far this source has been processed."),
+      summary: tool.schema.string().optional().describe("Conservative source summary if already known."),
+      bestUsedFor: tool.schema.string().optional().describe("Short note on deck sections this material is best used for."),
+      firstSeen: tool.schema.string().optional().describe("ISO timestamp when first seen."),
+      lastChecked: tool.schema.string().optional().describe("ISO timestamp when last checked."),
+      lastExtracted: tool.schema.string().optional().describe("ISO timestamp when last extracted."),
+      lastSummarized: tool.schema.string().optional().describe("ISO timestamp when last summarized."),
+      extraction: tool.schema.object({
+        manifestPath: tool.schema.string().optional(),
+        textPath: tool.schema.string().optional(),
+        cacheDir: tool.schema.string().optional(),
+      }).optional().describe("Reusable extraction output paths, if any."),
+    })).optional().describe("For init/readiness refresh: source material records discovered in the workspace."),
     researchPlan: tool.schema.array(tool.schema.object({
       axis: tool.schema.string().describe("Research axis name."),
       needed: tool.schema.boolean().describe("Whether this research axis is needed for the deck."),
@@ -87,6 +107,9 @@ export default tool({
       const defaultSlug = workspaceDeckSlug(workspaceRoot)
       if (args.action === "init") {
+        for (const material of (args.sourceMaterials ?? []) as SourceMaterial[]) {
+          upsertSourceMaterial(state, material, material.status ?? "discovered")
+        }
         writeDecksState(workspaceRoot, state)
         return JSON.stringify({ ok: true, path: DECKS_STATE_FILE, state }, null, 2)
       }

package/tools/workspace-scan.ts CHANGED Viewed

@@ -1,6 +1,8 @@
 import { tool } from "@opencode-ai/plugin"
 import { readdirSync, statSync, existsSync } from "fs"
 import { join, relative, extname, resolve, sep, isAbsolute } from "path"
+import { sourceMaterialMetadata } from "../lib/source-materials"
+import type { SourceMaterial } from "../lib/decks-state"
 const DOC_EXTENSIONS = new Set([
   ".pdf", ".docx", ".doc", ".xlsx", ".xls",
@@ -18,6 +20,8 @@ type FileEntry = {
   path: string
   type: string
   size: string
+  sizeBytes: number
+  sourceMaterial: SourceMaterial
 }
 /**
@@ -80,10 +84,13 @@ function scanDir(dir: string, rootDir: string, results: FileEntry[], maxDepth: n
     } else if (stat.isFile()) {
       const ext = extname(entry).toLowerCase()
       if (DOC_EXTENSIONS.has(ext)) {
+        const sourceMaterial = sourceMaterialMetadata(fullPath, rootDir)
         results.push({
           path: relative(rootDir, fullPath),
           type: typeLabel(ext),
           size: formatSize(stat.size),
+          sizeBytes: stat.size,
+          sourceMaterial: { ...sourceMaterial, status: "discovered" },
         })
       }
     }