npm - @cyber-dash-tech/revela - Versions diffs - 0.17.21 → 0.17.22 - Mend

@cyber-dash-tech/revela 0.17.21 → 0.17.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/lib/document-materials/extract.ts +189 -6
package/lib/material-intake.ts +494 -0
package/lib/runtime/index.ts +2 -0
package/package.json +1 -1
package/plugins/revela/.mcp.json +1 -1
package/plugins/revela/hooks/hooks.json +10 -0
package/plugins/revela/hooks/revela_material_notice.ts +58 -0
package/plugins/revela/mcp/revela-server.ts +66 -0
package/plugins/revela/skills/revela-init/SKILL.md +18 -8

package/lib/document-materials/extract.ts CHANGED Viewed

@@ -58,6 +58,7 @@ export type DocumentMaterialsResult = {
   cache_dir?: string
   manifest_path?: string
   text_path?: string
+  read_view_path?: string
   images?: DocumentMaterial[]
   skipped_assets?: SkippedAsset[]
   slides?: PptxSlide[]
@@ -74,6 +75,7 @@ type CachedManifest = {
   cache_dir: string
   manifest_path: string
   text_path: string
+  read_view_path?: string
   images: DocumentMaterial[]
   skipped_assets: SkippedAsset[]
   slides: PptxSlide[]
@@ -157,6 +159,145 @@ function materialPath(cacheDir: string, workspaceDir: string, ...segments: strin
   return workspaceRelative(join(cacheDir, ...segments), workspaceDir)
 }
+function buildReadView(input: {
+  source: string
+  type: SupportedType
+  fingerprint: string
+  text: string
+  manifestPath: string
+  textPath: string
+  images: DocumentMaterial[]
+  skippedAssets: SkippedAsset[]
+  tables: DocumentMaterial[]
+  slides: PptxSlide[] | undefined
+}): string {
+  const lines = [
+    `# Extracted Material: ${basename(input.source)}`,
+    "",
+    "## Source",
+    "",
+    `- sourcePath: ${input.source}`,
+    `- type: ${input.type}`,
+    `- fingerprint: ${input.fingerprint}`,
+    `- manifestPath: ${input.manifestPath}`,
+    `- textPath: ${input.textPath}`,
+    "",
+    "## Text",
+    "",
+    input.text.trim() || "No text extracted.",
+    "",
+    "## Extracted Images",
+    "",
+  ]
+  if (input.images.length === 0) lines.push("- None")
+  else {
+    for (const image of input.images) {
+      const parts = [
+        image.page_or_slide ? `page_or_slide: ${image.page_or_slide}` : null,
+        `source_ref: ${image.source_ref}`,
+        image.note ? `note: ${image.note}` : null,
+      ].filter(Boolean).join("; ")
+      lines.push(`- ${image.path}${parts ? ` (${parts})` : ""}`)
+    }
+  }
+  if (input.skippedAssets.length > 0) {
+    lines.push("", "## Skipped Or Unmapped Assets", "")
+    for (const asset of input.skippedAssets) {
+      const parts = [
+        asset.page_or_slide ? `page_or_slide: ${asset.page_or_slide}` : null,
+        `reason: ${asset.reason}`,
+        asset.kind ? `kind: ${asset.kind}` : null,
+      ].filter(Boolean).join("; ")
+      lines.push(`- ${asset.source_ref}${parts ? ` (${parts})` : ""}`)
+    }
+  }
+  if (input.tables.length > 0) {
+    lines.push("", "## Extracted Tables", "")
+    for (const table of input.tables) lines.push(`- ${table.path} (${table.note ?? table.source_ref})`)
+  }
+  if (input.slides?.length) {
+    lines.push("", "## Slide Structure", "")
+    for (const slide of input.slides) {
+      const textCount = slide.elements.filter((element) => element.kind === "text").length
+      const imageCount = slide.elements.filter((element) => element.kind === "image").length
+      const shapeCount = slide.elements.filter((element) => element.kind === "shape").length
+      lines.push(`- ${slide.slide}: ${textCount} text, ${imageCount} image, ${shapeCount} shape`)
+    }
+  }
+  lines.push(
+    "",
+    "## Intake Rules",
+    "",
+    "- Treat this extracted material as source context until a material review records what was considered.",
+    "- Do not treat extracted images as interpreted evidence unless an explicit image review or user-provided meaning exists.",
+    "- Canonical evidence still requires source trace, quote/snippet, support scope, unsupported scope, caveat, strength, and relations in `revela-narrative/`.",
+  )
+  return lines.join("\n")
+}
+function writeReadView(input: {
+  cacheDir: string
+  workspaceDir: string
+  source: string
+  type: SupportedType
+  fingerprint: string
+  text: string
+  manifestPath: string
+  textPath: string
+  images: DocumentMaterial[]
+  skippedAssets: SkippedAsset[]
+  tables: DocumentMaterial[]
+  slides?: PptxSlide[]
+}): string {
+  const readViewPath = join(input.cacheDir, "read.md")
+  writeFileSync(readViewPath, buildReadView({
+    source: input.source,
+    type: input.type,
+    fingerprint: input.fingerprint,
+    text: input.text,
+    manifestPath: input.manifestPath,
+    textPath: input.textPath,
+    images: input.images,
+    skippedAssets: input.skippedAssets,
+    tables: input.tables,
+    slides: input.slides,
+  }), "utf-8")
+  return workspaceRelative(readViewPath, input.workspaceDir)
+}
+function ensureCachedReadView(
+  manifest: CachedManifest,
+  cacheDir: string,
+  workspaceDir: string,
+): string {
+  const existing = manifest.read_view_path
+  if (existing && existsSync(join(workspaceDir, existing))) return existing
+  const text = existsSync(join(workspaceDir, manifest.text_path))
+    ? readFileSync(join(workspaceDir, manifest.text_path), "utf-8").replace(/^\[Extracted from: .*?\]\n\n/, "")
+    : ""
+  return writeReadView({
+    cacheDir,
+    workspaceDir,
+    source: manifest.source,
+    type: manifest.type,
+    fingerprint: manifest.fingerprint,
+    text,
+    manifestPath: manifest.manifest_path,
+    textPath: manifest.text_path,
+    images: manifest.images,
+    skippedAssets: manifest.skipped_assets,
+    tables: manifest.tables,
+    slides: manifest.slides,
+  })
+}
 function updateDecksSourceMaterialIndex(
   workspaceDir: string,
   filePath: string,
@@ -716,6 +857,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
   if (existsSync(manifestPath)) {
     const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
+    const readViewPath = ensureCachedReadView(manifest, cacheDir, workspaceDir)
     return {
       status: "processed",
       cache_status: "hit",
@@ -724,6 +866,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
       cache_dir: manifest.cache_dir,
       manifest_path: manifest.manifest_path,
       text_path: manifest.text_path,
+      read_view_path: readViewPath,
       images: manifest.images,
       skipped_assets: manifest.skipped_assets,
       slides: manifest.slides,
@@ -740,6 +883,22 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
   writeFileSync(textPath, `[Extracted from: ${basename(filePath)}]\n\n${text}`, "utf-8")
   const images = await extractPdfImages(buf, cacheDir, workspaceDir)
+  const relativeManifestPath = workspaceRelative(manifestPath, workspaceDir)
+  const relativeTextPath = workspaceRelative(textPath, workspaceDir)
+  const readViewPath = writeReadView({
+    cacheDir,
+    workspaceDir,
+    source: relativeSource,
+    type: "pdf",
+    fingerprint,
+    text,
+    manifestPath: relativeManifestPath,
+    textPath: relativeTextPath,
+    images,
+    skippedAssets: [],
+    tables: [],
+    slides: [],
+  })
   const result: DocumentMaterialsResult = {
     status: "processed",
@@ -747,8 +906,9 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
     source: relativeSource,
     type: "pdf",
     cache_dir: workspaceRelative(cacheDir, workspaceDir),
-    manifest_path: workspaceRelative(manifestPath, workspaceDir),
-    text_path: workspaceRelative(textPath, workspaceDir),
+    manifest_path: relativeManifestPath,
+    text_path: relativeTextPath,
+    read_view_path: readViewPath,
     images,
     skipped_assets: [],
     slides: [],
@@ -762,6 +922,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
     cache_dir: result.cache_dir!,
     manifest_path: result.manifest_path!,
     text_path: result.text_path!,
+    read_view_path: result.read_view_path,
     images: result.images ?? [],
     skipped_assets: [],
     slides: [],
@@ -780,6 +941,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
   if (existsSync(manifestPath)) {
     const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
+    const readViewPath = ensureCachedReadView(manifest, cacheDir, workspaceDir)
     return {
       status: "processed",
       cache_status: "hit",
@@ -788,6 +950,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
       cache_dir: manifest.cache_dir,
       manifest_path: manifest.manifest_path,
       text_path: manifest.text_path,
+      read_view_path: readViewPath,
       images: manifest.images,
       skipped_assets: manifest.skipped_assets,
       slides: manifest.slides,
@@ -821,6 +984,24 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
   const slides = type === "pptx"
     ? extractPptxSlides(files, images, pptxAssets!.skipped_assets)
     : undefined
+  const relativeManifestPath = workspaceRelative(manifestPath, workspaceDir)
+  const relativeTextPath = workspaceRelative(textPath, workspaceDir)
+  const tables = extractTables(type, relativeTextPath)
+  const skippedAssets = pptxAssets?.skipped_assets ?? []
+  const readViewPath = writeReadView({
+    cacheDir,
+    workspaceDir,
+    source: relativeSource,
+    type,
+    fingerprint,
+    text,
+    manifestPath: relativeManifestPath,
+    textPath: relativeTextPath,
+    images,
+    skippedAssets,
+    tables,
+    slides,
+  })
   const result: DocumentMaterialsResult = {
     status: "processed",
@@ -828,12 +1009,13 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
     source: relativeSource,
     type,
     cache_dir: workspaceRelative(cacheDir, workspaceDir),
-    manifest_path: workspaceRelative(manifestPath, workspaceDir),
-    text_path: workspaceRelative(textPath, workspaceDir),
+    manifest_path: relativeManifestPath,
+    text_path: relativeTextPath,
+    read_view_path: readViewPath,
     images,
-    skipped_assets: pptxAssets?.skipped_assets ?? [],
+    skipped_assets: skippedAssets,
     slides,
-    tables: extractTables(type, workspaceRelative(textPath, workspaceDir)),
+    tables,
   }
   const manifest: CachedManifest = {
@@ -843,6 +1025,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
     cache_dir: result.cache_dir!,
     manifest_path: result.manifest_path!,
     text_path: result.text_path!,
+    read_view_path: result.read_view_path,
     images: result.images ?? [],
     skipped_assets: result.skipped_assets ?? [],
     slides: result.slides ?? [],

package/lib/material-intake.ts ADDED Viewed

@@ -0,0 +1,494 @@
+import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "fs"
+import { basename, extname, isAbsolute, join, relative, resolve, sep } from "path"
+import { extractDocumentMaterials, type DocumentMaterialsResult } from "./document-materials/extract"
+import { sourceMaterialMetadata, sourceMaterialType } from "./source-materials"
+import type { SourceMaterial } from "./decks-state"
+export type MaterialIntakeStatus =
+  | "scanned"
+  | "extracted"
+  | "reviewed"
+  | "text_only_read"
+  | "skipped"
+  | "unsupported"
+  | "failed"
+export interface MaterialRegistryEntry {
+  sourcePath: string
+  type: string
+  fingerprint?: string
+  size?: number
+  lastModified?: string
+  status: MaterialIntakeStatus
+  requiresExtraction: boolean
+  allowedReadPath?: string | null
+  extraction?: {
+    manifestPath?: string
+    textPath?: string
+    readViewPath?: string
+    cacheDir?: string
+    imageCount: number
+    tableCount: number
+  } | null
+  review?: {
+    reviewPath: string
+    reviewedAt: string
+    reviewedPaths: string[]
+    summary: string
+  } | null
+  warnings?: string[]
+  firstSeen: string
+  lastChecked: string
+}
+export interface MaterialRegistry {
+  version: 1
+  updatedAt: string
+  sources: MaterialRegistryEntry[]
+}
+export interface MaterialIngestTask {
+  path: string
+  materialType: string
+  needsExtraction: boolean
+  suggestedAction: "read_directly" | "extract_then_read"
+  status: MaterialIntakeStatus
+  allowedReadPath?: string | null
+  note: string
+}
+export interface PrepareLocalMaterialsInput {
+  workspaceRoot?: string
+  path?: string
+  maxDepth?: number
+  autoExtract?: boolean
+}
+export interface PrepareLocalMaterialsResult {
+  ok: true
+  workspaceRoot: string
+  registryPath: string
+  found: number
+  files: SourceMaterial[]
+  suggestedTasks: MaterialIngestTask[]
+  extractions: DocumentMaterialsResult[]
+  warnings: string[]
+}
+export interface RecordMaterialReviewInput {
+  workspaceRoot?: string
+  sourcePath: string
+  reviewedPaths: string[]
+  reviewSummary: string
+  narrativeDecisions: Array<{
+    kind: "merged" | "gap" | "ignored" | "deferred"
+    target?: string
+    rationale: string
+  }>
+}
+export interface RecordMaterialReviewResult {
+  ok: true
+  path: string
+  registryPath: string
+  sourcePath: string
+}
+export interface CheckMaterialIntakeInput {
+  workspaceRoot?: string
+  strictness?: "authoring" | "readiness" | "render"
+}
+export interface CheckMaterialIntakeResult {
+  ok: boolean
+  registryPath: string
+  warnings: string[]
+  sources: Array<MaterialRegistryEntry & { recommendedNextAction?: string }>
+}
+const DOC_EXTENSIONS = new Set([".pdf", ".docx", ".doc", ".xlsx", ".xls", ".pptx", ".ppt", ".csv", ".md", ".txt"])
+const EXCLUDE_DIRS = new Set(["node_modules", ".git", "dist", ".opencode", "researches", "revela-narrative", "designs", "domains"])
+const EXCLUDE_FILENAMES = new Set(["AGENTS.md", "DECKS.md", "README.md", "README.zh-CN.md"])
+const EXTRACTION_EXTENSIONS = new Set(["pdf", "ppt", "pptx", "doc", "docx", "xls", "xlsx"])
+const SUPPORTED_EXTRACTION_EXTENSIONS = new Set(["pdf", "pptx", "docx", "xlsx"])
+export function materialRegistryPath(workspaceRoot: string): string {
+  return join(workspaceRoot, ".opencode", "revela", "material-intake", "registry.json")
+}
+export function readMaterialRegistry(workspaceRoot: string): MaterialRegistry {
+  const path = materialRegistryPath(workspaceRoot)
+  if (!existsSync(path)) return { version: 1, updatedAt: new Date(0).toISOString(), sources: [] }
+  return JSON.parse(readFileSync(path, "utf-8")) as MaterialRegistry
+}
+export function writeMaterialRegistry(workspaceRoot: string, registry: MaterialRegistry): string {
+  const path = materialRegistryPath(workspaceRoot)
+  mkdirSync(join(workspaceRoot, ".opencode", "revela", "material-intake"), { recursive: true })
+  writeFileSync(path, JSON.stringify({ ...registry, updatedAt: new Date().toISOString() }, null, 2), "utf-8")
+  return workspaceRelative(path, workspaceRoot)
+}
+export async function prepareLocalMaterials(input: PrepareLocalMaterialsInput = {}): Promise<PrepareLocalMaterialsResult> {
+  const workspaceRoot = root(input.workspaceRoot)
+  const scanRoot = scanRootFor(workspaceRoot, input.path)
+  const files = scanWorkspaceSources(workspaceRoot, scanRoot, input.maxDepth ?? 2)
+  let registry = readMaterialRegistry(workspaceRoot)
+  const now = new Date().toISOString()
+  const extractions: DocumentMaterialsResult[] = []
+  for (const file of files) {
+    registry = upsertRegistryEntry(registry, materialEntryFromSource(file, now))
+  }
+  if (input.autoExtract ?? true) {
+    for (const file of files) {
+      const type = (file.type || sourceMaterialType(file.path)).toLowerCase()
+      if (!EXTRACTION_EXTENSIONS.has(type)) continue
+      const result = await extractAndUpdateRegistry({ workspaceRoot, file: file.path }, registry)
+      registry = result.registry
+      extractions.push(result.extraction)
+    }
+  }
+  const registryPath = writeMaterialRegistry(workspaceRoot, registry)
+  return {
+    ok: true,
+    workspaceRoot,
+    registryPath,
+    found: files.length,
+    files,
+    suggestedTasks: registry.sources.map((entry) => ingestTask(entry)),
+    extractions,
+    warnings: intakeWarnings(registry.sources),
+  }
+}
+export async function extractMaterial(input: { workspaceRoot?: string; file: string }): Promise<DocumentMaterialsResult> {
+  const workspaceRoot = root(input.workspaceRoot)
+  const registry = readMaterialRegistry(workspaceRoot)
+  const result = await extractAndUpdateRegistry({ workspaceRoot, file: input.file }, registry)
+  writeMaterialRegistry(workspaceRoot, result.registry)
+  return result.extraction
+}
+export function recordMaterialReview(input: RecordMaterialReviewInput): RecordMaterialReviewResult {
+  const workspaceRoot = root(input.workspaceRoot)
+  const registry = readMaterialRegistry(workspaceRoot)
+  const entry = registry.sources.find((item) => item.sourcePath === normalizePath(input.sourcePath))
+  const sourcePath = entry?.sourcePath ?? normalizePath(input.sourcePath)
+  const reviewPath = writeReviewMarkdown(workspaceRoot, {
+    sourcePath,
+    fingerprint: entry?.fingerprint,
+    extraction: entry?.extraction ?? null,
+    reviewedPaths: input.reviewedPaths.map(normalizePath),
+    reviewSummary: input.reviewSummary,
+    narrativeDecisions: input.narrativeDecisions,
+  })
+  const now = new Date().toISOString()
+  const nextEntry: MaterialRegistryEntry = {
+    ...(entry ?? {
+      sourcePath,
+      type: sourceMaterialType(sourcePath),
+      status: "scanned",
+      requiresExtraction: EXTRACTION_EXTENSIONS.has(sourceMaterialType(sourcePath)),
+      firstSeen: now,
+      lastChecked: now,
+    }),
+    status: "reviewed",
+    review: {
+      reviewPath,
+      reviewedAt: now,
+      reviewedPaths: input.reviewedPaths.map(normalizePath),
+      summary: input.reviewSummary,
+    },
+    warnings: [],
+    lastChecked: now,
+  }
+  const updated = upsertRegistryEntry(registry, nextEntry)
+  const registryPath = writeMaterialRegistry(workspaceRoot, updated)
+  return { ok: true, path: reviewPath, registryPath, sourcePath }
+}
+export function checkMaterialIntake(input: CheckMaterialIntakeInput = {}): CheckMaterialIntakeResult {
+  const workspaceRoot = root(input.workspaceRoot)
+  const registry = readMaterialRegistry(workspaceRoot)
+  const sources = registry.sources.map((source) => {
+    const recommendedNextAction = recommendedAction(source, input.strictness ?? "authoring")
+    return recommendedNextAction ? { ...source, recommendedNextAction } : source
+  })
+  const warnings = intakeWarnings(registry.sources)
+  return {
+    ok: warnings.length === 0,
+    registryPath: workspaceRelative(materialRegistryPath(workspaceRoot), workspaceRoot),
+    warnings,
+    sources,
+  }
+}
+export function materialIntakeNoticeForCommand(input: { workspaceRoot?: string; command: string }): string | null {
+  const workspaceRoot = root(input.workspaceRoot)
+  const registry = readMaterialRegistry(workspaceRoot)
+  const command = input.command
+  const rawOfficeRead = /\b(textutil|pandoc|strings|unzip)\b/.test(command) && /\.(docx|doc|pptx|ppt|xlsx|xls|pdf)\b/i.test(command)
+  const matched = registry.sources.filter((entry) => entry.requiresExtraction && command.includes(entry.sourcePath))
+  if (!rawOfficeRead && matched.length === 0) return null
+  const paths = matched.length > 0
+    ? matched.map((entry) => entry.sourcePath)
+    : registry.sources.filter((entry) => entry.requiresExtraction).map((entry) => entry.sourcePath)
+  const unique = [...new Set(paths)].slice(0, 5)
+  return [
+    "Revela material intake notice:",
+    unique.length > 0
+      ? `Scanned source(s) require Revela extraction before narrative intake: ${unique.map((path) => `\`${path}\``).join(", ")}.`
+      : "This command appears to read an Office/PDF source directly.",
+    "Use `revela_extract_document_materials` and read the returned `read_view_path` so embedded images and manifests are considered.",
+    "If this is intentionally text-only, mark it as degraded intake in the init report; do not treat it as complete material review.",
+  ].join("\n")
+}
+async function extractAndUpdateRegistry(
+  input: { workspaceRoot: string; file: string },
+  registry: MaterialRegistry,
+): Promise<{ extraction: DocumentMaterialsResult; registry: MaterialRegistry }> {
+  const sourcePath = normalizePath(input.file)
+  const extraction = await extractDocumentMaterials(sourcePath, input.workspaceRoot)
+  const existing = registry.sources.find((entry) => entry.sourcePath === sourcePath)
+  const now = new Date().toISOString()
+  const type = extraction.type === "other" ? sourceMaterialType(sourcePath) : extraction.type
+  const unsupported = EXTRACTION_EXTENSIONS.has(type) && !SUPPORTED_EXTRACTION_EXTENSIONS.has(type)
+  const status: MaterialIntakeStatus = extraction.status === "processed"
+    ? "extracted"
+    : extraction.status === "failed"
+      ? "failed"
+      : unsupported
+        ? "unsupported"
+        : "skipped"
+  return {
+    extraction,
+    registry: upsertRegistryEntry(registry, {
+      ...(existing ?? {
+        sourcePath,
+        type,
+        requiresExtraction: EXTRACTION_EXTENSIONS.has(type),
+        firstSeen: now,
+      }),
+      sourcePath,
+      type,
+      status,
+      requiresExtraction: EXTRACTION_EXTENSIONS.has(type),
+      allowedReadPath: extraction.read_view_path ?? extraction.text_path ?? null,
+      extraction: extraction.status === "processed"
+        ? {
+            manifestPath: extraction.manifest_path,
+            textPath: extraction.text_path,
+            readViewPath: extraction.read_view_path,
+            cacheDir: extraction.cache_dir,
+            imageCount: extraction.images?.length ?? 0,
+            tableCount: extraction.tables?.length ?? 0,
+          }
+        : null,
+      warnings: extractionWarnings(extraction, unsupported),
+      lastChecked: now,
+    }),
+  }
+}
+function scanWorkspaceSources(workspaceRoot: string, scanRoot: string, maxDepth: number): SourceMaterial[] {
+  const results: SourceMaterial[] = []
+  scanDir(scanRoot, workspaceRoot, results, maxDepth, 0)
+  return results.sort((a, b) => a.path.localeCompare(b.path))
+}
+function scanDir(dir: string, workspaceRoot: string, results: SourceMaterial[], maxDepth: number, depth: number): void {
+  if (depth > maxDepth || !existsSync(dir)) return
+  let entries: string[]
+  try {
+    entries = readdirSync(dir)
+  } catch {
+    return
+  }
+  for (const entry of entries) {
+    if (entry.startsWith(".") || EXCLUDE_DIRS.has(entry)) continue
+    const fullPath = join(dir, entry)
+    let stat
+    try {
+      stat = statSync(fullPath)
+    } catch {
+      continue
+    }
+    if (stat.isDirectory()) {
+      scanDir(fullPath, workspaceRoot, results, maxDepth, depth + 1)
+      continue
+    }
+    if (!stat.isFile() || EXCLUDE_FILENAMES.has(entry) || entry.startsWith("~$")) continue
+    if (!DOC_EXTENSIONS.has(extname(entry).toLowerCase())) continue
+    results.push({ ...sourceMaterialMetadata(fullPath, workspaceRoot), status: "discovered" })
+  }
+}
+function materialEntryFromSource(source: SourceMaterial, now: string): MaterialRegistryEntry {
+  const type = (source.type || sourceMaterialType(source.path)).toLowerCase()
+  return {
+    sourcePath: source.path,
+    type,
+    fingerprint: source.fingerprint,
+    size: source.size,
+    lastModified: source.lastModified,
+    status: "scanned",
+    requiresExtraction: EXTRACTION_EXTENSIONS.has(type),
+    allowedReadPath: EXTRACTION_EXTENSIONS.has(type) ? null : source.path,
+    extraction: null,
+    review: null,
+    warnings: [],
+    firstSeen: now,
+    lastChecked: now,
+  }
+}
+function upsertRegistryEntry(registry: MaterialRegistry, entry: MaterialRegistryEntry): MaterialRegistry {
+  const sourcePath = normalizePath(entry.sourcePath)
+  const existingIndex = registry.sources.findIndex((item) => item.sourcePath === sourcePath)
+  const existing = existingIndex >= 0 ? registry.sources[existingIndex] : undefined
+  const unchangedFingerprint = Boolean(existing?.fingerprint && entry.fingerprint && existing.fingerprint === entry.fingerprint)
+  const scanRefresh = entry.status === "scanned" && unchangedFingerprint
+  const next: MaterialRegistryEntry = {
+    ...existing,
+    ...entry,
+    sourcePath,
+    status: scanRefresh ? existing!.status : entry.status,
+    allowedReadPath: scanRefresh ? existing!.allowedReadPath : entry.allowedReadPath,
+    extraction: scanRefresh ? existing!.extraction : entry.extraction,
+    firstSeen: existing?.firstSeen ?? entry.firstSeen,
+    review: scanRefresh ? existing!.review : entry.review === undefined ? existing?.review : entry.review,
+  }
+  const sources = [...registry.sources]
+  if (existingIndex >= 0) sources[existingIndex] = next
+  else sources.push(next)
+  return { version: 1, updatedAt: new Date().toISOString(), sources: sources.sort((a, b) => a.sourcePath.localeCompare(b.sourcePath)) }
+}
+function ingestTask(entry: MaterialRegistryEntry): MaterialIngestTask {
+  return {
+    path: entry.sourcePath,
+    materialType: entry.type,
+    needsExtraction: entry.requiresExtraction,
+    suggestedAction: entry.requiresExtraction ? "extract_then_read" : "read_directly",
+    status: entry.status,
+    allowedReadPath: entry.allowedReadPath ?? null,
+    note: entry.requiresExtraction
+      ? "Read the extracted read_view_path after Revela extraction; do not read the original Office/PDF source for narrative intake."
+      : "Read directly when relevant and record narrative meaning only after source content is actually inspected.",
+  }
+}
+function extractionWarnings(result: DocumentMaterialsResult, unsupported: boolean): string[] {
+  if (unsupported) return ["This source type is discovered but not supported for Revela extraction in v1."]
+  if (result.status === "failed") return [`Extraction failed: ${result.reason ?? "unknown error"}`]
+  if (result.status === "skipped") return [`Extraction skipped: ${result.reason ?? "unsupported file type"}`]
+  return []
+}
+function intakeWarnings(sources: MaterialRegistryEntry[]): string[] {
+  const warnings: string[] = []
+  for (const source of sources) {
+    if (!source.requiresExtraction) continue
+    if (source.status === "scanned") warnings.push(`${source.sourcePath} was scanned but not extracted through Revela material extraction.`)
+    else if (source.status === "extracted") warnings.push(`${source.sourcePath} was extracted but has no recorded material review.`)
+    else if (source.status === "text_only_read") warnings.push(`${source.sourcePath} was read as text-only; embedded images or structure may not have been considered.`)
+    else if (source.status === "unsupported") warnings.push(`${source.sourcePath} is not supported for extraction; convert it to a supported format such as .docx/.pptx/.xlsx when needed.`)
+    else if (source.status === "failed") warnings.push(`${source.sourcePath} extraction failed and should not be treated as complete intake.`)
+  }
+  return warnings
+}
+function recommendedAction(source: MaterialRegistryEntry, strictness: "authoring" | "readiness" | "render"): string | undefined {
+  if (!source.requiresExtraction) return undefined
+  if (source.status === "scanned") return "Call `revela_extract_document_materials`, then read the returned `read_view_path`."
+  if (source.status === "extracted") return strictness === "authoring"
+    ? "Read `allowedReadPath`, then call `revela_record_material_review`."
+    : "Record material review before treating this source as considered for narrative readiness."
+  if (source.status === "text_only_read") return "Use Revela extraction and review before treating this source as complete intake."
+  if (source.status === "unsupported") return "Convert to a supported format or keep the source as an explicit intake gap."
+  if (source.status === "failed") return "Fix extraction failure or record the source as an intake gap."
+  return undefined
+}
+function writeReviewMarkdown(
+  workspaceRoot: string,
+  input: {
+    sourcePath: string
+    fingerprint?: string
+    extraction: MaterialRegistryEntry["extraction"]
+    reviewedPaths: string[]
+    reviewSummary: string
+    narrativeDecisions: RecordMaterialReviewInput["narrativeDecisions"]
+  },
+): string {
+  const dir = join(workspaceRoot, "researches", "local-materials")
+  mkdirSync(dir, { recursive: true })
+  const fileName = `${slugify(input.sourcePath)}-review.md`
+  const path = join(dir, fileName)
+  const lines = [
+    "---",
+    "type: local-material-review",
+    `sourcePath: ${JSON.stringify(input.sourcePath)}`,
+    input.fingerprint ? `fingerprint: ${JSON.stringify(input.fingerprint)}` : undefined,
+    input.extraction?.manifestPath ? `extractionManifestPath: ${JSON.stringify(input.extraction.manifestPath)}` : undefined,
+    input.extraction?.textPath ? `extractionTextPath: ${JSON.stringify(input.extraction.textPath)}` : undefined,
+    input.extraction?.readViewPath ? `readViewPath: ${JSON.stringify(input.extraction.readViewPath)}` : undefined,
+    `reviewedAt: ${JSON.stringify(new Date().toISOString())}`,
+    "status: reviewed",
+    "---",
+    "",
+    "# Local Material Review",
+    "",
+    "## Review Summary",
+    "",
+    input.reviewSummary.trim(),
+    "",
+    "## Reviewed Paths",
+    "",
+    ...input.reviewedPaths.map((path) => `- ${path}`),
+    "",
+    "## Narrative Decisions",
+    "",
+    ...input.narrativeDecisions.map((decision) => `- ${decision.kind}${decision.target ? `: ${decision.target}` : ""} - ${decision.rationale}`),
+    "",
+    "## Extracted Images",
+    "",
+    input.extraction?.imageCount ? `- ${input.extraction.imageCount} extracted image(s); do not treat as interpreted evidence without explicit image review.` : "- None recorded.",
+    "",
+  ].filter((line): line is string => line !== undefined)
+  writeFileSync(path, lines.join("\n"), "utf-8")
+  return workspaceRelative(path, workspaceRoot)
+}
+function scanRootFor(workspaceRoot: string, path?: string): string {
+  if (!path) return workspaceRoot
+  if (isAbsolute(path)) throw new Error("path must be relative to workspace root")
+  const candidate = resolve(workspaceRoot, path)
+  const resolvedWorkspace = resolve(workspaceRoot)
+  if (candidate !== resolvedWorkspace && !candidate.startsWith(resolvedWorkspace + sep)) throw new Error("path must be within workspace")
+  return candidate
+}
+function root(workspaceRoot?: string): string {
+  return resolve(workspaceRoot || process.cwd())
+}
+function workspaceRelative(path: string, workspaceRoot: string): string {
+  return relative(resolve(workspaceRoot), resolve(path)).replace(/\\/g, "/")
+}
+function normalizePath(path: string): string {
+  return path.replace(/\\/g, "/")
+}
+function slugify(value: string): string {
+  const base = basename(value).replace(/\.[^.]+$/, "")
+  return base.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "") || "material"
+}

package/lib/runtime/index.ts CHANGED Viewed

@@ -25,10 +25,12 @@ import { formatArtifactQaUserNotice, formatMarkdownQaUserNotice } from "../hook-
 import { readDeckPlanArtifact } from "../narrative-state/deck-plan-artifact"
 import { extractDesignClasses } from "../design/designs"
 import { recordRenderedArtifact, workspaceRelative } from "../workspace-state/rendered-artifacts"
+import { checkMaterialIntake, extractMaterial, materialIntakeNoticeForCommand, prepareLocalMaterials, recordMaterialReview } from "../material-intake"
 import type { ReviewDeckOpenInput, ReviewDeckReadInput } from "./review"
 import pkg from "../../package.json"
 export { bindResearchFindings, evaluateResearchFindings, researchSave, researchTargets } from "./research"
 export { storyRead } from "./story"
+export { checkMaterialIntake, extractMaterial, materialIntakeNoticeForCommand, prepareLocalMaterials, recordMaterialReview }
 export interface RuntimeWorkspaceInput {
   workspaceRoot?: string

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cyber-dash-tech/revela",
-  "version": "0.17.21",
+  "version": "0.17.22",
   "description": "OpenCode plugin for trusted narrative artifacts from local sources, research, and evidence",
   "type": "module",
   "main": "./index.ts",

package/plugins/revela/.mcp.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "mcpServers": {
     "revela": {
       "command": "npx",
-      "args": ["-y", "@cyber-dash-tech/revela@0.17.21", "mcp"]
+      "args": ["-y", "@cyber-dash-tech/revela@0.17.22", "mcp"]
     }
   }
 }

package/plugins/revela/hooks/hooks.json CHANGED Viewed

@@ -1,6 +1,16 @@
 {
   "hooks": {
     "PreToolUse": [
+      {
+        "matcher": "exec_command",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bun ${PLUGIN_ROOT}/hooks/revela_material_notice.ts",
+            "statusMessage": "Checking Revela material intake"
+          }
+        ]
+      },
       {
         "matcher": "apply_patch",
         "hooks": [

package/plugins/revela/hooks/revela_material_notice.ts ADDED Viewed

@@ -0,0 +1,58 @@
+import { dirname, resolve } from "path"
+import { fileURLToPath, pathToFileURL } from "url"
+import { resolveRevelaRuntime } from "../mcp/runtime-resolver"
+import { workspaceRootFromInput } from "./revela_post_write_notice"
+export interface MaterialNoticeResult {
+  ok: true
+  messages: string[]
+}
+export async function runMaterialReadNotice(input: string): Promise<MaterialNoticeResult> {
+  const command = commandFromInput(input)
+  if (!command) return { ok: true, messages: [] }
+  const pluginRoot = resolve(process.env.PLUGIN_ROOT || dirname(dirname(fileURLToPath(import.meta.url))))
+  const runtime = resolveRevelaRuntime({ pluginRoot })
+  if (!runtime.ok || !runtime.runtimePath) return { ok: true, messages: [] }
+  const workspaceRoot = workspaceRootFromInput(input)
+  const runtimeModule = await import(pathToFileURL(runtime.runtimePath).href)
+  const notice = runtimeModule.materialIntakeNoticeForCommand?.({ workspaceRoot, command })
+  return { ok: true, messages: notice ? [notice] : [] }
+}
+export function commandFromInput(input: string): string | null {
+  try {
+    const parsed = JSON.parse(input)
+    const candidates = [
+      parsed.cmd,
+      parsed.command,
+      parsed.args?.cmd,
+      parsed.args?.command,
+      parsed.tool_input?.cmd,
+      parsed.tool_input?.command,
+      parsed.toolInput?.cmd,
+      parsed.toolInput?.command,
+    ]
+    for (const candidate of candidates) {
+      if (typeof candidate === "string" && candidate.trim()) return candidate
+    }
+    return null
+  } catch {
+    return input.trim() || null
+  }
+}
+if (import.meta.main) {
+  const input = await new Response(Bun.stdin.stream()).text()
+  try {
+    const result = await runMaterialReadNotice(input)
+    if (result.messages.length > 0) console.error(result.messages.join("\n\n---\n\n"))
+    process.exit(0)
+  } catch (e) {
+    console.error("Revela material intake notice failed to run.")
+    console.error(e instanceof Error ? e.message : String(e))
+    process.exit(0)
+  }
+}

package/plugins/revela/mcp/revela-server.ts CHANGED Viewed

@@ -40,6 +40,10 @@ type RuntimeModule = {
   researchSave(input: any): any
   evaluateResearchFindings(input: any): any
   bindResearchFindings(input: any): any
+  prepareLocalMaterials(input: any): Promise<any>
+  extractMaterial(input: any): Promise<any>
+  recordMaterialReview(input: any): any
+  checkMaterialIntake(input: any): any
 }
 type MessageMode = "framed" | "raw"
@@ -287,6 +291,43 @@ const tools = [
       evidenceId: stringProp("Optional canonical evidence node id override."),
     }, ["findingsFile"]),
   },
+  {
+    name: "revela_prepare_local_materials",
+    description: "Scan local workspace source materials, create/update the material-intake registry, and optionally extract Office/PDF files into read views.",
+    inputSchema: objectSchema({
+      workspaceRoot: stringProp("Optional workspace root."),
+      path: stringProp("Optional workspace-relative subdirectory to scan."),
+      maxDepth: numberProp("Maximum scan depth. Defaults to 2."),
+      autoExtract: booleanProp("Whether to extract Office/PDF sources during prepare. Defaults to true."),
+    }),
+  },
+  {
+    name: "revela_extract_document_materials",
+    description: "Extract text, manifest, read view, and embedded images from a workspace document. Supports pdf, pptx, docx, and xlsx.",
+    inputSchema: objectSchema({
+      workspaceRoot: stringProp("Optional workspace root."),
+      file: requiredStringProp("Workspace-relative source file path."),
+    }, ["file"]),
+  },
+  {
+    name: "revela_record_material_review",
+    description: "Record that an LLM has read extracted local material and decided what was merged, deferred, ignored, or left as a gap.",
+    inputSchema: objectSchema({
+      workspaceRoot: stringProp("Optional workspace root."),
+      sourcePath: requiredStringProp("Workspace-relative source file path."),
+      reviewedPaths: arrayProp("Workspace-relative extracted paths actually reviewed."),
+      reviewSummary: requiredStringProp("Concise summary of the reviewed material."),
+      narrativeDecisions: arrayObjectProp("Narrative decisions with kind, optional target, and rationale."),
+    }, ["sourcePath", "reviewedPaths", "reviewSummary", "narrativeDecisions"]),
+  },
+  {
+    name: "revela_check_material_intake",
+    description: "Check whether scanned Office/PDF sources were extracted and reviewed before being treated as narrative intake.",
+    inputSchema: objectSchema({
+      workspaceRoot: stringProp("Optional workspace root."),
+      strictness: enumProp(["authoring", "readiness", "render"], "Check strictness."),
+    }),
+  },
 ]
 let runtimePromise: Promise<RuntimeModule> | undefined
@@ -374,6 +415,10 @@ async function callTool(name: string, args: any): Promise<any> {
   if (name === "revela_research_save") return r.researchSave(args)
   if (name === "revela_evaluate_research_findings") return r.evaluateResearchFindings(args)
   if (name === "revela_bind_research_findings") return r.bindResearchFindings(args)
+  if (name === "revela_prepare_local_materials") return r.prepareLocalMaterials(args)
+  if (name === "revela_extract_document_materials") return r.extractMaterial(args)
+  if (name === "revela_record_material_review") return r.recordMaterialReview(args)
+  if (name === "revela_check_material_intake") return r.checkMaterialIntake(args)
   throw new Error(`Unknown tool: ${name}`)
 }
@@ -401,6 +446,10 @@ function booleanProp(description: string) {
   return { type: "boolean", description }
 }
+function numberProp(description: string) {
+  return { type: "number", description }
+}
 function enumProp(values: string[], description: string) {
   return { type: "string", enum: values, description }
 }
@@ -409,6 +458,23 @@ function arrayProp(description: string) {
   return { type: "array", items: { type: "string" }, description }
 }
+function arrayObjectProp(description: string) {
+  return {
+    type: "array",
+    description,
+    items: {
+      type: "object",
+      properties: {
+        kind: { type: "string", enum: ["merged", "gap", "ignored", "deferred"] },
+        target: { type: "string" },
+        rationale: { type: "string" },
+      },
+      required: ["kind", "rationale"],
+      additionalProperties: false,
+    },
+  }
+}
 function writeMessage(message: any, mode: MessageMode = activeResponseMode): void {
   activeResponseMode = mode
   const body = JSON.stringify(message)

package/plugins/revela/skills/revela-init/SKILL.md CHANGED Viewed

@@ -18,14 +18,24 @@ Use this skill when the user asks to start Revela, initialize the workspace, ing
 ## Workflow
-1. Inspect the workspace with normal Codex file tools. Stay inside the current workspace root.
-2. Prefer local source materials first: Markdown, text, CSV, PDFs, Office files, existing `researches/`, existing `revela-narrative/`, `deck-plan/`, and `decks/`.
-3. Call `revela_domain_list` and `revela_domain_read` for active domain guidance before authoring narrative meaning. Treat domain guidance as framing guidance, never as evidence.
-4. If `revela-narrative/` exists, call `revela_markdown_qa` and `revela_compile_narrative`.
-5. If the narrative vault is missing, create the initial `revela-narrative/` Markdown nodes directly with valid frontmatter and plain wikilink relations.
-6. Evidence nodes must preserve source, quote/snippet, support scope, unsupported scope, caveat, and strength before being treated as support.
-7. After writing narrative Markdown, call `revela_markdown_qa` and `revela_compile_narrative` again.
-8. End with a concise init report: local materials found, active domain, narrative graph status, open gaps, Markdown QA status, and next command/action.
+1. Call `revela_prepare_local_materials` first. Treat scan results as an intake registry and task list, not as source content.
+2. For any registry entry with `requiresExtraction: true`, do not read the original Office/PDF file directly for narrative intake. Use the returned `allowedReadPath` / `read_view_path`; if missing, call `revela_extract_document_materials` first.
+3. Prefer local source materials first: Markdown, text, CSV, PDFs, Office files, existing `researches/`, existing `revela-narrative/`, `deck-plan/`, and `decks/`.
+4. After reading extracted material views, call `revela_record_material_review` for each considered Office/PDF source. Record what was merged, deferred, ignored, or left as a gap.
+5. Call `revela_domain_list` and `revela_domain_read` for active domain guidance before authoring narrative meaning. Treat domain guidance as framing guidance, never as evidence.
+6. If `revela-narrative/` exists, call `revela_markdown_qa` and `revela_compile_narrative`.
+7. If the narrative vault is missing, create the initial `revela-narrative/` Markdown nodes directly with valid frontmatter and plain wikilink relations.
+8. Evidence nodes must preserve source, quote/snippet, support scope, unsupported scope, caveat, and strength before being treated as support.
+9. After writing narrative Markdown, call `revela_markdown_qa` and `revela_compile_narrative` again.
+10. Before the final report, call `revela_check_material_intake` and surface any warnings about scanned-but-unextracted, extracted-but-unreviewed, unsupported, failed, or text-only sources.
+11. End with a concise init report: local materials found, active domain, narrative graph status, material intake status, open gaps, Markdown QA status, and next command/action.
+## Material Intake Rules
+- Scan results only prove that files exist; they do not prove file content.
+- For `.docx`, `.pptx`, `.xlsx`, and `.pdf`, read the extracted `read_view_path` instead of using Codex/textutil/raw reads of the original file.
+- Extracted images are candidate materials only. Do not interpret them as evidence unless image meaning is explicitly reviewed or supplied by the user.
+- If a user explicitly asks for text-only inspection, report it as degraded intake and do not treat it as complete source review.
 ## Markdown Rules