@cyber-dash-tech/revela 0.17.21 → 0.17.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -58,6 +58,7 @@ export type DocumentMaterialsResult = {
58
58
  cache_dir?: string
59
59
  manifest_path?: string
60
60
  text_path?: string
61
+ read_view_path?: string
61
62
  images?: DocumentMaterial[]
62
63
  skipped_assets?: SkippedAsset[]
63
64
  slides?: PptxSlide[]
@@ -74,6 +75,7 @@ type CachedManifest = {
74
75
  cache_dir: string
75
76
  manifest_path: string
76
77
  text_path: string
78
+ read_view_path?: string
77
79
  images: DocumentMaterial[]
78
80
  skipped_assets: SkippedAsset[]
79
81
  slides: PptxSlide[]
@@ -157,6 +159,145 @@ function materialPath(cacheDir: string, workspaceDir: string, ...segments: strin
157
159
  return workspaceRelative(join(cacheDir, ...segments), workspaceDir)
158
160
  }
159
161
 
162
+ function buildReadView(input: {
163
+ source: string
164
+ type: SupportedType
165
+ fingerprint: string
166
+ text: string
167
+ manifestPath: string
168
+ textPath: string
169
+ images: DocumentMaterial[]
170
+ skippedAssets: SkippedAsset[]
171
+ tables: DocumentMaterial[]
172
+ slides: PptxSlide[] | undefined
173
+ }): string {
174
+ const lines = [
175
+ `# Extracted Material: ${basename(input.source)}`,
176
+ "",
177
+ "## Source",
178
+ "",
179
+ `- sourcePath: ${input.source}`,
180
+ `- type: ${input.type}`,
181
+ `- fingerprint: ${input.fingerprint}`,
182
+ `- manifestPath: ${input.manifestPath}`,
183
+ `- textPath: ${input.textPath}`,
184
+ "",
185
+ "## Text",
186
+ "",
187
+ input.text.trim() || "No text extracted.",
188
+ "",
189
+ "## Extracted Images",
190
+ "",
191
+ ]
192
+
193
+ if (input.images.length === 0) lines.push("- None")
194
+ else {
195
+ for (const image of input.images) {
196
+ const parts = [
197
+ image.page_or_slide ? `page_or_slide: ${image.page_or_slide}` : null,
198
+ `source_ref: ${image.source_ref}`,
199
+ image.note ? `note: ${image.note}` : null,
200
+ ].filter(Boolean).join("; ")
201
+ lines.push(`- ${image.path}${parts ? ` (${parts})` : ""}`)
202
+ }
203
+ }
204
+
205
+ if (input.skippedAssets.length > 0) {
206
+ lines.push("", "## Skipped Or Unmapped Assets", "")
207
+ for (const asset of input.skippedAssets) {
208
+ const parts = [
209
+ asset.page_or_slide ? `page_or_slide: ${asset.page_or_slide}` : null,
210
+ `reason: ${asset.reason}`,
211
+ asset.kind ? `kind: ${asset.kind}` : null,
212
+ ].filter(Boolean).join("; ")
213
+ lines.push(`- ${asset.source_ref}${parts ? ` (${parts})` : ""}`)
214
+ }
215
+ }
216
+
217
+ if (input.tables.length > 0) {
218
+ lines.push("", "## Extracted Tables", "")
219
+ for (const table of input.tables) lines.push(`- ${table.path} (${table.note ?? table.source_ref})`)
220
+ }
221
+
222
+ if (input.slides?.length) {
223
+ lines.push("", "## Slide Structure", "")
224
+ for (const slide of input.slides) {
225
+ const textCount = slide.elements.filter((element) => element.kind === "text").length
226
+ const imageCount = slide.elements.filter((element) => element.kind === "image").length
227
+ const shapeCount = slide.elements.filter((element) => element.kind === "shape").length
228
+ lines.push(`- ${slide.slide}: ${textCount} text, ${imageCount} image, ${shapeCount} shape`)
229
+ }
230
+ }
231
+
232
+ lines.push(
233
+ "",
234
+ "## Intake Rules",
235
+ "",
236
+ "- Treat this extracted material as source context until a material review records what was considered.",
237
+ "- Do not treat extracted images as interpreted evidence unless an explicit image review or user-provided meaning exists.",
238
+ "- Canonical evidence still requires source trace, quote/snippet, support scope, unsupported scope, caveat, strength, and relations in `revela-narrative/`.",
239
+ )
240
+
241
+ return lines.join("\n")
242
+ }
243
+
244
+ function writeReadView(input: {
245
+ cacheDir: string
246
+ workspaceDir: string
247
+ source: string
248
+ type: SupportedType
249
+ fingerprint: string
250
+ text: string
251
+ manifestPath: string
252
+ textPath: string
253
+ images: DocumentMaterial[]
254
+ skippedAssets: SkippedAsset[]
255
+ tables: DocumentMaterial[]
256
+ slides?: PptxSlide[]
257
+ }): string {
258
+ const readViewPath = join(input.cacheDir, "read.md")
259
+ writeFileSync(readViewPath, buildReadView({
260
+ source: input.source,
261
+ type: input.type,
262
+ fingerprint: input.fingerprint,
263
+ text: input.text,
264
+ manifestPath: input.manifestPath,
265
+ textPath: input.textPath,
266
+ images: input.images,
267
+ skippedAssets: input.skippedAssets,
268
+ tables: input.tables,
269
+ slides: input.slides,
270
+ }), "utf-8")
271
+ return workspaceRelative(readViewPath, input.workspaceDir)
272
+ }
273
+
274
+ function ensureCachedReadView(
275
+ manifest: CachedManifest,
276
+ cacheDir: string,
277
+ workspaceDir: string,
278
+ ): string {
279
+ const existing = manifest.read_view_path
280
+ if (existing && existsSync(join(workspaceDir, existing))) return existing
281
+
282
+ const text = existsSync(join(workspaceDir, manifest.text_path))
283
+ ? readFileSync(join(workspaceDir, manifest.text_path), "utf-8").replace(/^\[Extracted from: .*?\]\n\n/, "")
284
+ : ""
285
+ return writeReadView({
286
+ cacheDir,
287
+ workspaceDir,
288
+ source: manifest.source,
289
+ type: manifest.type,
290
+ fingerprint: manifest.fingerprint,
291
+ text,
292
+ manifestPath: manifest.manifest_path,
293
+ textPath: manifest.text_path,
294
+ images: manifest.images,
295
+ skippedAssets: manifest.skipped_assets,
296
+ tables: manifest.tables,
297
+ slides: manifest.slides,
298
+ })
299
+ }
300
+
160
301
  function updateDecksSourceMaterialIndex(
161
302
  workspaceDir: string,
162
303
  filePath: string,
@@ -716,6 +857,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
716
857
 
717
858
  if (existsSync(manifestPath)) {
718
859
  const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
860
+ const readViewPath = ensureCachedReadView(manifest, cacheDir, workspaceDir)
719
861
  return {
720
862
  status: "processed",
721
863
  cache_status: "hit",
@@ -724,6 +866,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
724
866
  cache_dir: manifest.cache_dir,
725
867
  manifest_path: manifest.manifest_path,
726
868
  text_path: manifest.text_path,
869
+ read_view_path: readViewPath,
727
870
  images: manifest.images,
728
871
  skipped_assets: manifest.skipped_assets,
729
872
  slides: manifest.slides,
@@ -740,6 +883,22 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
740
883
  writeFileSync(textPath, `[Extracted from: ${basename(filePath)}]\n\n${text}`, "utf-8")
741
884
 
742
885
  const images = await extractPdfImages(buf, cacheDir, workspaceDir)
886
+ const relativeManifestPath = workspaceRelative(manifestPath, workspaceDir)
887
+ const relativeTextPath = workspaceRelative(textPath, workspaceDir)
888
+ const readViewPath = writeReadView({
889
+ cacheDir,
890
+ workspaceDir,
891
+ source: relativeSource,
892
+ type: "pdf",
893
+ fingerprint,
894
+ text,
895
+ manifestPath: relativeManifestPath,
896
+ textPath: relativeTextPath,
897
+ images,
898
+ skippedAssets: [],
899
+ tables: [],
900
+ slides: [],
901
+ })
743
902
 
744
903
  const result: DocumentMaterialsResult = {
745
904
  status: "processed",
@@ -747,8 +906,9 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
747
906
  source: relativeSource,
748
907
  type: "pdf",
749
908
  cache_dir: workspaceRelative(cacheDir, workspaceDir),
750
- manifest_path: workspaceRelative(manifestPath, workspaceDir),
751
- text_path: workspaceRelative(textPath, workspaceDir),
909
+ manifest_path: relativeManifestPath,
910
+ text_path: relativeTextPath,
911
+ read_view_path: readViewPath,
752
912
  images,
753
913
  skipped_assets: [],
754
914
  slides: [],
@@ -762,6 +922,7 @@ async function processPdfFile(filePath: string, workspaceDir: string): Promise<D
762
922
  cache_dir: result.cache_dir!,
763
923
  manifest_path: result.manifest_path!,
764
924
  text_path: result.text_path!,
925
+ read_view_path: result.read_view_path,
765
926
  images: result.images ?? [],
766
927
  skipped_assets: [],
767
928
  slides: [],
@@ -780,6 +941,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
780
941
 
781
942
  if (existsSync(manifestPath)) {
782
943
  const manifest = JSON.parse(readFileSync(manifestPath, "utf-8")) as CachedManifest
944
+ const readViewPath = ensureCachedReadView(manifest, cacheDir, workspaceDir)
783
945
  return {
784
946
  status: "processed",
785
947
  cache_status: "hit",
@@ -788,6 +950,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
788
950
  cache_dir: manifest.cache_dir,
789
951
  manifest_path: manifest.manifest_path,
790
952
  text_path: manifest.text_path,
953
+ read_view_path: readViewPath,
791
954
  images: manifest.images,
792
955
  skipped_assets: manifest.skipped_assets,
793
956
  slides: manifest.slides,
@@ -821,6 +984,24 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
821
984
  const slides = type === "pptx"
822
985
  ? extractPptxSlides(files, images, pptxAssets!.skipped_assets)
823
986
  : undefined
987
+ const relativeManifestPath = workspaceRelative(manifestPath, workspaceDir)
988
+ const relativeTextPath = workspaceRelative(textPath, workspaceDir)
989
+ const tables = extractTables(type, relativeTextPath)
990
+ const skippedAssets = pptxAssets?.skipped_assets ?? []
991
+ const readViewPath = writeReadView({
992
+ cacheDir,
993
+ workspaceDir,
994
+ source: relativeSource,
995
+ type,
996
+ fingerprint,
997
+ text,
998
+ manifestPath: relativeManifestPath,
999
+ textPath: relativeTextPath,
1000
+ images,
1001
+ skippedAssets,
1002
+ tables,
1003
+ slides,
1004
+ })
824
1005
 
825
1006
  const result: DocumentMaterialsResult = {
826
1007
  status: "processed",
@@ -828,12 +1009,13 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
828
1009
  source: relativeSource,
829
1010
  type,
830
1011
  cache_dir: workspaceRelative(cacheDir, workspaceDir),
831
- manifest_path: workspaceRelative(manifestPath, workspaceDir),
832
- text_path: workspaceRelative(textPath, workspaceDir),
1012
+ manifest_path: relativeManifestPath,
1013
+ text_path: relativeTextPath,
1014
+ read_view_path: readViewPath,
833
1015
  images,
834
- skipped_assets: pptxAssets?.skipped_assets ?? [],
1016
+ skipped_assets: skippedAssets,
835
1017
  slides,
836
- tables: extractTables(type, workspaceRelative(textPath, workspaceDir)),
1018
+ tables,
837
1019
  }
838
1020
 
839
1021
  const manifest: CachedManifest = {
@@ -843,6 +1025,7 @@ async function processOfficeFile(filePath: string, workspaceDir: string, type: S
843
1025
  cache_dir: result.cache_dir!,
844
1026
  manifest_path: result.manifest_path!,
845
1027
  text_path: result.text_path!,
1028
+ read_view_path: result.read_view_path,
846
1029
  images: result.images ?? [],
847
1030
  skipped_assets: result.skipped_assets ?? [],
848
1031
  slides: result.slides ?? [],
@@ -0,0 +1,494 @@
1
+ import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "fs"
2
+ import { basename, extname, isAbsolute, join, relative, resolve, sep } from "path"
3
+ import { extractDocumentMaterials, type DocumentMaterialsResult } from "./document-materials/extract"
4
+ import { sourceMaterialMetadata, sourceMaterialType } from "./source-materials"
5
+ import type { SourceMaterial } from "./decks-state"
6
+
7
+ export type MaterialIntakeStatus =
8
+ | "scanned"
9
+ | "extracted"
10
+ | "reviewed"
11
+ | "text_only_read"
12
+ | "skipped"
13
+ | "unsupported"
14
+ | "failed"
15
+
16
+ export interface MaterialRegistryEntry {
17
+ sourcePath: string
18
+ type: string
19
+ fingerprint?: string
20
+ size?: number
21
+ lastModified?: string
22
+ status: MaterialIntakeStatus
23
+ requiresExtraction: boolean
24
+ allowedReadPath?: string | null
25
+ extraction?: {
26
+ manifestPath?: string
27
+ textPath?: string
28
+ readViewPath?: string
29
+ cacheDir?: string
30
+ imageCount: number
31
+ tableCount: number
32
+ } | null
33
+ review?: {
34
+ reviewPath: string
35
+ reviewedAt: string
36
+ reviewedPaths: string[]
37
+ summary: string
38
+ } | null
39
+ warnings?: string[]
40
+ firstSeen: string
41
+ lastChecked: string
42
+ }
43
+
44
+ export interface MaterialRegistry {
45
+ version: 1
46
+ updatedAt: string
47
+ sources: MaterialRegistryEntry[]
48
+ }
49
+
50
+ export interface MaterialIngestTask {
51
+ path: string
52
+ materialType: string
53
+ needsExtraction: boolean
54
+ suggestedAction: "read_directly" | "extract_then_read"
55
+ status: MaterialIntakeStatus
56
+ allowedReadPath?: string | null
57
+ note: string
58
+ }
59
+
60
+ export interface PrepareLocalMaterialsInput {
61
+ workspaceRoot?: string
62
+ path?: string
63
+ maxDepth?: number
64
+ autoExtract?: boolean
65
+ }
66
+
67
+ export interface PrepareLocalMaterialsResult {
68
+ ok: true
69
+ workspaceRoot: string
70
+ registryPath: string
71
+ found: number
72
+ files: SourceMaterial[]
73
+ suggestedTasks: MaterialIngestTask[]
74
+ extractions: DocumentMaterialsResult[]
75
+ warnings: string[]
76
+ }
77
+
78
+ export interface RecordMaterialReviewInput {
79
+ workspaceRoot?: string
80
+ sourcePath: string
81
+ reviewedPaths: string[]
82
+ reviewSummary: string
83
+ narrativeDecisions: Array<{
84
+ kind: "merged" | "gap" | "ignored" | "deferred"
85
+ target?: string
86
+ rationale: string
87
+ }>
88
+ }
89
+
90
+ export interface RecordMaterialReviewResult {
91
+ ok: true
92
+ path: string
93
+ registryPath: string
94
+ sourcePath: string
95
+ }
96
+
97
+ export interface CheckMaterialIntakeInput {
98
+ workspaceRoot?: string
99
+ strictness?: "authoring" | "readiness" | "render"
100
+ }
101
+
102
+ export interface CheckMaterialIntakeResult {
103
+ ok: boolean
104
+ registryPath: string
105
+ warnings: string[]
106
+ sources: Array<MaterialRegistryEntry & { recommendedNextAction?: string }>
107
+ }
108
+
109
+ const DOC_EXTENSIONS = new Set([".pdf", ".docx", ".doc", ".xlsx", ".xls", ".pptx", ".ppt", ".csv", ".md", ".txt"])
110
+ const EXCLUDE_DIRS = new Set(["node_modules", ".git", "dist", ".opencode", "researches", "revela-narrative", "designs", "domains"])
111
+ const EXCLUDE_FILENAMES = new Set(["AGENTS.md", "DECKS.md", "README.md", "README.zh-CN.md"])
112
+ const EXTRACTION_EXTENSIONS = new Set(["pdf", "ppt", "pptx", "doc", "docx", "xls", "xlsx"])
113
+ const SUPPORTED_EXTRACTION_EXTENSIONS = new Set(["pdf", "pptx", "docx", "xlsx"])
114
+
115
+ export function materialRegistryPath(workspaceRoot: string): string {
116
+ return join(workspaceRoot, ".opencode", "revela", "material-intake", "registry.json")
117
+ }
118
+
119
+ export function readMaterialRegistry(workspaceRoot: string): MaterialRegistry {
120
+ const path = materialRegistryPath(workspaceRoot)
121
+ if (!existsSync(path)) return { version: 1, updatedAt: new Date(0).toISOString(), sources: [] }
122
+ return JSON.parse(readFileSync(path, "utf-8")) as MaterialRegistry
123
+ }
124
+
125
+ export function writeMaterialRegistry(workspaceRoot: string, registry: MaterialRegistry): string {
126
+ const path = materialRegistryPath(workspaceRoot)
127
+ mkdirSync(join(workspaceRoot, ".opencode", "revela", "material-intake"), { recursive: true })
128
+ writeFileSync(path, JSON.stringify({ ...registry, updatedAt: new Date().toISOString() }, null, 2), "utf-8")
129
+ return workspaceRelative(path, workspaceRoot)
130
+ }
131
+
132
+ export async function prepareLocalMaterials(input: PrepareLocalMaterialsInput = {}): Promise<PrepareLocalMaterialsResult> {
133
+ const workspaceRoot = root(input.workspaceRoot)
134
+ const scanRoot = scanRootFor(workspaceRoot, input.path)
135
+ const files = scanWorkspaceSources(workspaceRoot, scanRoot, input.maxDepth ?? 2)
136
+ let registry = readMaterialRegistry(workspaceRoot)
137
+ const now = new Date().toISOString()
138
+ const extractions: DocumentMaterialsResult[] = []
139
+
140
+ for (const file of files) {
141
+ registry = upsertRegistryEntry(registry, materialEntryFromSource(file, now))
142
+ }
143
+
144
+ if (input.autoExtract ?? true) {
145
+ for (const file of files) {
146
+ const type = (file.type || sourceMaterialType(file.path)).toLowerCase()
147
+ if (!EXTRACTION_EXTENSIONS.has(type)) continue
148
+ const result = await extractAndUpdateRegistry({ workspaceRoot, file: file.path }, registry)
149
+ registry = result.registry
150
+ extractions.push(result.extraction)
151
+ }
152
+ }
153
+
154
+ const registryPath = writeMaterialRegistry(workspaceRoot, registry)
155
+ return {
156
+ ok: true,
157
+ workspaceRoot,
158
+ registryPath,
159
+ found: files.length,
160
+ files,
161
+ suggestedTasks: registry.sources.map((entry) => ingestTask(entry)),
162
+ extractions,
163
+ warnings: intakeWarnings(registry.sources),
164
+ }
165
+ }
166
+
167
+ export async function extractMaterial(input: { workspaceRoot?: string; file: string }): Promise<DocumentMaterialsResult> {
168
+ const workspaceRoot = root(input.workspaceRoot)
169
+ const registry = readMaterialRegistry(workspaceRoot)
170
+ const result = await extractAndUpdateRegistry({ workspaceRoot, file: input.file }, registry)
171
+ writeMaterialRegistry(workspaceRoot, result.registry)
172
+ return result.extraction
173
+ }
174
+
175
+ export function recordMaterialReview(input: RecordMaterialReviewInput): RecordMaterialReviewResult {
176
+ const workspaceRoot = root(input.workspaceRoot)
177
+ const registry = readMaterialRegistry(workspaceRoot)
178
+ const entry = registry.sources.find((item) => item.sourcePath === normalizePath(input.sourcePath))
179
+ const sourcePath = entry?.sourcePath ?? normalizePath(input.sourcePath)
180
+ const reviewPath = writeReviewMarkdown(workspaceRoot, {
181
+ sourcePath,
182
+ fingerprint: entry?.fingerprint,
183
+ extraction: entry?.extraction ?? null,
184
+ reviewedPaths: input.reviewedPaths.map(normalizePath),
185
+ reviewSummary: input.reviewSummary,
186
+ narrativeDecisions: input.narrativeDecisions,
187
+ })
188
+ const now = new Date().toISOString()
189
+ const nextEntry: MaterialRegistryEntry = {
190
+ ...(entry ?? {
191
+ sourcePath,
192
+ type: sourceMaterialType(sourcePath),
193
+ status: "scanned",
194
+ requiresExtraction: EXTRACTION_EXTENSIONS.has(sourceMaterialType(sourcePath)),
195
+ firstSeen: now,
196
+ lastChecked: now,
197
+ }),
198
+ status: "reviewed",
199
+ review: {
200
+ reviewPath,
201
+ reviewedAt: now,
202
+ reviewedPaths: input.reviewedPaths.map(normalizePath),
203
+ summary: input.reviewSummary,
204
+ },
205
+ warnings: [],
206
+ lastChecked: now,
207
+ }
208
+ const updated = upsertRegistryEntry(registry, nextEntry)
209
+ const registryPath = writeMaterialRegistry(workspaceRoot, updated)
210
+ return { ok: true, path: reviewPath, registryPath, sourcePath }
211
+ }
212
+
213
+ export function checkMaterialIntake(input: CheckMaterialIntakeInput = {}): CheckMaterialIntakeResult {
214
+ const workspaceRoot = root(input.workspaceRoot)
215
+ const registry = readMaterialRegistry(workspaceRoot)
216
+ const sources = registry.sources.map((source) => {
217
+ const recommendedNextAction = recommendedAction(source, input.strictness ?? "authoring")
218
+ return recommendedNextAction ? { ...source, recommendedNextAction } : source
219
+ })
220
+ const warnings = intakeWarnings(registry.sources)
221
+ return {
222
+ ok: warnings.length === 0,
223
+ registryPath: workspaceRelative(materialRegistryPath(workspaceRoot), workspaceRoot),
224
+ warnings,
225
+ sources,
226
+ }
227
+ }
228
+
229
+ export function materialIntakeNoticeForCommand(input: { workspaceRoot?: string; command: string }): string | null {
230
+ const workspaceRoot = root(input.workspaceRoot)
231
+ const registry = readMaterialRegistry(workspaceRoot)
232
+ const command = input.command
233
+ const rawOfficeRead = /\b(textutil|pandoc|strings|unzip)\b/.test(command) && /\.(docx|doc|pptx|ppt|xlsx|xls|pdf)\b/i.test(command)
234
+ const matched = registry.sources.filter((entry) => entry.requiresExtraction && command.includes(entry.sourcePath))
235
+ if (!rawOfficeRead && matched.length === 0) return null
236
+
237
+ const paths = matched.length > 0
238
+ ? matched.map((entry) => entry.sourcePath)
239
+ : registry.sources.filter((entry) => entry.requiresExtraction).map((entry) => entry.sourcePath)
240
+ const unique = [...new Set(paths)].slice(0, 5)
241
+ return [
242
+ "Revela material intake notice:",
243
+ unique.length > 0
244
+ ? `Scanned source(s) require Revela extraction before narrative intake: ${unique.map((path) => `\`${path}\``).join(", ")}.`
245
+ : "This command appears to read an Office/PDF source directly.",
246
+ "Use `revela_extract_document_materials` and read the returned `read_view_path` so embedded images and manifests are considered.",
247
+ "If this is intentionally text-only, mark it as degraded intake in the init report; do not treat it as complete material review.",
248
+ ].join("\n")
249
+ }
250
+
251
+ async function extractAndUpdateRegistry(
252
+ input: { workspaceRoot: string; file: string },
253
+ registry: MaterialRegistry,
254
+ ): Promise<{ extraction: DocumentMaterialsResult; registry: MaterialRegistry }> {
255
+ const sourcePath = normalizePath(input.file)
256
+ const extraction = await extractDocumentMaterials(sourcePath, input.workspaceRoot)
257
+ const existing = registry.sources.find((entry) => entry.sourcePath === sourcePath)
258
+ const now = new Date().toISOString()
259
+ const type = extraction.type === "other" ? sourceMaterialType(sourcePath) : extraction.type
260
+ const unsupported = EXTRACTION_EXTENSIONS.has(type) && !SUPPORTED_EXTRACTION_EXTENSIONS.has(type)
261
+ const status: MaterialIntakeStatus = extraction.status === "processed"
262
+ ? "extracted"
263
+ : extraction.status === "failed"
264
+ ? "failed"
265
+ : unsupported
266
+ ? "unsupported"
267
+ : "skipped"
268
+
269
+ return {
270
+ extraction,
271
+ registry: upsertRegistryEntry(registry, {
272
+ ...(existing ?? {
273
+ sourcePath,
274
+ type,
275
+ requiresExtraction: EXTRACTION_EXTENSIONS.has(type),
276
+ firstSeen: now,
277
+ }),
278
+ sourcePath,
279
+ type,
280
+ status,
281
+ requiresExtraction: EXTRACTION_EXTENSIONS.has(type),
282
+ allowedReadPath: extraction.read_view_path ?? extraction.text_path ?? null,
283
+ extraction: extraction.status === "processed"
284
+ ? {
285
+ manifestPath: extraction.manifest_path,
286
+ textPath: extraction.text_path,
287
+ readViewPath: extraction.read_view_path,
288
+ cacheDir: extraction.cache_dir,
289
+ imageCount: extraction.images?.length ?? 0,
290
+ tableCount: extraction.tables?.length ?? 0,
291
+ }
292
+ : null,
293
+ warnings: extractionWarnings(extraction, unsupported),
294
+ lastChecked: now,
295
+ }),
296
+ }
297
+ }
298
+
299
+ function scanWorkspaceSources(workspaceRoot: string, scanRoot: string, maxDepth: number): SourceMaterial[] {
300
+ const results: SourceMaterial[] = []
301
+ scanDir(scanRoot, workspaceRoot, results, maxDepth, 0)
302
+ return results.sort((a, b) => a.path.localeCompare(b.path))
303
+ }
304
+
305
+ function scanDir(dir: string, workspaceRoot: string, results: SourceMaterial[], maxDepth: number, depth: number): void {
306
+ if (depth > maxDepth || !existsSync(dir)) return
307
+ let entries: string[]
308
+ try {
309
+ entries = readdirSync(dir)
310
+ } catch {
311
+ return
312
+ }
313
+
314
+ for (const entry of entries) {
315
+ if (entry.startsWith(".") || EXCLUDE_DIRS.has(entry)) continue
316
+ const fullPath = join(dir, entry)
317
+ let stat
318
+ try {
319
+ stat = statSync(fullPath)
320
+ } catch {
321
+ continue
322
+ }
323
+ if (stat.isDirectory()) {
324
+ scanDir(fullPath, workspaceRoot, results, maxDepth, depth + 1)
325
+ continue
326
+ }
327
+ if (!stat.isFile() || EXCLUDE_FILENAMES.has(entry) || entry.startsWith("~$")) continue
328
+ if (!DOC_EXTENSIONS.has(extname(entry).toLowerCase())) continue
329
+ results.push({ ...sourceMaterialMetadata(fullPath, workspaceRoot), status: "discovered" })
330
+ }
331
+ }
332
+
333
+ function materialEntryFromSource(source: SourceMaterial, now: string): MaterialRegistryEntry {
334
+ const type = (source.type || sourceMaterialType(source.path)).toLowerCase()
335
+ return {
336
+ sourcePath: source.path,
337
+ type,
338
+ fingerprint: source.fingerprint,
339
+ size: source.size,
340
+ lastModified: source.lastModified,
341
+ status: "scanned",
342
+ requiresExtraction: EXTRACTION_EXTENSIONS.has(type),
343
+ allowedReadPath: EXTRACTION_EXTENSIONS.has(type) ? null : source.path,
344
+ extraction: null,
345
+ review: null,
346
+ warnings: [],
347
+ firstSeen: now,
348
+ lastChecked: now,
349
+ }
350
+ }
351
+
352
+ function upsertRegistryEntry(registry: MaterialRegistry, entry: MaterialRegistryEntry): MaterialRegistry {
353
+ const sourcePath = normalizePath(entry.sourcePath)
354
+ const existingIndex = registry.sources.findIndex((item) => item.sourcePath === sourcePath)
355
+ const existing = existingIndex >= 0 ? registry.sources[existingIndex] : undefined
356
+ const unchangedFingerprint = Boolean(existing?.fingerprint && entry.fingerprint && existing.fingerprint === entry.fingerprint)
357
+ const scanRefresh = entry.status === "scanned" && unchangedFingerprint
358
+ const next: MaterialRegistryEntry = {
359
+ ...existing,
360
+ ...entry,
361
+ sourcePath,
362
+ status: scanRefresh ? existing!.status : entry.status,
363
+ allowedReadPath: scanRefresh ? existing!.allowedReadPath : entry.allowedReadPath,
364
+ extraction: scanRefresh ? existing!.extraction : entry.extraction,
365
+ firstSeen: existing?.firstSeen ?? entry.firstSeen,
366
+ review: scanRefresh ? existing!.review : entry.review === undefined ? existing?.review : entry.review,
367
+ }
368
+ const sources = [...registry.sources]
369
+ if (existingIndex >= 0) sources[existingIndex] = next
370
+ else sources.push(next)
371
+ return { version: 1, updatedAt: new Date().toISOString(), sources: sources.sort((a, b) => a.sourcePath.localeCompare(b.sourcePath)) }
372
+ }
373
+
374
+ function ingestTask(entry: MaterialRegistryEntry): MaterialIngestTask {
375
+ return {
376
+ path: entry.sourcePath,
377
+ materialType: entry.type,
378
+ needsExtraction: entry.requiresExtraction,
379
+ suggestedAction: entry.requiresExtraction ? "extract_then_read" : "read_directly",
380
+ status: entry.status,
381
+ allowedReadPath: entry.allowedReadPath ?? null,
382
+ note: entry.requiresExtraction
383
+ ? "Read the extracted read_view_path after Revela extraction; do not read the original Office/PDF source for narrative intake."
384
+ : "Read directly when relevant and record narrative meaning only after source content is actually inspected.",
385
+ }
386
+ }
387
+
388
+ function extractionWarnings(result: DocumentMaterialsResult, unsupported: boolean): string[] {
389
+ if (unsupported) return ["This source type is discovered but not supported for Revela extraction in v1."]
390
+ if (result.status === "failed") return [`Extraction failed: ${result.reason ?? "unknown error"}`]
391
+ if (result.status === "skipped") return [`Extraction skipped: ${result.reason ?? "unsupported file type"}`]
392
+ return []
393
+ }
394
+
395
+ function intakeWarnings(sources: MaterialRegistryEntry[]): string[] {
396
+ const warnings: string[] = []
397
+ for (const source of sources) {
398
+ if (!source.requiresExtraction) continue
399
+ if (source.status === "scanned") warnings.push(`${source.sourcePath} was scanned but not extracted through Revela material extraction.`)
400
+ else if (source.status === "extracted") warnings.push(`${source.sourcePath} was extracted but has no recorded material review.`)
401
+ else if (source.status === "text_only_read") warnings.push(`${source.sourcePath} was read as text-only; embedded images or structure may not have been considered.`)
402
+ else if (source.status === "unsupported") warnings.push(`${source.sourcePath} is not supported for extraction; convert it to a supported format such as .docx/.pptx/.xlsx when needed.`)
403
+ else if (source.status === "failed") warnings.push(`${source.sourcePath} extraction failed and should not be treated as complete intake.`)
404
+ }
405
+ return warnings
406
+ }
407
+
408
+ function recommendedAction(source: MaterialRegistryEntry, strictness: "authoring" | "readiness" | "render"): string | undefined {
409
+ if (!source.requiresExtraction) return undefined
410
+ if (source.status === "scanned") return "Call `revela_extract_document_materials`, then read the returned `read_view_path`."
411
+ if (source.status === "extracted") return strictness === "authoring"
412
+ ? "Read `allowedReadPath`, then call `revela_record_material_review`."
413
+ : "Record material review before treating this source as considered for narrative readiness."
414
+ if (source.status === "text_only_read") return "Use Revela extraction and review before treating this source as complete intake."
415
+ if (source.status === "unsupported") return "Convert to a supported format or keep the source as an explicit intake gap."
416
+ if (source.status === "failed") return "Fix extraction failure or record the source as an intake gap."
417
+ return undefined
418
+ }
419
+
420
+ function writeReviewMarkdown(
421
+ workspaceRoot: string,
422
+ input: {
423
+ sourcePath: string
424
+ fingerprint?: string
425
+ extraction: MaterialRegistryEntry["extraction"]
426
+ reviewedPaths: string[]
427
+ reviewSummary: string
428
+ narrativeDecisions: RecordMaterialReviewInput["narrativeDecisions"]
429
+ },
430
+ ): string {
431
+ const dir = join(workspaceRoot, "researches", "local-materials")
432
+ mkdirSync(dir, { recursive: true })
433
+ const fileName = `${slugify(input.sourcePath)}-review.md`
434
+ const path = join(dir, fileName)
435
+ const lines = [
436
+ "---",
437
+ "type: local-material-review",
438
+ `sourcePath: ${JSON.stringify(input.sourcePath)}`,
439
+ input.fingerprint ? `fingerprint: ${JSON.stringify(input.fingerprint)}` : undefined,
440
+ input.extraction?.manifestPath ? `extractionManifestPath: ${JSON.stringify(input.extraction.manifestPath)}` : undefined,
441
+ input.extraction?.textPath ? `extractionTextPath: ${JSON.stringify(input.extraction.textPath)}` : undefined,
442
+ input.extraction?.readViewPath ? `readViewPath: ${JSON.stringify(input.extraction.readViewPath)}` : undefined,
443
+ `reviewedAt: ${JSON.stringify(new Date().toISOString())}`,
444
+ "status: reviewed",
445
+ "---",
446
+ "",
447
+ "# Local Material Review",
448
+ "",
449
+ "## Review Summary",
450
+ "",
451
+ input.reviewSummary.trim(),
452
+ "",
453
+ "## Reviewed Paths",
454
+ "",
455
+ ...input.reviewedPaths.map((path) => `- ${path}`),
456
+ "",
457
+ "## Narrative Decisions",
458
+ "",
459
+ ...input.narrativeDecisions.map((decision) => `- ${decision.kind}${decision.target ? `: ${decision.target}` : ""} - ${decision.rationale}`),
460
+ "",
461
+ "## Extracted Images",
462
+ "",
463
+ input.extraction?.imageCount ? `- ${input.extraction.imageCount} extracted image(s); do not treat as interpreted evidence without explicit image review.` : "- None recorded.",
464
+ "",
465
+ ].filter((line): line is string => line !== undefined)
466
+ writeFileSync(path, lines.join("\n"), "utf-8")
467
+ return workspaceRelative(path, workspaceRoot)
468
+ }
469
+
470
+ function scanRootFor(workspaceRoot: string, path?: string): string {
471
+ if (!path) return workspaceRoot
472
+ if (isAbsolute(path)) throw new Error("path must be relative to workspace root")
473
+ const candidate = resolve(workspaceRoot, path)
474
+ const resolvedWorkspace = resolve(workspaceRoot)
475
+ if (candidate !== resolvedWorkspace && !candidate.startsWith(resolvedWorkspace + sep)) throw new Error("path must be within workspace")
476
+ return candidate
477
+ }
478
+
479
+ function root(workspaceRoot?: string): string {
480
+ return resolve(workspaceRoot || process.cwd())
481
+ }
482
+
483
+ function workspaceRelative(path: string, workspaceRoot: string): string {
484
+ return relative(resolve(workspaceRoot), resolve(path)).replace(/\\/g, "/")
485
+ }
486
+
487
+ function normalizePath(path: string): string {
488
+ return path.replace(/\\/g, "/")
489
+ }
490
+
491
+ function slugify(value: string): string {
492
+ const base = basename(value).replace(/\.[^.]+$/, "")
493
+ return base.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "") || "material"
494
+ }
@@ -25,10 +25,12 @@ import { formatArtifactQaUserNotice, formatMarkdownQaUserNotice } from "../hook-
25
25
  import { readDeckPlanArtifact } from "../narrative-state/deck-plan-artifact"
26
26
  import { extractDesignClasses } from "../design/designs"
27
27
  import { recordRenderedArtifact, workspaceRelative } from "../workspace-state/rendered-artifacts"
28
+ import { checkMaterialIntake, extractMaterial, materialIntakeNoticeForCommand, prepareLocalMaterials, recordMaterialReview } from "../material-intake"
28
29
  import type { ReviewDeckOpenInput, ReviewDeckReadInput } from "./review"
29
30
  import pkg from "../../package.json"
30
31
  export { bindResearchFindings, evaluateResearchFindings, researchSave, researchTargets } from "./research"
31
32
  export { storyRead } from "./story"
33
+ export { checkMaterialIntake, extractMaterial, materialIntakeNoticeForCommand, prepareLocalMaterials, recordMaterialReview }
32
34
 
33
35
  export interface RuntimeWorkspaceInput {
34
36
  workspaceRoot?: string
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cyber-dash-tech/revela",
3
- "version": "0.17.21",
3
+ "version": "0.17.22",
4
4
  "description": "OpenCode plugin for trusted narrative artifacts from local sources, research, and evidence",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
@@ -2,7 +2,7 @@
2
2
  "mcpServers": {
3
3
  "revela": {
4
4
  "command": "npx",
5
- "args": ["-y", "@cyber-dash-tech/revela@0.17.21", "mcp"]
5
+ "args": ["-y", "@cyber-dash-tech/revela@0.17.22", "mcp"]
6
6
  }
7
7
  }
8
8
  }
@@ -1,6 +1,16 @@
1
1
  {
2
2
  "hooks": {
3
3
  "PreToolUse": [
4
+ {
5
+ "matcher": "exec_command",
6
+ "hooks": [
7
+ {
8
+ "type": "command",
9
+ "command": "bun ${PLUGIN_ROOT}/hooks/revela_material_notice.ts",
10
+ "statusMessage": "Checking Revela material intake"
11
+ }
12
+ ]
13
+ },
4
14
  {
5
15
  "matcher": "apply_patch",
6
16
  "hooks": [
@@ -0,0 +1,58 @@
1
+ import { dirname, resolve } from "path"
2
+ import { fileURLToPath, pathToFileURL } from "url"
3
+ import { resolveRevelaRuntime } from "../mcp/runtime-resolver"
4
+ import { workspaceRootFromInput } from "./revela_post_write_notice"
5
+
6
+ export interface MaterialNoticeResult {
7
+ ok: true
8
+ messages: string[]
9
+ }
10
+
11
+ export async function runMaterialReadNotice(input: string): Promise<MaterialNoticeResult> {
12
+ const command = commandFromInput(input)
13
+ if (!command) return { ok: true, messages: [] }
14
+
15
+ const pluginRoot = resolve(process.env.PLUGIN_ROOT || dirname(dirname(fileURLToPath(import.meta.url))))
16
+ const runtime = resolveRevelaRuntime({ pluginRoot })
17
+ if (!runtime.ok || !runtime.runtimePath) return { ok: true, messages: [] }
18
+
19
+ const workspaceRoot = workspaceRootFromInput(input)
20
+ const runtimeModule = await import(pathToFileURL(runtime.runtimePath).href)
21
+ const notice = runtimeModule.materialIntakeNoticeForCommand?.({ workspaceRoot, command })
22
+ return { ok: true, messages: notice ? [notice] : [] }
23
+ }
24
+
25
+ export function commandFromInput(input: string): string | null {
26
+ try {
27
+ const parsed = JSON.parse(input)
28
+ const candidates = [
29
+ parsed.cmd,
30
+ parsed.command,
31
+ parsed.args?.cmd,
32
+ parsed.args?.command,
33
+ parsed.tool_input?.cmd,
34
+ parsed.tool_input?.command,
35
+ parsed.toolInput?.cmd,
36
+ parsed.toolInput?.command,
37
+ ]
38
+ for (const candidate of candidates) {
39
+ if (typeof candidate === "string" && candidate.trim()) return candidate
40
+ }
41
+ return null
42
+ } catch {
43
+ return input.trim() || null
44
+ }
45
+ }
46
+
47
+ if (import.meta.main) {
48
+ const input = await new Response(Bun.stdin.stream()).text()
49
+ try {
50
+ const result = await runMaterialReadNotice(input)
51
+ if (result.messages.length > 0) console.error(result.messages.join("\n\n---\n\n"))
52
+ process.exit(0)
53
+ } catch (e) {
54
+ console.error("Revela material intake notice failed to run.")
55
+ console.error(e instanceof Error ? e.message : String(e))
56
+ process.exit(0)
57
+ }
58
+ }
@@ -40,6 +40,10 @@ type RuntimeModule = {
40
40
  researchSave(input: any): any
41
41
  evaluateResearchFindings(input: any): any
42
42
  bindResearchFindings(input: any): any
43
+ prepareLocalMaterials(input: any): Promise<any>
44
+ extractMaterial(input: any): Promise<any>
45
+ recordMaterialReview(input: any): any
46
+ checkMaterialIntake(input: any): any
43
47
  }
44
48
 
45
49
  type MessageMode = "framed" | "raw"
@@ -287,6 +291,43 @@ const tools = [
287
291
  evidenceId: stringProp("Optional canonical evidence node id override."),
288
292
  }, ["findingsFile"]),
289
293
  },
294
+ {
295
+ name: "revela_prepare_local_materials",
296
+ description: "Scan local workspace source materials, create/update the material-intake registry, and optionally extract Office/PDF files into read views.",
297
+ inputSchema: objectSchema({
298
+ workspaceRoot: stringProp("Optional workspace root."),
299
+ path: stringProp("Optional workspace-relative subdirectory to scan."),
300
+ maxDepth: numberProp("Maximum scan depth. Defaults to 2."),
301
+ autoExtract: booleanProp("Whether to extract Office/PDF sources during prepare. Defaults to true."),
302
+ }),
303
+ },
304
+ {
305
+ name: "revela_extract_document_materials",
306
+ description: "Extract text, manifest, read view, and embedded images from a workspace document. Supports pdf, pptx, docx, and xlsx.",
307
+ inputSchema: objectSchema({
308
+ workspaceRoot: stringProp("Optional workspace root."),
309
+ file: requiredStringProp("Workspace-relative source file path."),
310
+ }, ["file"]),
311
+ },
312
+ {
313
+ name: "revela_record_material_review",
314
+ description: "Record that an LLM has read extracted local material and decided what was merged, deferred, ignored, or left as a gap.",
315
+ inputSchema: objectSchema({
316
+ workspaceRoot: stringProp("Optional workspace root."),
317
+ sourcePath: requiredStringProp("Workspace-relative source file path."),
318
+ reviewedPaths: arrayProp("Workspace-relative extracted paths actually reviewed."),
319
+ reviewSummary: requiredStringProp("Concise summary of the reviewed material."),
320
+ narrativeDecisions: arrayObjectProp("Narrative decisions with kind, optional target, and rationale."),
321
+ }, ["sourcePath", "reviewedPaths", "reviewSummary", "narrativeDecisions"]),
322
+ },
323
+ {
324
+ name: "revela_check_material_intake",
325
+ description: "Check whether scanned Office/PDF sources were extracted and reviewed before being treated as narrative intake.",
326
+ inputSchema: objectSchema({
327
+ workspaceRoot: stringProp("Optional workspace root."),
328
+ strictness: enumProp(["authoring", "readiness", "render"], "Check strictness."),
329
+ }),
330
+ },
290
331
  ]
291
332
 
292
333
  let runtimePromise: Promise<RuntimeModule> | undefined
@@ -374,6 +415,10 @@ async function callTool(name: string, args: any): Promise<any> {
374
415
  if (name === "revela_research_save") return r.researchSave(args)
375
416
  if (name === "revela_evaluate_research_findings") return r.evaluateResearchFindings(args)
376
417
  if (name === "revela_bind_research_findings") return r.bindResearchFindings(args)
418
+ if (name === "revela_prepare_local_materials") return r.prepareLocalMaterials(args)
419
+ if (name === "revela_extract_document_materials") return r.extractMaterial(args)
420
+ if (name === "revela_record_material_review") return r.recordMaterialReview(args)
421
+ if (name === "revela_check_material_intake") return r.checkMaterialIntake(args)
377
422
  throw new Error(`Unknown tool: ${name}`)
378
423
  }
379
424
 
@@ -401,6 +446,10 @@ function booleanProp(description: string) {
401
446
  return { type: "boolean", description }
402
447
  }
403
448
 
449
+ function numberProp(description: string) {
450
+ return { type: "number", description }
451
+ }
452
+
404
453
  function enumProp(values: string[], description: string) {
405
454
  return { type: "string", enum: values, description }
406
455
  }
@@ -409,6 +458,23 @@ function arrayProp(description: string) {
409
458
  return { type: "array", items: { type: "string" }, description }
410
459
  }
411
460
 
461
+ function arrayObjectProp(description: string) {
462
+ return {
463
+ type: "array",
464
+ description,
465
+ items: {
466
+ type: "object",
467
+ properties: {
468
+ kind: { type: "string", enum: ["merged", "gap", "ignored", "deferred"] },
469
+ target: { type: "string" },
470
+ rationale: { type: "string" },
471
+ },
472
+ required: ["kind", "rationale"],
473
+ additionalProperties: false,
474
+ },
475
+ }
476
+ }
477
+
412
478
  function writeMessage(message: any, mode: MessageMode = activeResponseMode): void {
413
479
  activeResponseMode = mode
414
480
  const body = JSON.stringify(message)
@@ -18,14 +18,24 @@ Use this skill when the user asks to start Revela, initialize the workspace, ing
18
18
 
19
19
  ## Workflow
20
20
 
21
- 1. Inspect the workspace with normal Codex file tools. Stay inside the current workspace root.
22
- 2. Prefer local source materials first: Markdown, text, CSV, PDFs, Office files, existing `researches/`, existing `revela-narrative/`, `deck-plan/`, and `decks/`.
23
- 3. Call `revela_domain_list` and `revela_domain_read` for active domain guidance before authoring narrative meaning. Treat domain guidance as framing guidance, never as evidence.
24
- 4. If `revela-narrative/` exists, call `revela_markdown_qa` and `revela_compile_narrative`.
25
- 5. If the narrative vault is missing, create the initial `revela-narrative/` Markdown nodes directly with valid frontmatter and plain wikilink relations.
26
- 6. Evidence nodes must preserve source, quote/snippet, support scope, unsupported scope, caveat, and strength before being treated as support.
27
- 7. After writing narrative Markdown, call `revela_markdown_qa` and `revela_compile_narrative` again.
28
- 8. End with a concise init report: local materials found, active domain, narrative graph status, open gaps, Markdown QA status, and next command/action.
21
+ 1. Call `revela_prepare_local_materials` first. Treat scan results as an intake registry and task list, not as source content.
22
+ 2. For any registry entry with `requiresExtraction: true`, do not read the original Office/PDF file directly for narrative intake. Use the returned `allowedReadPath` / `read_view_path`; if missing, call `revela_extract_document_materials` first.
23
+ 3. Prefer local source materials first: Markdown, text, CSV, PDFs, Office files, existing `researches/`, existing `revela-narrative/`, `deck-plan/`, and `decks/`.
24
+ 4. After reading extracted material views, call `revela_record_material_review` for each considered Office/PDF source. Record what was merged, deferred, ignored, or left as a gap.
25
+ 5. Call `revela_domain_list` and `revela_domain_read` for active domain guidance before authoring narrative meaning. Treat domain guidance as framing guidance, never as evidence.
26
+ 6. If `revela-narrative/` exists, call `revela_markdown_qa` and `revela_compile_narrative`.
27
+ 7. If the narrative vault is missing, create the initial `revela-narrative/` Markdown nodes directly with valid frontmatter and plain wikilink relations.
28
+ 8. Evidence nodes must preserve source, quote/snippet, support scope, unsupported scope, caveat, and strength before being treated as support.
29
+ 9. After writing narrative Markdown, call `revela_markdown_qa` and `revela_compile_narrative` again.
30
+ 10. Before the final report, call `revela_check_material_intake` and surface any warnings about scanned-but-unextracted, extracted-but-unreviewed, unsupported, failed, or text-only sources.
31
+ 11. End with a concise init report: local materials found, active domain, narrative graph status, material intake status, open gaps, Markdown QA status, and next command/action.
32
+
33
+ ## Material Intake Rules
34
+
35
+ - Scan results only prove that files exist; they do not prove file content.
36
+ - For `.docx`, `.pptx`, `.xlsx`, and `.pdf`, read the extracted `read_view_path` instead of using Codex/textutil/raw reads of the original file.
37
+ - Extracted images are candidate materials only. Do not interpret them as evidence unless image meaning is explicitly reviewed or supplied by the user.
38
+ - If a user explicitly asks for text-only inspection, report it as degraded intake and do not treat it as complete source review.
29
39
 
30
40
  ## Markdown Rules
31
41