@swarmvaultai/engine 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1729,7 +1729,7 @@ import matter3 from "gray-matter";
1729
1729
  import ignore from "ignore";
1730
1730
  import { JSDOM as JSDOM2 } from "jsdom";
1731
1731
  import mime from "mime-types";
1732
- import TurndownService from "turndown";
1732
+ import TurndownService2 from "turndown";
1733
1733
 
1734
1734
  // src/code-analysis.ts
1735
1735
  import fs6 from "fs/promises";
@@ -4504,8 +4504,11 @@ async function analyzeCodeSource(manifest, extractedText, schemaHash) {
4504
4504
  import fs7 from "fs/promises";
4505
4505
  import os from "os";
4506
4506
  import path7 from "path";
4507
+ import { Readable } from "stream";
4508
+ import { parse as parseCsvSync } from "csv-parse/sync";
4507
4509
  import { strFromU8, unzipSync } from "fflate";
4508
4510
  import { JSDOM } from "jsdom";
4511
+ import TurndownService from "turndown";
4509
4512
  import { z } from "zod";
4510
4513
  var imageVisionExtractionSchema = z.object({
4511
4514
  title: z.string().min(1).nullable().optional(),
@@ -4685,7 +4688,7 @@ function normalizePdfMetadata(raw) {
4685
4688
  function normalizeDocumentText(raw) {
4686
4689
  return raw.replace(/\r\n/g, "\n").split(/\n{2,}/).map((section) => normalizeWhitespace(section)).filter(Boolean).join("\n\n").trim();
4687
4690
  }
4688
- function parseDocxCoreMetadata(bytes) {
4691
+ function parseOfficeCoreMetadata(bytes) {
4689
4692
  try {
4690
4693
  const archive = unzipSync(new Uint8Array(bytes));
4691
4694
  const coreXml = archive["docProps/core.xml"];
@@ -4725,6 +4728,122 @@ function parseDocxCoreMetadata(bytes) {
4725
4728
  return void 0;
4726
4729
  }
4727
4730
  }
4731
+ function decodeTextBytes(bytes) {
4732
+ const text = bytes.toString("utf8");
4733
+ return text.charCodeAt(0) === 65279 ? text.slice(1) : text;
4734
+ }
4735
+ function normalizeTableCell(value) {
4736
+ return normalizeWhitespace(String(value ?? ""));
4737
+ }
4738
+ function isNumericCell(value) {
4739
+ return value.length > 0 && Number.isFinite(Number(value));
4740
+ }
4741
+ function detectHeaderRow(rows) {
4742
+ if (!rows.length) {
4743
+ return { headers: [], bodyRows: [] };
4744
+ }
4745
+ const firstRow = rows[0] ?? [];
4746
+ const nonEmpty = firstRow.filter(Boolean);
4747
+ const unique = new Set(nonEmpty);
4748
+ const nonNumeric = nonEmpty.filter((value) => !isNumericCell(value));
4749
+ const looksLikeHeader = nonEmpty.length > 0 && unique.size === nonEmpty.length && nonNumeric.length >= Math.ceil(nonEmpty.length / 2) && rows.length > 1;
4750
+ if (looksLikeHeader) {
4751
+ return {
4752
+ headers: firstRow.map((value, index) => value || `column_${index + 1}`),
4753
+ bodyRows: rows.slice(1)
4754
+ };
4755
+ }
4756
+ const columnCount = Math.max(...rows.map((row) => row.length), 0);
4757
+ return {
4758
+ headers: Array.from({ length: columnCount }, (_, index) => `column_${index + 1}`),
4759
+ bodyRows: rows
4760
+ };
4761
+ }
4762
+ function columnHints(headers, rows) {
4763
+ return headers.map((header, index) => {
4764
+ const values = rows.map((row) => row[index] ?? "").map(normalizeTableCell).filter(Boolean);
4765
+ if (!values.length) {
4766
+ return null;
4767
+ }
4768
+ const uniqueValues = [...new Set(values)];
4769
+ if (values.every(isNumericCell)) {
4770
+ return `- ${header}: numeric`;
4771
+ }
4772
+ if (uniqueValues.length <= 6 && values.length >= uniqueValues.length) {
4773
+ return `- ${header}: low-cardinality (${uniqueValues.slice(0, 6).join(", ")})`;
4774
+ }
4775
+ return null;
4776
+ }).filter((item) => Boolean(item));
4777
+ }
4778
+ function markdownTable(headers, rows, rowLimit = 20) {
4779
+ if (!headers.length) {
4780
+ return ["No tabular preview available."];
4781
+ }
4782
+ const width = headers.length;
4783
+ const lines = [`| ${headers.join(" | ")} |`, `| ${headers.map(() => "---").join(" | ")} |`];
4784
+ for (const row of rows.slice(0, rowLimit)) {
4785
+ const normalized = Array.from({ length: width }, (_, index) => normalizeTableCell(row[index] ?? ""));
4786
+ lines.push(`| ${normalized.join(" | ")} |`);
4787
+ }
4788
+ return lines;
4789
+ }
4790
+ function zipEntryText(archive, entryPath) {
4791
+ const entry = archive[entryPath];
4792
+ return entry ? strFromU8(entry) : void 0;
4793
+ }
4794
+ function parseXmlDocument(xml) {
4795
+ return new JSDOM(xml, { contentType: "text/xml" }).window.document;
4796
+ }
4797
+ function zipDirname(value) {
4798
+ const index = value.lastIndexOf("/");
4799
+ return index === -1 ? "" : value.slice(0, index);
4800
+ }
4801
+ function resolveZipTarget(basePath, target) {
4802
+ return path7.posix.normalize(path7.posix.join(zipDirname(basePath), target));
4803
+ }
4804
+ function relationshipTargets(xml, basePath) {
4805
+ const document = parseXmlDocument(xml);
4806
+ const map = /* @__PURE__ */ new Map();
4807
+ for (const node of Array.from(document.getElementsByTagName("*"))) {
4808
+ if (node.localName !== "Relationship") {
4809
+ continue;
4810
+ }
4811
+ const id = node.getAttribute("Id")?.trim();
4812
+ const target = node.getAttribute("Target")?.trim();
4813
+ const type = node.getAttribute("Type")?.trim() ?? "";
4814
+ if (!id || !target) {
4815
+ continue;
4816
+ }
4817
+ map.set(id, { target: resolveZipTarget(basePath, target), type });
4818
+ }
4819
+ return map;
4820
+ }
4821
+ function xmlTextNodes(xml, localName) {
4822
+ const document = parseXmlDocument(xml);
4823
+ const values = [];
4824
+ for (const node of Array.from(document.getElementsByTagName("*"))) {
4825
+ if (node.localName !== localName) {
4826
+ continue;
4827
+ }
4828
+ const text = normalizeWhitespace(node.textContent ?? "");
4829
+ if (text) {
4830
+ values.push(text);
4831
+ }
4832
+ }
4833
+ return values;
4834
+ }
4835
+ function firstHtmlHeading(html) {
4836
+ const dom = new JSDOM(html);
4837
+ const heading = dom.window.document.querySelector("h1, h2, h3");
4838
+ const title = normalizeWhitespace(heading?.textContent ?? "");
4839
+ return title || void 0;
4840
+ }
4841
+ function htmlToMarkdown(html) {
4842
+ const dom = new JSDOM(html);
4843
+ const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
4844
+ const body = dom.window.document.body?.innerHTML ?? html;
4845
+ return turndown.turndown(body).trim();
4846
+ }
4728
4847
  async function extractPdfText(input) {
4729
4848
  try {
4730
4849
  const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
@@ -4765,39 +4884,793 @@ async function extractPdfText(input) {
4765
4884
  };
4766
4885
  } catch (error) {
4767
4886
  return {
4768
- artifact: {
4769
- ...extractionMetadata("pdf", input.mimeType, "pdf_text"),
4770
- warnings: [`PDF text extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
4771
- }
4887
+ artifact: {
4888
+ ...extractionMetadata("pdf", input.mimeType, "pdf_text"),
4889
+ warnings: [`PDF text extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
4890
+ }
4891
+ };
4892
+ }
4893
+ }
4894
+ async function extractDocxText(input) {
4895
+ try {
4896
+ const mammoth = await import("mammoth");
4897
+ const result = await mammoth.extractRawText({
4898
+ buffer: input.bytes
4899
+ });
4900
+ const extractedText = normalizeDocumentText(result.value);
4901
+ const warnings = result.messages.map((message) => normalizeWhitespace(message.message)).filter(Boolean).map((message) => truncate(message, 240));
4902
+ const artifact = {
4903
+ ...extractionMetadata("docx", input.mimeType, "docx_text"),
4904
+ metadata: parseOfficeCoreMetadata(input.bytes),
4905
+ warnings: warnings.length ? warnings : void 0
4906
+ };
4907
+ if (!extractedText) {
4908
+ artifact.warnings = [...artifact.warnings ?? [], "DOCX text extraction completed but produced no extractable text."];
4909
+ }
4910
+ return {
4911
+ extractedText: extractedText || void 0,
4912
+ artifact
4913
+ };
4914
+ } catch (error) {
4915
+ return {
4916
+ artifact: {
4917
+ ...extractionMetadata("docx", input.mimeType, "docx_text"),
4918
+ warnings: [`DOCX text extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
4919
+ }
4920
+ };
4921
+ }
4922
+ }
4923
+ async function extractCsvText(input) {
4924
+ try {
4925
+ const rawText = decodeTextBytes(input.bytes);
4926
+ const delimiter = input.fileName?.toLowerCase().endsWith(".tsv") || input.mimeType.includes("tab-separated") ? " " : ",";
4927
+ const parsed = parseCsvSync(rawText, {
4928
+ delimiter,
4929
+ relax_column_count: true,
4930
+ skip_empty_lines: true,
4931
+ trim: true
4932
+ });
4933
+ const rows = parsed.map((row) => row.map((value) => normalizeTableCell(value)));
4934
+ const { headers, bodyRows } = detectHeaderRow(rows);
4935
+ const hintLines = columnHints(headers, bodyRows);
4936
+ const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0;
4937
+ const extractedText = [
4938
+ title ? `# ${title}` : null,
4939
+ `Format: ${delimiter === " " ? "TSV" : "CSV"}`,
4940
+ `Rows: ${bodyRows.length}`,
4941
+ `Columns: ${headers.length}`,
4942
+ headers.length ? `Headers: ${headers.join(", ")}` : null,
4943
+ "",
4944
+ hintLines.length ? "## Column Hints" : null,
4945
+ hintLines.length ? hintLines.join("\n") : null,
4946
+ hintLines.length ? "" : null,
4947
+ "## Preview",
4948
+ ...markdownTable(headers, bodyRows)
4949
+ ].filter((item) => Boolean(item)).join("\n").trim();
4950
+ const artifact = {
4951
+ ...extractionMetadata("csv", input.mimeType, "csv_text"),
4952
+ metadata: {
4953
+ format: delimiter === " " ? "tsv" : "csv",
4954
+ row_count: String(bodyRows.length),
4955
+ column_count: String(headers.length),
4956
+ headers: headers.join(", ")
4957
+ }
4958
+ };
4959
+ return {
4960
+ title,
4961
+ extractedText,
4962
+ artifact
4963
+ };
4964
+ } catch (error) {
4965
+ return {
4966
+ artifact: {
4967
+ ...extractionMetadata("csv", input.mimeType, "csv_text"),
4968
+ warnings: [`CSV extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
4969
+ }
4970
+ };
4971
+ }
4972
+ }
4973
+ async function extractXlsxText(input) {
4974
+ try {
4975
+ const XLSX = await import("xlsx");
4976
+ const workbook = XLSX.read(input.bytes, { type: "buffer", cellFormula: false, cellHTML: false, cellStyles: false });
4977
+ const allSheetNames = workbook.SheetNames;
4978
+ const sheetNames = allSheetNames.slice(0, 10);
4979
+ const sheetSections = [];
4980
+ const metadata = {
4981
+ sheet_count: String(allSheetNames.length),
4982
+ sheet_names: allSheetNames.join(", ")
4983
+ };
4984
+ for (const sheetName of sheetNames) {
4985
+ const sheet = workbook.Sheets[sheetName];
4986
+ if (!sheet) {
4987
+ continue;
4988
+ }
4989
+ const rows = XLSX.utils.sheet_to_json(sheet, {
4990
+ header: 1,
4991
+ raw: false,
4992
+ defval: ""
4993
+ }).map((row) => row.map((value) => normalizeTableCell(value)));
4994
+ const { headers, bodyRows } = detectHeaderRow(rows);
4995
+ sheetSections.push(`## Sheet: ${sheetName}`);
4996
+ sheetSections.push(`Rows: ${bodyRows.length}`);
4997
+ sheetSections.push(`Columns: ${headers.length}`);
4998
+ sheetSections.push(...markdownTable(headers, bodyRows));
4999
+ sheetSections.push("");
5000
+ }
5001
+ const title = normalizeWhitespace(String(workbook.Props?.Title ?? "")) || (input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0);
5002
+ const extractedText = [
5003
+ title ? `# ${title}` : null,
5004
+ `Sheets: ${allSheetNames.length}`,
5005
+ allSheetNames.length ? `Sheet Names: ${allSheetNames.join(", ")}` : null,
5006
+ "",
5007
+ ...sheetSections
5008
+ ].filter((item) => Boolean(item)).join("\n").trim();
5009
+ const warnings = allSheetNames.length > sheetNames.length ? ["Workbook preview truncated to the first 10 sheets."] : void 0;
5010
+ return {
5011
+ title,
5012
+ extractedText,
5013
+ artifact: {
5014
+ ...extractionMetadata("xlsx", input.mimeType, "xlsx_text"),
5015
+ metadata,
5016
+ warnings
5017
+ }
5018
+ };
5019
+ } catch (error) {
5020
+ return {
5021
+ artifact: {
5022
+ ...extractionMetadata("xlsx", input.mimeType, "xlsx_text"),
5023
+ warnings: [`XLSX extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
5024
+ }
5025
+ };
5026
+ }
5027
+ }
5028
+ async function extractPptxText(input) {
5029
+ try {
5030
+ const archive = unzipSync(new Uint8Array(input.bytes));
5031
+ const presentationXml = zipEntryText(archive, "ppt/presentation.xml");
5032
+ if (!presentationXml) {
5033
+ throw new Error("Missing ppt/presentation.xml");
5034
+ }
5035
+ const relsXml = zipEntryText(archive, "ppt/_rels/presentation.xml.rels");
5036
+ if (!relsXml) {
5037
+ throw new Error("Missing ppt/_rels/presentation.xml.rels");
5038
+ }
5039
+ const rels = relationshipTargets(relsXml, "ppt/presentation.xml");
5040
+ const document = parseXmlDocument(presentationXml);
5041
+ const slideTargets = Array.from(document.getElementsByTagName("*")).filter((node) => node.localName === "sldId").map((node) => node.getAttribute("r:id")?.trim()).filter((value) => Boolean(value)).map((relationshipId) => rels.get(relationshipId)?.target).filter((value) => Boolean(value)).slice(0, 60);
5042
+ const slideSections = [];
5043
+ for (let index = 0; index < slideTargets.length; index += 1) {
5044
+ const slidePath = slideTargets[index];
5045
+ const slideXml = zipEntryText(archive, slidePath);
5046
+ if (!slideXml) {
5047
+ continue;
5048
+ }
5049
+ const slideTexts = xmlTextNodes(slideXml, "t");
5050
+ const slideTitle = slideTexts[0] ?? `Slide ${index + 1}`;
5051
+ slideSections.push(`## Slide ${index + 1}: ${slideTitle}`);
5052
+ if (slideTexts.length) {
5053
+ slideSections.push(slideTexts.join("\n"));
5054
+ }
5055
+ const slideRelsPath = `${zipDirname(slidePath)}/_rels/${path7.posix.basename(slidePath)}.rels`;
5056
+ const slideRelsXml = zipEntryText(archive, slideRelsPath);
5057
+ if (slideRelsXml) {
5058
+ const slideRels = relationshipTargets(slideRelsXml, slidePath);
5059
+ const notesTarget = [...slideRels.values()].find((entry) => entry.type.endsWith("/notesSlide"))?.target;
5060
+ if (notesTarget) {
5061
+ const notesXml = zipEntryText(archive, notesTarget);
5062
+ const noteTexts = notesXml ? xmlTextNodes(notesXml, "t") : [];
5063
+ if (noteTexts.length) {
5064
+ slideSections.push("Notes:");
5065
+ slideSections.push(noteTexts.join("\n"));
5066
+ }
5067
+ }
5068
+ }
5069
+ slideSections.push("");
5070
+ }
5071
+ const metadata = parseOfficeCoreMetadata(input.bytes);
5072
+ const title = metadata?.title || (input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0);
5073
+ const extractedText = [title ? `# ${title}` : null, `Slides: ${slideTargets.length}`, "", ...slideSections].filter((item) => Boolean(item)).join("\n").trim();
5074
+ return {
5075
+ title,
5076
+ extractedText,
5077
+ artifact: {
5078
+ ...extractionMetadata("pptx", input.mimeType, "pptx_text"),
5079
+ metadata: {
5080
+ ...metadata ?? {},
5081
+ slide_count: String(slideTargets.length)
5082
+ },
5083
+ warnings: Array.from(document.getElementsByTagName("*")).filter((node) => node.localName === "sldId").length > slideTargets.length ? ["Slide extraction truncated to the first 60 slides."] : void 0
5084
+ }
5085
+ };
5086
+ } catch (error) {
5087
+ return {
5088
+ artifact: {
5089
+ ...extractionMetadata("pptx", input.mimeType, "pptx_text"),
5090
+ warnings: [`PPTX extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
5091
+ }
5092
+ };
5093
+ }
5094
+ }
5095
+ async function extractEpubChapters(input) {
5096
+ try {
5097
+ const archive = unzipSync(new Uint8Array(input.bytes));
5098
+ const containerXml = zipEntryText(archive, "META-INF/container.xml");
5099
+ if (!containerXml) {
5100
+ throw new Error("Missing META-INF/container.xml");
5101
+ }
5102
+ const container = parseXmlDocument(containerXml);
5103
+ const rootfile = Array.from(container.getElementsByTagName("*")).find((node) => node.localName === "rootfile");
5104
+ const packagePath = rootfile?.getAttribute("full-path")?.trim();
5105
+ if (!packagePath) {
5106
+ throw new Error("EPUB container did not declare a package document.");
5107
+ }
5108
+ const packageXml = zipEntryText(archive, packagePath);
5109
+ if (!packageXml) {
5110
+ throw new Error(`Missing EPUB package document: ${packagePath}`);
5111
+ }
5112
+ const packageDocument = parseXmlDocument(packageXml);
5113
+ const manifestEntries = new Map(
5114
+ Array.from(packageDocument.getElementsByTagName("*")).filter((node) => node.localName === "item").map(
5115
+ (node) => [
5116
+ node.getAttribute("id")?.trim() ?? "",
5117
+ {
5118
+ href: node.getAttribute("href")?.trim() ?? "",
5119
+ mediaType: node.getAttribute("media-type")?.trim() ?? "",
5120
+ properties: node.getAttribute("properties")?.trim() ?? ""
5121
+ }
5122
+ ]
5123
+ ).filter(([id, item]) => Boolean(id && item.href))
5124
+ );
5125
+ const spineIds = Array.from(packageDocument.getElementsByTagName("*")).filter((node) => node.localName === "itemref").map((node) => node.getAttribute("idref")?.trim()).filter((value) => Boolean(value));
5126
+ const bookTitle = xmlTextNodes(packageXml, "title")[0] || (input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0);
5127
+ const author = xmlTextNodes(packageXml, "creator")[0];
5128
+ const chapters = [];
5129
+ for (const spineId of spineIds) {
5130
+ const item = manifestEntries.get(spineId);
5131
+ if (!item || !item.mediaType.includes("html") && !item.mediaType.includes("xhtml")) {
5132
+ continue;
5133
+ }
5134
+ if (item.properties.split(/\s+/).includes("nav")) {
5135
+ continue;
5136
+ }
5137
+ const entryPath = resolveZipTarget(packagePath, item.href);
5138
+ const html = zipEntryText(archive, entryPath);
5139
+ if (!html) {
5140
+ continue;
5141
+ }
5142
+ const markdown = htmlToMarkdown(html);
5143
+ if (!markdown) {
5144
+ continue;
5145
+ }
5146
+ const chapterTitle = firstHtmlHeading(html) || markdown.match(/^#\s+(.+)$/m)?.[1]?.trim() || item.href;
5147
+ const normalizedTitle = normalizeWhitespace(chapterTitle);
5148
+ if (!normalizedTitle || /^table of contents$/i.test(normalizedTitle)) {
5149
+ continue;
5150
+ }
5151
+ chapters.push({
5152
+ partKey: item.href,
5153
+ title: normalizedTitle,
5154
+ markdown,
5155
+ metadata: {
5156
+ book_title: bookTitle ?? "",
5157
+ chapter_title: normalizedTitle,
5158
+ author: author ?? ""
5159
+ }
5160
+ });
5161
+ }
5162
+ return {
5163
+ title: bookTitle,
5164
+ author,
5165
+ chapters,
5166
+ warnings: chapters.length ? void 0 : ["EPUB extraction completed but found no chapter-like spine entries."]
5167
+ };
5168
+ } catch (error) {
5169
+ return {
5170
+ chapters: [],
5171
+ warnings: [`EPUB extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
5172
+ };
5173
+ }
5174
+ }
5175
+ function timestampFromMs(value) {
5176
+ const totalMs = Math.max(0, Math.floor(value));
5177
+ const totalSeconds = Math.floor(totalMs / 1e3);
5178
+ const hours = Math.floor(totalSeconds / 3600);
5179
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
5180
+ const seconds = totalSeconds % 60;
5181
+ const milliseconds = totalMs % 1e3;
5182
+ return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${String(
5183
+ milliseconds
5184
+ ).padStart(3, "0")}`;
5185
+ }
5186
+ function normalizeDelimitedList(values) {
5187
+ const unique = [...new Set(values.map((value) => normalizeWhitespace(value)).filter(Boolean))];
5188
+ return unique.length ? unique.join(", ") : void 0;
5189
+ }
5190
+ function normalizeIsoDate(value) {
5191
+ if (value instanceof Date && Number.isFinite(value.getTime())) {
5192
+ return value.toISOString();
5193
+ }
5194
+ if (typeof value === "string" && value.trim()) {
5195
+ const parsed = new Date(value);
5196
+ if (Number.isFinite(parsed.getTime())) {
5197
+ return parsed.toISOString();
5198
+ }
5199
+ }
5200
+ return void 0;
5201
+ }
5202
+ function addressNames(value) {
5203
+ if (!value || typeof value !== "object" || !("value" in value) || !Array.isArray(value.value)) {
5204
+ return [];
5205
+ }
5206
+ return value.value.map((entry) => normalizeWhitespace(entry.name ?? entry.address ?? "")).filter(Boolean);
5207
+ }
5208
+ function addressList(value) {
5209
+ return normalizeDelimitedList(addressNames(value));
5210
+ }
5211
+ function emailConversationId(parsed) {
5212
+ const asArray = (value) => Array.isArray(value) ? value : typeof value === "string" ? [value] : [];
5213
+ return normalizeWhitespace(parsed.messageId ?? "") || normalizeWhitespace(asArray(parsed.inReplyTo)[0] ?? "") || normalizeWhitespace(asArray(parsed.references)[0] ?? "") || void 0;
5214
+ }
5215
+ function emailBodyMarkdown(parsed) {
5216
+ const text = normalizeDocumentText(parsed.text ?? "");
5217
+ if (text) {
5218
+ return text;
5219
+ }
5220
+ if (typeof parsed.html === "string" && parsed.html.trim()) {
5221
+ return normalizeDocumentText(htmlToMarkdown(parsed.html));
5222
+ }
5223
+ return "";
5224
+ }
5225
+ function normalizeParsedEmail(parsed, fallbackTitle) {
5226
+ const title = normalizeWhitespace(parsed.subject ?? "") || fallbackTitle;
5227
+ const sender = addressList(parsed.from);
5228
+ const recipients = addressList(parsed.to);
5229
+ const cc = addressList(parsed.cc);
5230
+ const occurredAt = normalizeIsoDate(parsed.date);
5231
+ const participants = normalizeDelimitedList([...addressNames(parsed.from), ...addressNames(parsed.to), ...addressNames(parsed.cc)]);
5232
+ const conversationId = emailConversationId(parsed);
5233
+ const body = emailBodyMarkdown(parsed);
5234
+ const attachmentCount = Array.isArray(parsed.attachments) ? parsed.attachments.length : 0;
5235
+ return {
5236
+ title,
5237
+ conversationId,
5238
+ metadata: {
5239
+ ...occurredAt ? { occurred_at: occurredAt } : {},
5240
+ ...sender ? { sender } : {},
5241
+ ...recipients ? { recipients } : {},
5242
+ ...cc ? { cc } : {},
5243
+ ...participants ? { participants } : {},
5244
+ ...conversationId ? { conversation_id: conversationId } : {},
5245
+ ...normalizeWhitespace(parsed.messageId ?? "") ? { message_id: normalizeWhitespace(parsed.messageId ?? "") } : {},
5246
+ ...attachmentCount ? { attachment_count: String(attachmentCount) } : {}
5247
+ },
5248
+ markdown: [
5249
+ `# ${title}`,
5250
+ "",
5251
+ ...occurredAt ? [`Date: ${occurredAt}`] : [],
5252
+ ...sender ? [`From: ${sender}`] : [],
5253
+ ...recipients ? [`To: ${recipients}`] : [],
5254
+ ...cc ? [`CC: ${cc}`] : [],
5255
+ ...conversationId ? [`Conversation ID: ${conversationId}`] : [],
5256
+ ...attachmentCount ? [`Attachments: ${attachmentCount}`] : [],
5257
+ "",
5258
+ "## Message",
5259
+ "",
5260
+ body || "No readable body content was extracted from this email.",
5261
+ ""
5262
+ ].join("\n")
5263
+ };
5264
+ }
5265
+ function calendarAttendees(value) {
5266
+ if (!value) {
5267
+ return [];
5268
+ }
5269
+ const attendees = Array.isArray(value) ? value : [value];
5270
+ return attendees.map((entry) => {
5271
+ if (!entry || typeof entry !== "object") {
5272
+ return "";
5273
+ }
5274
+ const item = entry;
5275
+ const name = normalizeWhitespace(String(item.params?.CN ?? ""));
5276
+ const address = normalizeWhitespace(String(item.val ?? item.value ?? ""));
5277
+ return name || address;
5278
+ }).filter(Boolean);
5279
+ }
5280
+ function slackFormatSpeakerId(input, usersById) {
5281
+ return usersById.get(input) ?? input;
5282
+ }
5283
+ function slackNormalizeText(text, usersById) {
5284
+ return normalizeWhitespace(
5285
+ text.replace(/<@([A-Z0-9]+)>/g, (_, userId) => `@${slackFormatSpeakerId(userId, usersById)}`).replace(/<#[A-Z0-9]+\|([^>]+)>/g, "#$1").replace(/<(https?:\/\/[^>|]+)\|([^>]+)>/g, "$2 ($1)").replace(/<(https?:\/\/[^>]+)>/g, "$1")
5286
+ );
5287
+ }
5288
+ function slackMessageTimestamp(ts2, fallbackDate) {
5289
+ const numeric = Number(ts2);
5290
+ if (Number.isFinite(numeric) && numeric > 0) {
5291
+ return new Date(numeric * 1e3).toISOString();
5292
+ }
5293
+ return (/* @__PURE__ */ new Date(`${fallbackDate}T00:00:00.000Z`)).toISOString();
5294
+ }
5295
+ async function loadZipMessageBuffers(bytes) {
5296
+ const { MboxStream } = await import("node-mbox");
5297
+ const stream = MboxStream(Readable.from([bytes]));
5298
+ return await new Promise((resolve, reject) => {
5299
+ const messages = [];
5300
+ stream.on("data", (message) => {
5301
+ messages.push(Buffer.isBuffer(message) ? message : Buffer.from(message));
5302
+ });
5303
+ stream.on("error", reject);
5304
+ stream.on("finish", () => resolve(messages));
5305
+ stream.on("end", () => resolve(messages));
5306
+ });
5307
+ }
5308
+ function archiveEntriesAsText(archive) {
5309
+ return new Map(
5310
+ Object.entries(archive).filter(([, value]) => value).map(([entryPath, value]) => [entryPath, strFromU8(value)])
5311
+ );
5312
+ }
5313
+ function looksLikeSlackEntries(entries) {
5314
+ const all = [...entries];
5315
+ const hasChannelsIndex = all.some(
5316
+ (entry) => entry === "channels.json" || entry === "groups.json" || entry === "dms.json" || entry === "mpims.json"
5317
+ );
5318
+ const hasChannelDayFiles = all.some((entry) => /^[^/]+\/\d{4}-\d{2}-\d{2}\.json$/i.test(entry));
5319
+ return hasChannelsIndex && hasChannelDayFiles;
5320
+ }
5321
+ function slackEntriesFromChannelIndex(raw, usersById) {
5322
+ const entries = /* @__PURE__ */ new Map();
5323
+ if (!Array.isArray(raw)) {
5324
+ return entries;
5325
+ }
5326
+ for (const item of raw) {
5327
+ if (!item || typeof item !== "object") {
5328
+ continue;
5329
+ }
5330
+ const value = item;
5331
+ const id = normalizeWhitespace(value.id ?? "");
5332
+ const title = normalizeWhitespace(value.name ?? "");
5333
+ if (!title) {
5334
+ continue;
5335
+ }
5336
+ const members = (Array.isArray(value.members) ? value.members : value.user ? [value.user] : []).map((member) => slackFormatSpeakerId(member, usersById)).filter(Boolean);
5337
+ entries.set(title, { id, title, members });
5338
+ }
5339
+ return entries;
5340
+ }
5341
+ async function extractTranscriptText(input) {
5342
+ try {
5343
+ const { parseSync } = await import("subtitle");
5344
+ const rawText = decodeTextBytes(input.bytes);
5345
+ const cues = parseSync(rawText).filter((node) => node.type === "cue" && node.data).map((node) => ({
5346
+ start: Math.max(0, node.data?.start ?? 0),
5347
+ end: Math.max(0, node.data?.end ?? 0),
5348
+ text: normalizeWhitespace((node.data?.text ?? "").replace(/\s*\n+\s*/g, " "))
5349
+ })).filter((cue) => cue.text);
5350
+ const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : void 0;
5351
+ const extractedText = [
5352
+ title ? `# ${title}` : null,
5353
+ `Format: ${input.fileName?.toLowerCase().endsWith(".vtt") ? "WebVTT" : "SRT"}`,
5354
+ `Segments: ${cues.length}`,
5355
+ ...cues.length ? [`Start: ${timestampFromMs(cues[0].start)}`, `End: ${timestampFromMs(cues[cues.length - 1].end)}`] : [],
5356
+ "",
5357
+ "## Transcript",
5358
+ "",
5359
+ ...cues.length ? cues.map((cue) => `- [${timestampFromMs(cue.start)} - ${timestampFromMs(cue.end)}] ${cue.text}`) : ["- No transcript segments were extracted."],
5360
+ ""
5361
+ ].filter((item) => Boolean(item)).join("\n");
5362
+ return {
5363
+ title,
5364
+ extractedText,
5365
+ artifact: {
5366
+ ...extractionMetadata("transcript", input.mimeType, "transcript_text"),
5367
+ metadata: {
5368
+ format: input.fileName?.toLowerCase().endsWith(".vtt") ? "vtt" : "srt",
5369
+ segment_count: String(cues.length),
5370
+ ...cues.length ? { started_at: timestampFromMs(cues[0].start), ended_at: timestampFromMs(cues[cues.length - 1].end) } : {}
5371
+ }
5372
+ }
5373
+ };
5374
+ } catch (error) {
5375
+ return {
5376
+ artifact: {
5377
+ ...extractionMetadata("transcript", input.mimeType, "transcript_text"),
5378
+ warnings: [`Transcript extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
5379
+ }
5380
+ };
5381
+ }
5382
+ }
5383
+ async function extractEmailText(input) {
5384
+ try {
5385
+ const { simpleParser } = await import("mailparser");
5386
+ const fallbackTitle = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : "Email";
5387
+ const parsed = await simpleParser(input.bytes);
5388
+ const normalized = normalizeParsedEmail(parsed, fallbackTitle);
5389
+ return {
5390
+ title: normalized.title,
5391
+ extractedText: normalized.markdown,
5392
+ artifact: {
5393
+ ...extractionMetadata("email", input.mimeType, "email_text"),
5394
+ metadata: normalized.metadata
5395
+ }
5396
+ };
5397
+ } catch (error) {
5398
+ return {
5399
+ artifact: {
5400
+ ...extractionMetadata("email", input.mimeType, "email_text"),
5401
+ warnings: [`Email extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
5402
+ }
5403
+ };
5404
+ }
5405
+ }
5406
+ async function extractMboxMessages(input) {
5407
+ try {
5408
+ const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : "Mailbox";
5409
+ const { simpleParser } = await import("mailparser");
5410
+ const messages = await loadZipMessageBuffers(input.bytes);
5411
+ const extracted = [];
5412
+ for (let index = 0; index < messages.length; index += 1) {
5413
+ const parsed = await simpleParser(messages[index]);
5414
+ const normalized = normalizeParsedEmail(parsed, `Message ${index + 1}`);
5415
+ const conversationId = normalized.conversationId || `${index + 1}`;
5416
+ extracted.push({
5417
+ partKey: `${conversationId}-${index + 1}`,
5418
+ title: normalized.title,
5419
+ markdown: normalized.markdown,
5420
+ metadata: {
5421
+ ...normalized.metadata,
5422
+ container_title: title,
5423
+ mailbox_title: title,
5424
+ part_index: String(index + 1),
5425
+ part_count: String(messages.length)
5426
+ }
5427
+ });
5428
+ }
5429
+ return {
5430
+ title,
5431
+ messages: extracted,
5432
+ warnings: extracted.length ? void 0 : ["Mailbox extraction completed but found no readable messages."]
5433
+ };
5434
+ } catch (error) {
5435
+ return {
5436
+ messages: [],
5437
+ warnings: [`Mailbox extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
5438
+ };
5439
+ }
5440
+ }
5441
+ async function extractCalendarEvents(input) {
5442
+ try {
5443
+ const ical = await import("node-ical");
5444
+ const calendarTitle = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : "Calendar";
5445
+ const parsed = ical.default.sync.parseICS(decodeTextBytes(input.bytes));
5446
+ const events = [];
5447
+ for (const item of Object.values(parsed)) {
5448
+ if (!item || typeof item !== "object" || item.type !== "VEVENT") {
5449
+ continue;
5450
+ }
5451
+ const event = item;
5452
+ const title = normalizeWhitespace(event.summary ?? "") || "Calendar Event";
5453
+ const occurredAt = normalizeIsoDate(event.start);
5454
+ const endsAt = normalizeIsoDate(event.end);
5455
+ const organizer = event.organizer ? normalizeWhitespace(String(event.organizer.params?.CN ?? event.organizer.val ?? "")) : void 0;
5456
+ const attendees = calendarAttendees(event.attendees);
5457
+ const participants = normalizeDelimitedList([organizer ?? "", ...attendees]);
5458
+ const location = normalizeWhitespace(event.location ?? "") || void 0;
5459
+ const description = normalizeDocumentText(event.description ?? "");
5460
+ const conversationId = normalizeWhitespace(event.uid ?? "") || `${title}-${occurredAt ?? events.length + 1}`;
5461
+ events.push({
5462
+ partKey: conversationId,
5463
+ title,
5464
+ metadata: {
5465
+ container_title: calendarTitle,
5466
+ ...occurredAt ? { occurred_at: occurredAt } : {},
5467
+ ...endsAt ? { ends_at: endsAt } : {},
5468
+ ...organizer ? { organizer } : {},
5469
+ ...location ? { location } : {},
5470
+ ...participants ? { participants } : {},
5471
+ conversation_id: conversationId
5472
+ },
5473
+ markdown: [
5474
+ `# ${title}`,
5475
+ "",
5476
+ ...occurredAt ? [`Start: ${occurredAt}`] : [],
5477
+ ...endsAt ? [`End: ${endsAt}`] : [],
5478
+ ...organizer ? [`Organizer: ${organizer}`] : [],
5479
+ ...attendees.length ? [`Attendees: ${attendees.join(", ")}`] : [],
5480
+ ...location ? [`Location: ${location}`] : [],
5481
+ ...conversationId ? [`Event ID: ${conversationId}`] : [],
5482
+ "",
5483
+ "## Description",
5484
+ "",
5485
+ description || "No event description was provided.",
5486
+ ""
5487
+ ].join("\n")
5488
+ });
5489
+ }
5490
+ return {
5491
+ title: calendarTitle,
5492
+ events,
5493
+ warnings: events.length ? void 0 : ["Calendar extraction completed but found no VEVENT entries."]
5494
+ };
5495
+ } catch (error) {
5496
+ return {
5497
+ events: [],
5498
+ warnings: [`Calendar extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
5499
+ };
5500
+ }
5501
+ }
5502
+ function parseSlackExportEntries(entries, exportTitle) {
5503
+ const usersById = /* @__PURE__ */ new Map();
5504
+ const rawUsers = entries.get("users.json");
5505
+ if (rawUsers) {
5506
+ const parsed = JSON.parse(rawUsers);
5507
+ for (const user of parsed) {
5508
+ const id = normalizeWhitespace(user.id ?? "");
5509
+ const name = normalizeWhitespace(user.profile?.display_name ?? user.real_name ?? user.profile?.real_name ?? user.name ?? "");
5510
+ if (id && name) {
5511
+ usersById.set(id, name);
5512
+ }
5513
+ }
5514
+ }
5515
+ const channelIndex = /* @__PURE__ */ new Map();
5516
+ for (const indexPath of ["channels.json", "groups.json", "dms.json", "mpims.json"]) {
5517
+ const rawIndex = entries.get(indexPath);
5518
+ if (!rawIndex) {
5519
+ continue;
5520
+ }
5521
+ const parsed = JSON.parse(rawIndex);
5522
+ for (const [key, value] of slackEntriesFromChannelIndex(parsed, usersById)) {
5523
+ channelIndex.set(key, value);
5524
+ }
5525
+ }
5526
+ const conversationPaths = [...entries.keys()].filter((entryPath) => /^[^/]+\/\d{4}-\d{2}-\d{2}\.json$/i.test(entryPath)).sort((left, right) => left.localeCompare(right));
5527
+ const conversations = [];
5528
+ for (const entryPath of conversationPaths) {
5529
+ const raw = entries.get(entryPath);
5530
+ if (!raw) {
5531
+ continue;
5532
+ }
5533
+ const messages = JSON.parse(raw);
5534
+ if (!Array.isArray(messages)) {
5535
+ continue;
5536
+ }
5537
+ const [channelName, dateFile] = entryPath.split("/");
5538
+ const date = dateFile?.replace(/\.json$/i, "") ?? "";
5539
+ const channel = channelIndex.get(channelName ?? "") ?? {
5540
+ id: channelName ?? "",
5541
+ title: channelName ?? "channel",
5542
+ members: []
5543
+ };
5544
+ const participants = new Set(channel.members);
5545
+ const lines = [];
5546
+ const threadIds = /* @__PURE__ */ new Set();
5547
+ const sortedMessages = [...messages].sort((left, right) => Number(left.ts ?? 0) - Number(right.ts ?? 0));
5548
+ let occurredAt;
5549
+ for (const message of sortedMessages) {
5550
+ const speaker = normalizeWhitespace(
5551
+ message.username ?? message.bot_profile?.name ?? (message.user ? slackFormatSpeakerId(message.user, usersById) : "")
5552
+ ) || "unknown";
5553
+ participants.add(speaker);
5554
+ const messageTime = slackMessageTimestamp(message.ts, date);
5555
+ occurredAt ??= messageTime;
5556
+ const normalizedText = slackNormalizeText(
5557
+ [
5558
+ message.text ?? "",
5559
+ ...Array.isArray(message.files) ? message.files.map((file) => normalizeWhitespace(file.title ?? file.name ?? "")).filter(Boolean).map((label) => `Attachment: ${label}`) : []
5560
+ ].join("\n"),
5561
+ usersById
5562
+ );
5563
+ if (message.thread_ts && message.thread_ts !== message.ts) {
5564
+ threadIds.add(message.thread_ts);
5565
+ }
5566
+ lines.push(
5567
+ `- [${messageTime}] ${speaker}${message.thread_ts ? ` {thread:${message.thread_ts}}` : ""}${message.ts ? ` {id:${message.ts}}` : ""}: ${normalizedText || normalizeWhitespace(message.subtype ?? "") || "[no text]"}`
5568
+ );
5569
+ }
5570
+ const participantsList = normalizeDelimitedList([...participants]);
5571
+ const conversationId = `${channel.id || channel.title}:${date}`;
5572
+ conversations.push({
5573
+ partKey: `${channel.title}-${date}`,
5574
+ title: `#${channel.title} - ${date}`,
5575
+ metadata: {
5576
+ workspace_title: exportTitle,
5577
+ channel: channel.title,
5578
+ ...channel.id ? { channel_id: channel.id } : {},
5579
+ ...occurredAt ? { occurred_at: occurredAt } : {},
5580
+ ...participantsList ? { participants: participantsList } : {},
5581
+ container_title: `${exportTitle} / #${channel.title}`,
5582
+ conversation_id: conversationId,
5583
+ date,
5584
+ message_count: String(sortedMessages.length),
5585
+ thread_count: String(threadIds.size)
5586
+ },
5587
+ markdown: [
5588
+ `# #${channel.title} - ${date}`,
5589
+ "",
5590
+ `Workspace: ${exportTitle}`,
5591
+ `Messages: ${sortedMessages.length}`,
5592
+ `Threads: ${threadIds.size}`,
5593
+ ...participantsList ? [`Participants: ${participantsList}`] : [],
5594
+ "",
5595
+ "## Messages",
5596
+ "",
5597
+ ...lines.length ? lines : ["- No messages were extracted."],
5598
+ ""
5599
+ ].join("\n")
5600
+ });
5601
+ }
5602
+ return {
5603
+ title: exportTitle,
5604
+ conversations,
5605
+ warnings: conversations.length ? void 0 : ["Slack export parsing completed but found no channel day files."]
5606
+ };
5607
+ }
5608
+ function isSlackExportArchive(bytes) {
5609
+ try {
5610
+ const archive = unzipSync(new Uint8Array(bytes));
5611
+ return looksLikeSlackEntries(Object.keys(archive));
5612
+ } catch {
5613
+ return false;
5614
+ }
5615
+ }
5616
+ async function isSlackExportDirectory(directoryPath) {
5617
+ const entries = await fs7.readdir(directoryPath).catch(() => []);
5618
+ if (!entries.length) {
5619
+ return false;
5620
+ }
5621
+ const fileSet = new Set(entries);
5622
+ const hasIndex = ["channels.json", "groups.json", "dms.json", "mpims.json"].some((name) => fileSet.has(name));
5623
+ if (!hasIndex) {
5624
+ return false;
5625
+ }
5626
+ for (const entry of entries) {
5627
+ const channelDir = path7.join(directoryPath, entry);
5628
+ const stat = await fs7.stat(channelDir).catch(() => null);
5629
+ if (!stat?.isDirectory()) {
5630
+ continue;
5631
+ }
5632
+ const channelEntries = await fs7.readdir(channelDir).catch(() => []);
5633
+ if (channelEntries.some((name) => /^\d{4}-\d{2}-\d{2}\.json$/i.test(name))) {
5634
+ return true;
5635
+ }
5636
+ }
5637
+ return false;
5638
+ }
5639
+ async function extractSlackExportArchive(input) {
5640
+ try {
5641
+ const archive = unzipSync(new Uint8Array(input.bytes));
5642
+ const title = input.fileName ? path7.basename(input.fileName, path7.extname(input.fileName)) : "Slack Export";
5643
+ return parseSlackExportEntries(archiveEntriesAsText(archive), title);
5644
+ } catch (error) {
5645
+ return {
5646
+ conversations: [],
5647
+ warnings: [`Slack export extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
4772
5648
  };
4773
5649
  }
4774
5650
  }
4775
- async function extractDocxText(input) {
5651
+ async function extractSlackExportDirectory(directoryPath) {
5652
+ const title = path7.basename(directoryPath) || "Slack Export";
4776
5653
  try {
4777
- const mammoth = await import("mammoth");
4778
- const result = await mammoth.extractRawText({
4779
- buffer: input.bytes
4780
- });
4781
- const extractedText = normalizeDocumentText(result.value);
4782
- const warnings = result.messages.map((message) => normalizeWhitespace(message.message)).filter(Boolean).map((message) => truncate(message, 240));
4783
- const artifact = {
4784
- ...extractionMetadata("docx", input.mimeType, "docx_text"),
4785
- metadata: parseDocxCoreMetadata(input.bytes),
4786
- warnings: warnings.length ? warnings : void 0
4787
- };
4788
- if (!extractedText) {
4789
- artifact.warnings = [...artifact.warnings ?? [], "DOCX text extraction completed but produced no extractable text."];
5654
+ const entries = /* @__PURE__ */ new Map();
5655
+ const queue = [directoryPath];
5656
+ while (queue.length > 0) {
5657
+ const current = queue.shift();
5658
+ const children = await fs7.readdir(current, { withFileTypes: true });
5659
+ for (const child of children) {
5660
+ const absoluteChild = path7.join(current, child.name);
5661
+ if (child.isDirectory()) {
5662
+ queue.push(absoluteChild);
5663
+ continue;
5664
+ }
5665
+ const relativeChild = path7.posix.relative(directoryPath, absoluteChild.split(path7.sep).join(path7.posix.sep));
5666
+ entries.set(relativeChild, await fs7.readFile(absoluteChild, "utf8"));
5667
+ }
4790
5668
  }
4791
- return {
4792
- extractedText: extractedText || void 0,
4793
- artifact
4794
- };
5669
+ return parseSlackExportEntries(entries, title);
4795
5670
  } catch (error) {
4796
5671
  return {
4797
- artifact: {
4798
- ...extractionMetadata("docx", input.mimeType, "docx_text"),
4799
- warnings: [`DOCX text extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
4800
- }
5672
+ conversations: [],
5673
+ warnings: [`Slack export extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
4801
5674
  };
4802
5675
  }
4803
5676
  }
@@ -5230,21 +6103,42 @@ function inferKind(mimeType, filePath) {
5230
6103
  if (isRstFilePath(filePath)) {
5231
6104
  return "text";
5232
6105
  }
6106
+ if (isTranscriptFilePath(filePath) || mimeType === "application/x-subrip" || mimeType === "text/vtt") {
6107
+ return "transcript";
6108
+ }
5233
6109
  if (mimeType.includes("markdown")) {
5234
6110
  return "markdown";
5235
6111
  }
5236
6112
  if (mimeType.includes("html")) {
5237
6113
  return "html";
5238
6114
  }
5239
- if (mimeType.startsWith("text/")) {
5240
- return "text";
5241
- }
5242
6115
  if (mimeType === "application/pdf" || filePath.toLowerCase().endsWith(".pdf")) {
5243
6116
  return "pdf";
5244
6117
  }
5245
6118
  if (mimeType === "application/vnd.openxmlformats-officedocument.wordprocessingml.document" || filePath.toLowerCase().endsWith(".docx")) {
5246
6119
  return "docx";
5247
6120
  }
6121
+ if (isEmailFilePath(filePath) || mimeType === "message/rfc822" || mimeType === "application/mbox") {
6122
+ return "email";
6123
+ }
6124
+ if (isCalendarFilePath(filePath) || mimeType === "text/calendar") {
6125
+ return "calendar";
6126
+ }
6127
+ if (mimeType === "application/epub+zip" || filePath.toLowerCase().endsWith(".epub")) {
6128
+ return "epub";
6129
+ }
6130
+ if (mimeType === "text/csv" || mimeType === "text/tab-separated-values" || filePath.toLowerCase().endsWith(".csv") || filePath.toLowerCase().endsWith(".tsv")) {
6131
+ return "csv";
6132
+ }
6133
+ if (mimeType.startsWith("text/")) {
6134
+ return "text";
6135
+ }
6136
+ if (mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" || filePath.toLowerCase().endsWith(".xlsx")) {
6137
+ return "xlsx";
6138
+ }
6139
+ if (mimeType === "application/vnd.openxmlformats-officedocument.presentationml.presentation" || filePath.toLowerCase().endsWith(".pptx")) {
6140
+ return "pptx";
6141
+ }
5248
6142
  if (mimeType.startsWith("image/")) {
5249
6143
  return "image";
5250
6144
  }
@@ -5254,6 +6148,17 @@ function isRstFilePath(filePath) {
5254
6148
  const extension = path12.extname(filePath).toLowerCase();
5255
6149
  return extension === ".rst" || extension === ".rest";
5256
6150
  }
6151
+ function isTranscriptFilePath(filePath) {
6152
+ const extension = path12.extname(filePath).toLowerCase();
6153
+ return extension === ".srt" || extension === ".vtt";
6154
+ }
6155
+ function isEmailFilePath(filePath) {
6156
+ const extension = path12.extname(filePath).toLowerCase();
6157
+ return extension === ".eml" || extension === ".mbox";
6158
+ }
6159
+ function isCalendarFilePath(filePath) {
6160
+ return path12.extname(filePath).toLowerCase() === ".ics";
6161
+ }
5257
6162
  function titleFromText(fallback, content, filePath) {
5258
6163
  if (filePath && isRstFilePath(filePath)) {
5259
6164
  const rstTitle = titleFromRst(fallback, content);
@@ -5270,6 +6175,57 @@ function guessMimeType(target) {
5270
6175
  }
5271
6176
  return mime.lookup(target) || "application/octet-stream";
5272
6177
  }
6178
+ function sourceGroupIdFor(prepared) {
6179
+ const originKey = prepared.originType === "url" ? prepared.url ?? prepared.title : prepared.originalPath ?? prepared.title;
6180
+ return `${slugify(prepared.title)}-${sha256(originKey).slice(0, 8)}`;
6181
+ }
6182
+ function groupedPreparedInputsFor(input) {
6183
+ const groupId = sourceGroupIdFor({
6184
+ title: input.title,
6185
+ originType: input.originType,
6186
+ originalPath: input.originalPath,
6187
+ url: input.url
6188
+ });
6189
+ return input.parts.map(
6190
+ (part, index) => finalizePreparedInput({
6191
+ title: `${input.title} - ${part.title}`,
6192
+ originType: input.originType,
6193
+ sourceKind: input.sourceKind,
6194
+ sourceClass: input.sourceClass,
6195
+ originalPath: input.originalPath,
6196
+ repoRelativePath: input.repoRelativePath,
6197
+ url: input.url,
6198
+ mimeType: "text/markdown",
6199
+ storedExtension: input.storedExtension,
6200
+ payloadBytes: Buffer.from(part.markdown, "utf8"),
6201
+ extractedText: part.markdown,
6202
+ extractionArtifact: {
6203
+ extractor: `${input.sourceKind}_text`,
6204
+ sourceKind: input.sourceKind,
6205
+ mimeType: input.mimeType,
6206
+ producedAt: (/* @__PURE__ */ new Date()).toISOString(),
6207
+ metadata: {
6208
+ ...part.metadata,
6209
+ part_index: String(index + 1),
6210
+ part_count: String(input.parts.length)
6211
+ },
6212
+ warnings: input.warnings
6213
+ },
6214
+ sourceGroupId: groupId,
6215
+ sourceGroupTitle: input.title,
6216
+ sourcePartKey: part.partKey,
6217
+ partIndex: index + 1,
6218
+ partCount: input.parts.length,
6219
+ partTitle: part.title,
6220
+ details: {
6221
+ ...part.metadata,
6222
+ part_index: String(index + 1),
6223
+ part_count: String(input.parts.length)
6224
+ },
6225
+ logDetails: input.logDetails
6226
+ })
6227
+ );
6228
+ }
5273
6229
  function rstAdornmentLine(line) {
5274
6230
  const trimmed = line.trim();
5275
6231
  if (trimmed.length < 3) {
@@ -5500,6 +6456,13 @@ async function findNearestGitRoot2(startPath) {
5500
6456
  current = parent;
5501
6457
  }
5502
6458
  }
6459
+ async function detectScopedRepoRoot(rootDir, inputPath, fallbackRoot) {
6460
+ const detectedRepoRoot = await findNearestGitRoot2(inputPath);
6461
+ if (!detectedRepoRoot) {
6462
+ return fallbackRoot;
6463
+ }
6464
+ return withinRoot(rootDir, inputPath) && !withinRoot(rootDir, detectedRepoRoot) ? fallbackRoot : detectedRepoRoot;
6465
+ }
5503
6466
  function withinRoot(rootPath, targetPath) {
5504
6467
  const relative = path12.relative(rootPath, targetPath);
5505
6468
  return relative === "" || !relative.startsWith("..") && !path12.isAbsolute(relative);
@@ -5844,6 +6807,9 @@ function manifestMatchesOrigin(manifest, prepared) {
5844
6807
  }
5845
6808
  return Boolean(prepared.originalPath && manifest.originalPath && toPosix(manifest.originalPath) === toPosix(prepared.originalPath));
5846
6809
  }
6810
+ function manifestMatchesOriginPart(manifest, prepared) {
6811
+ return manifestMatchesOrigin(manifest, prepared) && (manifest.sourcePartKey ?? "") === (prepared.sourcePartKey ?? "");
6812
+ }
5847
6813
  function buildCompositeHash(payloadBytes, attachments = []) {
5848
6814
  if (!attachments.length) {
5849
6815
  return sha256(payloadBytes);
@@ -5941,7 +6907,7 @@ function extractMarkdownImageReferences(content, baseUrl) {
5941
6907
  async function convertHtmlToMarkdown(html, url) {
5942
6908
  const dom = new JSDOM2(html, { url });
5943
6909
  const article = new Readability(dom.window.document).parse();
5944
- const turndown = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
6910
+ const turndown = new TurndownService2({ headingStyle: "atx", codeBlockStyle: "fenced" });
5945
6911
  const body = article?.content ?? dom.window.document.body.innerHTML;
5946
6912
  const markdown = turndown.turndown(body);
5947
6913
  return {
@@ -5965,21 +6931,26 @@ async function readManifestByHash(manifestsDir, contentHash) {
5965
6931
  }
5966
6932
  return null;
5967
6933
  }
5968
- async function readManifestByOrigin(manifestsDir, prepared) {
6934
+ async function readManifestsByOrigin(manifestsDir, prepared) {
5969
6935
  const entries = await fs11.readdir(manifestsDir, { withFileTypes: true }).catch(() => []);
6936
+ const manifests = [];
5970
6937
  for (const entry of entries) {
5971
6938
  if (!entry.isFile() || !entry.name.endsWith(".json")) {
5972
6939
  continue;
5973
6940
  }
5974
6941
  const manifest = await readJsonFile(path12.join(manifestsDir, entry.name));
5975
6942
  if (manifest && manifestMatchesOrigin(manifest, prepared)) {
5976
- return {
6943
+ manifests.push({
5977
6944
  ...manifest,
5978
6945
  semanticHash: manifest.semanticHash ?? manifest.contentHash
5979
- };
6946
+ });
5980
6947
  }
5981
6948
  }
5982
- return null;
6949
+ return manifests;
6950
+ }
6951
+ async function readManifestByOrigin(manifestsDir, prepared) {
6952
+ const manifests = await readManifestsByOrigin(manifestsDir, prepared);
6953
+ return manifests.find((manifest) => manifestMatchesOriginPart(manifest, prepared)) ?? null;
5983
6954
  }
5984
6955
  async function loadGitignoreMatcher(repoRoot, enabled) {
5985
6956
  if (!enabled) {
@@ -6046,7 +7017,13 @@ async function collectDirectoryFiles(rootDir, inputDir, repoRoot, options) {
6046
7017
  continue;
6047
7018
  }
6048
7019
  const mimeType = guessMimeType(absolutePath);
6049
- const sourceKind = inferKind(mimeType, absolutePath);
7020
+ let sourceKind = inferKind(mimeType, absolutePath);
7021
+ if (sourceKind === "binary" && path12.extname(absolutePath).toLowerCase() === ".zip") {
7022
+ const bytes = await fs11.readFile(absolutePath);
7023
+ if (isSlackExportArchive(bytes)) {
7024
+ sourceKind = "chat_export";
7025
+ }
7026
+ }
6050
7027
  const sourceClass = sourceClassForRelativePath(relativePath, options);
6051
7028
  if (!supportedDirectoryKind(sourceKind)) {
6052
7029
  skipped.push({ path: toPosix(path12.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
@@ -6228,8 +7205,8 @@ async function persistPreparedInput(rootDir, prepared, paths) {
6228
7205
  const semanticHash = prepared.semanticHash ?? contentHash;
6229
7206
  const extractionHash = prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact);
6230
7207
  const existingByOrigin = await readManifestByOrigin(paths.manifestsDir, prepared);
6231
- const existingByHash = existingByOrigin ? null : await readManifestByHash(paths.manifestsDir, contentHash);
6232
- if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.semanticHash === semanticHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.sourceType === prepared.sourceType && existingByOrigin.sourceClass === prepared.sourceClass && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
7208
+ const existingByHash = existingByOrigin || prepared.sourcePartKey ? null : await readManifestByHash(paths.manifestsDir, contentHash);
7209
+ if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.semanticHash === semanticHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.sourceType === prepared.sourceType && existingByOrigin.sourceClass === prepared.sourceClass && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath && existingByOrigin.sourceGroupId === prepared.sourceGroupId && existingByOrigin.sourceGroupTitle === prepared.sourceGroupTitle && existingByOrigin.sourcePartKey === prepared.sourcePartKey && existingByOrigin.partIndex === prepared.partIndex && existingByOrigin.partCount === prepared.partCount && existingByOrigin.partTitle === prepared.partTitle && JSON.stringify(existingByOrigin.details ?? {}) === JSON.stringify(prepared.details ?? {})) {
6233
7210
  return { manifest: existingByOrigin, isNew: false, wasUpdated: false };
6234
7211
  }
6235
7212
  if (existingByHash) {
@@ -6288,6 +7265,13 @@ async function persistPreparedInput(rootDir, prepared, paths) {
6288
7265
  mimeType: prepared.mimeType,
6289
7266
  contentHash,
6290
7267
  semanticHash,
7268
+ sourceGroupId: prepared.sourceGroupId,
7269
+ sourceGroupTitle: prepared.sourceGroupTitle,
7270
+ sourcePartKey: prepared.sourcePartKey,
7271
+ partIndex: prepared.partIndex,
7272
+ partCount: prepared.partCount,
7273
+ partTitle: prepared.partTitle,
7274
+ details: prepared.details,
6291
7275
  createdAt: previous?.createdAt ?? now,
6292
7276
  updatedAt: now,
6293
7277
  attachments: manifestAttachments.length ? manifestAttachments : void 0
@@ -6309,6 +7293,42 @@ async function persistPreparedInput(rootDir, prepared, paths) {
6309
7293
  }
6310
7294
  return { manifest, isNew: !previous, wasUpdated: Boolean(previous) };
6311
7295
  }
7296
+ async function persistPreparedInputs(rootDir, input, preparedInputs, paths) {
7297
+ const template = preparedInputs[0];
7298
+ const existingByOrigin = template ? await readManifestsByOrigin(paths.manifestsDir, template) : [];
7299
+ const created = [];
7300
+ const updated = [];
7301
+ const unchanged = [];
7302
+ const removed = [];
7303
+ const seenSourceIds = /* @__PURE__ */ new Set();
7304
+ for (const prepared of preparedInputs) {
7305
+ const result = await persistPreparedInput(rootDir, prepared, paths);
7306
+ if (result.isNew) {
7307
+ created.push(result.manifest);
7308
+ } else if (result.wasUpdated) {
7309
+ updated.push(result.manifest);
7310
+ } else {
7311
+ unchanged.push(result.manifest);
7312
+ }
7313
+ seenSourceIds.add(result.manifest.sourceId);
7314
+ }
7315
+ for (const manifest of existingByOrigin) {
7316
+ if (seenSourceIds.has(manifest.sourceId)) {
7317
+ continue;
7318
+ }
7319
+ await removeManifestArtifacts(rootDir, manifest, paths);
7320
+ removed.push(manifest);
7321
+ }
7322
+ return {
7323
+ input,
7324
+ scannedCount: preparedInputs.length,
7325
+ created,
7326
+ updated,
7327
+ unchanged,
7328
+ removed,
7329
+ skipped: []
7330
+ };
7331
+ }
6312
7332
  async function removeManifestArtifacts(rootDir, manifest, paths) {
6313
7333
  await fs11.rm(path12.join(paths.manifestsDir, `${manifest.sourceId}.json`), { force: true });
6314
7334
  await fs11.rm(path12.resolve(rootDir, manifest.storedPath), { force: true });
@@ -6335,10 +7355,10 @@ function repoSyncWorkspaceIgnorePaths(rootDir, paths, repoRoot) {
6335
7355
  return candidates.map((candidate) => path12.resolve(candidate)).filter((candidate, index, items) => items.indexOf(candidate) === index).filter((candidate) => withinRoot(repoRoot, candidate));
6336
7356
  }
6337
7357
  function preparedMatchesManifest(manifest, prepared, contentHash) {
6338
- return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.sourceType === prepared.sourceType && manifest.sourceClass === prepared.sourceClass && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
7358
+ return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.semanticHash === (prepared.semanticHash ?? contentHash) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.sourceType === prepared.sourceType && manifest.sourceClass === prepared.sourceClass && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath && manifest.sourceGroupId === prepared.sourceGroupId && manifest.sourceGroupTitle === prepared.sourceGroupTitle && manifest.sourcePartKey === prepared.sourcePartKey && manifest.partIndex === prepared.partIndex && manifest.partCount === prepared.partCount && manifest.partTitle === prepared.partTitle && JSON.stringify(manifest.details ?? {}) === JSON.stringify(prepared.details ?? {});
6339
7359
  }
6340
7360
  function shouldDeferWatchSemanticRefresh(sourceKind) {
6341
- return sourceKind === "markdown" || sourceKind === "text" || sourceKind === "html" || sourceKind === "pdf" || sourceKind === "docx" || sourceKind === "image";
7361
+ return sourceKind === "markdown" || sourceKind === "text" || sourceKind === "html" || sourceKind === "pdf" || sourceKind === "docx" || sourceKind === "epub" || sourceKind === "csv" || sourceKind === "xlsx" || sourceKind === "pptx" || sourceKind === "transcript" || sourceKind === "chat_export" || sourceKind === "email" || sourceKind === "calendar" || sourceKind === "image";
6342
7362
  }
6343
7363
  function pendingSemanticRefreshId(changeType, repoRoot, relativePath) {
6344
7364
  return `pending:${changeType}:${sha256(`${toPosix(repoRoot)}:${relativePath}`).slice(0, 12)}`;
@@ -6404,13 +7424,16 @@ async function syncTrackedRepos(rootDir, options, repoRoots) {
6404
7424
  const currentPaths = new Set(files.map((absolutePath) => path12.resolve(absolutePath)));
6405
7425
  for (const absolutePath of files) {
6406
7426
  const relativePath = repoRelativePathFor(absolutePath, repoRoot) ?? toPosix(path12.relative(repoRoot, absolutePath));
6407
- const prepared = await prepareFileInput(rootDir, absolutePath, repoRoot, sourceClassForRelativePath(relativePath, normalizedOptions));
6408
- const result = await persistPreparedInput(rootDir, prepared, paths);
6409
- if (result.isNew) {
6410
- imported.push(result.manifest);
6411
- } else if (result.wasUpdated) {
6412
- updated.push(result.manifest);
6413
- }
7427
+ const preparedInputs = await prepareFileInputs(
7428
+ rootDir,
7429
+ absolutePath,
7430
+ repoRoot,
7431
+ sourceClassForRelativePath(relativePath, normalizedOptions)
7432
+ );
7433
+ const result = await persistPreparedInputs(rootDir, absolutePath, preparedInputs, paths);
7434
+ imported.push(...result.created);
7435
+ updated.push(...result.updated);
7436
+ removed.push(...result.removed);
6414
7437
  progress.tick();
6415
7438
  }
6416
7439
  progress.finish(`repo=${toPosix(path12.relative(rootDir, repoRoot)) || "."}`);
@@ -6469,9 +7492,6 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
6469
7492
  let scannedCount = 0;
6470
7493
  for (const repoRoot of uniqueRoots) {
6471
7494
  const repoManifests = manifestsByRepoRoot.get(repoRoot) ?? [];
6472
- const manifestsByOriginalPath = new Map(
6473
- repoManifests.filter((manifest) => manifest.originalPath).map((manifest) => [path12.resolve(manifest.originalPath), manifest])
6474
- );
6475
7495
  if (!await fileExists(repoRoot)) {
6476
7496
  for (const manifest of repoManifests) {
6477
7497
  if (shouldDeferWatchSemanticRefresh(manifest.sourceKind)) {
@@ -6507,38 +7527,50 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
6507
7527
  const currentPaths = new Set(files.map((absolutePath) => path12.resolve(absolutePath)));
6508
7528
  for (const absolutePath of files) {
6509
7529
  const relativePath = repoRelativePathFor(absolutePath, repoRoot) ?? toPosix(path12.relative(repoRoot, absolutePath));
6510
- const prepared = await prepareFileInput(rootDir, absolutePath, repoRoot, sourceClassForRelativePath(relativePath, normalizedOptions));
6511
- if (shouldDeferWatchSemanticRefresh(prepared.sourceKind)) {
6512
- const existing = manifestsByOriginalPath.get(path12.resolve(absolutePath));
6513
- const contentHash = buildCompositeHash(prepared.payloadBytes, prepared.attachments);
6514
- const changed = !existing || !preparedMatchesManifest(existing, prepared, contentHash);
7530
+ const preparedInputs = await prepareFileInputs(
7531
+ rootDir,
7532
+ absolutePath,
7533
+ repoRoot,
7534
+ sourceClassForRelativePath(relativePath, normalizedOptions)
7535
+ );
7536
+ const firstPrepared = preparedInputs[0];
7537
+ if (firstPrepared && shouldDeferWatchSemanticRefresh(firstPrepared.sourceKind)) {
7538
+ const existing = repoManifests.filter(
7539
+ (manifest) => manifest.originalPath && path12.resolve(manifest.originalPath) === path12.resolve(absolutePath)
7540
+ );
7541
+ const existingByPartKey = new Map(existing.map((manifest) => [manifest.sourcePartKey ?? "__single__", manifest]));
7542
+ const changed = existing.length !== preparedInputs.length || preparedInputs.some((prepared) => {
7543
+ const match = existingByPartKey.get(prepared.sourcePartKey ?? "__single__");
7544
+ const contentHash = buildCompositeHash(prepared.payloadBytes, prepared.attachments);
7545
+ return !match || !preparedMatchesManifest(match, prepared, contentHash);
7546
+ }) || existing.some(
7547
+ (manifest) => !preparedInputs.some((prepared) => (prepared.sourcePartKey ?? "") === (manifest.sourcePartKey ?? ""))
7548
+ );
6515
7549
  if (changed) {
6516
7550
  pendingSemanticRefresh.push({
6517
7551
  id: pendingSemanticRefreshId(
6518
- existing ? "modified" : "added",
7552
+ existing.length ? "modified" : "added",
6519
7553
  repoRoot,
6520
- prepared.repoRelativePath ?? toPosix(path12.relative(repoRoot, absolutePath))
7554
+ firstPrepared.repoRelativePath ?? toPosix(path12.relative(repoRoot, absolutePath))
6521
7555
  ),
6522
7556
  repoRoot,
6523
7557
  path: toPosix(path12.relative(rootDir, absolutePath)),
6524
- changeType: existing ? "modified" : "added",
7558
+ changeType: existing.length ? "modified" : "added",
6525
7559
  detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
6526
- sourceId: existing?.sourceId,
6527
- sourceKind: prepared.sourceKind
7560
+ sourceId: existing[0]?.sourceId,
7561
+ sourceKind: firstPrepared.sourceKind
6528
7562
  });
6529
- if (existing?.sourceId) {
6530
- staleSourceIds.add(existing.sourceId);
7563
+ for (const manifest of existing) {
7564
+ staleSourceIds.add(manifest.sourceId);
6531
7565
  }
6532
7566
  }
6533
7567
  progress.tick();
6534
7568
  continue;
6535
7569
  }
6536
- const result = await persistPreparedInput(rootDir, prepared, paths);
6537
- if (result.isNew) {
6538
- imported.push(result.manifest);
6539
- } else if (result.wasUpdated) {
6540
- updated.push(result.manifest);
6541
- }
7570
+ const result = await persistPreparedInputs(rootDir, absolutePath, preparedInputs, paths);
7571
+ imported.push(...result.created);
7572
+ updated.push(...result.updated);
7573
+ removed.push(...result.removed);
6542
7574
  progress.tick();
6543
7575
  }
6544
7576
  progress.finish(`repo=${toPosix(path12.relative(rootDir, repoRoot)) || "."}`);
@@ -6592,8 +7624,25 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
6592
7624
  staleSourceIds: [...staleSourceIds]
6593
7625
  };
6594
7626
  }
6595
- async function prepareFileInput(rootDir, absoluteInput, repoRoot, sourceClass) {
7627
+ async function prepareFileInputs(rootDir, absoluteInput, repoRoot, sourceClass) {
6596
7628
  const payloadBytes = await fs11.readFile(absoluteInput);
7629
+ if (path12.extname(absoluteInput).toLowerCase() === ".zip" && isSlackExportArchive(payloadBytes)) {
7630
+ const slackExport = await extractSlackExportArchive({ mimeType: "application/zip", bytes: payloadBytes, fileName: absoluteInput });
7631
+ if (slackExport.conversations.length) {
7632
+ return groupedPreparedInputsFor({
7633
+ title: slackExport.title?.trim() || path12.basename(absoluteInput, path12.extname(absoluteInput)),
7634
+ originType: "file",
7635
+ sourceKind: "chat_export",
7636
+ sourceClass,
7637
+ originalPath: toPosix(absoluteInput),
7638
+ repoRelativePath: repoRelativePathFor(absoluteInput, repoRoot),
7639
+ mimeType: "application/zip",
7640
+ storedExtension: ".md",
7641
+ warnings: slackExport.warnings,
7642
+ parts: slackExport.conversations
7643
+ });
7644
+ }
7645
+ }
6597
7646
  const mimeType = guessMimeType(absoluteInput);
6598
7647
  const sourceKind = inferKind(mimeType, absoluteInput);
6599
7648
  const language = inferCodeLanguage(absoluteInput, mimeType);
@@ -6623,6 +7672,118 @@ async function prepareFileInput(rootDir, absoluteInput, repoRoot, sourceClass) {
6623
7672
  title = extracted.artifact.metadata?.title?.trim() || title;
6624
7673
  extractedText = extracted.extractedText;
6625
7674
  extractionArtifact = extracted.artifact;
7675
+ } else if (sourceKind === "transcript") {
7676
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
7677
+ const extracted = await extractTranscriptText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
7678
+ title = extracted.title?.trim() || title;
7679
+ extractedText = extracted.extractedText;
7680
+ extractionArtifact = extracted.artifact;
7681
+ } else if (sourceKind === "email" && path12.extname(absoluteInput).toLowerCase() === ".eml") {
7682
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
7683
+ const extracted = await extractEmailText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
7684
+ title = extracted.title?.trim() || title;
7685
+ extractedText = extracted.extractedText;
7686
+ extractionArtifact = extracted.artifact;
7687
+ } else if (sourceKind === "email" && path12.extname(absoluteInput).toLowerCase() === ".mbox") {
7688
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
7689
+ const extracted = await extractMboxMessages({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
7690
+ title = extracted.title?.trim() || title;
7691
+ if (extracted.messages.length) {
7692
+ return groupedPreparedInputsFor({
7693
+ title,
7694
+ originType: "file",
7695
+ sourceKind: "email",
7696
+ sourceClass,
7697
+ originalPath: toPosix(absoluteInput),
7698
+ repoRelativePath: repoRelativePathFor(absoluteInput, repoRoot),
7699
+ mimeType,
7700
+ storedExtension: ".md",
7701
+ warnings: extracted.warnings,
7702
+ parts: extracted.messages
7703
+ });
7704
+ }
7705
+ extractionArtifact = {
7706
+ extractor: "email_text",
7707
+ sourceKind: "email",
7708
+ mimeType,
7709
+ producedAt: (/* @__PURE__ */ new Date()).toISOString(),
7710
+ warnings: extracted.warnings ?? ["Mailbox extraction completed but produced no readable messages."]
7711
+ };
7712
+ } else if (sourceKind === "calendar") {
7713
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
7714
+ const extracted = await extractCalendarEvents({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
7715
+ title = extracted.title?.trim() || title;
7716
+ if (extracted.events.length) {
7717
+ return groupedPreparedInputsFor({
7718
+ title,
7719
+ originType: "file",
7720
+ sourceKind: "calendar",
7721
+ sourceClass,
7722
+ originalPath: toPosix(absoluteInput),
7723
+ repoRelativePath: repoRelativePathFor(absoluteInput, repoRoot),
7724
+ mimeType,
7725
+ storedExtension: ".md",
7726
+ warnings: extracted.warnings,
7727
+ parts: extracted.events
7728
+ });
7729
+ }
7730
+ extractionArtifact = {
7731
+ extractor: "calendar_text",
7732
+ sourceKind: "calendar",
7733
+ mimeType,
7734
+ producedAt: (/* @__PURE__ */ new Date()).toISOString(),
7735
+ warnings: extracted.warnings ?? ["Calendar extraction completed but found no events."]
7736
+ };
7737
+ } else if (sourceKind === "csv") {
7738
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
7739
+ const extracted = await extractCsvText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
7740
+ title = extracted.title?.trim() || title;
7741
+ extractedText = extracted.extractedText;
7742
+ extractionArtifact = extracted.artifact;
7743
+ } else if (sourceKind === "xlsx") {
7744
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
7745
+ const extracted = await extractXlsxText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
7746
+ title = extracted.title?.trim() || title;
7747
+ extractedText = extracted.extractedText;
7748
+ extractionArtifact = extracted.artifact;
7749
+ } else if (sourceKind === "pptx") {
7750
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
7751
+ const extracted = await extractPptxText({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
7752
+ title = extracted.title?.trim() || title;
7753
+ extractedText = extracted.extractedText;
7754
+ extractionArtifact = extracted.artifact;
7755
+ } else if (sourceKind === "epub") {
7756
+ title = path12.basename(absoluteInput, path12.extname(absoluteInput));
7757
+ const extracted = await extractEpubChapters({ mimeType, bytes: payloadBytes, fileName: absoluteInput });
7758
+ title = extracted.title?.trim() || title;
7759
+ if (extracted.chapters.length) {
7760
+ return groupedPreparedInputsFor({
7761
+ title,
7762
+ originType: "file",
7763
+ sourceKind: "epub",
7764
+ sourceClass,
7765
+ originalPath: toPosix(absoluteInput),
7766
+ repoRelativePath: repoRelativePathFor(absoluteInput, repoRoot),
7767
+ mimeType,
7768
+ storedExtension: ".md",
7769
+ warnings: extracted.warnings,
7770
+ parts: extracted.chapters.map((chapter) => ({
7771
+ ...chapter,
7772
+ metadata: {
7773
+ ...chapter.metadata,
7774
+ ...extracted.author ? { author: extracted.author } : {}
7775
+ }
7776
+ }))
7777
+ });
7778
+ }
7779
+ extractedText = void 0;
7780
+ extractionArtifact = {
7781
+ extractor: "epub_text",
7782
+ sourceKind: "epub",
7783
+ mimeType,
7784
+ producedAt: (/* @__PURE__ */ new Date()).toISOString(),
7785
+ warnings: extracted.warnings ?? ["EPUB extraction completed but produced no chapter content."]
7786
+ };
6626
7787
  } else if (sourceKind === "image") {
6627
7788
  title = path12.basename(absoluteInput, path12.extname(absoluteInput));
6628
7789
  const extracted = await extractImageWithVision(rootDir, {
@@ -6636,23 +7797,33 @@ async function prepareFileInput(rootDir, absoluteInput, repoRoot, sourceClass) {
6636
7797
  } else {
6637
7798
  title = path12.basename(absoluteInput, path12.extname(absoluteInput));
6638
7799
  }
6639
- return finalizePreparedInput({
6640
- title,
6641
- originType: "file",
6642
- sourceKind,
6643
- sourceClass,
6644
- language,
6645
- originalPath: toPosix(absoluteInput),
6646
- repoRelativePath: repoRelativePathFor(absoluteInput, repoRoot),
6647
- mimeType,
6648
- storedExtension,
6649
- payloadBytes,
6650
- extractedText,
6651
- extractionArtifact,
6652
- extractionHash: buildExtractionHash(extractedText, extractionArtifact)
6653
- });
7800
+ return [
7801
+ finalizePreparedInput({
7802
+ title,
7803
+ originType: "file",
7804
+ sourceKind,
7805
+ sourceClass,
7806
+ language,
7807
+ originalPath: toPosix(absoluteInput),
7808
+ repoRelativePath: repoRelativePathFor(absoluteInput, repoRoot),
7809
+ mimeType,
7810
+ storedExtension,
7811
+ payloadBytes,
7812
+ extractedText,
7813
+ extractionArtifact,
7814
+ extractionHash: buildExtractionHash(extractedText, extractionArtifact),
7815
+ details: extractionArtifact?.metadata
7816
+ })
7817
+ ];
6654
7818
  }
6655
- async function prepareUrlInput(rootDir, input, options) {
7819
+ async function prepareFileInput(rootDir, absoluteInput, repoRoot, sourceClass) {
7820
+ const prepared = await prepareFileInputs(rootDir, absoluteInput, repoRoot, sourceClass);
7821
+ if (!prepared.length) {
7822
+ throw new Error(`No ingestable sources were extracted from ${absoluteInput}.`);
7823
+ }
7824
+ return prepared[0];
7825
+ }
7826
+ async function prepareUrlInputs(rootDir, input, options) {
6656
7827
  await validateUrlSafety(input);
6657
7828
  const response = await fetch(input);
6658
7829
  if (!response.ok) {
@@ -6661,6 +7832,25 @@ async function prepareUrlInput(rootDir, input, options) {
6661
7832
  const finalUrl = normalizeOriginUrl(response.url || input);
6662
7833
  const inputUrl = new URL(finalUrl);
6663
7834
  const originalPayloadBytes = Buffer.from(await response.arrayBuffer());
7835
+ if (path12.extname(inputUrl.pathname).toLowerCase() === ".zip" && isSlackExportArchive(originalPayloadBytes)) {
7836
+ const slackExport = await extractSlackExportArchive({
7837
+ mimeType: "application/zip",
7838
+ bytes: originalPayloadBytes,
7839
+ fileName: inputUrl.pathname
7840
+ });
7841
+ if (slackExport.conversations.length) {
7842
+ return groupedPreparedInputsFor({
7843
+ title: slackExport.title?.trim() || inputUrl.hostname,
7844
+ originType: "url",
7845
+ sourceKind: "chat_export",
7846
+ url: finalUrl,
7847
+ mimeType: "application/zip",
7848
+ storedExtension: ".md",
7849
+ warnings: slackExport.warnings,
7850
+ parts: slackExport.conversations
7851
+ });
7852
+ }
7853
+ }
6664
7854
  let payloadBytes = originalPayloadBytes;
6665
7855
  let mimeType = resolveUrlMimeType(input, response);
6666
7856
  let sourceKind = inferKind(mimeType, inputUrl.pathname);
@@ -6747,6 +7937,104 @@ async function prepareUrlInput(rootDir, input, options) {
6747
7937
  title = extracted.artifact.metadata?.title?.trim() || title;
6748
7938
  extractedText = extracted.extractedText;
6749
7939
  extractionArtifact = extracted.artifact;
7940
+ } else if (sourceKind === "transcript") {
7941
+ const extracted = await extractTranscriptText({ mimeType, bytes: payloadBytes, fileName: inputUrl.pathname });
7942
+ title = extracted.title?.trim() || title;
7943
+ extractedText = extracted.extractedText;
7944
+ extractionArtifact = extracted.artifact;
7945
+ } else if (sourceKind === "email" && path12.extname(inputUrl.pathname).toLowerCase() === ".eml") {
7946
+ const extracted = await extractEmailText({ mimeType, bytes: payloadBytes, fileName: inputUrl.pathname });
7947
+ title = extracted.title?.trim() || title;
7948
+ extractedText = extracted.extractedText;
7949
+ extractionArtifact = extracted.artifact;
7950
+ } else if (sourceKind === "email" && path12.extname(inputUrl.pathname).toLowerCase() === ".mbox") {
7951
+ const extracted = await extractMboxMessages({ mimeType, bytes: payloadBytes, fileName: inputUrl.pathname });
7952
+ title = extracted.title?.trim() || title;
7953
+ if (extracted.messages.length) {
7954
+ return groupedPreparedInputsFor({
7955
+ title,
7956
+ originType: "url",
7957
+ sourceKind: "email",
7958
+ url: finalUrl,
7959
+ mimeType,
7960
+ storedExtension: ".md",
7961
+ warnings: extracted.warnings,
7962
+ parts: extracted.messages
7963
+ });
7964
+ }
7965
+ extractionArtifact = {
7966
+ extractor: "email_text",
7967
+ sourceKind: "email",
7968
+ mimeType,
7969
+ producedAt: (/* @__PURE__ */ new Date()).toISOString(),
7970
+ warnings: extracted.warnings ?? ["Mailbox extraction completed but produced no readable messages."]
7971
+ };
7972
+ } else if (sourceKind === "calendar") {
7973
+ const extracted = await extractCalendarEvents({ mimeType, bytes: payloadBytes, fileName: inputUrl.pathname });
7974
+ title = extracted.title?.trim() || title;
7975
+ if (extracted.events.length) {
7976
+ return groupedPreparedInputsFor({
7977
+ title,
7978
+ originType: "url",
7979
+ sourceKind: "calendar",
7980
+ url: finalUrl,
7981
+ mimeType,
7982
+ storedExtension: ".md",
7983
+ warnings: extracted.warnings,
7984
+ parts: extracted.events
7985
+ });
7986
+ }
7987
+ extractionArtifact = {
7988
+ extractor: "calendar_text",
7989
+ sourceKind: "calendar",
7990
+ mimeType,
7991
+ producedAt: (/* @__PURE__ */ new Date()).toISOString(),
7992
+ warnings: extracted.warnings ?? ["Calendar extraction completed but found no events."]
7993
+ };
7994
+ } else if (sourceKind === "csv") {
7995
+ const extracted = await extractCsvText({ mimeType, bytes: payloadBytes, fileName: inputUrl.pathname });
7996
+ title = extracted.title?.trim() || title;
7997
+ extractedText = extracted.extractedText;
7998
+ extractionArtifact = extracted.artifact;
7999
+ } else if (sourceKind === "xlsx") {
8000
+ const extracted = await extractXlsxText({ mimeType, bytes: payloadBytes, fileName: inputUrl.pathname });
8001
+ title = extracted.title?.trim() || title;
8002
+ extractedText = extracted.extractedText;
8003
+ extractionArtifact = extracted.artifact;
8004
+ } else if (sourceKind === "pptx") {
8005
+ const extracted = await extractPptxText({ mimeType, bytes: payloadBytes, fileName: inputUrl.pathname });
8006
+ title = extracted.title?.trim() || title;
8007
+ extractedText = extracted.extractedText;
8008
+ extractionArtifact = extracted.artifact;
8009
+ } else if (sourceKind === "epub") {
8010
+ const extracted = await extractEpubChapters({ mimeType, bytes: payloadBytes, fileName: inputUrl.pathname });
8011
+ title = extracted.title?.trim() || title;
8012
+ if (extracted.chapters.length) {
8013
+ return groupedPreparedInputsFor({
8014
+ title,
8015
+ originType: "url",
8016
+ sourceKind: "epub",
8017
+ url: finalUrl,
8018
+ mimeType,
8019
+ storedExtension: ".md",
8020
+ warnings: extracted.warnings,
8021
+ parts: extracted.chapters.map((chapter) => ({
8022
+ ...chapter,
8023
+ metadata: {
8024
+ ...chapter.metadata,
8025
+ ...extracted.author ? { author: extracted.author } : {}
8026
+ }
8027
+ })),
8028
+ logDetails
8029
+ });
8030
+ }
8031
+ extractionArtifact = {
8032
+ extractor: "epub_text",
8033
+ sourceKind: "epub",
8034
+ mimeType,
8035
+ producedAt: (/* @__PURE__ */ new Date()).toISOString(),
8036
+ warnings: extracted.warnings ?? ["EPUB extraction completed but produced no chapter content."]
8037
+ };
6750
8038
  } else if (sourceKind === "image") {
6751
8039
  const extracted = await extractImageWithVision(rootDir, {
6752
8040
  title,
@@ -6758,22 +8046,32 @@ async function prepareUrlInput(rootDir, input, options) {
6758
8046
  extractionArtifact = extracted.artifact;
6759
8047
  }
6760
8048
  }
6761
- return finalizePreparedInput({
6762
- title,
6763
- originType: "url",
6764
- sourceKind,
6765
- language,
6766
- url: finalUrl,
6767
- mimeType,
6768
- storedExtension,
6769
- payloadBytes,
6770
- extractedText,
6771
- extractionArtifact,
6772
- extractionHash: buildExtractionHash(extractedText, extractionArtifact),
6773
- attachments,
6774
- contentHash,
6775
- logDetails
6776
- });
8049
+ return [
8050
+ finalizePreparedInput({
8051
+ title,
8052
+ originType: "url",
8053
+ sourceKind,
8054
+ language,
8055
+ url: finalUrl,
8056
+ mimeType,
8057
+ storedExtension,
8058
+ payloadBytes,
8059
+ extractedText,
8060
+ extractionArtifact,
8061
+ extractionHash: buildExtractionHash(extractedText, extractionArtifact),
8062
+ attachments,
8063
+ contentHash,
8064
+ details: extractionArtifact?.metadata,
8065
+ logDetails
8066
+ })
8067
+ ];
8068
+ }
8069
+ async function prepareUrlInput(rootDir, input, options) {
8070
+ const prepared = await prepareUrlInputs(rootDir, input, options);
8071
+ if (!prepared.length) {
8072
+ throw new Error(`No ingestable sources were extracted from ${input}.`);
8073
+ }
8074
+ return prepared[0];
6777
8075
  }
6778
8076
  async function collectInboxAttachmentRefs(inputDir, files) {
6779
8077
  const refsBySource = /* @__PURE__ */ new Map();
@@ -6905,18 +8203,38 @@ async function prepareInboxHtmlInput(absolutePath, attachmentRefs) {
6905
8203
  };
6906
8204
  }
6907
8205
  function isSupportedInboxKind(sourceKind) {
6908
- return ["markdown", "text", "html", "pdf", "docx", "image"].includes(sourceKind);
8206
+ return [
8207
+ "markdown",
8208
+ "text",
8209
+ "html",
8210
+ "pdf",
8211
+ "docx",
8212
+ "epub",
8213
+ "csv",
8214
+ "xlsx",
8215
+ "pptx",
8216
+ "transcript",
8217
+ "chat_export",
8218
+ "email",
8219
+ "calendar",
8220
+ "image"
8221
+ ].includes(sourceKind);
6909
8222
  }
6910
8223
  async function ingestInputDetailed(rootDir, input, options) {
6911
8224
  const { paths } = await initWorkspace(rootDir);
6912
8225
  const normalizedOptions = normalizeIngestOptions(options);
6913
8226
  const absoluteInput = path12.resolve(rootDir, input);
6914
- const repoRoot = isHttpUrl(input) || normalizedOptions.repoRoot ? normalizedOptions.repoRoot : await findNearestGitRoot2(absoluteInput).then((value) => value ?? path12.dirname(absoluteInput));
6915
- const prepared = isHttpUrl(input) ? await prepareUrlInput(rootDir, input, normalizedOptions) : await prepareFileInput(rootDir, absoluteInput, repoRoot);
6916
- return await persistPreparedInput(rootDir, prepared, paths);
8227
+ const repoRoot = isHttpUrl(input) || normalizedOptions.repoRoot ? normalizedOptions.repoRoot : await detectScopedRepoRoot(rootDir, absoluteInput, path12.dirname(absoluteInput));
8228
+ const prepared = isHttpUrl(input) ? await prepareUrlInputs(rootDir, input, normalizedOptions) : await prepareFileInputs(rootDir, absoluteInput, repoRoot);
8229
+ return await persistPreparedInputs(rootDir, input, prepared, paths);
6917
8230
  }
6918
8231
  async function ingestInput(rootDir, input, options) {
6919
- return (await ingestInputDetailed(rootDir, input, options)).manifest;
8232
+ const result = await ingestInputDetailed(rootDir, input, options);
8233
+ const manifest = [...result.created, ...result.updated, ...result.unchanged][0];
8234
+ if (!manifest) {
8235
+ throw new Error(`No source manifests were created or updated for ${input}.`);
8236
+ }
8237
+ return manifest;
6920
8238
  }
6921
8239
  async function addInput(rootDir, input, options = {}) {
6922
8240
  const { paths } = await initWorkspace(rootDir);
@@ -7004,23 +8322,59 @@ async function ingestDirectory(rootDir, inputDir, options) {
7004
8322
  const { paths } = await initWorkspace(rootDir);
7005
8323
  const normalizedOptions = await resolveRepoIngestOptions(rootDir, options);
7006
8324
  const absoluteInputDir = path12.resolve(rootDir, inputDir);
7007
- const repoRoot = normalizedOptions.repoRoot ?? await findNearestGitRoot2(absoluteInputDir) ?? absoluteInputDir;
8325
+ const repoRoot = normalizedOptions.repoRoot ?? await detectScopedRepoRoot(rootDir, absoluteInputDir, absoluteInputDir);
7008
8326
  if (!await fileExists(absoluteInputDir)) {
7009
8327
  throw new Error(`Directory not found: ${absoluteInputDir}`);
7010
8328
  }
8329
+ if (await isSlackExportDirectory(absoluteInputDir)) {
8330
+ const extracted = await extractSlackExportDirectory(absoluteInputDir);
8331
+ const preparedInputs = groupedPreparedInputsFor({
8332
+ title: extracted.title?.trim() || path12.basename(absoluteInputDir),
8333
+ originType: "file",
8334
+ sourceKind: "chat_export",
8335
+ originalPath: toPosix(absoluteInputDir),
8336
+ mimeType: "application/json",
8337
+ storedExtension: ".md",
8338
+ warnings: extracted.warnings,
8339
+ parts: extracted.conversations
8340
+ });
8341
+ const result = await persistPreparedInputs(rootDir, absoluteInputDir, preparedInputs, paths);
8342
+ await appendLogEntry(rootDir, "ingest_directory", toPosix(path12.relative(rootDir, absoluteInputDir)) || ".", [
8343
+ `repo_root=${toPosix(path12.relative(rootDir, repoRoot)) || "."}`,
8344
+ `scanned=${preparedInputs.length}`,
8345
+ `imported=${result.created.length}`,
8346
+ `updated=${result.updated.length}`,
8347
+ `skipped=${result.skipped.length}`
8348
+ ]);
8349
+ return {
8350
+ inputDir: absoluteInputDir,
8351
+ repoRoot,
8352
+ scannedCount: preparedInputs.length,
8353
+ imported: result.created,
8354
+ updated: result.updated,
8355
+ skipped: result.skipped
8356
+ };
8357
+ }
7011
8358
  const { files, skipped } = await collectDirectoryFiles(rootDir, absoluteInputDir, repoRoot, normalizedOptions);
7012
8359
  const imported = [];
7013
8360
  const updated = [];
7014
8361
  const progress = createProgressReporter("ingest", files.length);
7015
8362
  for (const absolutePath of files) {
7016
8363
  const relativePath = repoRelativePathFor(absolutePath, repoRoot) ?? toPosix(path12.relative(repoRoot, absolutePath));
7017
- const prepared = await prepareFileInput(rootDir, absolutePath, repoRoot, sourceClassForRelativePath(relativePath, normalizedOptions));
7018
- const result = await persistPreparedInput(rootDir, prepared, paths);
7019
- if (result.isNew) {
7020
- imported.push(result.manifest);
7021
- } else if (result.wasUpdated) {
7022
- updated.push(result.manifest);
7023
- } else {
8364
+ const preparedInputs = await prepareFileInputs(
8365
+ rootDir,
8366
+ absolutePath,
8367
+ repoRoot,
8368
+ sourceClassForRelativePath(relativePath, normalizedOptions)
8369
+ );
8370
+ const result = await persistPreparedInputs(rootDir, absolutePath, preparedInputs, paths);
8371
+ if (result.created.length) {
8372
+ imported.push(...result.created);
8373
+ }
8374
+ if (result.updated.length) {
8375
+ updated.push(...result.updated);
8376
+ }
8377
+ if (!result.created.length && !result.updated.length && !result.removed.length) {
7024
8378
  skipped.push({ path: toPosix(path12.relative(rootDir, absolutePath)), reason: "duplicate_content" });
7025
8379
  }
7026
8380
  progress.tick();
@@ -7065,19 +8419,25 @@ async function importInbox(rootDir, inputDir) {
7065
8419
  continue;
7066
8420
  }
7067
8421
  const mimeType = guessMimeType(absolutePath);
7068
- const sourceKind = inferKind(mimeType, absolutePath);
8422
+ let sourceKind = inferKind(mimeType, absolutePath);
8423
+ if (sourceKind === "binary" && path12.extname(absolutePath).toLowerCase() === ".zip") {
8424
+ const bytes = await fs11.readFile(absolutePath);
8425
+ if (isSlackExportArchive(bytes)) {
8426
+ sourceKind = "chat_export";
8427
+ }
8428
+ }
7069
8429
  if (!isSupportedInboxKind(sourceKind)) {
7070
8430
  skipped.push({ path: toPosix(path12.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
7071
8431
  continue;
7072
8432
  }
7073
8433
  const prepared = sourceKind === "markdown" && refsBySource.has(absolutePath) ? await prepareInboxMarkdownInput(absolutePath, refsBySource.get(absolutePath) ?? []) : sourceKind === "html" && refsBySource.has(absolutePath) ? await prepareInboxHtmlInput(absolutePath, refsBySource.get(absolutePath) ?? []) : await prepareFileInput(rootDir, absolutePath);
7074
- const result = await persistPreparedInput(rootDir, prepared, paths);
7075
- if (!result.isNew) {
8434
+ const result = await persistPreparedInputs(rootDir, absolutePath, [prepared], paths);
8435
+ if (!result.created.length) {
7076
8436
  skipped.push({ path: toPosix(path12.relative(rootDir, absolutePath)), reason: "duplicate_content" });
7077
8437
  continue;
7078
8438
  }
7079
- attachmentCount += result.manifest.attachments?.length ?? 0;
7080
- imported.push(result.manifest);
8439
+ attachmentCount += result.created.reduce((total, manifest) => total + (manifest.attachments?.length ?? 0), 0);
8440
+ imported.push(...result.created);
7081
8441
  }
7082
8442
  await appendLogEntry(rootDir, "inbox_import", toPosix(path12.relative(rootDir, effectiveInputDir)) || ".", [
7083
8443
  `scanned=${files.length}`,
@@ -9292,7 +10652,20 @@ function relatedOutputsSection(relatedOutputs) {
9292
10652
  if (!relatedOutputs.length) {
9293
10653
  return [];
9294
10654
  }
9295
- return ["## Related Outputs", "", ...relatedOutputs.map((page) => `- ${pageLink(page)}`), ""];
10655
+ return ["## Related Outputs", "", ...relatedOutputs.map((page) => `- ${pageLink(page)}`), ""];
10656
+ }
10657
+ function detailValue(manifest, key) {
10658
+ const value = manifest.details?.[key];
10659
+ const normalized = typeof value === "string" ? value.trim() : "";
10660
+ return normalized || void 0;
10661
+ }
10662
+ function detailList(manifest, key) {
10663
+ const value = detailValue(manifest, key);
10664
+ if (!value) {
10665
+ return void 0;
10666
+ }
10667
+ const items = value.split(",").map((item) => item.trim()).filter(Boolean);
10668
+ return items.length ? items : void 0;
9296
10669
  }
9297
10670
  function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutputs = [], modulePage, decorations) {
9298
10671
  const relativePath = pagePathFor("source", manifest.sourceId);
@@ -9317,6 +10690,10 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
9317
10690
  title: analysis.title,
9318
10691
  ...manifest.sourceType ? { source_type: manifest.sourceType } : {},
9319
10692
  ...manifest.sourceClass ? { source_class: manifest.sourceClass } : {},
10693
+ ...detailValue(manifest, "occurred_at") ? { occurred_at: detailValue(manifest, "occurred_at") } : {},
10694
+ ...detailList(manifest, "participants") ? { participants: detailList(manifest, "participants") } : {},
10695
+ ...detailValue(manifest, "container_title") ? { container_title: detailValue(manifest, "container_title") } : {},
10696
+ ...detailValue(manifest, "conversation_id") ? { conversation_id: detailValue(manifest, "conversation_id") } : {},
9320
10697
  tags: decoratedTags(analysis.code ? ["source", "code"] : ["source"], decorations),
9321
10698
  source_ids: [manifest.sourceId],
9322
10699
  project_ids: decorations?.projectIds ?? [],
@@ -9336,9 +10713,19 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
9336
10713
  `# ${analysis.title}`,
9337
10714
  "",
9338
10715
  `Source ID: \`${manifest.sourceId}\``,
10716
+ `Source Kind: \`${manifest.sourceKind}\``,
9339
10717
  manifest.url ? `Source URL: ${manifest.url}` : `Source Path: \`${manifest.originalPath ?? manifest.storedPath}\``,
9340
10718
  ...manifest.sourceType ? [`Source Type: \`${manifest.sourceType}\``, ""] : [""],
9341
10719
  ...manifest.sourceClass ? [`Source Class: \`${manifest.sourceClass}\``, ""] : [],
10720
+ ...manifest.sourceGroupTitle ? [`Source Group: ${manifest.sourceGroupTitle}`] : [],
10721
+ ...manifest.partTitle ? [`Part: ${manifest.partIndex ?? "?"}/${manifest.partCount ?? "?"} - ${manifest.partTitle}`] : [],
10722
+ ...manifest.details && Object.keys(manifest.details).length ? [
10723
+ "",
10724
+ "## Source Details",
10725
+ "",
10726
+ ...Object.entries(manifest.details).map(([key, value]) => `- ${key.replace(/_/g, " ")}: ${value}`),
10727
+ ""
10728
+ ] : [],
9342
10729
  "",
9343
10730
  "## Summary",
9344
10731
  "",
@@ -9639,6 +11026,9 @@ function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
9639
11026
  const outputs = pages.filter((page) => page.kind === "output");
9640
11027
  const insights = pages.filter((page) => page.kind === "insight");
9641
11028
  const graphPages = pages.filter((page) => page.kind === "graph_report" || page.kind === "community_summary");
11029
+ const dashboards = pages.filter(
11030
+ (page) => page.kind === "index" && page.path.startsWith("dashboards/") && page.path !== "dashboards/index.md"
11031
+ );
9642
11032
  return [
9643
11033
  "---",
9644
11034
  "page_id: index",
@@ -9684,6 +11074,10 @@ function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
9684
11074
  "",
9685
11075
  ...outputs.length ? outputs.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`) : ["- No saved outputs yet."],
9686
11076
  "",
11077
+ "## Dashboards",
11078
+ "",
11079
+ ...dashboards.length ? dashboards.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`) : ["- No dashboards yet."],
11080
+ "",
9687
11081
  "## Graph",
9688
11082
  "",
9689
11083
  ...graphPages.length ? graphPages.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`) : ["- No graph reports yet."],
@@ -11160,15 +12554,37 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
11160
12554
  const insertPage = db.prepare(
11161
12555
  "INSERT INTO pages (id, path, title, body, kind, status, source_type, source_class, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
11162
12556
  );
12557
+ const rootDir = path21.dirname(wikiDir);
11163
12558
  for (const page of pages) {
11164
12559
  const absolutePath = path21.join(wikiDir, page.path);
11165
12560
  const content = await fs17.readFile(absolutePath, "utf8");
11166
12561
  const parsed = matter8(content);
12562
+ let body = parsed.content;
12563
+ const primarySourceId = Array.isArray(parsed.data.source_ids) && typeof parsed.data.source_ids[0] === "string" ? parsed.data.source_ids[0] : page.sourceIds[0];
12564
+ if ((page.kind === "source" || page.kind === "module") && primarySourceId) {
12565
+ try {
12566
+ const manifest = JSON.parse(
12567
+ await fs17.readFile(path21.join(rootDir, "state", "manifests", `${primarySourceId}.json`), "utf8")
12568
+ );
12569
+ const excerptPath = manifest.extractedTextPath ?? manifest.storedPath;
12570
+ if (excerptPath) {
12571
+ const excerpt = await fs17.readFile(path21.join(rootDir, excerptPath), "utf8");
12572
+ if (excerpt.trim()) {
12573
+ body = `${body}
12574
+
12575
+ ## Source Excerpt
12576
+
12577
+ ${excerpt.trim()}`.trim();
12578
+ }
12579
+ }
12580
+ } catch {
12581
+ }
12582
+ }
11167
12583
  insertPage.run(
11168
12584
  page.id,
11169
12585
  page.path,
11170
12586
  page.title,
11171
- parsed.content,
12587
+ body,
11172
12588
  page.kind,
11173
12589
  page.status,
11174
12590
  typeof parsed.data.source_type === "string" ? parsed.data.source_type : "",
@@ -11229,7 +12645,25 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
11229
12645
  FROM page_search
11230
12646
  JOIN pages ON pages.rowid = page_search.rowid
11231
12647
  WHERE ${clauses.join(" AND ")}
11232
- ORDER BY rank
12648
+ ORDER BY
12649
+ CASE pages.status
12650
+ WHEN 'active' THEN 0
12651
+ WHEN 'draft' THEN 1
12652
+ WHEN 'candidate' THEN 2
12653
+ ELSE 3
12654
+ END,
12655
+ CASE pages.kind
12656
+ WHEN 'source' THEN 0
12657
+ WHEN 'module' THEN 1
12658
+ WHEN 'output' THEN 2
12659
+ WHEN 'insight' THEN 3
12660
+ WHEN 'graph_report' THEN 4
12661
+ WHEN 'community_summary' THEN 5
12662
+ WHEN 'concept' THEN 6
12663
+ WHEN 'entity' THEN 7
12664
+ ELSE 8
12665
+ END,
12666
+ rank
11233
12667
  LIMIT ?
11234
12668
  `);
11235
12669
  params.push(options.limit ?? 5);
@@ -11926,6 +13360,267 @@ async function buildManagedContent(absolutePath, defaults, build) {
11926
13360
  }
11927
13361
  return content;
11928
13362
  }
13363
+ function manifestDetailValue(manifest, key) {
13364
+ const value = manifest.details?.[key];
13365
+ return typeof value === "string" && value.trim() ? value.trim() : void 0;
13366
+ }
13367
+ async function loadAnalysesBySourceIds(paths, sourceIds) {
13368
+ const analyses = await Promise.all(
13369
+ sourceIds.map(async (sourceId) => await readJsonFile(path22.join(paths.analysesDir, `${sourceId}.json`)))
13370
+ );
13371
+ return analyses.filter((analysis) => Boolean(analysis?.sourceId));
13372
+ }
13373
+ async function buildDashboardRecords(paths, graph, schemaHash, report) {
13374
+ const sourcePages = graph.pages.filter((page) => page.kind === "source");
13375
+ const reviewPages = graph.pages.filter((page) => page.kind === "output" && page.path.startsWith("outputs/source-reviews/"));
13376
+ const briefPages = graph.pages.filter((page) => page.kind === "output" && page.path.startsWith("outputs/source-briefs/"));
13377
+ const manifests = graph.sources;
13378
+ const manifestBySourceId = new Map(manifests.map((manifest) => [manifest.sourceId, manifest]));
13379
+ const timelineManifests = manifests.filter((manifest) => manifestDetailValue(manifest, "occurred_at")).sort((left, right) => (manifestDetailValue(right, "occurred_at") ?? "").localeCompare(manifestDetailValue(left, "occurred_at") ?? "")).slice(0, 25);
13380
+ const recentSourcePages = [...sourcePages].sort((left, right) => right.updatedAt.localeCompare(left.updatedAt)).slice(0, 20);
13381
+ const analyses = await loadAnalysesBySourceIds(paths, uniqueStrings3(sourcePages.flatMap((page) => page.sourceIds)));
13382
+ const openQuestions = uniqueStrings3(
13383
+ analyses.flatMap((analysis) => analysis.questions.map((question) => `${analysis.title}: ${question}`))
13384
+ ).slice(0, 20);
13385
+ const dashboards = [
13386
+ {
13387
+ relativePath: "dashboards/index.md",
13388
+ title: "Dashboards",
13389
+ content: (metadata) => matter9.stringify(
13390
+ [
13391
+ "# Dashboards",
13392
+ "",
13393
+ "- [[dashboards/recent-sources|Recent Sources]]",
13394
+ "- [[dashboards/timeline|Timeline]]",
13395
+ "- [[dashboards/contradictions|Contradictions]]",
13396
+ "- [[dashboards/open-questions|Open Questions]]",
13397
+ "",
13398
+ "```dataview",
13399
+ "TABLE file.mtime AS updated",
13400
+ 'FROM "dashboards"',
13401
+ 'WHERE file.name != "index"',
13402
+ "SORT file.mtime desc",
13403
+ "```",
13404
+ ""
13405
+ ].join("\n"),
13406
+ {
13407
+ page_id: "dashboards:index",
13408
+ kind: "index",
13409
+ title: "Dashboards",
13410
+ tags: ["index", "dashboards"],
13411
+ source_ids: [],
13412
+ project_ids: [],
13413
+ node_ids: [],
13414
+ freshness: "fresh",
13415
+ status: metadata.status,
13416
+ confidence: 1,
13417
+ created_at: metadata.createdAt,
13418
+ updated_at: metadata.updatedAt,
13419
+ compiled_from: metadata.compiledFrom,
13420
+ managed_by: metadata.managedBy,
13421
+ backlinks: [],
13422
+ schema_hash: schemaHash,
13423
+ source_hashes: {},
13424
+ source_semantic_hashes: {}
13425
+ }
13426
+ )
13427
+ },
13428
+ {
13429
+ relativePath: "dashboards/recent-sources.md",
13430
+ title: "Recent Sources",
13431
+ content: (metadata) => matter9.stringify(
13432
+ [
13433
+ "# Recent Sources",
13434
+ "",
13435
+ ...recentSourcePages.length ? recentSourcePages.map((page) => `- ${page.updatedAt}: [[${page.path.replace(/\.md$/, "")}|${page.title}]]`) : ["- No source pages yet."],
13436
+ "",
13437
+ "```dataview",
13438
+ "TABLE source_type, occurred_at, participants",
13439
+ 'FROM "sources"',
13440
+ "SORT updated_at desc",
13441
+ "LIMIT 25",
13442
+ "```",
13443
+ ""
13444
+ ].join("\n"),
13445
+ {
13446
+ page_id: "dashboards:recent-sources",
13447
+ kind: "index",
13448
+ title: "Recent Sources",
13449
+ tags: ["index", "dashboard", "recent-sources"],
13450
+ source_ids: recentSourcePages.flatMap((page) => page.sourceIds),
13451
+ project_ids: [],
13452
+ node_ids: [],
13453
+ freshness: "fresh",
13454
+ status: metadata.status,
13455
+ confidence: 1,
13456
+ created_at: metadata.createdAt,
13457
+ updated_at: metadata.updatedAt,
13458
+ compiled_from: recentSourcePages.flatMap((page) => page.sourceIds),
13459
+ managed_by: metadata.managedBy,
13460
+ backlinks: [],
13461
+ schema_hash: schemaHash,
13462
+ source_hashes: {},
13463
+ source_semantic_hashes: {}
13464
+ }
13465
+ )
13466
+ },
13467
+ {
13468
+ relativePath: "dashboards/timeline.md",
13469
+ title: "Timeline",
13470
+ content: (metadata) => matter9.stringify(
13471
+ [
13472
+ "# Timeline",
13473
+ "",
13474
+ ...timelineManifests.length ? timelineManifests.map((manifest) => {
13475
+ const occurredAt = manifestDetailValue(manifest, "occurred_at") ?? manifest.updatedAt;
13476
+ const sourcePage = sourcePages.find((page) => page.sourceIds.includes(manifest.sourceId));
13477
+ return `- ${occurredAt}: ${sourcePage ? `[[${sourcePage.path.replace(/\.md$/, "")}|${sourcePage.title}]]` : manifest.title}`;
13478
+ }) : ["- No timeline-aware sources yet."],
13479
+ "",
13480
+ "```dataview",
13481
+ "TABLE occurred_at, participants, container_title",
13482
+ 'FROM "sources"',
13483
+ "WHERE occurred_at",
13484
+ "SORT occurred_at desc",
13485
+ "```",
13486
+ ""
13487
+ ].join("\n"),
13488
+ {
13489
+ page_id: "dashboards:timeline",
13490
+ kind: "index",
13491
+ title: "Timeline",
13492
+ tags: ["index", "dashboard", "timeline"],
13493
+ source_ids: timelineManifests.map((manifest) => manifest.sourceId),
13494
+ project_ids: [],
13495
+ node_ids: [],
13496
+ freshness: "fresh",
13497
+ status: metadata.status,
13498
+ confidence: 1,
13499
+ created_at: metadata.createdAt,
13500
+ updated_at: metadata.updatedAt,
13501
+ compiled_from: timelineManifests.map((manifest) => manifest.sourceId),
13502
+ managed_by: metadata.managedBy,
13503
+ backlinks: [],
13504
+ schema_hash: schemaHash,
13505
+ source_hashes: {},
13506
+ source_semantic_hashes: {}
13507
+ }
13508
+ )
13509
+ },
13510
+ {
13511
+ relativePath: "dashboards/contradictions.md",
13512
+ title: "Contradictions",
13513
+ content: (metadata) => matter9.stringify(
13514
+ [
13515
+ "# Contradictions",
13516
+ "",
13517
+ ...report?.contradictions.length ? report.contradictions.map((contradiction) => {
13518
+ const left = manifestBySourceId.get(contradiction.sourceIdA)?.title ?? contradiction.sourceIdA;
13519
+ const right = manifestBySourceId.get(contradiction.sourceIdB)?.title ?? contradiction.sourceIdB;
13520
+ return `- ${left} / ${right}: ${contradiction.claimA} <> ${contradiction.claimB}`;
13521
+ }) : ["- No contradictions are currently flagged."],
13522
+ "",
13523
+ ...reviewPages.length || briefPages.length ? [
13524
+ "## Related Reviews",
13525
+ "",
13526
+ ...[...reviewPages, ...briefPages].slice(0, 12).map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`),
13527
+ ""
13528
+ ] : [],
13529
+ "```dataview",
13530
+ 'LIST FROM "outputs/source-reviews"',
13531
+ "SORT file.mtime desc",
13532
+ "```",
13533
+ ""
13534
+ ].join("\n"),
13535
+ {
13536
+ page_id: "dashboards:contradictions",
13537
+ kind: "index",
13538
+ title: "Contradictions",
13539
+ tags: ["index", "dashboard", "contradictions"],
13540
+ source_ids: report?.contradictions.flatMap((item) => [item.sourceIdA, item.sourceIdB]) ?? [],
13541
+ project_ids: [],
13542
+ node_ids: [],
13543
+ freshness: "fresh",
13544
+ status: metadata.status,
13545
+ confidence: 1,
13546
+ created_at: metadata.createdAt,
13547
+ updated_at: metadata.updatedAt,
13548
+ compiled_from: report?.contradictions.flatMap((item) => [item.sourceIdA, item.sourceIdB]) ?? [],
13549
+ managed_by: metadata.managedBy,
13550
+ backlinks: [],
13551
+ schema_hash: schemaHash,
13552
+ source_hashes: {},
13553
+ source_semantic_hashes: {}
13554
+ }
13555
+ )
13556
+ },
13557
+ {
13558
+ relativePath: "dashboards/open-questions.md",
13559
+ title: "Open Questions",
13560
+ content: (metadata) => matter9.stringify(
13561
+ [
13562
+ "# Open Questions",
13563
+ "",
13564
+ ...openQuestions.length ? openQuestions.map((question) => `- ${question}`) : ["- No open questions are currently extracted."],
13565
+ "",
13566
+ "```dataview",
13567
+ 'LIST FROM "outputs/source-briefs" OR "outputs/source-reviews"',
13568
+ "SORT file.mtime desc",
13569
+ "```",
13570
+ ""
13571
+ ].join("\n"),
13572
+ {
13573
+ page_id: "dashboards:open-questions",
13574
+ kind: "index",
13575
+ title: "Open Questions",
13576
+ tags: ["index", "dashboard", "open-questions"],
13577
+ source_ids: analyses.map((analysis) => analysis.sourceId),
13578
+ project_ids: [],
13579
+ node_ids: [],
13580
+ freshness: "fresh",
13581
+ status: metadata.status,
13582
+ confidence: 1,
13583
+ created_at: metadata.createdAt,
13584
+ updated_at: metadata.updatedAt,
13585
+ compiled_from: analyses.map((analysis) => analysis.sourceId),
13586
+ managed_by: metadata.managedBy,
13587
+ backlinks: [],
13588
+ schema_hash: schemaHash,
13589
+ source_hashes: {},
13590
+ source_semantic_hashes: {}
13591
+ }
13592
+ )
13593
+ }
13594
+ ];
13595
+ const records = [];
13596
+ for (const dashboard of dashboards) {
13597
+ const absolutePath = path22.join(paths.wikiDir, dashboard.relativePath);
13598
+ const compiledFrom = dashboard.relativePath === "dashboards/recent-sources.md" ? recentSourcePages.flatMap((page) => page.sourceIds) : [];
13599
+ const content = await buildManagedContent(
13600
+ absolutePath,
13601
+ {
13602
+ managedBy: "system",
13603
+ compiledFrom
13604
+ },
13605
+ dashboard.content
13606
+ );
13607
+ records.push({
13608
+ page: emptyGraphPage({
13609
+ id: `dashboard:${dashboard.relativePath.replace(/\.md$/, "")}`,
13610
+ path: dashboard.relativePath,
13611
+ title: dashboard.title,
13612
+ kind: "index",
13613
+ sourceIds: compiledFrom,
13614
+ nodeIds: [],
13615
+ schemaHash,
13616
+ sourceHashes: {},
13617
+ confidence: 1
13618
+ }),
13619
+ content
13620
+ });
13621
+ }
13622
+ return records;
13623
+ }
11929
13624
  function indexCompiledFrom(pages) {
11930
13625
  return uniqueStrings3(pages.flatMap((page) => page.sourceIds));
11931
13626
  }
@@ -12951,8 +14646,19 @@ async function syncVaultArtifacts(rootDir, input) {
12951
14646
  input.previousState?.generatedAt,
12952
14647
  contradictions
12953
14648
  );
12954
- records.push(...graphOrientation.records);
12955
- const allPages = [...basePages, ...graphOrientation.records.map((record) => record.page)];
14649
+ const preliminaryPages = [...basePages, ...graphOrientation.records.map((record) => record.page)];
14650
+ const dashboardRecords = await buildDashboardRecords(
14651
+ paths,
14652
+ {
14653
+ ...baseGraph,
14654
+ sources: input.manifests,
14655
+ pages: preliminaryPages
14656
+ },
14657
+ globalSchemaHash,
14658
+ graphOrientation.report
14659
+ );
14660
+ records.push(...graphOrientation.records, ...dashboardRecords);
14661
+ const allPages = uniqueBy([...preliminaryPages, ...dashboardRecords.map((record) => record.page)], (page) => page.id);
12956
14662
  const graph = {
12957
14663
  ...baseGraph,
12958
14664
  pages: allPages
@@ -13056,6 +14762,11 @@ async function syncVaultArtifacts(rootDir, input) {
13056
14762
  ["concepts/index.md", "concepts", activeConceptPages],
13057
14763
  ["entities/index.md", "entities", activeEntityPages],
13058
14764
  ["outputs/index.md", "outputs", allPages.filter((page) => page.kind === "output")],
14765
+ [
14766
+ "dashboards/index.md",
14767
+ "dashboards",
14768
+ allPages.filter((page) => page.kind === "index" && page.path.startsWith("dashboards/") && page.path !== "dashboards/index.md")
14769
+ ],
13059
14770
  ["candidates/index.md", "candidates", candidatePages],
13060
14771
  ["graph/index.md", "graph", allPages.filter((page) => page.kind === "graph_report" || page.kind === "community_summary")]
13061
14772
  ]) {
@@ -13156,17 +14867,40 @@ async function refreshIndexesAndSearch(rootDir, pages) {
13156
14867
  const compileState = await readJsonFile(paths.compileStatePath);
13157
14868
  const globalSchemaHash = schemas.effective.global.hash;
13158
14869
  const currentGraph = await readJsonFile(paths.graphPath);
13159
- const basePages = pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
14870
+ const orientationPages = uniqueBy(
14871
+ pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary"),
14872
+ (page) => page.id
14873
+ );
14874
+ const basePages = uniqueBy(
14875
+ pages.filter(
14876
+ (page) => page.kind !== "graph_report" && page.kind !== "community_summary" && !(page.kind === "index" && page.path.startsWith("dashboards/"))
14877
+ ),
14878
+ (page) => page.id
14879
+ );
13160
14880
  const graphOrientation = currentGraph ? await buildGraphOrientationPages(
13161
14881
  {
13162
14882
  ...currentGraph,
13163
- pages: basePages
14883
+ pages: orientationPages
13164
14884
  },
13165
14885
  paths,
13166
14886
  globalSchemaHash,
13167
14887
  compileState?.generatedAt
13168
14888
  ) : { records: [], report: null };
13169
- const pagesWithGraph = sortGraphPages([...basePages, ...graphOrientation.records.map((record) => record.page)]);
14889
+ const dashboardRecords = currentGraph ? await buildDashboardRecords(
14890
+ paths,
14891
+ {
14892
+ ...currentGraph,
14893
+ pages: [...basePages, ...graphOrientation.records.map((record) => record.page)]
14894
+ },
14895
+ globalSchemaHash,
14896
+ graphOrientation.report
14897
+ ) : [];
14898
+ const pagesWithGraph = sortGraphPages(
14899
+ uniqueBy(
14900
+ [...basePages, ...graphOrientation.records.map((record) => record.page), ...dashboardRecords.map((record) => record.page)],
14901
+ (page) => page.id
14902
+ )
14903
+ );
13170
14904
  if (currentGraph) {
13171
14905
  await writeJsonFile(paths.graphPath, {
13172
14906
  ...currentGraph,
@@ -13194,6 +14928,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
13194
14928
  ensureDir(path22.join(paths.wikiDir, "concepts")),
13195
14929
  ensureDir(path22.join(paths.wikiDir, "entities")),
13196
14930
  ensureDir(path22.join(paths.wikiDir, "outputs")),
14931
+ ensureDir(path22.join(paths.wikiDir, "dashboards")),
13197
14932
  ensureDir(path22.join(paths.wikiDir, "graph")),
13198
14933
  ensureDir(path22.join(paths.wikiDir, "graph", "communities")),
13199
14934
  ensureDir(path22.join(paths.wikiDir, "projects")),
@@ -13256,6 +14991,11 @@ async function refreshIndexesAndSearch(rootDir, pages) {
13256
14991
  ["concepts/index.md", "concepts", pagesWithGraph.filter((page) => page.kind === "concept" && page.status !== "candidate")],
13257
14992
  ["entities/index.md", "entities", pagesWithGraph.filter((page) => page.kind === "entity" && page.status !== "candidate")],
13258
14993
  ["outputs/index.md", "outputs", pagesWithGraph.filter((page) => page.kind === "output")],
14994
+ [
14995
+ "dashboards/index.md",
14996
+ "dashboards",
14997
+ pagesWithGraph.filter((page) => page.kind === "index" && page.path.startsWith("dashboards/") && page.path !== "dashboards/index.md")
14998
+ ],
13259
14999
  ["candidates/index.md", "candidates", pagesWithGraph.filter((page) => page.status === "candidate")],
13260
15000
  ["graph/index.md", "graph", pagesWithGraph.filter((page) => page.kind === "graph_report" || page.kind === "community_summary")]
13261
15001
  ]) {
@@ -13275,6 +15015,9 @@ async function refreshIndexesAndSearch(rootDir, pages) {
13275
15015
  for (const record of graphOrientation.records) {
13276
15016
  await writeFileIfChanged(path22.join(paths.wikiDir, record.page.path), record.content);
13277
15017
  }
15018
+ for (const record of dashboardRecords) {
15019
+ await writeFileIfChanged(path22.join(paths.wikiDir, record.page.path), record.content);
15020
+ }
13278
15021
  if (graphOrientation.report) {
13279
15022
  await writeJsonFile(path22.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
13280
15023
  }
@@ -13291,6 +15034,11 @@ async function refreshIndexesAndSearch(rootDir, pages) {
13291
15034
  await Promise.all(
13292
15035
  existingGraphPages.filter((relativePath) => !allowedGraphPages.has(relativePath)).map((relativePath) => fs18.rm(path22.join(paths.wikiDir, relativePath), { force: true }))
13293
15036
  );
15037
+ const existingDashboardPages = (await listFilesRecursive(path22.join(paths.wikiDir, "dashboards")).catch(() => [])).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path22.relative(paths.wikiDir, absolutePath)));
15038
+ const allowedDashboardPages = /* @__PURE__ */ new Set(["dashboards/index.md", ...dashboardRecords.map((record) => record.page.path)]);
15039
+ await Promise.all(
15040
+ existingDashboardPages.filter((relativePath) => !allowedDashboardPages.has(relativePath)).map((relativePath) => fs18.rm(path22.join(paths.wikiDir, relativePath), { force: true }))
15041
+ );
13294
15042
  await rebuildSearchIndex(paths.searchDbPath, pagesWithGraph, paths.wikiDir);
13295
15043
  }
13296
15044
  async function prepareOutputPageSave(rootDir, input) {
@@ -13426,6 +15174,9 @@ async function stageOutputApprovalBundle(rootDir, stagedPages) {
13426
15174
  });
13427
15175
  return { approvalId, approvalDir };
13428
15176
  }
15177
+ async function stageGeneratedOutputPages(rootDir, stagedPages) {
15178
+ return await stageOutputApprovalBundle(rootDir, stagedPages);
15179
+ }
13429
15180
  async function executeQuery(rootDir, question, format) {
13430
15181
  const { paths } = await loadVaultConfig(rootDir);
13431
15182
  const schemas = await loadVaultSchemas(rootDir);
@@ -14767,7 +16518,17 @@ async function benchmarkVault(rootDir, options = {}) {
14767
16518
  });
14768
16519
  await writeJsonFile(paths.benchmarkPath, artifact);
14769
16520
  await refreshIndexesAndSearch(rootDir, graph.pages);
14770
- return artifact;
16521
+ const refreshedGraph = await readJsonFile(paths.graphPath) ?? graph;
16522
+ const refreshedHash = graphHash(refreshedGraph);
16523
+ if (artifact.graphHash === refreshedHash) {
16524
+ return artifact;
16525
+ }
16526
+ const refreshedArtifact = {
16527
+ ...artifact,
16528
+ graphHash: refreshedHash
16529
+ };
16530
+ await writeJsonFile(paths.benchmarkPath, refreshedArtifact);
16531
+ return refreshedArtifact;
14771
16532
  }
14772
16533
  async function pathGraphVault(rootDir, from, to) {
14773
16534
  const graph = await ensureCompiledGraph(rootDir);
@@ -14987,7 +16748,7 @@ async function bootstrapDemo(rootDir, input) {
14987
16748
  }
14988
16749
 
14989
16750
  // src/mcp.ts
14990
- var SERVER_VERSION = "0.2.2";
16751
+ var SERVER_VERSION = "0.4.0";
14991
16752
  async function createMcpServer(rootDir) {
14992
16753
  const server = new McpServer({
14993
16754
  name: "swarmvault",
@@ -15165,8 +16926,8 @@ async function createMcpServer(rootDir) {
15165
16926
  }
15166
16927
  },
15167
16928
  async ({ input }) => {
15168
- const manifest = await ingestInput(rootDir, input);
15169
- return asToolText(manifest);
16929
+ const result = await ingestInputDetailed(rootDir, input);
16930
+ return asToolText(result);
15170
16931
  }
15171
16932
  );
15172
16933
  server.registerTool(
@@ -15843,7 +17604,7 @@ function matchesManagedSourceSpec(existing, input) {
15843
17604
  if (existing.kind !== input.kind) {
15844
17605
  return false;
15845
17606
  }
15846
- if (input.kind === "directory") {
17607
+ if (input.kind === "directory" || input.kind === "file") {
15847
17608
  return path25.resolve(existing.path ?? "") === path25.resolve(input.path);
15848
17609
  }
15849
17610
  return (existing.url ?? "") === input.url;
@@ -15855,10 +17616,15 @@ async function resolveManagedSourceInput(rootDir, input) {
15855
17616
  if (!stat) {
15856
17617
  throw new Error(`Source not found: ${input}`);
15857
17618
  }
17619
+ if (stat.isFile()) {
17620
+ return {
17621
+ kind: "file",
17622
+ path: absoluteInput,
17623
+ title: path25.basename(absoluteInput, path25.extname(absoluteInput)) || absoluteInput
17624
+ };
17625
+ }
15858
17626
  if (!stat.isDirectory()) {
15859
- throw new Error(
15860
- "`swarmvault source add` supports directories, public GitHub repo root URLs, and docs hubs. Use `swarmvault ingest` for single files."
15861
- );
17627
+ throw new Error("`swarmvault source add` supports local files, directories, public GitHub repo root URLs, and docs hubs.");
15862
17628
  }
15863
17629
  const detectedRepoRoot = await findNearestGitRoot3(absoluteInput);
15864
17630
  const repoRoot = detectedRepoRoot && !(withinRoot2(rootDir, absoluteInput) && !withinRoot2(rootDir, detectedRepoRoot)) ? detectedRepoRoot : absoluteInput;
@@ -15891,6 +17657,10 @@ async function resolveManagedSourceInput(rootDir, input) {
15891
17657
  function directorySourceIdsFor(manifests, inputPath) {
15892
17658
  return manifests.filter((manifest) => manifest.originalPath && withinRoot2(path25.resolve(inputPath), path25.resolve(manifest.originalPath))).map((manifest) => manifest.sourceId).sort((left, right) => left.localeCompare(right));
15893
17659
  }
17660
+ function fileSourceIdsFor(manifests, inputPath) {
17661
+ const absoluteInput = path25.resolve(inputPath);
17662
+ return manifests.filter((manifest) => manifest.originalPath && path25.resolve(manifest.originalPath) === absoluteInput).map((manifest) => manifest.sourceId).sort((left, right) => left.localeCompare(right));
17663
+ }
15894
17664
  async function syncDirectorySource(rootDir, inputPath, repoRoot) {
15895
17665
  const manifestsBefore = await listManifests(rootDir);
15896
17666
  const previousInScope = manifestsBefore.filter(
@@ -15924,6 +17694,22 @@ async function syncDirectorySource(rootDir, inputPath, repoRoot) {
15924
17694
  changed: result.imported.length + result.updated.length + removed.length > 0
15925
17695
  };
15926
17696
  }
17697
+ async function syncFileSource(rootDir, inputPath) {
17698
+ const result = await ingestInputDetailed(rootDir, inputPath);
17699
+ const manifestsAfter = await listManifests(rootDir);
17700
+ return {
17701
+ title: path25.basename(inputPath, path25.extname(inputPath)) || inputPath,
17702
+ sourceIds: fileSourceIdsFor(manifestsAfter, inputPath),
17703
+ counts: {
17704
+ scannedCount: result.scannedCount,
17705
+ importedCount: result.created.length,
17706
+ updatedCount: result.updated.length,
17707
+ removedCount: result.removed.length,
17708
+ skippedCount: result.skipped.length
17709
+ },
17710
+ changed: result.created.length + result.updated.length + result.removed.length > 0
17711
+ };
17712
+ }
15927
17713
  async function runGitCommand(cwd, args) {
15928
17714
  await new Promise((resolve, reject) => {
15929
17715
  const child = spawn2("git", args, {
@@ -15970,12 +17756,11 @@ async function syncCrawlSource(rootDir, entry, options) {
15970
17756
  let updatedCount = 0;
15971
17757
  for (const pageUrl of crawl.pages) {
15972
17758
  const persisted = await ingestInputDetailed(rootDir, pageUrl);
15973
- currentSourceIds.push(persisted.manifest.sourceId);
15974
- if (persisted.isNew) {
15975
- importedCount += 1;
15976
- } else if (persisted.wasUpdated) {
15977
- updatedCount += 1;
15978
- }
17759
+ currentSourceIds.push(...persisted.created.map((manifest) => manifest.sourceId));
17760
+ currentSourceIds.push(...persisted.updated.map((manifest) => manifest.sourceId));
17761
+ currentSourceIds.push(...persisted.unchanged.map((manifest) => manifest.sourceId));
17762
+ importedCount += persisted.created.length;
17763
+ updatedCount += persisted.updated.length;
15979
17764
  }
15980
17765
  let removedCount = 0;
15981
17766
  for (const sourceId of previousSourceIds) {
@@ -16019,6 +17804,22 @@ async function syncManagedSource(rootDir, entry, options) {
16019
17804
  };
16020
17805
  }
16021
17806
  sync = await syncDirectorySource(rootDir, entry.path, entry.repoRoot);
17807
+ } else if (entry.kind === "file") {
17808
+ if (!entry.path) {
17809
+ throw new Error(`Managed source ${entry.id} is missing its file path.`);
17810
+ }
17811
+ if (!await fileExists(entry.path)) {
17812
+ return {
17813
+ ...entry,
17814
+ status: "missing",
17815
+ updatedAt: now,
17816
+ lastSyncAt: now,
17817
+ lastSyncStatus: "error",
17818
+ lastError: `File not found: ${entry.path}`,
17819
+ changed: false
17820
+ };
17821
+ }
17822
+ sync = await syncFileSource(rootDir, entry.path);
16022
17823
  } else if (entry.kind === "github_repo") {
16023
17824
  sync = await syncGitHubRepoSource(rootDir, entry);
16024
17825
  } else {
@@ -16237,6 +18038,179 @@ async function generateBriefsForSources(rootDir, sources) {
16237
18038
  }
16238
18039
  return briefPaths;
16239
18040
  }
18041
+ function renderDeterministicSourceReview(input) {
18042
+ const canonicalPages = input.sourcePages.filter((page) => page.kind === "source" || page.kind === "concept" || page.kind === "entity").slice(0, 10);
18043
+ const modulePages = input.sourcePages.filter((page) => page.kind === "module").slice(0, 8);
18044
+ const questions = uniqueStrings4(input.analyses.flatMap((analysis) => analysis.questions)).slice(0, 8);
18045
+ const concepts = uniqueStrings4(input.analyses.flatMap((analysis) => analysis.concepts.map((concept) => concept.name))).slice(0, 8);
18046
+ const entities = uniqueStrings4(input.analyses.flatMap((analysis) => analysis.entities.map((entity) => entity.name))).slice(0, 8);
18047
+ const contradictions = input.report?.contradictions.filter(
18048
+ (contradiction) => input.scope.sourceIds.includes(contradiction.sourceIdA) || input.scope.sourceIds.includes(contradiction.sourceIdB)
18049
+ ) ?? [];
18050
+ return [
18051
+ `# Source Review: ${input.scope.title}`,
18052
+ "",
18053
+ "## What This Source Contains",
18054
+ "",
18055
+ ...input.analyses.length ? input.analyses.map((analysis) => `- ${analysis.title}: ${analysis.summary}`) : ["- This source has not been analyzed yet. Compile the vault before trusting downstream pages."],
18056
+ "",
18057
+ "## Likely Canonical Pages To Update",
18058
+ "",
18059
+ ...canonicalPages.length ? canonicalPages.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`) : ["- No canonical source, concept, or entity pages are linked to this source yet."],
18060
+ "",
18061
+ "## Important Topics And Entities",
18062
+ "",
18063
+ ...concepts.length ? [`Concepts: ${concepts.join(", ")}`] : ["Concepts: none detected."],
18064
+ ...entities.length ? [`Entities: ${entities.join(", ")}`] : ["Entities: none detected."],
18065
+ ...modulePages.length ? ["", ...modulePages.map((page) => `- Module: [[${page.path.replace(/\.md$/, "")}|${page.title}]]`)] : [],
18066
+ "",
18067
+ "## Contradictions To Inspect",
18068
+ "",
18069
+ ...contradictions.length ? contradictions.map((contradiction) => `- ${contradiction.claimA} / ${contradiction.claimB}`) : ["- No contradictions are currently flagged for this source scope."],
18070
+ "",
18071
+ "## Open Questions",
18072
+ "",
18073
+ ...questions.length ? questions.map((question) => `- ${question}`) : ["- No extracted open questions yet."],
18074
+ "",
18075
+ "## Suggested Next Steps",
18076
+ "",
18077
+ ...canonicalPages.length ? canonicalPages.slice(0, 5).map((page) => `- Review [[${page.path.replace(/\.md$/, "")}|${page.title}]] for canonical updates.`) : ["- Review the source page and decide which canonical pages should exist."],
18078
+ ""
18079
+ ].join("\n");
18080
+ }
18081
+ async function generateSourceReviewMarkdown(rootDir, scope) {
18082
+ const { paths } = await loadVaultConfig(rootDir);
18083
+ let graph = await readJsonFile(paths.graphPath);
18084
+ if (!graph) {
18085
+ await compileVault(rootDir, {});
18086
+ graph = await readJsonFile(paths.graphPath);
18087
+ }
18088
+ if (!graph) {
18089
+ return null;
18090
+ }
18091
+ const sourcePages = scopedSourcePages(graph, scope.sourceIds);
18092
+ const analyses = await loadSourceAnalyses(rootDir, scope.sourceIds);
18093
+ const report = await readGraphReport(rootDir);
18094
+ const fallback = renderDeterministicSourceReview({
18095
+ scope,
18096
+ sourcePages,
18097
+ graph,
18098
+ analyses,
18099
+ report
18100
+ });
18101
+ const provider = await getProviderForTask(rootDir, "queryProvider");
18102
+ if (provider.type === "heuristic") {
18103
+ return fallback;
18104
+ }
18105
+ try {
18106
+ const schemas = await loadVaultSchemas(rootDir);
18107
+ const pageContext = sourcePages.slice(0, 12).map((page) => `- ${page.title} (${page.kind}) -> ${page.path}`).join("\n");
18108
+ const analysisContext = analyses.slice(0, 8).map(
18109
+ (analysis) => `# ${analysis.title}
18110
+ Summary: ${analysis.summary}
18111
+ Questions: ${analysis.questions.join(" | ") || "none"}
18112
+ Concepts: ${analysis.concepts.map((concept) => concept.name).join(", ") || "none"}
18113
+ Entities: ${analysis.entities.map((entity) => entity.name).join(", ") || "none"}`
18114
+ ).join("\n\n---\n\n");
18115
+ const response = await provider.generateText({
18116
+ system: buildSchemaPrompt(
18117
+ schemas.effective.global,
18118
+ "Write a concise markdown source review with sections: What This Source Contains, Likely Canonical Pages To Update, Important Topics And Entities, Contradictions To Inspect, Open Questions, Suggested Next Steps. Focus on helping a human decide what to keep, update, or question in the wiki."
18119
+ ),
18120
+ prompt: [
18121
+ `Source scope: ${scope.title}`,
18122
+ `Scope id: ${scope.id}`,
18123
+ `Tracked source ids: ${scope.sourceIds.join(", ") || "none"}`,
18124
+ "",
18125
+ "Pages:",
18126
+ pageContext || "- none",
18127
+ "",
18128
+ "Analyses:",
18129
+ analysisContext || "No analysis context available.",
18130
+ "",
18131
+ "Deterministic fallback draft:",
18132
+ fallback
18133
+ ].join("\n")
18134
+ });
18135
+ return response.text?.trim() ? response.text.trim() : fallback;
18136
+ } catch {
18137
+ return fallback;
18138
+ }
18139
+ }
18140
+ async function stageSourceReviewForScope(rootDir, scope) {
18141
+ const { paths } = await loadVaultConfig(rootDir);
18142
+ const markdown = await generateSourceReviewMarkdown(rootDir, scope);
18143
+ if (!markdown) {
18144
+ throw new Error(`Could not generate a source review for ${scope.id}.`);
18145
+ }
18146
+ const graph = await readJsonFile(paths.graphPath);
18147
+ const relatedPages = graph ? scopedSourcePages(graph, scope.sourceIds) : [];
18148
+ const relatedPageIds = relatedPages.slice(0, 16).map((page) => page.id);
18149
+ const relatedNodeIds = graph ? scopedNodeIds(graph, scope.sourceIds).slice(0, 24) : [];
18150
+ const projectIds = uniqueStrings4(relatedPages.flatMap((page) => page.projectIds));
18151
+ const now = (/* @__PURE__ */ new Date()).toISOString();
18152
+ const output = buildOutputPage({
18153
+ title: `Source Review: ${scope.title}`,
18154
+ question: `Review ${scope.title}`,
18155
+ answer: markdown,
18156
+ citations: scope.sourceIds,
18157
+ schemaHash: graph?.generatedAt ?? "",
18158
+ outputFormat: "report",
18159
+ relatedPageIds,
18160
+ relatedNodeIds,
18161
+ relatedSourceIds: scope.sourceIds,
18162
+ projectIds,
18163
+ extraTags: ["source-review"],
18164
+ origin: "query",
18165
+ slug: `source-reviews/${scope.id}`,
18166
+ metadata: {
18167
+ status: "draft",
18168
+ createdAt: now,
18169
+ updatedAt: now,
18170
+ compiledFrom: scope.sourceIds,
18171
+ managedBy: "system",
18172
+ confidence: 0.79
18173
+ }
18174
+ });
18175
+ const approval = await stageGeneratedOutputPages(rootDir, [{ page: output.page, content: output.content }]);
18176
+ return {
18177
+ sourceId: scope.id,
18178
+ pageId: output.page.id,
18179
+ reviewPath: path25.join(approval.approvalDir, "wiki", output.page.path),
18180
+ staged: true,
18181
+ approvalId: approval.approvalId,
18182
+ approvalDir: approval.approvalDir
18183
+ };
18184
+ }
18185
+ function scopeFromManagedSource(source) {
18186
+ return {
18187
+ id: source.id,
18188
+ title: source.title,
18189
+ sourceIds: source.sourceIds
18190
+ };
18191
+ }
18192
+ async function reviewSourceScope(rootDir, scope) {
18193
+ return await stageSourceReviewForScope(rootDir, scope);
18194
+ }
18195
+ async function reviewManagedSource(rootDir, id) {
18196
+ const managedSources = await loadManagedSources(rootDir);
18197
+ const managedSource = managedSources.find((source) => source.id === id);
18198
+ if (managedSource) {
18199
+ if (!await loadVaultConfig(rootDir).then(({ paths }) => fileExists(paths.graphPath))) {
18200
+ await compileVault(rootDir, {});
18201
+ }
18202
+ return await stageSourceReviewForScope(rootDir, scopeFromManagedSource(managedSource));
18203
+ }
18204
+ const manifest = (await listManifests(rootDir)).find((candidate) => candidate.sourceId === id);
18205
+ if (!manifest) {
18206
+ throw new Error(`Managed source or source id not found: ${id}`);
18207
+ }
18208
+ return await stageSourceReviewForScope(rootDir, {
18209
+ id: manifest.sourceId,
18210
+ title: manifest.title,
18211
+ sourceIds: [manifest.sourceId]
18212
+ });
18213
+ }
16240
18214
  function shouldCompile(changedSources, graphExists, compileRequested) {
16241
18215
  return compileRequested && (!graphExists || changedSources.length > 0);
16242
18216
  }
@@ -16247,17 +18221,18 @@ async function listManagedSourceRecords(rootDir) {
16247
18221
  async function addManagedSource(rootDir, input, options = {}) {
16248
18222
  const compileRequested = options.compile ?? true;
16249
18223
  const briefRequested = options.brief ?? true;
18224
+ const reviewRequested = options.review ?? false;
16250
18225
  const sources = await loadManagedSources(rootDir);
16251
18226
  const resolved = await resolveManagedSourceInput(rootDir, input);
16252
18227
  const existing = sources.find((candidate) => matchesManagedSourceSpec(candidate, resolved));
16253
18228
  const now = (/* @__PURE__ */ new Date()).toISOString();
16254
18229
  const source = existing ?? {
16255
- id: resolved.kind === "directory" ? stableManagedSourceId("directory", path25.resolve(resolved.path), resolved.title) : stableManagedSourceId(resolved.kind, resolved.url, resolved.title),
18230
+ id: resolved.kind === "directory" || resolved.kind === "file" ? stableManagedSourceId(resolved.kind, path25.resolve(resolved.path), resolved.title) : stableManagedSourceId(resolved.kind, resolved.url, resolved.title),
16256
18231
  kind: resolved.kind,
16257
18232
  title: resolved.title,
16258
- path: resolved.kind === "directory" ? resolved.path : void 0,
18233
+ path: resolved.kind === "directory" || resolved.kind === "file" ? resolved.path : void 0,
16259
18234
  repoRoot: resolved.kind === "directory" ? resolved.repoRoot : void 0,
16260
- url: resolved.kind === "directory" ? void 0 : resolved.url,
18235
+ url: resolved.kind === "directory" || resolved.kind === "file" ? void 0 : resolved.url,
16261
18236
  createdAt: now,
16262
18237
  updatedAt: now,
16263
18238
  status: "ready",
@@ -16286,15 +18261,18 @@ async function addManagedSource(rootDir, input, options = {}) {
16286
18261
  };
16287
18262
  const nextSources = existing ? sources.map((candidate) => candidate.id === nextSource.id ? nextSource : candidate) : [...sources, nextSource];
16288
18263
  await saveManagedSources(rootDir, nextSources);
18264
+ const review = reviewRequested && nextSource.status === "ready" ? await stageSourceReviewForScope(rootDir, scopeFromManagedSource(nextSource)) : void 0;
16289
18265
  return {
16290
18266
  source: nextSource,
16291
18267
  compile,
16292
- briefGenerated
18268
+ briefGenerated,
18269
+ review
16293
18270
  };
16294
18271
  }
16295
18272
  async function reloadManagedSources(rootDir, options = {}) {
16296
18273
  const compileRequested = options.compile ?? true;
16297
18274
  const briefRequested = options.brief ?? true;
18275
+ const reviewRequested = options.review ?? false;
16298
18276
  const sources = await loadManagedSources(rootDir);
16299
18277
  const selected = options.all || !options.id ? sources : sources.filter((source) => source.id === options.id);
16300
18278
  if (!selected.length) {
@@ -16330,10 +18308,14 @@ async function reloadManagedSources(rootDir, options = {}) {
16330
18308
  };
16331
18309
  });
16332
18310
  await saveManagedSources(rootDir, nextSources);
18311
+ const reviews = reviewRequested ? await Promise.all(
18312
+ nextSources.filter((source) => selected.some((candidate) => candidate.id === source.id)).filter((source) => source.status === "ready").map(async (source) => await stageSourceReviewForScope(rootDir, scopeFromManagedSource(source)))
18313
+ ) : [];
16333
18314
  return {
16334
18315
  sources: nextSources.filter((source) => selected.some((candidate) => candidate.id === source.id)),
16335
18316
  compile,
16336
- briefPaths: [...briefPaths.values()]
18317
+ briefPaths: [...briefPaths.values()],
18318
+ reviews
16337
18319
  };
16338
18320
  }
16339
18321
  async function deleteManagedSource(rootDir, id) {
@@ -17237,6 +19219,7 @@ export {
17237
19219
  importInbox,
17238
19220
  ingestDirectory,
17239
19221
  ingestInput,
19222
+ ingestInputDetailed,
17240
19223
  initVault,
17241
19224
  initWorkspace,
17242
19225
  installAgent,
@@ -17267,10 +19250,13 @@ export {
17267
19250
  rejectApproval,
17268
19251
  reloadManagedSources,
17269
19252
  resolvePaths,
19253
+ reviewManagedSource,
19254
+ reviewSourceScope,
17270
19255
  runSchedule,
17271
19256
  runWatchCycle,
17272
19257
  searchVault,
17273
19258
  serveSchedules,
19259
+ stageGeneratedOutputPages,
17274
19260
  startGraphServer,
17275
19261
  startMcpServer,
17276
19262
  syncTrackedRepos,