@swarmvaultai/engine 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -117,9 +117,10 @@ This matters because many "OpenAI-compatible" backends only implement part of th
117
117
 
118
118
  ### Ingest
119
119
 
120
- - `ingestInput(rootDir, input)` ingests a local path or URL
120
+ - `ingestInput(rootDir, input, { includeAssets, maxAssetSize })` ingests a local path or URL
121
121
  - `importInbox(rootDir, inputDir?)` recursively imports supported inbox files and browser-clipper style bundles
122
122
  - `.js`, `.jsx`, `.ts`, and `.tsx` inputs are treated as code sources and compiled into both source pages and `wiki/code/` module pages
123
+ - HTML and markdown URL ingests localize remote image references into `raw/assets/<sourceId>/` by default and rewrite the stored markdown to local relative paths
123
124
 
124
125
  ### Compile + Query
125
126
 
@@ -160,8 +161,8 @@ Running the engine produces a local workspace with these main areas:
160
161
  - `swarmvault.schema.md`: vault-specific compile and query instructions
161
162
  - `inbox/`: capture staging area for markdown bundles and imported files
162
163
  - `raw/sources/`: immutable source copies
163
- - `raw/assets/`: copied attachments referenced by ingested markdown bundles
164
- - `wiki/`: generated markdown pages, staged candidates, saved query outputs, exploration hub pages, and a human-only `insights/` area
164
+ - `raw/assets/`: copied attachments referenced by ingested markdown bundles and remote URL ingests
165
+ - `wiki/`: generated markdown pages, the append-only `log.md` activity trail, staged candidates, saved query outputs, exploration hub pages, and a human-only `insights/` area
165
166
  - `wiki/outputs/assets/`: local chart/image artifacts and JSON manifests for saved visual outputs
166
167
  - `wiki/code/`: generated module pages for ingested JS/TS sources
167
168
  - `wiki/projects/`: generated project rollups over canonical pages
@@ -178,6 +179,7 @@ Running the engine produces a local workspace with these main areas:
178
179
 
179
180
  Saved outputs are indexed immediately into the graph page registry and search index, then linked back into compiled source, concept, and entity pages immediately through the lightweight artifact sync path. New concept and entity pages stage into `wiki/candidates/` first and promote to active pages on the next matching compile. Insight pages are indexed into search and page reads, but compile does not mutate them. Project-scoped pages receive `project_ids`, project tags, and layered root-plus-project schema hashes when all contributing sources resolve to the same configured project.
180
181
  JS/TS code sources also emit module and symbol nodes into `state/graph.json`, so local imports, exports, inheritance, and same-module call edges are queryable through the same viewer and search pipeline.
182
+ Ingest, inbox import, compile, query, lint, review, and candidate operations also append human-readable entries to `wiki/log.md`.
181
183
 
182
184
  ## Notes
183
185
 
package/dist/index.d.ts CHANGED
@@ -189,6 +189,10 @@ interface SourceAttachment {
189
189
  mimeType: string;
190
190
  originalPath?: string;
191
191
  }
192
+ interface IngestOptions {
193
+ includeAssets?: boolean;
194
+ maxAssetSize?: number;
195
+ }
192
196
  interface SourceManifest {
193
197
  sourceId: string;
194
198
  title: string;
@@ -674,7 +678,7 @@ declare function initWorkspace(rootDir: string): Promise<{
674
678
  paths: ResolvedPaths;
675
679
  }>;
676
680
 
677
- declare function ingestInput(rootDir: string, input: string): Promise<SourceManifest>;
681
+ declare function ingestInput(rootDir: string, input: string, options?: IngestOptions): Promise<SourceManifest>;
678
682
  declare function importInbox(rootDir: string, inputDir?: string): Promise<InboxImportResult>;
679
683
  declare function listManifests(rootDir: string): Promise<SourceManifest[]>;
680
684
  declare function readExtractedText(rootDir: string, manifest: SourceManifest): Promise<string | undefined>;
@@ -756,4 +760,4 @@ declare function watchVault(rootDir: string, options?: WatchOptions): Promise<Wa
756
760
  declare function createWebSearchAdapter(id: string, config: WebSearchProviderConfig, rootDir: string): Promise<WebSearchAdapter>;
757
761
  declare function getWebSearchAdapterForTask(rootDir: string, task: "deepLintProvider"): Promise<WebSearchAdapter>;
758
762
 
759
- export { type AnalyzedTerm, type ApprovalChangeType, type ApprovalDetail, type ApprovalEntry, type ApprovalEntryDetail, type ApprovalEntryStatus, type ApprovalManifest, type ApprovalSummary, type CandidateRecord, type ChartDatum, type ChartSpec, type ClaimStatus, type CodeAnalysis, type CodeDiagnostic, type CodeImport, type CodeLanguage, type CodeSymbol, type CodeSymbolKind, type CommandRoleExecutorConfig, type CompileOptions, type CompileResult, type CompileState, type ExploreOptions, type ExploreResult, type ExploreStepResult, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GraphArtifact, type GraphEdge, type GraphNode, type GraphPage, type ImageGenerationRequest, type ImageGenerationResponse, type InboxImportResult, type InboxImportSkip, type InitOptions, type LintFinding, type LintOptions, type OrchestrationConfig, type OrchestrationFinding, type OrchestrationProposal, type OrchestrationRole, type OrchestrationRoleConfig, type OrchestrationRoleResult, type OutputAsset, type OutputAssetRole, type OutputFormat, type OutputOrigin, type PageKind, type PageManager, type PageStatus, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderRoleExecutorConfig, type ProviderType, type QueryOptions, type QueryResult, type ResolvedPaths, type ReviewActionResult, type RoleExecutorConfig, type SceneElement, type SceneSpec, type ScheduleController, type ScheduleJobConfig, type ScheduleStateRecord, type ScheduleTriggerConfig, type ScheduledCompileTask, type ScheduledExploreTask, type ScheduledLintTask, type ScheduledQueryTask, type ScheduledRunResult, type ScheduledTaskConfig, type SearchResult, type SourceAnalysis, type SourceAttachment, type SourceClaim, type SourceKind, type SourceManifest, type VaultConfig, type WatchController, type WatchOptions, type WatchRunRecord, type WebSearchAdapter, type WebSearchProviderConfig, type WebSearchProviderType, type WebSearchResult, acceptApproval, archiveCandidate, assertProviderCapability, bootstrapDemo, compileVault, createMcpServer, createProvider, createWebSearchAdapter, defaultVaultConfig, defaultVaultSchema, exploreVault, exportGraphHtml, getProviderForTask, getWebSearchAdapterForTask, getWorkspaceInfo, importInbox, ingestInput, initVault, initWorkspace, installAgent, installConfiguredAgents, lintVault, listApprovals, listCandidates, listManifests, listPages, listSchedules, loadVaultConfig, loadVaultSchema, loadVaultSchemas, promoteCandidate, providerCapabilitySchema, providerTypeSchema, queryVault, readApproval, readExtractedText, readPage, rejectApproval, resolvePaths, runSchedule, searchVault, serveSchedules, startGraphServer, startMcpServer, watchVault, webSearchProviderTypeSchema };
763
+ export { type AnalyzedTerm, type ApprovalChangeType, type ApprovalDetail, type ApprovalEntry, type ApprovalEntryDetail, type ApprovalEntryStatus, type ApprovalManifest, type ApprovalSummary, type CandidateRecord, type ChartDatum, type ChartSpec, type ClaimStatus, type CodeAnalysis, type CodeDiagnostic, type CodeImport, type CodeLanguage, type CodeSymbol, type CodeSymbolKind, type CommandRoleExecutorConfig, type CompileOptions, type CompileResult, type CompileState, type ExploreOptions, type ExploreResult, type ExploreStepResult, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GraphArtifact, type GraphEdge, type GraphNode, type GraphPage, type ImageGenerationRequest, type ImageGenerationResponse, type InboxImportResult, type InboxImportSkip, type IngestOptions, type InitOptions, type LintFinding, type LintOptions, type OrchestrationConfig, type OrchestrationFinding, type OrchestrationProposal, type OrchestrationRole, type OrchestrationRoleConfig, type OrchestrationRoleResult, type OutputAsset, type OutputAssetRole, type OutputFormat, type OutputOrigin, type PageKind, type PageManager, type PageStatus, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderRoleExecutorConfig, type ProviderType, type QueryOptions, type QueryResult, type ResolvedPaths, type ReviewActionResult, type RoleExecutorConfig, type SceneElement, type SceneSpec, type ScheduleController, type ScheduleJobConfig, type ScheduleStateRecord, type ScheduleTriggerConfig, type ScheduledCompileTask, type ScheduledExploreTask, type ScheduledLintTask, type ScheduledQueryTask, type ScheduledRunResult, type ScheduledTaskConfig, type SearchResult, type SourceAnalysis, type SourceAttachment, type SourceClaim, type SourceKind, type SourceManifest, type VaultConfig, type WatchController, type WatchOptions, type WatchRunRecord, type WebSearchAdapter, type WebSearchProviderConfig, type WebSearchProviderType, type WebSearchResult, acceptApproval, archiveCandidate, assertProviderCapability, bootstrapDemo, compileVault, createMcpServer, createProvider, createWebSearchAdapter, defaultVaultConfig, defaultVaultSchema, exploreVault, exportGraphHtml, getProviderForTask, getWebSearchAdapterForTask, getWorkspaceInfo, importInbox, ingestInput, initVault, initWorkspace, installAgent, installConfiguredAgents, lintVault, listApprovals, listCandidates, listManifests, listPages, listSchedules, loadVaultConfig, loadVaultSchema, loadVaultSchemas, promoteCandidate, providerCapabilitySchema, providerTypeSchema, queryVault, readApproval, readExtractedText, readPage, rejectApproval, resolvePaths, runSchedule, searchVault, serveSchedules, startGraphServer, startMcpServer, watchVault, webSearchProviderTypeSchema };
package/dist/index.js CHANGED
@@ -670,6 +670,7 @@ async function appendWatchRun(rootDir, run) {
670
670
  }
671
671
 
672
672
  // src/ingest.ts
673
+ var DEFAULT_MAX_ASSET_SIZE = 10 * 1024 * 1024;
673
674
  function inferKind(mimeType, filePath) {
674
675
  if (inferCodeLanguage(filePath, mimeType)) {
675
676
  return "code";
@@ -698,6 +699,12 @@ function titleFromText(fallback, content) {
698
699
  function guessMimeType(target) {
699
700
  return mime.lookup(target) || "application/octet-stream";
700
701
  }
702
+ function normalizeIngestOptions(options) {
703
+ return {
704
+ includeAssets: options?.includeAssets ?? true,
705
+ maxAssetSize: Math.max(0, Math.floor(options?.maxAssetSize ?? DEFAULT_MAX_ASSET_SIZE))
706
+ };
707
+ }
701
708
  function buildCompositeHash(payloadBytes, attachments = []) {
702
709
  if (!attachments.length) {
703
710
  return sha256(payloadBytes);
@@ -742,6 +749,40 @@ function extractMarkdownReferences(content) {
742
749
  }
743
750
  return references;
744
751
  }
752
+ function normalizeRemoteReference(value, baseUrl) {
753
+ const trimmed = value.trim().replace(/^<|>$/g, "");
754
+ const [withoutTitle] = trimmed.split(/\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/, 1);
755
+ const candidate = withoutTitle.split("#")[0]?.trim();
756
+ if (!candidate) {
757
+ return null;
758
+ }
759
+ const lowered = candidate.toLowerCase();
760
+ if (lowered.startsWith("data:") || lowered.startsWith("mailto:") || lowered.startsWith("#")) {
761
+ return null;
762
+ }
763
+ let resolved;
764
+ try {
765
+ resolved = new URL(candidate, baseUrl);
766
+ } catch {
767
+ return null;
768
+ }
769
+ if (!/^https?:$/i.test(resolved.protocol)) {
770
+ return null;
771
+ }
772
+ resolved.hash = "";
773
+ return resolved.toString();
774
+ }
775
+ function extractMarkdownImageReferences(content, baseUrl) {
776
+ const references = [];
777
+ const imagePattern = /!\[[^\]]*]\(([^)]+)\)/g;
778
+ for (const match of content.matchAll(imagePattern)) {
779
+ const normalized = normalizeRemoteReference(match[1] ?? "", baseUrl);
780
+ if (normalized) {
781
+ references.push(normalized);
782
+ }
783
+ }
784
+ return references;
785
+ }
745
786
  async function convertHtmlToMarkdown(html, url) {
746
787
  const dom = new JSDOM(html, { url });
747
788
  const article = new Readability(dom.window.document).parse();
@@ -766,6 +807,142 @@ async function readManifestByHash(manifestsDir, contentHash) {
766
807
  }
767
808
  return null;
768
809
  }
810
+ function resolveUrlMimeType(input, response) {
811
+ const headerMimeType = response.headers.get("content-type")?.split(";")[0]?.trim();
812
+ const guessedMimeType = guessMimeType(new URL(input).pathname);
813
+ if (!headerMimeType) {
814
+ return guessedMimeType;
815
+ }
816
+ if ((headerMimeType === "text/plain" || headerMimeType === "application/octet-stream") && guessedMimeType !== "application/octet-stream") {
817
+ return guessedMimeType;
818
+ }
819
+ return headerMimeType;
820
+ }
821
+ function buildRemoteAssetRelativePath(assetUrl, mimeType) {
822
+ const url = new URL(assetUrl);
823
+ const normalized = sanitizeAssetRelativePath(`${url.hostname}${url.pathname || "/asset"}`);
824
+ const extension = path4.posix.extname(normalized);
825
+ const directory = path4.posix.dirname(normalized);
826
+ const basename = extension ? path4.posix.basename(normalized, extension) : path4.posix.basename(normalized);
827
+ const resolvedExtension = extension || `.${mime.extension(mimeType) || "bin"}`;
828
+ const hashedName = `${basename || "asset"}-${sha256(assetUrl).slice(0, 8)}${resolvedExtension}`;
829
+ return directory === "." ? hashedName : path4.posix.join(directory, hashedName);
830
+ }
831
+ async function readResponseBytesWithinLimit(response, maxBytes) {
832
+ const contentLength = Number.parseInt(response.headers.get("content-length") ?? "", 10);
833
+ if (Number.isFinite(contentLength) && contentLength > maxBytes) {
834
+ throw new Error(`asset exceeds max size (${contentLength} > ${maxBytes})`);
835
+ }
836
+ if (!response.body) {
837
+ const bytes = Buffer.from(await response.arrayBuffer());
838
+ if (bytes.length > maxBytes) {
839
+ throw new Error(`asset exceeds max size (${bytes.length} > ${maxBytes})`);
840
+ }
841
+ return bytes;
842
+ }
843
+ const reader = response.body.getReader();
844
+ const chunks = [];
845
+ let total = 0;
846
+ while (true) {
847
+ const { done, value } = await reader.read();
848
+ if (done) {
849
+ break;
850
+ }
851
+ total += value.byteLength;
852
+ if (total > maxBytes) {
853
+ await reader.cancel("asset exceeds configured size limit");
854
+ throw new Error(`asset exceeds max size (${total} > ${maxBytes})`);
855
+ }
856
+ chunks.push(Buffer.from(value));
857
+ }
858
+ return Buffer.concat(chunks);
859
+ }
860
+ async function fetchRemoteImageAttachment(assetUrl, maxAssetSize) {
861
+ const response = await fetch(assetUrl);
862
+ if (!response.ok) {
863
+ throw new Error(`failed with ${response.status} ${response.statusText}`);
864
+ }
865
+ const mimeType = response.headers.get("content-type")?.split(";")[0]?.trim() || guessMimeType(new URL(assetUrl).pathname);
866
+ if (!mimeType.startsWith("image/")) {
867
+ throw new Error(`unsupported mime type ${mimeType}`);
868
+ }
869
+ const bytes = await readResponseBytesWithinLimit(response, maxAssetSize);
870
+ return {
871
+ relativePath: buildRemoteAssetRelativePath(assetUrl, mimeType),
872
+ mimeType,
873
+ originalPath: assetUrl,
874
+ bytes
875
+ };
876
+ }
877
+ async function collectRemoteImageAttachments(assetUrls, options) {
878
+ if (!options.includeAssets || options.maxAssetSize === 0 || !assetUrls.length) {
879
+ return { attachments: [], skippedCount: 0 };
880
+ }
881
+ const attachments = [];
882
+ let skippedCount = 0;
883
+ for (const assetUrl of [...new Set(assetUrls)]) {
884
+ try {
885
+ attachments.push(await fetchRemoteImageAttachment(assetUrl, options.maxAssetSize));
886
+ } catch {
887
+ skippedCount += 1;
888
+ }
889
+ }
890
+ return { attachments, skippedCount };
891
+ }
892
+ function extractHtmlImageReferences(html, baseUrl) {
893
+ const dom = new JSDOM(html, { url: baseUrl });
894
+ const document = dom.window.document;
895
+ const references = [];
896
+ for (const image of [...document.querySelectorAll("img[src]")]) {
897
+ const src = image.getAttribute("src");
898
+ if (!src) {
899
+ continue;
900
+ }
901
+ const normalized = normalizeRemoteReference(src, baseUrl);
902
+ if (normalized) {
903
+ references.push(normalized);
904
+ }
905
+ }
906
+ return references;
907
+ }
908
+ function rewriteHtmlImageReferences(html, baseUrl, replacements) {
909
+ const dom = new JSDOM(html, { url: baseUrl });
910
+ const document = dom.window.document;
911
+ for (const image of [...document.querySelectorAll("img[src]")]) {
912
+ const src = image.getAttribute("src");
913
+ if (!src) {
914
+ continue;
915
+ }
916
+ const normalized = normalizeRemoteReference(src, baseUrl);
917
+ const replacement = normalized ? replacements.get(normalized) : void 0;
918
+ if (replacement) {
919
+ image.setAttribute("src", replacement);
920
+ }
921
+ }
922
+ return dom.serialize();
923
+ }
924
+ function rewriteMarkdownImageReferences(content, baseUrl, replacements) {
925
+ return content.replace(/(!\[[^\]]*]\()([^)]+)(\))/g, (fullMatch, prefix, target, suffix) => {
926
+ const normalized = normalizeRemoteReference(target, baseUrl);
927
+ const replacement = normalized ? replacements.get(normalized) : void 0;
928
+ if (!replacement) {
929
+ return fullMatch;
930
+ }
931
+ return `${prefix}${replacement}${suffix}`;
932
+ });
933
+ }
934
+ function rewriteMarkdownImageTargets(content, replacements) {
935
+ return content.replace(/(!\[[^\]]*]\()([^)]+)(\))/g, (fullMatch, prefix, target, suffix) => {
936
+ const trimmed = target.trim().replace(/^<|>$/g, "");
937
+ const [withoutTitle] = trimmed.split(/\s+(?=(?:[^"]*"[^"]*")*[^"]*$)/, 1);
938
+ const candidate = withoutTitle.trim();
939
+ const replacement = replacements.get(candidate);
940
+ if (!replacement) {
941
+ return fullMatch;
942
+ }
943
+ return `${prefix}${replacement}${suffix}`;
944
+ });
945
+ }
769
946
  async function persistPreparedInput(rootDir, prepared, paths) {
770
947
  await ensureDir(paths.rawSourcesDir);
771
948
  await ensureDir(paths.rawAssetsDir);
@@ -817,7 +994,8 @@ async function persistPreparedInput(rootDir, prepared, paths) {
817
994
  await appendLogEntry(rootDir, "ingest", prepared.title, [
818
995
  `source_id=${sourceId}`,
819
996
  `kind=${prepared.sourceKind}`,
820
- `attachments=${manifestAttachments.length}`
997
+ `attachments=${manifestAttachments.length}`,
998
+ ...prepared.logDetails ?? []
821
999
  ]);
822
1000
  return { manifest, isNew: true };
823
1001
  }
@@ -847,33 +1025,86 @@ async function prepareFileInput(_rootDir, absoluteInput) {
847
1025
  extractedText
848
1026
  };
849
1027
  }
850
- async function prepareUrlInput(input) {
1028
+ async function prepareUrlInput(input, options) {
851
1029
  const response = await fetch(input);
852
1030
  if (!response.ok) {
853
1031
  throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
854
1032
  }
855
- let payloadBytes = Buffer.from(await response.arrayBuffer());
856
- let mimeType = response.headers.get("content-type")?.split(";")[0]?.trim() || guessMimeType(input);
857
- let sourceKind = inferKind(mimeType, input);
858
- const language = inferCodeLanguage(input, mimeType);
1033
+ const inputUrl = new URL(input);
1034
+ const originalPayloadBytes = Buffer.from(await response.arrayBuffer());
1035
+ let payloadBytes = originalPayloadBytes;
1036
+ let mimeType = resolveUrlMimeType(input, response);
1037
+ let sourceKind = inferKind(mimeType, inputUrl.pathname);
1038
+ const language = inferCodeLanguage(inputUrl.pathname, mimeType);
859
1039
  let storedExtension = ".bin";
860
- let title = new URL(input).hostname + new URL(input).pathname;
1040
+ let title = inputUrl.hostname + inputUrl.pathname;
861
1041
  let extractedText;
1042
+ let attachments;
1043
+ let contentHash;
1044
+ const logDetails = [];
862
1045
  if (sourceKind === "html" || mimeType.startsWith("text/html")) {
863
- const html = payloadBytes.toString("utf8");
864
- const converted = await convertHtmlToMarkdown(html, input);
865
- title = converted.title;
1046
+ const html = originalPayloadBytes.toString("utf8");
1047
+ const initialConversion = await convertHtmlToMarkdown(html, input);
1048
+ title = initialConversion.title;
1049
+ let localizedHtml = html;
1050
+ let localAssetReplacements;
1051
+ if (options.includeAssets) {
1052
+ const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
1053
+ extractHtmlImageReferences(html, input),
1054
+ options
1055
+ );
1056
+ if (remoteAttachments.length) {
1057
+ attachments = remoteAttachments;
1058
+ contentHash = buildCompositeHash(originalPayloadBytes, remoteAttachments);
1059
+ const sourceId = `${slugify(title)}-${contentHash.slice(0, 8)}`;
1060
+ localAssetReplacements = new Map(
1061
+ remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
1062
+ );
1063
+ localizedHtml = rewriteHtmlImageReferences(html, input, localAssetReplacements);
1064
+ logDetails.push(`remote_assets=${remoteAttachments.length}`);
1065
+ }
1066
+ if (skippedCount) {
1067
+ logDetails.push(`remote_asset_skips=${skippedCount}`);
1068
+ }
1069
+ }
1070
+ const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml, input);
866
1071
  extractedText = converted.markdown;
867
- payloadBytes = Buffer.from(converted.markdown, "utf8");
1072
+ if (localAssetReplacements?.size) {
1073
+ const absoluteLocalAssetReplacements = new Map(
1074
+ [...localAssetReplacements.values()].map((replacement) => [new URL(replacement, input).toString(), replacement])
1075
+ );
1076
+ extractedText = rewriteMarkdownImageTargets(extractedText, absoluteLocalAssetReplacements);
1077
+ }
1078
+ payloadBytes = Buffer.from(extractedText, "utf8");
868
1079
  mimeType = "text/markdown";
869
1080
  sourceKind = "markdown";
870
1081
  storedExtension = ".md";
871
1082
  } else {
872
- const extension = path4.extname(new URL(input).pathname);
1083
+ const extension = path4.extname(inputUrl.pathname);
873
1084
  storedExtension = extension || `.${mime.extension(mimeType) || "bin"}`;
874
1085
  if (sourceKind === "markdown" || sourceKind === "text" || sourceKind === "code") {
875
1086
  extractedText = payloadBytes.toString("utf8");
876
- title = titleFromText(title || new URL(input).hostname, extractedText);
1087
+ title = titleFromText(title || inputUrl.hostname, extractedText);
1088
+ if (sourceKind === "markdown" && options.includeAssets) {
1089
+ const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
1090
+ extractMarkdownImageReferences(extractedText, input),
1091
+ options
1092
+ );
1093
+ if (remoteAttachments.length) {
1094
+ attachments = remoteAttachments;
1095
+ contentHash = buildCompositeHash(originalPayloadBytes, remoteAttachments);
1096
+ const sourceId = `${slugify(title)}-${contentHash.slice(0, 8)}`;
1097
+ const replacements = new Map(
1098
+ remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
1099
+ );
1100
+ extractedText = rewriteMarkdownImageReferences(extractedText, input, replacements);
1101
+ payloadBytes = Buffer.from(extractedText, "utf8");
1102
+ logDetails.push(`remote_assets=${remoteAttachments.length}`);
1103
+ }
1104
+ if (skippedCount) {
1105
+ logDetails.push(`remote_asset_skips=${skippedCount}`);
1106
+ }
1107
+ }
877
1108
  }
878
1109
  }
879
1110
  return {
@@ -885,7 +1116,10 @@ async function prepareUrlInput(input) {
885
1116
  mimeType,
886
1117
  storedExtension,
887
1118
  payloadBytes,
888
- extractedText
1119
+ extractedText,
1120
+ attachments,
1121
+ contentHash,
1122
+ logDetails
889
1123
  };
890
1124
  }
891
1125
  async function collectInboxAttachmentRefs(inputDir, files) {
@@ -975,9 +1209,10 @@ async function prepareInboxMarkdownInput(absolutePath, attachmentRefs) {
975
1209
  function isSupportedInboxKind(sourceKind) {
976
1210
  return ["markdown", "text", "html", "pdf", "image"].includes(sourceKind);
977
1211
  }
978
- async function ingestInput(rootDir, input) {
1212
+ async function ingestInput(rootDir, input, options) {
979
1213
  const { paths } = await initWorkspace(rootDir);
980
- const prepared = /^https?:\/\//i.test(input) ? await prepareUrlInput(input) : await prepareFileInput(rootDir, path4.resolve(rootDir, input));
1214
+ const normalizedOptions = normalizeIngestOptions(options);
1215
+ const prepared = /^https?:\/\//i.test(input) ? await prepareUrlInput(input, normalizedOptions) : await prepareFileInput(rootDir, path4.resolve(rootDir, input));
981
1216
  const result = await persistPreparedInput(rootDir, prepared, paths);
982
1217
  return result.manifest;
983
1218
  }
@@ -4605,6 +4840,7 @@ async function syncVaultArtifacts(rootDir, input) {
4605
4840
  const promotedPageIds = [];
4606
4841
  const candidateHistory = {};
4607
4842
  const records = [];
4843
+ const promoteCandidates = input.promoteCandidates ?? true;
4608
4844
  for (const manifest of input.manifests) {
4609
4845
  const analysis = input.analyses.find((item) => item.sourceId === manifest.sourceId);
4610
4846
  if (!analysis) {
@@ -4723,7 +4959,7 @@ async function syncVaultArtifacts(rootDir, input) {
4723
4959
  const projectIds = scopedProjectIdsFromSources(sourceIds, input.sourceProjects);
4724
4960
  const schemaHash = effectiveHashForProject(input.schemas, projectIds[0] ?? null);
4725
4961
  const previousEntry = input.previousState?.candidateHistory?.[pageId];
4726
- const promoted = previousEntry?.status === "active" || shouldPromoteCandidate(previousEntry, sourceIds);
4962
+ const promoted = previousEntry?.status === "active" || promoteCandidates && shouldPromoteCandidate(previousEntry, sourceIds);
4727
4963
  const relativePath = promoted ? activeAggregatePath(itemKind, slug) : candidatePagePathFor(itemKind, slug);
4728
4964
  const fallbackPaths = [
4729
4965
  path13.join(paths.wikiDir, activeAggregatePath(itemKind, slug)),
@@ -5355,7 +5591,8 @@ async function refreshVaultAfterOutputSave(rootDir) {
5355
5591
  outputHashes: pageHashes(storedOutputs),
5356
5592
  insightHashes: pageHashes(storedInsights),
5357
5593
  previousState: await readJsonFile(paths.compileStatePath),
5358
- approve: false
5594
+ approve: false,
5595
+ promoteCandidates: false
5359
5596
  });
5360
5597
  }
5361
5598
  function resolveApprovalTargets(manifest, targets) {
@@ -6529,7 +6766,7 @@ async function bootstrapDemo(rootDir, input) {
6529
6766
  }
6530
6767
 
6531
6768
  // src/mcp.ts
6532
- var SERVER_VERSION = "0.1.10";
6769
+ var SERVER_VERSION = "0.1.12";
6533
6770
  async function createMcpServer(rootDir) {
6534
6771
  const server = new McpServer({
6535
6772
  name: "swarmvault",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@swarmvaultai/engine",
3
- "version": "0.1.10",
3
+ "version": "0.1.12",
4
4
  "description": "Core engine for SwarmVault: ingest, compile, query, lint, and provider abstractions.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",