@swarmvaultai/engine 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -186,17 +186,21 @@ This matters because many "OpenAI-compatible" backends only implement part of th
186
186
  - `reloadManagedSources(rootDir, { id, all, compile, brief, maxPages, maxDepth })` re-syncs one managed source or the full registry
187
187
  - `deleteManagedSource(rootDir, id)` removes a managed-source registry entry and transient sync state without deleting canonical vault artifacts
188
188
  - `ingestInput(rootDir, input, { includeAssets, maxAssetSize })` ingests a local file path or URL
189
+ - `ingestInputDetailed(rootDir, input, { includeAssets, maxAssetSize })` returns a summary envelope with `created`, `updated`, `unchanged`, and `removed` manifests when one input expands into multiple sources
189
190
  - `addInput(rootDir, input, { author, contributor })` captures supported URLs into normalized markdown before ingesting them, or falls back to generic URL ingest
190
191
  - `ingestDirectory(rootDir, inputDir, { repoRoot, include, exclude, maxFiles, gitignore, extractClasses })` recursively ingests a local directory as a repo-aware code/content source tree
191
192
  - `importInbox(rootDir, inputDir?)` recursively imports supported inbox files plus markdown and HTML browser-clipper style bundles
192
193
  - managed sources support local directories, public GitHub repo root URLs, and bounded same-domain docs hubs
193
194
  - registry data lives in `state/sources.json`, working state lives under `state/sources/<id>/`, and source briefs are written to `wiki/outputs/source-briefs/<id>.md`
195
+ - EPUB inputs split into chapter-level manifests with shared group metadata so books stay navigable instead of becoming one giant source
196
+ - CSV and TSV inputs produce bounded tabular summaries with delimiter-aware previews and compact column hints
197
+ - XLSX inputs extract workbook-level and sheet-level previews, while PPTX inputs extract slide text plus speaker notes when present
194
198
  - JavaScript, JSX, TypeScript, TSX, Python, Go, Rust, Java, Kotlin, Scala, Lua, Zig, C#, C, C++, PHP, Ruby, and PowerShell inputs are treated as code sources and compiled into both source pages and `wiki/code/` module pages
195
199
  - `.rst` and `.rest` inputs are treated as first-class text sources with lightweight heading and directive normalization before analysis
196
200
  - code manifests can carry `repoRelativePath`, and compile writes `state/code-index.json` so local imports can resolve across an ingested repo tree
197
201
  - repo-aware manifests, graph nodes, and graph pages can also carry `sourceClass` so first-party, third-party, resource, and generated material can be filtered and reported separately
198
202
  - HTML and markdown URL ingests localize remote image references into `raw/assets/<sourceId>/` by default and rewrite the stored markdown to local relative paths
199
- - PDF and DOCX ingests now write extracted-text and metadata sidecars under `state/extracts/`, and image ingest keeps the same sidecar model for vision extraction
203
+ - PDF, DOCX, EPUB, CSV/TSV, XLSX, and PPTX ingests write extracted-text and metadata sidecars under `state/extracts/`, and image ingest keeps the same sidecar model for vision extraction
200
204
  - Tree-sitter-backed languages now verify runtime and grammar compatibility per language; failures stay local to the affected source and surface as diagnostics instead of aborting the whole compile
201
205
 
202
206
  ### Compile + Query
package/dist/index.d.ts CHANGED
@@ -54,10 +54,10 @@ type PageStatus = "draft" | "candidate" | "active" | "archived";
54
54
  type PageManager = "system" | "human";
55
55
  type ApprovalEntryStatus = "pending" | "accepted" | "rejected";
56
56
  type ApprovalChangeType = "create" | "update" | "delete" | "promote";
57
- type SourceKind = "markdown" | "text" | "pdf" | "image" | "html" | "docx" | "binary" | "code";
57
+ type SourceKind = "markdown" | "text" | "pdf" | "image" | "html" | "docx" | "epub" | "csv" | "xlsx" | "pptx" | "transcript" | "chat_export" | "email" | "calendar" | "binary" | "code";
58
58
  type SourceCaptureType = "arxiv" | "doi" | "tweet" | "article" | "url";
59
59
  type SourceClass = "first_party" | "third_party" | "resource" | "generated";
60
- type ManagedSourceKind = "directory" | "github_repo" | "crawl_url";
60
+ type ManagedSourceKind = "directory" | "file" | "github_repo" | "crawl_url";
61
61
  type ManagedSourceStatus = "ready" | "missing" | "error";
62
62
  type CodeLanguage = "javascript" | "jsx" | "typescript" | "tsx" | "python" | "go" | "rust" | "java" | "kotlin" | "scala" | "lua" | "zig" | "csharp" | "c" | "cpp" | "php" | "ruby" | "powershell";
63
63
  type CodeSymbolKind = "function" | "class" | "interface" | "type_alias" | "enum" | "variable" | "struct" | "trait";
@@ -231,7 +231,7 @@ interface SourceAttachment {
231
231
  mimeType: string;
232
232
  originalPath?: string;
233
233
  }
234
- type ExtractionKind = "plain_text" | "html_readability" | "pdf_text" | "docx_text" | "image_vision";
234
+ type ExtractionKind = "plain_text" | "html_readability" | "pdf_text" | "docx_text" | "epub_text" | "csv_text" | "xlsx_text" | "pptx_text" | "transcript_text" | "chat_export_text" | "email_text" | "calendar_text" | "image_vision";
235
235
  interface ExtractionTerm {
236
236
  name: string;
237
237
  description: string;
@@ -284,6 +284,15 @@ interface DirectoryIngestResult {
284
284
  updated: SourceManifest[];
285
285
  skipped: DirectoryIngestSkip[];
286
286
  }
287
+ interface InputIngestResult {
288
+ input: string;
289
+ scannedCount: number;
290
+ created: SourceManifest[];
291
+ updated: SourceManifest[];
292
+ unchanged: SourceManifest[];
293
+ removed: SourceManifest[];
294
+ skipped: DirectoryIngestSkip[];
295
+ }
287
296
  interface SourceManifest {
288
297
  sourceId: string;
289
298
  title: string;
@@ -302,6 +311,13 @@ interface SourceManifest {
302
311
  mimeType: string;
303
312
  contentHash: string;
304
313
  semanticHash: string;
314
+ sourceGroupId?: string;
315
+ sourceGroupTitle?: string;
316
+ sourcePartKey?: string;
317
+ partIndex?: number;
318
+ partCount?: number;
319
+ partTitle?: string;
320
+ details?: Record<string, string>;
305
321
  createdAt: string;
306
322
  updatedAt: string;
307
323
  attachments?: SourceAttachment[];
@@ -746,6 +762,7 @@ interface InstallAgentResult {
746
762
  interface ManagedSourceAddOptions {
747
763
  compile?: boolean;
748
764
  brief?: boolean;
765
+ review?: boolean;
749
766
  maxPages?: number;
750
767
  maxDepth?: number;
751
768
  }
@@ -757,15 +774,25 @@ interface ManagedSourceAddResult {
757
774
  source: ManagedSourceRecord;
758
775
  compile?: CompileResult;
759
776
  briefGenerated: boolean;
777
+ review?: SourceReviewResult;
760
778
  }
761
779
  interface ManagedSourceReloadResult {
762
780
  sources: ManagedSourceRecord[];
763
781
  compile?: CompileResult;
764
782
  briefPaths: string[];
783
+ reviews: SourceReviewResult[];
765
784
  }
766
785
  interface ManagedSourceDeleteResult {
767
786
  removed: ManagedSourceRecord;
768
787
  }
788
+ interface SourceReviewResult {
789
+ sourceId: string;
790
+ pageId: string;
791
+ reviewPath: string;
792
+ staged: boolean;
793
+ approvalId?: string;
794
+ approvalDir?: string;
795
+ }
769
796
  interface GitHookStatus {
770
797
  repoRoot: string | null;
771
798
  postCommit: "installed" | "not_installed" | "other_content";
@@ -1206,6 +1233,7 @@ declare function uninstallGitHooks(rootDir: string): Promise<GitHookStatus>;
1206
1233
  declare function listTrackedRepoRoots(rootDir: string): Promise<string[]>;
1207
1234
  declare function syncTrackedRepos(rootDir: string, options?: IngestOptions, repoRoots?: string[]): Promise<RepoSyncResult>;
1208
1235
  declare function syncTrackedReposForWatch(rootDir: string, options?: IngestOptions, repoRoots?: string[]): Promise<WatchRepoSyncResult>;
1236
+ declare function ingestInputDetailed(rootDir: string, input: string, options?: IngestOptions): Promise<InputIngestResult>;
1209
1237
  declare function ingestInput(rootDir: string, input: string, options?: IngestOptions): Promise<SourceManifest>;
1210
1238
  declare function addInput(rootDir: string, input: string, options?: AddOptions): Promise<AddResult>;
1211
1239
  declare function ingestDirectory(rootDir: string, inputDir: string, options?: IngestOptions): Promise<DirectoryIngestResult>;
@@ -1242,11 +1270,35 @@ interface LoadedVaultSchemas {
1242
1270
  declare function loadVaultSchemas(rootDir: string): Promise<LoadedVaultSchemas>;
1243
1271
  declare function loadVaultSchema(rootDir: string): Promise<VaultSchema>;
1244
1272
 
1273
+ type SourceReviewScope = {
1274
+ id: string;
1275
+ title: string;
1276
+ sourceIds: string[];
1277
+ };
1278
+ declare function reviewSourceScope(rootDir: string, scope: SourceReviewScope): Promise<SourceReviewResult>;
1279
+ declare function reviewManagedSource(rootDir: string, id: string): Promise<SourceReviewResult>;
1245
1280
  declare function listManagedSourceRecords(rootDir: string): Promise<ManagedSourceRecord[]>;
1246
1281
  declare function addManagedSource(rootDir: string, input: string, options?: ManagedSourceAddOptions): Promise<ManagedSourceAddResult>;
1247
1282
  declare function reloadManagedSources(rootDir: string, options?: ManagedSourceReloadOptions): Promise<ManagedSourceReloadResult>;
1248
1283
  declare function deleteManagedSource(rootDir: string, id: string): Promise<ManagedSourceDeleteResult>;
1249
1284
 
1285
+ type GeneratedOutputArtifacts = {
1286
+ answer: string;
1287
+ outputAssets: OutputAsset[];
1288
+ assetFiles: Array<{
1289
+ relativePath: string;
1290
+ content: string | Uint8Array;
1291
+ encoding?: BufferEncoding;
1292
+ }>;
1293
+ };
1294
+ declare function stageGeneratedOutputPages(rootDir: string, stagedPages: Array<{
1295
+ page: GraphPage;
1296
+ content: string;
1297
+ assetFiles?: GeneratedOutputArtifacts["assetFiles"];
1298
+ }>): Promise<{
1299
+ approvalId: string;
1300
+ approvalDir: string;
1301
+ }>;
1250
1302
  declare function listApprovals(rootDir: string): Promise<ApprovalSummary[]>;
1251
1303
  declare function readApproval(rootDir: string, approvalId: string, options?: {
1252
1304
  diff?: boolean;
@@ -1327,4 +1379,4 @@ declare function getWatchStatus(rootDir: string): Promise<WatchStatusResult>;
1327
1379
  declare function createWebSearchAdapter(id: string, config: WebSearchProviderConfig, rootDir: string): Promise<WebSearchAdapter>;
1328
1380
  declare function getWebSearchAdapterForTask(rootDir: string, task: "deepLintProvider"): Promise<WebSearchAdapter>;
1329
1381
 
1330
- export { type AddOptions, type AddResult, type AgentType, type AnalyzedTerm, type ApprovalChangeType, type ApprovalDetail, type ApprovalEntry, type ApprovalEntryDetail, type ApprovalEntryStatus, type ApprovalManifest, type ApprovalSummary, type BenchmarkArtifact, type BenchmarkOptions, type BenchmarkQuestionResult, type BenchmarkSummary, type CandidateRecord, type ChartDatum, type ChartSpec, type ClaimStatus, type CodeAnalysis, type CodeDiagnostic, type CodeImport, type CodeIndexArtifact, type CodeIndexEntry, type CodeLanguage, type CodeSymbol, type CodeSymbolKind, type CommandRoleExecutorConfig, type CompileOptions, type CompileResult, type CompileState, type DirectoryIngestResult, type DirectoryIngestSkip, type EmbeddingCacheArtifact, type EmbeddingCacheEntry, type EvidenceClass, type ExploreOptions, type ExploreResult, type ExploreStepResult, type ExtractionClaim, type ExtractionKind, type ExtractionTerm, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GitHookStatus, type GraphArtifact, type GraphEdge, type GraphExplainNeighbor, type GraphExplainResult, type GraphExportFormat, type GraphExportResult, type GraphHyperedge, type GraphNode, type GraphPage, type GraphPathResult, type GraphPushCounts, type GraphPushNeo4jOptions, type GraphPushResult, type GraphQueryMatch, type GraphQueryResult, type GraphReportArtifact, type ImageGenerationRequest, type ImageGenerationResponse, type ImageVisionExtraction, type InboxImportResult, type InboxImportSkip, type IngestOptions, type InitOptions, type InstallAgentOptions, type InstallAgentResult, type LintFinding, type LintOptions, type ManagedSourceAddOptions, type ManagedSourceAddResult, type ManagedSourceDeleteResult, type ManagedSourceKind, type ManagedSourceRecord, type ManagedSourceReloadOptions, type ManagedSourceReloadResult, type ManagedSourceStatus, type ManagedSourceSyncCounts, type ManagedSourcesArtifact, type Neo4jGraphSinkConfig, type OrchestrationConfig, type OrchestrationFinding, type OrchestrationProposal, type OrchestrationRole, type OrchestrationRoleConfig, type OrchestrationRoleResult, type OutputAsset, type OutputAssetRole, type OutputFormat, type OutputOrigin, type PageKind, type PageManager, type PageStatus, type PendingSemanticRefreshEntry, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderRoleExecutorConfig, type ProviderType, type QueryOptions, type QueryResult, type RepoSyncResult, type ResolvedPaths, type ReviewActionResult, type RoleExecutorConfig, type SceneElement, type SceneSpec, type ScheduleController, type ScheduleJobConfig, type ScheduleStateRecord, type ScheduleTriggerConfig, type ScheduledCompileTask, type ScheduledExploreTask, type ScheduledLintTask, type ScheduledQueryTask, type ScheduledRunResult, type ScheduledTaskConfig, type SearchResult, type SourceAnalysis, type SourceAttachment, type SourceCaptureType, type SourceClaim, type SourceClass, type SourceExtractionArtifact, type SourceKind, type SourceManifest, type SourceRationale, type VaultConfig, type WatchController, type WatchOptions, type WatchRepoSyncResult, type WatchRunRecord, type WatchStatusResult, type WebSearchAdapter, type WebSearchProviderConfig, type WebSearchProviderType, type WebSearchResult, acceptApproval, addInput, addManagedSource, agentTypeSchema, archiveCandidate, assertProviderCapability, benchmarkVault, bootstrapDemo, compileVault, createMcpServer, createProvider, createWebSearchAdapter, defaultVaultConfig, defaultVaultSchema, deleteManagedSource, explainGraphVault, exploreVault, exportGraphFormat, exportGraphHtml, getGitHookStatus, getProviderForTask, getWatchStatus, getWebSearchAdapterForTask, getWorkspaceInfo, importInbox, ingestDirectory, ingestInput, initVault, initWorkspace, installAgent, installConfiguredAgents, installGitHooks, lintVault, listApprovals, listCandidates, listGodNodes, listGraphHyperedges, listManagedSourceRecords, listManifests, listPages, listSchedules, listTrackedRepoRoots, loadVaultConfig, loadVaultSchema, loadVaultSchemas, pathGraphVault, promoteCandidate, providerCapabilitySchema, providerTypeSchema, pushGraphNeo4j, queryGraphVault, queryVault, readApproval, readExtractedText, readGraphReport, readPage, rejectApproval, reloadManagedSources, resolvePaths, runSchedule, runWatchCycle, searchVault, serveSchedules, startGraphServer, startMcpServer, syncTrackedRepos, syncTrackedReposForWatch, uninstallGitHooks, watchVault, webSearchProviderTypeSchema };
1382
+ export { type AddOptions, type AddResult, type AgentType, type AnalyzedTerm, type ApprovalChangeType, type ApprovalDetail, type ApprovalEntry, type ApprovalEntryDetail, type ApprovalEntryStatus, type ApprovalManifest, type ApprovalSummary, type BenchmarkArtifact, type BenchmarkOptions, type BenchmarkQuestionResult, type BenchmarkSummary, type CandidateRecord, type ChartDatum, type ChartSpec, type ClaimStatus, type CodeAnalysis, type CodeDiagnostic, type CodeImport, type CodeIndexArtifact, type CodeIndexEntry, type CodeLanguage, type CodeSymbol, type CodeSymbolKind, type CommandRoleExecutorConfig, type CompileOptions, type CompileResult, type CompileState, type DirectoryIngestResult, type DirectoryIngestSkip, type EmbeddingCacheArtifact, type EmbeddingCacheEntry, type EvidenceClass, type ExploreOptions, type ExploreResult, type ExploreStepResult, type ExtractionClaim, type ExtractionKind, type ExtractionTerm, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GitHookStatus, type GraphArtifact, type GraphEdge, type GraphExplainNeighbor, type GraphExplainResult, type GraphExportFormat, type GraphExportResult, type GraphHyperedge, type GraphNode, type GraphPage, type GraphPathResult, type GraphPushCounts, type GraphPushNeo4jOptions, type GraphPushResult, type GraphQueryMatch, type GraphQueryResult, type GraphReportArtifact, type ImageGenerationRequest, type ImageGenerationResponse, type ImageVisionExtraction, type InboxImportResult, type InboxImportSkip, type IngestOptions, type InitOptions, type InputIngestResult, type InstallAgentOptions, type InstallAgentResult, type LintFinding, type LintOptions, type ManagedSourceAddOptions, type ManagedSourceAddResult, type ManagedSourceDeleteResult, type ManagedSourceKind, type ManagedSourceRecord, type ManagedSourceReloadOptions, type ManagedSourceReloadResult, type ManagedSourceStatus, type ManagedSourceSyncCounts, type ManagedSourcesArtifact, type Neo4jGraphSinkConfig, type OrchestrationConfig, type OrchestrationFinding, type OrchestrationProposal, type OrchestrationRole, type OrchestrationRoleConfig, type OrchestrationRoleResult, type OutputAsset, type OutputAssetRole, type OutputFormat, type OutputOrigin, type PageKind, type PageManager, type PageStatus, type PendingSemanticRefreshEntry, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderRoleExecutorConfig, type ProviderType, type QueryOptions, type QueryResult, type RepoSyncResult, type ResolvedPaths, type ReviewActionResult, type RoleExecutorConfig, type SceneElement, type SceneSpec, type ScheduleController, type ScheduleJobConfig, type ScheduleStateRecord, type ScheduleTriggerConfig, type ScheduledCompileTask, type ScheduledExploreTask, type ScheduledLintTask, type ScheduledQueryTask, type ScheduledRunResult, type ScheduledTaskConfig, type SearchResult, type SourceAnalysis, type SourceAttachment, type SourceCaptureType, type SourceClaim, type SourceClass, type SourceExtractionArtifact, type SourceKind, type SourceManifest, type SourceRationale, type SourceReviewResult, type VaultConfig, type WatchController, type WatchOptions, type WatchRepoSyncResult, type WatchRunRecord, type WatchStatusResult, type WebSearchAdapter, type WebSearchProviderConfig, type WebSearchProviderType, type WebSearchResult, acceptApproval, addInput, addManagedSource, agentTypeSchema, archiveCandidate, assertProviderCapability, benchmarkVault, bootstrapDemo, compileVault, createMcpServer, createProvider, createWebSearchAdapter, defaultVaultConfig, defaultVaultSchema, deleteManagedSource, explainGraphVault, exploreVault, exportGraphFormat, exportGraphHtml, getGitHookStatus, getProviderForTask, getWatchStatus, getWebSearchAdapterForTask, getWorkspaceInfo, importInbox, ingestDirectory, ingestInput, ingestInputDetailed, initVault, initWorkspace, installAgent, installConfiguredAgents, installGitHooks, lintVault, listApprovals, listCandidates, listGodNodes, listGraphHyperedges, listManagedSourceRecords, listManifests, listPages, listSchedules, listTrackedRepoRoots, loadVaultConfig, loadVaultSchema, loadVaultSchemas, pathGraphVault, promoteCandidate, providerCapabilitySchema, providerTypeSchema, pushGraphNeo4j, queryGraphVault, queryVault, readApproval, readExtractedText, readGraphReport, readPage, rejectApproval, reloadManagedSources, resolvePaths, reviewManagedSource, reviewSourceScope, runSchedule, runWatchCycle, searchVault, serveSchedules, stageGeneratedOutputPages, startGraphServer, startMcpServer, syncTrackedRepos, syncTrackedReposForWatch, uninstallGitHooks, watchVault, webSearchProviderTypeSchema };