@swarmvaultai/engine 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -186,17 +186,21 @@ This matters because many "OpenAI-compatible" backends only implement part of th
186
186
  - `reloadManagedSources(rootDir, { id, all, compile, brief, maxPages, maxDepth })` re-syncs one managed source or the full registry
187
187
  - `deleteManagedSource(rootDir, id)` removes a managed-source registry entry and transient sync state without deleting canonical vault artifacts
188
188
  - `ingestInput(rootDir, input, { includeAssets, maxAssetSize })` ingests a local file path or URL
189
+ - `ingestInputDetailed(rootDir, input, { includeAssets, maxAssetSize })` returns a summary envelope with `created`, `updated`, `unchanged`, and `removed` manifests when one input expands into multiple sources
189
190
  - `addInput(rootDir, input, { author, contributor })` captures supported URLs into normalized markdown before ingesting them, or falls back to generic URL ingest
190
191
  - `ingestDirectory(rootDir, inputDir, { repoRoot, include, exclude, maxFiles, gitignore, extractClasses })` recursively ingests a local directory as a repo-aware code/content source tree
191
192
  - `importInbox(rootDir, inputDir?)` recursively imports supported inbox files plus markdown and HTML browser-clipper style bundles
192
193
  - managed sources support local directories, public GitHub repo root URLs, and bounded same-domain docs hubs
193
194
  - registry data lives in `state/sources.json`, working state lives under `state/sources/<id>/`, and source briefs are written to `wiki/outputs/source-briefs/<id>.md`
195
+ - EPUB inputs split into chapter-level manifests with shared group metadata so books stay navigable instead of becoming one giant source
196
+ - CSV and TSV inputs produce bounded tabular summaries with delimiter-aware previews and compact column hints
197
+ - XLSX inputs extract workbook-level and sheet-level previews, while PPTX inputs extract slide text plus speaker notes when present
194
198
  - JavaScript, JSX, TypeScript, TSX, Python, Go, Rust, Java, Kotlin, Scala, Lua, Zig, C#, C, C++, PHP, Ruby, and PowerShell inputs are treated as code sources and compiled into both source pages and `wiki/code/` module pages
195
199
  - `.rst` and `.rest` inputs are treated as first-class text sources with lightweight heading and directive normalization before analysis
196
200
  - code manifests can carry `repoRelativePath`, and compile writes `state/code-index.json` so local imports can resolve across an ingested repo tree
197
201
  - repo-aware manifests, graph nodes, and graph pages can also carry `sourceClass` so first-party, third-party, resource, and generated material can be filtered and reported separately
198
202
  - HTML and markdown URL ingests localize remote image references into `raw/assets/<sourceId>/` by default and rewrite the stored markdown to local relative paths
199
- - PDF and DOCX ingests now write extracted-text and metadata sidecars under `state/extracts/`, and image ingest keeps the same sidecar model for vision extraction
203
+ - PDF, DOCX, EPUB, CSV/TSV, XLSX, and PPTX ingests write extracted-text and metadata sidecars under `state/extracts/`, and image ingest keeps the same sidecar model for vision extraction
200
204
  - Tree-sitter-backed languages now verify runtime and grammar compatibility per language; failures stay local to the affected source and surface as diagnostics instead of aborting the whole compile
201
205
 
202
206
  ### Compile + Query
package/dist/index.d.ts CHANGED
@@ -54,7 +54,7 @@ type PageStatus = "draft" | "candidate" | "active" | "archived";
54
54
  type PageManager = "system" | "human";
55
55
  type ApprovalEntryStatus = "pending" | "accepted" | "rejected";
56
56
  type ApprovalChangeType = "create" | "update" | "delete" | "promote";
57
- type SourceKind = "markdown" | "text" | "pdf" | "image" | "html" | "docx" | "binary" | "code";
57
+ type SourceKind = "markdown" | "text" | "pdf" | "image" | "html" | "docx" | "epub" | "csv" | "xlsx" | "pptx" | "binary" | "code";
58
58
  type SourceCaptureType = "arxiv" | "doi" | "tweet" | "article" | "url";
59
59
  type SourceClass = "first_party" | "third_party" | "resource" | "generated";
60
60
  type ManagedSourceKind = "directory" | "github_repo" | "crawl_url";
@@ -231,7 +231,7 @@ interface SourceAttachment {
231
231
  mimeType: string;
232
232
  originalPath?: string;
233
233
  }
234
- type ExtractionKind = "plain_text" | "html_readability" | "pdf_text" | "docx_text" | "image_vision";
234
+ type ExtractionKind = "plain_text" | "html_readability" | "pdf_text" | "docx_text" | "epub_text" | "csv_text" | "xlsx_text" | "pptx_text" | "image_vision";
235
235
  interface ExtractionTerm {
236
236
  name: string;
237
237
  description: string;
@@ -284,6 +284,15 @@ interface DirectoryIngestResult {
284
284
  updated: SourceManifest[];
285
285
  skipped: DirectoryIngestSkip[];
286
286
  }
287
+ interface InputIngestResult {
288
+ input: string;
289
+ scannedCount: number;
290
+ created: SourceManifest[];
291
+ updated: SourceManifest[];
292
+ unchanged: SourceManifest[];
293
+ removed: SourceManifest[];
294
+ skipped: DirectoryIngestSkip[];
295
+ }
287
296
  interface SourceManifest {
288
297
  sourceId: string;
289
298
  title: string;
@@ -301,6 +310,14 @@ interface SourceManifest {
301
310
  extractionHash?: string;
302
311
  mimeType: string;
303
312
  contentHash: string;
313
+ semanticHash: string;
314
+ sourceGroupId?: string;
315
+ sourceGroupTitle?: string;
316
+ sourcePartKey?: string;
317
+ partIndex?: number;
318
+ partCount?: number;
319
+ partTitle?: string;
320
+ details?: Record<string, string>;
304
321
  createdAt: string;
305
322
  updatedAt: string;
306
323
  attachments?: SourceAttachment[];
@@ -410,6 +427,7 @@ interface SourceAnalysis {
410
427
  analysisVersion: number;
411
428
  sourceId: string;
412
429
  sourceHash: string;
430
+ semanticHash: string;
413
431
  extractionHash?: string;
414
432
  schemaHash: string;
415
433
  title: string;
@@ -480,6 +498,7 @@ interface GraphPage {
480
498
  backlinks: string[];
481
499
  schemaHash: string;
482
500
  sourceHashes: Record<string, string>;
501
+ sourceSemanticHashes: Record<string, string>;
483
502
  relatedPageIds: string[];
484
503
  relatedNodeIds: string[];
485
504
  relatedSourceIds: string[];
@@ -779,6 +798,7 @@ interface CompileState {
779
798
  projectConfigHash: string;
780
799
  analyses: Record<string, string>;
781
800
  sourceHashes: Record<string, string>;
801
+ sourceSemanticHashes: Record<string, string>;
782
802
  sourceProjects: Record<string, string | null>;
783
803
  outputHashes: Record<string, string>;
784
804
  insightHashes: Record<string, string>;
@@ -1202,6 +1222,7 @@ declare function uninstallGitHooks(rootDir: string): Promise<GitHookStatus>;
1202
1222
  declare function listTrackedRepoRoots(rootDir: string): Promise<string[]>;
1203
1223
  declare function syncTrackedRepos(rootDir: string, options?: IngestOptions, repoRoots?: string[]): Promise<RepoSyncResult>;
1204
1224
  declare function syncTrackedReposForWatch(rootDir: string, options?: IngestOptions, repoRoots?: string[]): Promise<WatchRepoSyncResult>;
1225
+ declare function ingestInputDetailed(rootDir: string, input: string, options?: IngestOptions): Promise<InputIngestResult>;
1205
1226
  declare function ingestInput(rootDir: string, input: string, options?: IngestOptions): Promise<SourceManifest>;
1206
1227
  declare function addInput(rootDir: string, input: string, options?: AddOptions): Promise<AddResult>;
1207
1228
  declare function ingestDirectory(rootDir: string, inputDir: string, options?: IngestOptions): Promise<DirectoryIngestResult>;
@@ -1292,11 +1313,15 @@ declare function bootstrapDemo(rootDir: string, input?: string): Promise<{
1292
1313
  compile?: CompileResult;
1293
1314
  }>;
1294
1315
 
1295
- declare function startGraphServer(rootDir: string, port?: number): Promise<{
1316
+ declare function startGraphServer(rootDir: string, port?: number, options?: {
1317
+ full?: boolean;
1318
+ }): Promise<{
1296
1319
  port: number;
1297
1320
  close: () => Promise<void>;
1298
1321
  }>;
1299
- declare function exportGraphHtml(rootDir: string, outputPath: string): Promise<string>;
1322
+ declare function exportGraphHtml(rootDir: string, outputPath: string, options?: {
1323
+ full?: boolean;
1324
+ }): Promise<string>;
1300
1325
 
1301
1326
  type WatchCycleResult = {
1302
1327
  watchedRepoRoots: string[];
@@ -1319,4 +1344,4 @@ declare function getWatchStatus(rootDir: string): Promise<WatchStatusResult>;
1319
1344
  declare function createWebSearchAdapter(id: string, config: WebSearchProviderConfig, rootDir: string): Promise<WebSearchAdapter>;
1320
1345
  declare function getWebSearchAdapterForTask(rootDir: string, task: "deepLintProvider"): Promise<WebSearchAdapter>;
1321
1346
 
1322
- export { type AddOptions, type AddResult, type AgentType, type AnalyzedTerm, type ApprovalChangeType, type ApprovalDetail, type ApprovalEntry, type ApprovalEntryDetail, type ApprovalEntryStatus, type ApprovalManifest, type ApprovalSummary, type BenchmarkArtifact, type BenchmarkOptions, type BenchmarkQuestionResult, type BenchmarkSummary, type CandidateRecord, type ChartDatum, type ChartSpec, type ClaimStatus, type CodeAnalysis, type CodeDiagnostic, type CodeImport, type CodeIndexArtifact, type CodeIndexEntry, type CodeLanguage, type CodeSymbol, type CodeSymbolKind, type CommandRoleExecutorConfig, type CompileOptions, type CompileResult, type CompileState, type DirectoryIngestResult, type DirectoryIngestSkip, type EmbeddingCacheArtifact, type EmbeddingCacheEntry, type EvidenceClass, type ExploreOptions, type ExploreResult, type ExploreStepResult, type ExtractionClaim, type ExtractionKind, type ExtractionTerm, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GitHookStatus, type GraphArtifact, type GraphEdge, type GraphExplainNeighbor, type GraphExplainResult, type GraphExportFormat, type GraphExportResult, type GraphHyperedge, type GraphNode, type GraphPage, type GraphPathResult, type GraphPushCounts, type GraphPushNeo4jOptions, type GraphPushResult, type GraphQueryMatch, type GraphQueryResult, type GraphReportArtifact, type ImageGenerationRequest, type ImageGenerationResponse, type ImageVisionExtraction, type InboxImportResult, type InboxImportSkip, type IngestOptions, type InitOptions, type InstallAgentOptions, type InstallAgentResult, type LintFinding, type LintOptions, type ManagedSourceAddOptions, type ManagedSourceAddResult, type ManagedSourceDeleteResult, type ManagedSourceKind, type ManagedSourceRecord, type ManagedSourceReloadOptions, type ManagedSourceReloadResult, type ManagedSourceStatus, type ManagedSourceSyncCounts, type ManagedSourcesArtifact, type Neo4jGraphSinkConfig, type OrchestrationConfig, type OrchestrationFinding, type OrchestrationProposal, type OrchestrationRole, type OrchestrationRoleConfig, type OrchestrationRoleResult, type OutputAsset, type OutputAssetRole, type OutputFormat, type OutputOrigin, type PageKind, type PageManager, type PageStatus, type PendingSemanticRefreshEntry, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderRoleExecutorConfig, type ProviderType, type QueryOptions, type QueryResult, type RepoSyncResult, type ResolvedPaths, type ReviewActionResult, type RoleExecutorConfig, type SceneElement, type SceneSpec, type ScheduleController, type ScheduleJobConfig, type ScheduleStateRecord, type ScheduleTriggerConfig, type ScheduledCompileTask, type ScheduledExploreTask, type ScheduledLintTask, type ScheduledQueryTask, type ScheduledRunResult, type ScheduledTaskConfig, type SearchResult, type SourceAnalysis, type SourceAttachment, type SourceCaptureType, type SourceClaim, type SourceClass, type SourceExtractionArtifact, type SourceKind, type SourceManifest, type SourceRationale, type VaultConfig, type WatchController, type WatchOptions, type WatchRepoSyncResult, type WatchRunRecord, type WatchStatusResult, type WebSearchAdapter, type WebSearchProviderConfig, type WebSearchProviderType, type WebSearchResult, acceptApproval, addInput, addManagedSource, agentTypeSchema, archiveCandidate, assertProviderCapability, benchmarkVault, bootstrapDemo, compileVault, createMcpServer, createProvider, createWebSearchAdapter, defaultVaultConfig, defaultVaultSchema, deleteManagedSource, explainGraphVault, exploreVault, exportGraphFormat, exportGraphHtml, getGitHookStatus, getProviderForTask, getWatchStatus, getWebSearchAdapterForTask, getWorkspaceInfo, importInbox, ingestDirectory, ingestInput, initVault, initWorkspace, installAgent, installConfiguredAgents, installGitHooks, lintVault, listApprovals, listCandidates, listGodNodes, listGraphHyperedges, listManagedSourceRecords, listManifests, listPages, listSchedules, listTrackedRepoRoots, loadVaultConfig, loadVaultSchema, loadVaultSchemas, pathGraphVault, promoteCandidate, providerCapabilitySchema, providerTypeSchema, pushGraphNeo4j, queryGraphVault, queryVault, readApproval, readExtractedText, readGraphReport, readPage, rejectApproval, reloadManagedSources, resolvePaths, runSchedule, runWatchCycle, searchVault, serveSchedules, startGraphServer, startMcpServer, syncTrackedRepos, syncTrackedReposForWatch, uninstallGitHooks, watchVault, webSearchProviderTypeSchema };
1347
+ export { type AddOptions, type AddResult, type AgentType, type AnalyzedTerm, type ApprovalChangeType, type ApprovalDetail, type ApprovalEntry, type ApprovalEntryDetail, type ApprovalEntryStatus, type ApprovalManifest, type ApprovalSummary, type BenchmarkArtifact, type BenchmarkOptions, type BenchmarkQuestionResult, type BenchmarkSummary, type CandidateRecord, type ChartDatum, type ChartSpec, type ClaimStatus, type CodeAnalysis, type CodeDiagnostic, type CodeImport, type CodeIndexArtifact, type CodeIndexEntry, type CodeLanguage, type CodeSymbol, type CodeSymbolKind, type CommandRoleExecutorConfig, type CompileOptions, type CompileResult, type CompileState, type DirectoryIngestResult, type DirectoryIngestSkip, type EmbeddingCacheArtifact, type EmbeddingCacheEntry, type EvidenceClass, type ExploreOptions, type ExploreResult, type ExploreStepResult, type ExtractionClaim, type ExtractionKind, type ExtractionTerm, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GitHookStatus, type GraphArtifact, type GraphEdge, type GraphExplainNeighbor, type GraphExplainResult, type GraphExportFormat, type GraphExportResult, type GraphHyperedge, type GraphNode, type GraphPage, type GraphPathResult, type GraphPushCounts, type GraphPushNeo4jOptions, type GraphPushResult, type GraphQueryMatch, type GraphQueryResult, type GraphReportArtifact, type ImageGenerationRequest, type ImageGenerationResponse, type ImageVisionExtraction, type InboxImportResult, type InboxImportSkip, type IngestOptions, type InitOptions, type InputIngestResult, type InstallAgentOptions, type InstallAgentResult, type LintFinding, type LintOptions, type ManagedSourceAddOptions, type ManagedSourceAddResult, type ManagedSourceDeleteResult, type ManagedSourceKind, type ManagedSourceRecord, type ManagedSourceReloadOptions, type ManagedSourceReloadResult, type ManagedSourceStatus, type ManagedSourceSyncCounts, type ManagedSourcesArtifact, type Neo4jGraphSinkConfig, type OrchestrationConfig, type OrchestrationFinding, type OrchestrationProposal, type OrchestrationRole, type OrchestrationRoleConfig, type OrchestrationRoleResult, type OutputAsset, type OutputAssetRole, type OutputFormat, type OutputOrigin, type PageKind, type PageManager, type PageStatus, type PendingSemanticRefreshEntry, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderRoleExecutorConfig, type ProviderType, type QueryOptions, type QueryResult, type RepoSyncResult, type ResolvedPaths, type ReviewActionResult, type RoleExecutorConfig, type SceneElement, type SceneSpec, type ScheduleController, type ScheduleJobConfig, type ScheduleStateRecord, type ScheduleTriggerConfig, type ScheduledCompileTask, type ScheduledExploreTask, type ScheduledLintTask, type ScheduledQueryTask, type ScheduledRunResult, type ScheduledTaskConfig, type SearchResult, type SourceAnalysis, type SourceAttachment, type SourceCaptureType, type SourceClaim, type SourceClass, type SourceExtractionArtifact, type SourceKind, type SourceManifest, type SourceRationale, type VaultConfig, type WatchController, type WatchOptions, type WatchRepoSyncResult, type WatchRunRecord, type WatchStatusResult, type WebSearchAdapter, type WebSearchProviderConfig, type WebSearchProviderType, type WebSearchResult, acceptApproval, addInput, addManagedSource, agentTypeSchema, archiveCandidate, assertProviderCapability, benchmarkVault, bootstrapDemo, compileVault, createMcpServer, createProvider, createWebSearchAdapter, defaultVaultConfig, defaultVaultSchema, deleteManagedSource, explainGraphVault, exploreVault, exportGraphFormat, exportGraphHtml, getGitHookStatus, getProviderForTask, getWatchStatus, getWebSearchAdapterForTask, getWorkspaceInfo, importInbox, ingestDirectory, ingestInput, ingestInputDetailed, initVault, initWorkspace, installAgent, installConfiguredAgents, installGitHooks, lintVault, listApprovals, listCandidates, listGodNodes, listGraphHyperedges, listManagedSourceRecords, listManifests, listPages, listSchedules, listTrackedRepoRoots, loadVaultConfig, loadVaultSchema, loadVaultSchemas, pathGraphVault, promoteCandidate, providerCapabilitySchema, providerTypeSchema, pushGraphNeo4j, queryGraphVault, queryVault, readApproval, readExtractedText, readGraphReport, readPage, rejectApproval, reloadManagedSources, resolvePaths, runSchedule, runWatchCycle, searchVault, serveSchedules, startGraphServer, startMcpServer, syncTrackedRepos, syncTrackedReposForWatch, uninstallGitHooks, watchVault, webSearchProviderTypeSchema };