@swarmvaultai/engine 0.1.21 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -225,7 +225,7 @@ Running the engine produces a local workspace with these main areas:
225
225
  - `wiki/projects/`: generated project rollups over canonical pages
226
226
  - `wiki/candidates/`: staged concept and entity pages awaiting confirmation on a later compile
227
227
  - `state/manifests/`: source manifests
228
- - `state/extracts/`: extracted text
228
+ - `state/extracts/`: extracted markdown plus JSON sidecars describing extractor kind, warnings, PDF page counts, and image-vision metadata
229
229
  - `state/analyses/`: model analysis output
230
230
  - `state/code-index.json`: repo-aware code module aliases and local resolution data
231
231
  - `state/benchmark.json`: latest benchmark/trust summary for the current vault
@@ -240,6 +240,7 @@ Running the engine produces a local workspace with these main areas:
240
240
  Saved outputs are indexed immediately into the graph page registry and search index, then linked back into compiled source, concept, and entity pages immediately through the lightweight artifact sync path. New concept and entity pages stage into `wiki/candidates/` first and promote to active pages on the next matching compile. Insight pages are indexed into search and page reads, but compile does not mutate them. Project-scoped pages receive `project_ids`, project tags, and layered root-plus-project schema hashes when all contributing sources resolve to the same configured project.
241
241
  Code sources also emit module, symbol, and parser-backed rationale nodes into `state/graph.json`, so local imports, exports, inheritance, same-module call edges, and rationale links are queryable through the same viewer and search pipeline.
242
242
  Ingest, inbox import, compile, query, lint, review, and candidate operations also append human-readable entries to `wiki/log.md`.
243
+ PDF sources now go through a local text-extraction pass before analysis, and image sources use the configured `visionProvider` for structured OCR/diagram extraction when a real multimodal provider is available. When image extraction is unavailable, SwarmVault records an explicit warning in the extraction sidecar and carries that warning forward into analysis instead of silently treating the source as empty.
243
244
 
244
245
  ## Notes
245
246
 
package/dist/index.d.ts CHANGED
@@ -208,6 +208,37 @@ interface SourceAttachment {
208
208
  mimeType: string;
209
209
  originalPath?: string;
210
210
  }
211
+ type ExtractionKind = "plain_text" | "html_readability" | "pdf_text" | "image_vision";
212
+ interface ExtractionTerm {
213
+ name: string;
214
+ description: string;
215
+ }
216
+ interface ExtractionClaim {
217
+ text: string;
218
+ confidence: number;
219
+ polarity: Polarity;
220
+ }
221
+ interface ImageVisionExtraction {
222
+ title?: string;
223
+ summary: string;
224
+ text: string;
225
+ concepts: ExtractionTerm[];
226
+ entities: ExtractionTerm[];
227
+ claims: ExtractionClaim[];
228
+ questions: string[];
229
+ }
230
+ interface SourceExtractionArtifact {
231
+ extractor: ExtractionKind;
232
+ sourceKind: SourceKind;
233
+ mimeType: string;
234
+ producedAt: string;
235
+ providerId?: string;
236
+ providerModel?: string;
237
+ warnings?: string[];
238
+ pageCount?: number;
239
+ metadata?: Record<string, string>;
240
+ vision?: ImageVisionExtraction;
241
+ }
211
242
  interface IngestOptions {
212
243
  includeAssets?: boolean;
213
244
  maxAssetSize?: number;
@@ -240,6 +271,8 @@ interface SourceManifest {
240
271
  url?: string;
241
272
  storedPath: string;
242
273
  extractedTextPath?: string;
274
+ extractedMetadataPath?: string;
275
+ extractionHash?: string;
243
276
  mimeType: string;
244
277
  contentHash: string;
245
278
  createdAt: string;
@@ -323,6 +356,7 @@ interface SourceAnalysis {
323
356
  analysisVersion: number;
324
357
  sourceId: string;
325
358
  sourceHash: string;
359
+ extractionHash?: string;
326
360
  schemaHash: string;
327
361
  title: string;
328
362
  summary: string;
@@ -991,4 +1025,4 @@ declare function getWatchStatus(rootDir: string): Promise<WatchStatusResult>;
991
1025
  declare function createWebSearchAdapter(id: string, config: WebSearchProviderConfig, rootDir: string): Promise<WebSearchAdapter>;
992
1026
  declare function getWebSearchAdapterForTask(rootDir: string, task: "deepLintProvider"): Promise<WebSearchAdapter>;
993
1027
 
994
- export { type AddOptions, type AddResult, type AgentType, type AnalyzedTerm, type ApprovalChangeType, type ApprovalDetail, type ApprovalEntry, type ApprovalEntryDetail, type ApprovalEntryStatus, type ApprovalManifest, type ApprovalSummary, type BenchmarkArtifact, type BenchmarkOptions, type BenchmarkQuestionResult, type CandidateRecord, type ChartDatum, type ChartSpec, type ClaimStatus, type CodeAnalysis, type CodeDiagnostic, type CodeImport, type CodeIndexArtifact, type CodeIndexEntry, type CodeLanguage, type CodeSymbol, type CodeSymbolKind, type CommandRoleExecutorConfig, type CompileOptions, type CompileResult, type CompileState, type DirectoryIngestResult, type DirectoryIngestSkip, type EvidenceClass, type ExploreOptions, type ExploreResult, type ExploreStepResult, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GitHookStatus, type GraphArtifact, type GraphEdge, type GraphExplainNeighbor, type GraphExplainResult, type GraphExportFormat, type GraphExportResult, type GraphNode, type GraphPage, type GraphPathResult, type GraphQueryMatch, type GraphQueryResult, type ImageGenerationRequest, type ImageGenerationResponse, type InboxImportResult, type InboxImportSkip, type IngestOptions, type InitOptions, type InstallAgentOptions, type LintFinding, type LintOptions, type OrchestrationConfig, type OrchestrationFinding, type OrchestrationProposal, type OrchestrationRole, type OrchestrationRoleConfig, type OrchestrationRoleResult, type OutputAsset, type OutputAssetRole, type OutputFormat, type OutputOrigin, type PageKind, type PageManager, type PageStatus, type PendingSemanticRefreshEntry, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderRoleExecutorConfig, type ProviderType, type QueryOptions, type QueryResult, type RepoSyncResult, type ResolvedPaths, type ReviewActionResult, type RoleExecutorConfig, type SceneElement, type SceneSpec, type ScheduleController, type ScheduleJobConfig, type ScheduleStateRecord, type ScheduleTriggerConfig, type ScheduledCompileTask, type ScheduledExploreTask, type ScheduledLintTask, type ScheduledQueryTask, type ScheduledRunResult, type ScheduledTaskConfig, type SearchResult, type SourceAnalysis, type SourceAttachment, type SourceClaim, type SourceKind, type SourceManifest, type SourceRationale, type VaultConfig, type WatchController, type WatchOptions, type WatchRepoSyncResult, type WatchRunRecord, type WatchStatusResult, type WebSearchAdapter, type WebSearchProviderConfig, type WebSearchProviderType, type WebSearchResult, acceptApproval, addInput, agentTypeSchema, archiveCandidate, assertProviderCapability, benchmarkVault, bootstrapDemo, compileVault, createMcpServer, createProvider, createWebSearchAdapter, defaultVaultConfig, defaultVaultSchema, explainGraphVault, exploreVault, exportGraphFormat, exportGraphHtml, getGitHookStatus, getProviderForTask, getWatchStatus, getWebSearchAdapterForTask, getWorkspaceInfo, importInbox, ingestDirectory, ingestInput, initVault, initWorkspace, installAgent, installConfiguredAgents, installGitHooks, lintVault, listApprovals, listCandidates, listGodNodes, listManifests, listPages, listSchedules, listTrackedRepoRoots, loadVaultConfig, loadVaultSchema, loadVaultSchemas, pathGraphVault, promoteCandidate, providerCapabilitySchema, providerTypeSchema, queryGraphVault, queryVault, readApproval, readExtractedText, readPage, rejectApproval, resolvePaths, runSchedule, runWatchCycle, searchVault, serveSchedules, startGraphServer, startMcpServer, syncTrackedRepos, syncTrackedReposForWatch, uninstallGitHooks, watchVault, webSearchProviderTypeSchema };
1028
+ export { type AddOptions, type AddResult, type AgentType, type AnalyzedTerm, type ApprovalChangeType, type ApprovalDetail, type ApprovalEntry, type ApprovalEntryDetail, type ApprovalEntryStatus, type ApprovalManifest, type ApprovalSummary, type BenchmarkArtifact, type BenchmarkOptions, type BenchmarkQuestionResult, type CandidateRecord, type ChartDatum, type ChartSpec, type ClaimStatus, type CodeAnalysis, type CodeDiagnostic, type CodeImport, type CodeIndexArtifact, type CodeIndexEntry, type CodeLanguage, type CodeSymbol, type CodeSymbolKind, type CommandRoleExecutorConfig, type CompileOptions, type CompileResult, type CompileState, type DirectoryIngestResult, type DirectoryIngestSkip, type EvidenceClass, type ExploreOptions, type ExploreResult, type ExploreStepResult, type ExtractionClaim, type ExtractionKind, type ExtractionTerm, type Freshness, type GenerationAttachment, type GenerationRequest, type GenerationResponse, type GitHookStatus, type GraphArtifact, type GraphEdge, type GraphExplainNeighbor, type GraphExplainResult, type GraphExportFormat, type GraphExportResult, type GraphNode, type GraphPage, type GraphPathResult, type GraphQueryMatch, type GraphQueryResult, type ImageGenerationRequest, type ImageGenerationResponse, type ImageVisionExtraction, type InboxImportResult, type InboxImportSkip, type IngestOptions, type InitOptions, type InstallAgentOptions, type LintFinding, type LintOptions, type OrchestrationConfig, type OrchestrationFinding, type OrchestrationProposal, type OrchestrationRole, type OrchestrationRoleConfig, type OrchestrationRoleResult, type OutputAsset, type OutputAssetRole, type OutputFormat, type OutputOrigin, type PageKind, type PageManager, type PageStatus, type PendingSemanticRefreshEntry, type Polarity, type ProviderAdapter, type ProviderCapability, type ProviderConfig, type ProviderRoleExecutorConfig, type ProviderType, type QueryOptions, type QueryResult, type RepoSyncResult, type ResolvedPaths, type ReviewActionResult, type RoleExecutorConfig, type SceneElement, type SceneSpec, type ScheduleController, type ScheduleJobConfig, type ScheduleStateRecord, type ScheduleTriggerConfig, type ScheduledCompileTask, type ScheduledExploreTask, type ScheduledLintTask, type ScheduledQueryTask, type ScheduledRunResult, type ScheduledTaskConfig, type SearchResult, type SourceAnalysis, type SourceAttachment, type SourceClaim, type SourceExtractionArtifact, type SourceKind, type SourceManifest, type SourceRationale, type VaultConfig, type WatchController, type WatchOptions, type WatchRepoSyncResult, type WatchRunRecord, type WatchStatusResult, type WebSearchAdapter, type WebSearchProviderConfig, type WebSearchProviderType, type WebSearchResult, acceptApproval, addInput, agentTypeSchema, archiveCandidate, assertProviderCapability, benchmarkVault, bootstrapDemo, compileVault, createMcpServer, createProvider, createWebSearchAdapter, defaultVaultConfig, defaultVaultSchema, explainGraphVault, exploreVault, exportGraphFormat, exportGraphHtml, getGitHookStatus, getProviderForTask, getWatchStatus, getWebSearchAdapterForTask, getWorkspaceInfo, importInbox, ingestDirectory, ingestInput, initVault, initWorkspace, installAgent, installConfiguredAgents, installGitHooks, lintVault, listApprovals, listCandidates, listGodNodes, listManifests, listPages, listSchedules, listTrackedRepoRoots, loadVaultConfig, loadVaultSchema, loadVaultSchemas, pathGraphVault, promoteCandidate, providerCapabilitySchema, providerTypeSchema, queryGraphVault, queryVault, readApproval, readExtractedText, readPage, rejectApproval, resolvePaths, runSchedule, runWatchCycle, searchVault, serveSchedules, startGraphServer, startMcpServer, syncTrackedRepos, syncTrackedReposForWatch, uninstallGitHooks, watchVault, webSearchProviderTypeSchema };