@semiont/make-meaning 0.4.11 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,8 +12,9 @@ This package implements the actor model from [ARCHITECTURE.md](../../docs/ARCHIT
12
12
 
13
13
  - **Stower** (write) — the single write gateway to the Knowledge Base; handles all resource and annotation mutations and job lifecycle events
14
14
  - **Browser** (read) — handles all KB read queries: resources, annotations, events, annotation history, referenced-by lookups, entity type listing, and directory browse (merging filesystem listings with KB metadata)
15
- - **Gatherer** (context assembly) — assembles gathered context for annotations (`gather:requested`) and resources (`gather:resource-requested`)
15
+ - **Gatherer** (context assembly) — assembles gathered context for annotations (`gather:requested`) and resources (`gather:resource-requested`); searches vectors for semantically similar passages (adds `semanticContext` to `GatheredContext`)
16
16
  - **Matcher** (search/link) — context-driven candidate search with multi-source retrieval, composite structural scoring, and optional LLM semantic scoring
17
+ - **Smelter** (embed) — subscribes to resource/annotation events, chunks text, embeds via `@semiont/vectors`, persists `embedding:computed` events, and indexes into vector store (Qdrant)
17
18
  - **CloneTokenManager** (yield) — manages clone token lifecycle for resource cloning
18
19
 
19
20
  All actors subscribe to the EventBus via RxJS pipelines. They expose only `initialize()` and `stop()` — no public business methods. Callers communicate with actors by putting events on the bus.
@@ -43,7 +44,7 @@ const makeMeaning = await startMakeMeaning(project, config, eventBus, logger);
43
44
 
44
45
  // Access components
45
46
  const { knowledgeSystem, jobQueue } = makeMeaning;
46
- const { kb, stower, browser, gatherer, matcher, cloneTokenManager } = knowledgeSystem;
47
+ const { kb, stower, browser, gatherer, matcher, smelter, cloneTokenManager } = knowledgeSystem;
47
48
 
48
49
  // Graceful shutdown
49
50
  await makeMeaning.stop();
@@ -51,11 +52,12 @@ await makeMeaning.stop();
51
52
 
52
53
  This single call initializes:
53
54
  - **KnowledgeSystem** — groups the Knowledge Base and its actors
54
- - **KnowledgeBase** — groups EventStore, ViewStorage, WorkingTreeStore, GraphDatabase, and GraphDBConsumer
55
+ - **KnowledgeBase** — groups EventStore, ViewStorage, WorkingTreeStore, GraphDatabase, GraphDBConsumer, and optionally VectorStore and Smelter
55
56
  - **Stower** — subscribes to write commands on EventBus
56
57
  - **Browser** — subscribes to all KB read queries and directory browse requests on EventBus
57
- - **Gatherer** — subscribes to annotation and resource gather requests on EventBus
58
+ - **Gatherer** — subscribes to annotation and resource gather requests on EventBus; searches vectors for semantically similar passages
58
59
  - **Matcher** — subscribes to candidate search requests on EventBus
60
+ - **Smelter** — subscribes to resource/annotation events, chunks text, embeds, indexes into Qdrant
59
61
  - **CloneTokenManager** — subscribes to clone token operations on EventBus
60
62
  - **JobQueue** — background job processing queue + job status subscription
61
63
  - **6 annotation workers** — poll job queue for async AI tasks
@@ -98,12 +100,18 @@ graph TB
98
100
  BROWSER["Browser<br/>(read)"]
99
101
  GATHERER["Gatherer<br/>(context assembly)"]
100
102
  MATCHER["Matcher<br/>(search/link)"]
103
+ SMELTER["Smelter<br/>(embed)"]
101
104
  CTM["CloneTokenManager<br/>(clone)"]
102
105
  KB["Knowledge Base"]
106
+ VECTORS["Vector Store<br/>(Qdrant)"]
103
107
  STOWER -->|persist| KB
104
108
  BROWSER -->|query| KB
105
109
  GATHERER -->|query| KB
110
+ GATHERER -->|search| VECTORS
106
111
  MATCHER -->|query| KB
112
+ MATCHER -->|search| VECTORS
113
+ SMELTER -->|embed & index| VECTORS
114
+ SMELTER -->|read| KB
107
115
  CTM -->|query| KB
108
116
  end
109
117
 
@@ -111,12 +119,14 @@ graph TB
111
119
  BUS -->|"browse:resource-requested, browse:resources-requested<br/>browse:annotations-requested, browse:annotation-requested<br/>browse:events-requested, browse:annotation-history-requested<br/>browse:referenced-by-requested, browse:entity-types-requested<br/>browse:directory-requested"| BROWSER
112
120
  BUS -->|"gather:requested<br/>gather:resource-requested"| GATHERER
113
121
  BUS -->|"match:search-requested"| MATCHER
122
+ BUS -->|"yield:created, mark:created,<br/>mark:body-updated"| SMELTER
114
123
  BUS -->|"yield:clone-token-requested<br/>yield:clone-resource-requested<br/>yield:clone-create"| CTM
115
124
 
116
125
  STOWER -->|"yield:created, yield:updated, yield:moved<br/>mark:created, mark:deleted, mark:body-updated<br/>mark:entity-type-added, ..."| BUS
117
126
  BROWSER -->|"browse:resource-result, browse:resources-result<br/>browse:annotations-result, browse:annotation-result<br/>browse:events-result, browse:annotation-history-result<br/>browse:referenced-by-result, browse:entity-types-result<br/>browse:directory-result"| BUS
118
127
  GATHERER -->|"gather:complete, gather:failed<br/>gather:resource-complete, gather:resource-failed"| BUS
119
128
  MATCHER -->|"match:search-results, match:search-failed"| BUS
129
+ SMELTER -->|"embedding:computed,<br/>embedding:deleted"| BUS
120
130
  CTM -->|"yield:clone-token-generated<br/>yield:clone-resource-result<br/>yield:clone-created"| BUS
121
131
 
122
132
  classDef bus fill:#e8a838,stroke:#b07818,stroke-width:3px,color:#000,font-weight:bold
@@ -125,8 +135,10 @@ graph TB
125
135
  classDef caller fill:#4a90a4,stroke:#2c5f7a,stroke-width:2px,color:#fff
126
136
 
127
137
  class BUS bus
128
- class STOWER,BROWSER,GATHERER,MATCHER,CTM actor
138
+ classDef vectorstore fill:#6b8e9d,stroke:#4a6a7a,stroke-width:2px,color:#fff
139
+ class STOWER,BROWSER,GATHERER,MATCHER,SMELTER,CTM actor
129
140
  class KB kb
141
+ class VECTORS vectorstore
130
142
  class Routes,Workers,EBC caller
131
143
  ```
132
144
 
@@ -134,7 +146,7 @@ graph TB
134
146
 
135
147
  The **Knowledge System** binds the Knowledge Base to its actors. Nothing outside the Knowledge System reads or writes the Knowledge Base directly.
136
148
 
137
- The **Knowledge Base** is an inert store — it has no intelligence, no goals, no decisions. It groups five subsystems:
149
+ The **Knowledge Base** is an inert store — it has no intelligence, no goals, no decisions. It groups five core subsystems and two optional ones:
138
150
 
139
151
  | Store | Implementation | Purpose |
140
152
  |-------|---------------|---------|
@@ -143,12 +155,15 @@ The **Knowledge Base** is an inert store — it has no intelligence, no goals, n
143
155
  | **Content Store** | `WorkingTreeStore` | Working-tree files addressed by URI |
144
156
  | **Graph** | `GraphDatabase` | Eventually consistent relationship projection |
145
157
  | **Graph Consumer** | `GraphDBConsumer` | Event-to-graph synchronization pipeline |
158
+ | **Vectors** *(optional)* | `VectorStore` | Semantic vector index (Qdrant + memory) via `@semiont/vectors` |
159
+ | **Smelter** *(optional)* | `Smelter` | Embedding pipeline actor (chunk, embed, index) |
146
160
 
147
161
  ```typescript
148
162
  import { createKnowledgeBase } from '@semiont/make-meaning';
149
163
 
150
164
  const kb = await createKnowledgeBase(eventStore, project, graphDb, logger);
151
165
  // kb.eventStore, kb.views, kb.content, kb.graph, kb.graphConsumer
166
+ // kb.vectors (optional), kb.smelter (optional)
152
167
  ```
153
168
 
154
169
  ### EventBus Ownership
@@ -184,8 +199,9 @@ The EventBus is created by the backend (or script) and passed into `startMakeMea
184
199
 
185
200
  - `Stower` — Write gateway actor
186
201
  - `Browser` — Read actor (all KB queries, directory listings merged with KB metadata)
187
- - `Gatherer` — Context assembly actor (annotation and resource gather flows)
202
+ - `Gatherer` — Context assembly actor (annotation and resource gather flows; vector semantic search)
188
203
  - `Matcher` — Search/link actor (context-driven candidate search with structural + semantic scoring)
204
+ - `Smelter` — Embedding pipeline actor (chunk, embed, persist, index into vector store)
189
205
  - `CloneTokenManager` — Clone token lifecycle actor (yield domain)
190
206
 
191
207
  ### Operations
@@ -214,6 +230,7 @@ The EventBus is created by the backend (or script) and passed into `startMakeMea
214
230
  - **[@semiont/graph](../graph/)** — Graph database abstraction
215
231
  - **[@semiont/ontology](../ontology/)** — Schema definitions for tags
216
232
  - **[@semiont/inference](../inference/)** — AI primitives (generateText)
233
+ - **[@semiont/vectors](../vectors/)** — Vector store abstraction (Qdrant + memory) and embedding providers (Voyage, Ollama)
217
234
  - **[@semiont/jobs](../jobs/)** — Job queue and annotation workers
218
235
 
219
236
  ## Testing
package/dist/index.d.ts CHANGED
@@ -1,10 +1,11 @@
1
1
  import { JobQueue, ReferenceAnnotationWorker, GenerationWorker, HighlightAnnotationWorker, AssessmentAnnotationWorker, CommentAnnotationWorker, TagAnnotationWorker } from '@semiont/jobs';
2
2
  import { SemiontProject } from '@semiont/core/node';
3
- import { GraphServiceConfig, Logger, StoredEvent, ResourceId, components, EventBus, AnnotationId, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
3
+ import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, Logger, StoredEvent, ResourceId, EventBus, components, AnnotationId, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
4
4
  export { AssembledAnnotation, applyBodyOperations, assembleAnnotation } from '@semiont/core';
5
5
  import { EventStore, ViewStorage } from '@semiont/event-sourcing';
6
6
  import { WorkingTreeStore } from '@semiont/content';
7
7
  import { GraphDatabase } from '@semiont/graph';
8
+ import { VectorStore, EmbeddingProvider, ChunkingConfig } from '@semiont/vectors';
8
9
  import { InferenceClient } from '@semiont/inference';
9
10
  import { Writable, Readable } from 'node:stream';
10
11
 
@@ -44,6 +45,8 @@ interface WorkerInferenceConfig {
44
45
  interface MakeMeaningConfig {
45
46
  services: {
46
47
  graph?: GraphServiceConfig;
48
+ vectors?: VectorsServiceConfig;
49
+ embedding?: EmbeddingServiceConfig;
47
50
  };
48
51
  /** Per-actor inference config */
49
52
  actors?: ActorInferenceConfig;
@@ -143,17 +146,84 @@ declare class GraphDBConsumer {
143
146
  shutdown(): Promise<void>;
144
147
  }
145
148
 
149
+ /**
150
+ * Smelter Actor
151
+ *
152
+ * Takes raw content, refines it into embedding vectors, persists them as events,
153
+ * and indexes them into the vector store. Peer to the Graph Consumer.
154
+ *
155
+ * Pipeline:
156
+ * 1. Subscribe to resource and annotation events from the EventStore
157
+ * 2. Chunk resource text into overlapping passages
158
+ * 3. Embed each chunk via the configured EmbeddingProvider
159
+ * 4. Emit embedding:computed events on the EventBus (persisted by Stower)
160
+ * 5. Index vectors into the VectorStore (Qdrant) for fast similarity search
161
+ *
162
+ * Uses the same burst-buffer RxJS pipeline as GraphDBConsumer.
163
+ */
164
+
165
+ declare class Smelter {
166
+ private eventStore;
167
+ private eventBus;
168
+ private vectorStore;
169
+ private embeddingProvider;
170
+ private contentStore;
171
+ private static readonly SMELTER_RELEVANT_EVENTS;
172
+ private static readonly BURST_WINDOW_MS;
173
+ private static readonly MAX_BATCH_SIZE;
174
+ private static readonly IDLE_TIMEOUT_MS;
175
+ private _globalSubscription;
176
+ private eventSubject;
177
+ private pipelineSubscription;
178
+ private readonly logger;
179
+ private readonly chunkingConfig;
180
+ constructor(eventStore: EventStore, eventBus: EventBus, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, contentStore: WorkingTreeStore, logger: Logger, chunkingConfig?: ChunkingConfig);
181
+ initialize(): Promise<void>;
182
+ stop(): Promise<void>;
183
+ /**
184
+ * Rebuild the vector store from persisted embedding events in the event log.
185
+ * Reads all embedding.computed / embedding.deleted events and replays them.
186
+ * Bypasses the live pipeline — reads directly from the event store.
187
+ */
188
+ rebuildAll(): Promise<void>;
189
+ private processBatch;
190
+ /**
191
+ * Batch-optimized processing for consecutive events of the same type.
192
+ * Collects all texts across events, embeds in a single embedBatch() call,
193
+ * then distributes results back to their respective resources/annotations.
194
+ */
195
+ private applyBatchByType;
196
+ /**
197
+ * Batch-embed chunks from multiple resource.created events in a single
198
+ * embedBatch() call, then emit events and index per resource.
199
+ */
200
+ private batchResourceCreated;
201
+ /**
202
+ * Batch-embed exact texts from multiple annotation.added events in a
203
+ * single embedBatch() call, then emit events and index per annotation.
204
+ */
205
+ private batchAnnotationAdded;
206
+ private safeProcessEvent;
207
+ private processEvent;
208
+ private handleResourceCreated;
209
+ private handleResourceArchived;
210
+ private handleAnnotationAdded;
211
+ private handleAnnotationRemoved;
212
+ }
213
+
146
214
  /**
147
215
  * Knowledge Base
148
216
  *
149
217
  * The durable store that records what intelligent actors decide.
150
- * Groups the five KB subsystems from ARCHITECTURE.md:
218
+ * Groups the KB subsystems from ARCHITECTURE.md:
151
219
  *
152
220
  * - Event Log (immutable append-only) — via EventStore
153
221
  * - Materialized Views (fast single-doc queries) — via ViewStorage
154
222
  * - Content Store (working-tree files, URI-addressed) — via WorkingTreeStore
155
223
  * - Graph (eventually consistent relationship projection) — via GraphDatabase
156
224
  * - Graph Consumer (event-to-graph projection) — via GraphDBConsumer
225
+ * - Vectors (semantic search) — via VectorStore (optional)
226
+ * - Smelter (event-to-vector projection) — via Smelter (optional)
157
227
  *
158
228
  * The Gatherer and Matcher are the only actors that read from these stores directly.
159
229
  */
@@ -164,9 +234,18 @@ interface KnowledgeBase {
164
234
  content: WorkingTreeStore;
165
235
  graph: GraphDatabase;
166
236
  graphConsumer: GraphDBConsumer;
237
+ vectors?: VectorStore;
238
+ smelter?: Smelter;
167
239
  projectionsDir: string;
168
240
  }
169
- declare function createKnowledgeBase(eventStore: EventStore, project: SemiontProject, graphDb: GraphDatabase, logger: Logger): Promise<KnowledgeBase>;
241
+ interface CreateKnowledgeBaseOptions {
242
+ vectorStore?: VectorStore;
243
+ embeddingProvider?: EmbeddingProvider;
244
+ eventBus?: EventBus;
245
+ chunkingConfig?: ChunkingConfig;
246
+ skipRebuild?: boolean;
247
+ }
248
+ declare function createKnowledgeBase(eventStore: EventStore, project: SemiontProject, graphDb: GraphDatabase, logger: Logger, options?: CreateKnowledgeBaseOptions): Promise<KnowledgeBase>;
170
249
 
171
250
  /**
172
251
  * Stower Actor
@@ -198,6 +277,8 @@ declare function createKnowledgeBase(eventStore: EventStore, project: SemiontPro
198
277
  * - job:report-progress → job.progress
199
278
  * - job:complete → job.completed
200
279
  * - job:fail → job.failed
280
+ * - embedding:computed → embedding.computed (from Smelter)
281
+ * - embedding:deleted → embedding.deleted (from Smelter)
201
282
  */
202
283
 
203
284
  type ResourceDescriptor$3 = components['schemas']['ResourceDescriptor'];
@@ -226,6 +307,8 @@ declare class Stower {
226
307
  private handleJobReportProgress;
227
308
  private handleJobComplete;
228
309
  private handleJobFail;
310
+ private handleEmbeddingComputed;
311
+ private handleEmbeddingDeleted;
229
312
  stop(): Promise<void>;
230
313
  }
231
314
 
@@ -251,9 +334,10 @@ declare class Gatherer {
251
334
  private kb;
252
335
  private eventBus;
253
336
  private inferenceClient;
337
+ private embeddingProvider?;
254
338
  private subscriptions;
255
339
  private readonly logger;
256
- constructor(kb: KnowledgeBase, eventBus: EventBus, inferenceClient: InferenceClient, logger: Logger);
340
+ constructor(kb: KnowledgeBase, eventBus: EventBus, inferenceClient: InferenceClient, logger: Logger, embeddingProvider?: EmbeddingProvider | undefined);
257
341
  initialize(): Promise<void>;
258
342
  private handleAnnotationGather;
259
343
  private handleResourceGather;
@@ -278,9 +362,10 @@ declare class Matcher {
278
362
  private kb;
279
363
  private eventBus;
280
364
  private inferenceClient;
365
+ private embeddingProvider?;
281
366
  private subscriptions;
282
367
  private readonly logger;
283
- constructor(kb: KnowledgeBase, eventBus: EventBus, logger: Logger, inferenceClient: InferenceClient);
368
+ constructor(kb: KnowledgeBase, eventBus: EventBus, logger: Logger, inferenceClient: InferenceClient, embeddingProvider?: EmbeddingProvider | undefined);
284
369
  initialize(): Promise<void>;
285
370
  private handleSearch;
286
371
  /**
@@ -308,6 +393,11 @@ declare class Matcher {
308
393
  * @returns Map of resourceId → score (0-1)
309
394
  */
310
395
  private inferenceSemanticScore;
396
+ /**
397
+ * Search vectors for semantically similar resources.
398
+ * Returns empty array if vectors or embedding provider are not configured.
399
+ */
400
+ private searchVectors;
311
401
  stop(): Promise<void>;
312
402
  }
313
403
 
@@ -427,7 +517,9 @@ type Workers = {
427
517
  comment: CommentAnnotationWorker;
428
518
  tag: TagAnnotationWorker;
429
519
  };
430
- declare function startMakeMeaning(project: SemiontProject, config: MakeMeaningConfig, eventBus: EventBus, logger: Logger): Promise<MakeMeaningService>;
520
+ declare function startMakeMeaning(project: SemiontProject, config: MakeMeaningConfig, eventBus: EventBus, logger: Logger, options?: {
521
+ skipRebuild?: boolean;
522
+ }): Promise<MakeMeaningService>;
431
523
 
432
524
  /**
433
525
  * Entity Types Bootstrap Service
@@ -825,7 +917,7 @@ declare class AnnotationContext {
825
917
  * @returns Rich context for LLM processing
826
918
  * @throws Error if annotation or resource not found
827
919
  */
828
- static buildLLMContext(annotationId: AnnotationId, resourceId: ResourceId, kb: KnowledgeBase, options?: BuildContextOptions, inferenceClient?: InferenceClient, logger?: Logger): Promise<AnnotationLLMContextResponse>;
920
+ static buildLLMContext(annotationId: AnnotationId, resourceId: ResourceId, kb: KnowledgeBase, options?: BuildContextOptions, inferenceClient?: InferenceClient, logger?: Logger, embeddingProvider?: EmbeddingProvider): Promise<AnnotationLLMContextResponse>;
829
921
  /**
830
922
  * Get resource annotations from view storage (fast path)
831
923
  * Throws if view missing
@@ -992,4 +1084,4 @@ declare function generateReferenceSuggestions(referenceTitle: string, client: In
992
1084
  declare const PACKAGE_NAME = "@semiont/make-meaning";
993
1085
  declare const VERSION = "0.1.0";
994
1086
 
995
- export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations, Stower, type UpdateAnnotationBodyResult, type UpdateResourceInput, VERSION, bootstrapEntityTypes, createKnowledgeBase, exportBackup, exportLinkedData, generateReferenceSuggestions, generateResourceSummary, importBackup, importLinkedData, isBackupManifest, readEntityTypesProjection, resetBootstrap, startMakeMeaning, stopKnowledgeSystem, validateManifestVersion };
1087
+ export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations, Smelter, Stower, type UpdateAnnotationBodyResult, type UpdateResourceInput, VERSION, bootstrapEntityTypes, createKnowledgeBase, exportBackup, exportLinkedData, generateReferenceSuggestions, generateResourceSummary, importBackup, importLinkedData, isBackupManifest, readEntityTypesProjection, resetBootstrap, startMakeMeaning, stopKnowledgeSystem, validateManifestVersion };