@semiont/make-meaning 0.4.17 → 0.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { JobQueue, ReferenceAnnotationWorker, GenerationWorker, HighlightAnnotationWorker, AssessmentAnnotationWorker, CommentAnnotationWorker, TagAnnotationWorker } from '@semiont/jobs';
2
2
  import { SemiontProject } from '@semiont/core/node';
3
- import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, components, AnnotationId, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
3
+ import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, AnnotationId, components, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
4
4
  export { AssembledAnnotation, applyBodyOperations, assembleAnnotation } from '@semiont/core';
5
5
  import { EventStore, ViewStorage } from '@semiont/event-sourcing';
6
6
  import { WorkingTreeStore } from '@semiont/content';
@@ -147,28 +147,92 @@ declare class GraphDBConsumer {
147
147
  shutdown(): Promise<void>;
148
148
  }
149
149
 
150
+ /**
151
+ * EmbeddingStore
152
+ *
153
+ * Durable file-based cache for pre-computed embedding vectors.
154
+ * Stored under .semiont/embeddings/ — committed to git alongside events,
155
+ * but overwritten in place rather than appended.
156
+ *
157
+ * File layout (same 4-hex Jump Consistent Hash sharding as events):
158
+ *
159
+ * .semiont/embeddings/{ab}/{cd}/{resourceId}.jsonl
160
+ * Line 0: { model, dimensions } ← model header
161
+ * Line N: { chunkIndex, text, embedding[] } ← one chunk per line
162
+ *
163
+ * .semiont/embeddings/{ab}/{cd}/{annotationId}.json
164
+ * { model, dimensions, resourceId, text, embedding[], motivation, entityTypes }
165
+ *
166
+ * rebuildAll() in Smelter reads these files and upserts into Qdrant without
167
+ * calling the embedding provider — unless the stored model doesn't match the
168
+ * configured provider, in which case the file is re-embedded and overwritten.
169
+ */
170
+
171
+ interface StoredChunk {
172
+ chunkIndex: number;
173
+ text: string;
174
+ embedding: number[];
175
+ }
176
+ interface ResourceEmbeddingFile {
177
+ model: string;
178
+ dimensions: number;
179
+ chunks: StoredChunk[];
180
+ }
181
+ interface AnnotationEmbeddingFile {
182
+ model: string;
183
+ dimensions: number;
184
+ resourceId: string;
185
+ text: string;
186
+ embedding: number[];
187
+ motivation: string;
188
+ entityTypes: string[];
189
+ }
190
+ declare class EmbeddingStore {
191
+ private readonly project;
192
+ constructor(project: SemiontProject);
193
+ private resourceFilePath;
194
+ private annotationFilePath;
195
+ writeResourceChunks(resourceId: ResourceId, model: string, dimensions: number, chunks: StoredChunk[]): Promise<void>;
196
+ readResourceEmbeddings(resourceId: ResourceId): Promise<ResourceEmbeddingFile | null>;
197
+ deleteResourceEmbeddings(resourceId: ResourceId): Promise<void>;
198
+ writeAnnotationEmbedding(annotationId: AnnotationId, resourceId: ResourceId, model: string, dimensions: number, text: string, embedding: number[], motivation: string, entityTypes: string[]): Promise<void>;
199
+ readAnnotationEmbedding(annotationId: AnnotationId): Promise<AnnotationEmbeddingFile | null>;
200
+ deleteAnnotationEmbedding(annotationId: AnnotationId): Promise<void>;
201
+ /**
202
+ * Scan embeddings directory and return all resource IDs (from *.jsonl files).
203
+ */
204
+ getAllResourceIds(): Promise<string[]>;
205
+ /**
206
+ * Scan embeddings directory and return all annotation IDs (from *.json files).
207
+ */
208
+ getAllAnnotationIds(): Promise<string[]>;
209
+ private scanIds;
210
+ }
211
+
150
212
  /**
151
213
  * Smelter Actor
152
214
  *
153
- * Takes raw content, refines it into embedding vectors, persists them as events,
154
- * and indexes them into the vector store. Peer to the Graph Consumer.
215
+ * Takes raw content, refines it into embedding vectors, persists them to the
216
+ * EmbeddingStore (.semiont/embeddings/), and indexes them into the VectorStore
217
+ * (Qdrant). Peer to the Graph Consumer.
155
218
  *
156
219
  * Pipeline:
157
220
  * 1. Subscribe to resource and annotation events from the EventStore
158
221
  * 2. Chunk resource text into overlapping passages
159
222
  * 3. Embed each chunk via the configured EmbeddingProvider
160
- * 4. Emit embedding:computed events on the EventBus (persisted by Stower)
223
+ * 4. Write vectors to EmbeddingStore (overwrite-in-place, git-durable)
161
224
  * 5. Index vectors into the VectorStore (Qdrant) for fast similarity search
162
225
  *
163
226
  * Uses the same burst-buffer RxJS pipeline as GraphDBConsumer.
164
227
  */
165
228
 
166
229
  declare class Smelter {
167
- private eventStore;
168
230
  private eventBus;
169
231
  private vectorStore;
170
232
  private embeddingProvider;
171
233
  private contentStore;
234
+ private embeddingStore;
235
+ private viewStorage;
172
236
  private static readonly SMELTER_RELEVANT_EVENTS;
173
237
  private static readonly BURST_WINDOW_MS;
174
238
  private static readonly MAX_BATCH_SIZE;
@@ -178,35 +242,47 @@ declare class Smelter {
178
242
  private pipelineSubscription;
179
243
  private readonly logger;
180
244
  private readonly chunkingConfig;
181
- constructor(eventStore: EventStore, eventBus: EventBus, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, contentStore: WorkingTreeStore, logger: Logger, chunkingConfig?: ChunkingConfig);
245
+ constructor(_eventStore: EventStore, eventBus: EventBus, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, contentStore: WorkingTreeStore, embeddingStore: EmbeddingStore, viewStorage: ViewStorage, logger: Logger, chunkingConfig?: ChunkingConfig);
182
246
  initialize(): Promise<void>;
183
247
  stop(): Promise<void>;
184
248
  /**
185
- * Rebuild the vector store from persisted embedding events in the event log.
186
- * Reads all embedding:computed / embedding:deleted events and replays them.
187
- * Bypasses the live pipeline reads directly from the event store.
249
+ * Rebuild the vector store from the EmbeddingStore (.semiont/embeddings/).
250
+ *
251
+ * For each stored file, checks whether the model matches the configured
252
+ * provider. On mismatch, re-embeds from the stored text and overwrites the
253
+ * file before upserting into Qdrant. On match, loads the stored vectors
254
+ * directly — no embedding provider calls needed.
188
255
  */
189
256
  rebuildAll(): Promise<void>;
190
257
  private processBatch;
191
258
  /**
192
259
  * Batch-optimized processing for consecutive events of the same type.
193
- * Collects all texts across events, embeds in a single embedBatch() call,
194
- * then distributes results back to their respective resources/annotations.
195
260
  */
196
261
  private applyBatchByType;
197
262
  /**
198
- * Batch-embed chunks from multiple resource.created events in a single
199
- * embedBatch() call, then emit events and index per resource.
263
+ * Batch-embed chunks from multiple yield:created events in a single
264
+ * embedBatch() call, then write to EmbeddingStore and index per resource.
200
265
  */
201
266
  private batchResourceCreated;
202
267
  /**
203
- * Batch-embed exact texts from multiple annotation.added events in a
204
- * single embedBatch() call, then emit events and index per annotation.
268
+ * Batch-embed exact texts from multiple mark:added events in a single
269
+ * embedBatch() call, then write to EmbeddingStore and index per annotation.
205
270
  */
206
271
  private batchAnnotationAdded;
207
272
  private safeProcessEvent;
208
273
  private processEvent;
209
274
  private handleResourceCreated;
275
+ /**
276
+ * Re-embed a resource whose content has changed in-place.
277
+ *
278
+ * Used by yield:updated and yield:representation-added handlers. Reads the
279
+ * current storageUri from the materialized view (which is updated before the
280
+ * EventBus fires), deletes stale Qdrant vectors, and overwrites the
281
+ * EmbeddingStore file with fresh chunks.
282
+ */
283
+ private reembedResource;
284
+ private handleResourceUpdated;
285
+ private handleRepresentationAdded;
210
286
  private handleResourceArchived;
211
287
  private handleAnnotationAdded;
212
288
  private handleAnnotationRemoved;
@@ -277,8 +353,6 @@ declare function createKnowledgeBase(eventStore: EventStore, project: SemiontPro
277
353
  * - job:report-progress → job.progress
278
354
  * - job:complete → job.completed
279
355
  * - job:fail → job.failed
280
- * - embedding:compute → embedding.computed (from Smelter)
281
- * - embedding:delete → embedding.deleted (from Smelter)
282
356
  */
283
357
 
284
358
  type ResourceDescriptor$3 = components['schemas']['ResourceDescriptor'];
@@ -307,8 +381,6 @@ declare class Stower {
307
381
  private handleJobReportProgress;
308
382
  private handleJobComplete;
309
383
  private handleJobFail;
310
- private handleEmbeddingComputed;
311
- private handleEmbeddingDeleted;
312
384
  stop(): Promise<void>;
313
385
  }
314
386
 
@@ -581,8 +653,6 @@ interface BackupManifestHeader {
581
653
  interface BackupStreamSummary {
582
654
  stream: string;
583
655
  eventCount: number;
584
- firstChecksum: string;
585
- lastChecksum: string;
586
656
  }
587
657
  declare const LINKED_DATA_FORMAT: "semiont-linked-data";
588
658
  interface LinkedDataManifest {
@@ -687,7 +757,6 @@ interface BackupImporterOptions {
687
757
  interface BackupImportResult {
688
758
  manifest: BackupManifestHeader;
689
759
  stats: ReplayStats;
690
- hashChainValid: boolean;
691
760
  }
692
761
  /**
693
762
  * Import a backup archive by replaying events through the EventBus.
@@ -878,9 +947,17 @@ declare class ResourceContext {
878
947
  */
879
948
  static getResourceMetadata(resourceId: ResourceId, kb: KnowledgeBase): Promise<ResourceDescriptor$1 | null>;
880
949
  /**
881
- * List all resources by scanning view storage
950
+ * List resources, optionally filtered.
951
+ *
952
+ * When `search` is set, delegates to `kb.graph.searchResources`, which runs
953
+ * the name match in the graph engine instead of scanning every view in JS.
954
+ * The graph result is then narrowed by `archived` if requested.
955
+ *
956
+ * When `search` is unset, falls back to scanning all materialized views.
957
+ * (TODO: also push the listing path through the graph for large KBs.)
882
958
  */
883
959
  static listResources(filters: ListResourcesFilters | undefined, kb: KnowledgeBase): Promise<ResourceDescriptor$1[]>;
960
+ private static sortByDateDesc;
884
961
  /**
885
962
  * Add content previews to resources (for search results)
886
963
  * Retrieves and decodes the first 200 characters of each resource's primary representation