@semiont/make-meaning 0.4.18 → 0.4.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +91 -22
- package/dist/index.js +371 -204
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { JobQueue, ReferenceAnnotationWorker, GenerationWorker, HighlightAnnotationWorker, AssessmentAnnotationWorker, CommentAnnotationWorker, TagAnnotationWorker } from '@semiont/jobs';
|
|
2
2
|
import { SemiontProject } from '@semiont/core/node';
|
|
3
|
-
import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId,
|
|
3
|
+
import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, AnnotationId, components, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
|
|
4
4
|
export { AssembledAnnotation, applyBodyOperations, assembleAnnotation } from '@semiont/core';
|
|
5
5
|
import { EventStore, ViewStorage } from '@semiont/event-sourcing';
|
|
6
6
|
import { WorkingTreeStore } from '@semiont/content';
|
|
@@ -147,28 +147,92 @@ declare class GraphDBConsumer {
|
|
|
147
147
|
shutdown(): Promise<void>;
|
|
148
148
|
}
|
|
149
149
|
|
|
150
|
+
/**
|
|
151
|
+
* EmbeddingStore
|
|
152
|
+
*
|
|
153
|
+
* Durable file-based cache for pre-computed embedding vectors.
|
|
154
|
+
* Stored under .semiont/embeddings/ — committed to git alongside events,
|
|
155
|
+
* but overwritten in place rather than appended.
|
|
156
|
+
*
|
|
157
|
+
* File layout (same 4-hex Jump Consistent Hash sharding as events):
|
|
158
|
+
*
|
|
159
|
+
* .semiont/embeddings/{ab}/{cd}/{resourceId}.jsonl
|
|
160
|
+
* Line 0: { model, dimensions } ← model header
|
|
161
|
+
* Line N: { chunkIndex, text, embedding[] } ← one chunk per line
|
|
162
|
+
*
|
|
163
|
+
* .semiont/embeddings/{ab}/{cd}/{annotationId}.json
|
|
164
|
+
* { model, dimensions, resourceId, text, embedding[], motivation, entityTypes }
|
|
165
|
+
*
|
|
166
|
+
* rebuildAll() in Smelter reads these files and upserts into Qdrant without
|
|
167
|
+
* calling the embedding provider — unless the stored model doesn't match the
|
|
168
|
+
* configured provider, in which case the file is re-embedded and overwritten.
|
|
169
|
+
*/
|
|
170
|
+
|
|
171
|
+
interface StoredChunk {
|
|
172
|
+
chunkIndex: number;
|
|
173
|
+
text: string;
|
|
174
|
+
embedding: number[];
|
|
175
|
+
}
|
|
176
|
+
interface ResourceEmbeddingFile {
|
|
177
|
+
model: string;
|
|
178
|
+
dimensions: number;
|
|
179
|
+
chunks: StoredChunk[];
|
|
180
|
+
}
|
|
181
|
+
interface AnnotationEmbeddingFile {
|
|
182
|
+
model: string;
|
|
183
|
+
dimensions: number;
|
|
184
|
+
resourceId: string;
|
|
185
|
+
text: string;
|
|
186
|
+
embedding: number[];
|
|
187
|
+
motivation: string;
|
|
188
|
+
entityTypes: string[];
|
|
189
|
+
}
|
|
190
|
+
declare class EmbeddingStore {
|
|
191
|
+
private readonly project;
|
|
192
|
+
constructor(project: SemiontProject);
|
|
193
|
+
private resourceFilePath;
|
|
194
|
+
private annotationFilePath;
|
|
195
|
+
writeResourceChunks(resourceId: ResourceId, model: string, dimensions: number, chunks: StoredChunk[]): Promise<void>;
|
|
196
|
+
readResourceEmbeddings(resourceId: ResourceId): Promise<ResourceEmbeddingFile | null>;
|
|
197
|
+
deleteResourceEmbeddings(resourceId: ResourceId): Promise<void>;
|
|
198
|
+
writeAnnotationEmbedding(annotationId: AnnotationId, resourceId: ResourceId, model: string, dimensions: number, text: string, embedding: number[], motivation: string, entityTypes: string[]): Promise<void>;
|
|
199
|
+
readAnnotationEmbedding(annotationId: AnnotationId): Promise<AnnotationEmbeddingFile | null>;
|
|
200
|
+
deleteAnnotationEmbedding(annotationId: AnnotationId): Promise<void>;
|
|
201
|
+
/**
|
|
202
|
+
* Scan embeddings directory and return all resource IDs (from *.jsonl files).
|
|
203
|
+
*/
|
|
204
|
+
getAllResourceIds(): Promise<string[]>;
|
|
205
|
+
/**
|
|
206
|
+
* Scan embeddings directory and return all annotation IDs (from *.json files).
|
|
207
|
+
*/
|
|
208
|
+
getAllAnnotationIds(): Promise<string[]>;
|
|
209
|
+
private scanIds;
|
|
210
|
+
}
|
|
211
|
+
|
|
150
212
|
/**
|
|
151
213
|
* Smelter Actor
|
|
152
214
|
*
|
|
153
|
-
* Takes raw content, refines it into embedding vectors, persists them
|
|
154
|
-
* and indexes them into the
|
|
215
|
+
* Takes raw content, refines it into embedding vectors, persists them to the
|
|
216
|
+
* EmbeddingStore (.semiont/embeddings/), and indexes them into the VectorStore
|
|
217
|
+
* (Qdrant). Peer to the Graph Consumer.
|
|
155
218
|
*
|
|
156
219
|
* Pipeline:
|
|
157
220
|
* 1. Subscribe to resource and annotation events from the EventStore
|
|
158
221
|
* 2. Chunk resource text into overlapping passages
|
|
159
222
|
* 3. Embed each chunk via the configured EmbeddingProvider
|
|
160
|
-
* 4.
|
|
223
|
+
* 4. Write vectors to EmbeddingStore (overwrite-in-place, git-durable)
|
|
161
224
|
* 5. Index vectors into the VectorStore (Qdrant) for fast similarity search
|
|
162
225
|
*
|
|
163
226
|
* Uses the same burst-buffer RxJS pipeline as GraphDBConsumer.
|
|
164
227
|
*/
|
|
165
228
|
|
|
166
229
|
declare class Smelter {
|
|
167
|
-
private eventStore;
|
|
168
230
|
private eventBus;
|
|
169
231
|
private vectorStore;
|
|
170
232
|
private embeddingProvider;
|
|
171
233
|
private contentStore;
|
|
234
|
+
private embeddingStore;
|
|
235
|
+
private viewStorage;
|
|
172
236
|
private static readonly SMELTER_RELEVANT_EVENTS;
|
|
173
237
|
private static readonly BURST_WINDOW_MS;
|
|
174
238
|
private static readonly MAX_BATCH_SIZE;
|
|
@@ -178,35 +242,47 @@ declare class Smelter {
|
|
|
178
242
|
private pipelineSubscription;
|
|
179
243
|
private readonly logger;
|
|
180
244
|
private readonly chunkingConfig;
|
|
181
|
-
constructor(
|
|
245
|
+
constructor(_eventStore: EventStore, eventBus: EventBus, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, contentStore: WorkingTreeStore, embeddingStore: EmbeddingStore, viewStorage: ViewStorage, logger: Logger, chunkingConfig?: ChunkingConfig);
|
|
182
246
|
initialize(): Promise<void>;
|
|
183
247
|
stop(): Promise<void>;
|
|
184
248
|
/**
|
|
185
|
-
* Rebuild the vector store from
|
|
186
|
-
*
|
|
187
|
-
*
|
|
249
|
+
* Rebuild the vector store from the EmbeddingStore (.semiont/embeddings/).
|
|
250
|
+
*
|
|
251
|
+
* For each stored file, checks whether the model matches the configured
|
|
252
|
+
* provider. On mismatch, re-embeds from the stored text and overwrites the
|
|
253
|
+
* file before upserting into Qdrant. On match, loads the stored vectors
|
|
254
|
+
* directly — no embedding provider calls needed.
|
|
188
255
|
*/
|
|
189
256
|
rebuildAll(): Promise<void>;
|
|
190
257
|
private processBatch;
|
|
191
258
|
/**
|
|
192
259
|
* Batch-optimized processing for consecutive events of the same type.
|
|
193
|
-
* Collects all texts across events, embeds in a single embedBatch() call,
|
|
194
|
-
* then distributes results back to their respective resources/annotations.
|
|
195
260
|
*/
|
|
196
261
|
private applyBatchByType;
|
|
197
262
|
/**
|
|
198
|
-
* Batch-embed chunks from multiple
|
|
199
|
-
* embedBatch() call, then
|
|
263
|
+
* Batch-embed chunks from multiple yield:created events in a single
|
|
264
|
+
* embedBatch() call, then write to EmbeddingStore and index per resource.
|
|
200
265
|
*/
|
|
201
266
|
private batchResourceCreated;
|
|
202
267
|
/**
|
|
203
|
-
* Batch-embed exact texts from multiple
|
|
204
|
-
*
|
|
268
|
+
* Batch-embed exact texts from multiple mark:added events in a single
|
|
269
|
+
* embedBatch() call, then write to EmbeddingStore and index per annotation.
|
|
205
270
|
*/
|
|
206
271
|
private batchAnnotationAdded;
|
|
207
272
|
private safeProcessEvent;
|
|
208
273
|
private processEvent;
|
|
209
274
|
private handleResourceCreated;
|
|
275
|
+
/**
|
|
276
|
+
* Re-embed a resource whose content has changed in-place.
|
|
277
|
+
*
|
|
278
|
+
* Used by yield:updated and yield:representation-added handlers. Reads the
|
|
279
|
+
* current storageUri from the materialized view (which is updated before the
|
|
280
|
+
* EventBus fires), deletes stale Qdrant vectors, and overwrites the
|
|
281
|
+
* EmbeddingStore file with fresh chunks.
|
|
282
|
+
*/
|
|
283
|
+
private reembedResource;
|
|
284
|
+
private handleResourceUpdated;
|
|
285
|
+
private handleRepresentationAdded;
|
|
210
286
|
private handleResourceArchived;
|
|
211
287
|
private handleAnnotationAdded;
|
|
212
288
|
private handleAnnotationRemoved;
|
|
@@ -277,8 +353,6 @@ declare function createKnowledgeBase(eventStore: EventStore, project: SemiontPro
|
|
|
277
353
|
* - job:report-progress → job.progress
|
|
278
354
|
* - job:complete → job.completed
|
|
279
355
|
* - job:fail → job.failed
|
|
280
|
-
* - embedding:compute → embedding.computed (from Smelter)
|
|
281
|
-
* - embedding:delete → embedding.deleted (from Smelter)
|
|
282
356
|
*/
|
|
283
357
|
|
|
284
358
|
type ResourceDescriptor$3 = components['schemas']['ResourceDescriptor'];
|
|
@@ -307,8 +381,6 @@ declare class Stower {
|
|
|
307
381
|
private handleJobReportProgress;
|
|
308
382
|
private handleJobComplete;
|
|
309
383
|
private handleJobFail;
|
|
310
|
-
private handleEmbeddingComputed;
|
|
311
|
-
private handleEmbeddingDeleted;
|
|
312
384
|
stop(): Promise<void>;
|
|
313
385
|
}
|
|
314
386
|
|
|
@@ -581,8 +653,6 @@ interface BackupManifestHeader {
|
|
|
581
653
|
interface BackupStreamSummary {
|
|
582
654
|
stream: string;
|
|
583
655
|
eventCount: number;
|
|
584
|
-
firstChecksum: string;
|
|
585
|
-
lastChecksum: string;
|
|
586
656
|
}
|
|
587
657
|
declare const LINKED_DATA_FORMAT: "semiont-linked-data";
|
|
588
658
|
interface LinkedDataManifest {
|
|
@@ -687,7 +757,6 @@ interface BackupImporterOptions {
|
|
|
687
757
|
interface BackupImportResult {
|
|
688
758
|
manifest: BackupManifestHeader;
|
|
689
759
|
stats: ReplayStats;
|
|
690
|
-
hashChainValid: boolean;
|
|
691
760
|
}
|
|
692
761
|
/**
|
|
693
762
|
* Import a backup archive by replaying events through the EventBus.
|