@semiont/make-meaning 0.4.18 → 0.4.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +119 -22
- package/dist/index.js +391 -225
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { JobQueue, ReferenceAnnotationWorker, GenerationWorker, HighlightAnnotationWorker, AssessmentAnnotationWorker, CommentAnnotationWorker, TagAnnotationWorker } from '@semiont/jobs';
|
|
2
2
|
import { SemiontProject } from '@semiont/core/node';
|
|
3
|
-
import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId,
|
|
3
|
+
import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, AnnotationId, components, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
|
|
4
4
|
export { AssembledAnnotation, applyBodyOperations, assembleAnnotation } from '@semiont/core';
|
|
5
5
|
import { EventStore, ViewStorage } from '@semiont/event-sourcing';
|
|
6
6
|
import { WorkingTreeStore } from '@semiont/content';
|
|
@@ -72,6 +72,15 @@ interface MakeMeaningConfig {
|
|
|
72
72
|
* BURST_WINDOW_MS = 50 — debounce window before flushing a batch
|
|
73
73
|
* MAX_BATCH_SIZE = 500 — force flush to bound memory
|
|
74
74
|
* IDLE_TIMEOUT_MS = 200 — silence before returning to passthrough
|
|
75
|
+
*
|
|
76
|
+
* ## Per-resource serialization
|
|
77
|
+
*
|
|
78
|
+
* `groupBy(resourceId) + concatMap(...)` is the stream-consumer flavor of
|
|
79
|
+
* per-resource serialization — the same invariant enforced by `Smelter`,
|
|
80
|
+
* `Gatherer`, and (in a different shape) `ViewManager`. See
|
|
81
|
+
* `packages/core/src/serialize-per-key.ts` for the shared primitive used
|
|
82
|
+
* by RPC-style services, and `.plans/PerResourceSerializer.md` for the
|
|
83
|
+
* broader design that would unify the two shapes.
|
|
75
84
|
*/
|
|
76
85
|
|
|
77
86
|
declare class GraphDBConsumer {
|
|
@@ -147,28 +156,102 @@ declare class GraphDBConsumer {
|
|
|
147
156
|
shutdown(): Promise<void>;
|
|
148
157
|
}
|
|
149
158
|
|
|
159
|
+
/**
|
|
160
|
+
* EmbeddingStore
|
|
161
|
+
*
|
|
162
|
+
* Durable file-based cache for pre-computed embedding vectors.
|
|
163
|
+
* Stored under .semiont/embeddings/ — committed to git alongside events,
|
|
164
|
+
* but overwritten in place rather than appended.
|
|
165
|
+
*
|
|
166
|
+
* File layout (same 4-hex Jump Consistent Hash sharding as events):
|
|
167
|
+
*
|
|
168
|
+
* .semiont/embeddings/{ab}/{cd}/{resourceId}.jsonl
|
|
169
|
+
* Line 0: { model, dimensions } ← model header
|
|
170
|
+
* Line N: { chunkIndex, text, embedding[] } ← one chunk per line
|
|
171
|
+
*
|
|
172
|
+
* .semiont/embeddings/{ab}/{cd}/{annotationId}.json
|
|
173
|
+
* { model, dimensions, resourceId, text, embedding[], motivation, entityTypes }
|
|
174
|
+
*
|
|
175
|
+
* rebuildAll() in Smelter reads these files and upserts into Qdrant without
|
|
176
|
+
* calling the embedding provider — unless the stored model doesn't match the
|
|
177
|
+
* configured provider, in which case the file is re-embedded and overwritten.
|
|
178
|
+
*/
|
|
179
|
+
|
|
180
|
+
interface StoredChunk {
|
|
181
|
+
chunkIndex: number;
|
|
182
|
+
text: string;
|
|
183
|
+
embedding: number[];
|
|
184
|
+
}
|
|
185
|
+
interface ResourceEmbeddingFile {
|
|
186
|
+
model: string;
|
|
187
|
+
dimensions: number;
|
|
188
|
+
chunks: StoredChunk[];
|
|
189
|
+
}
|
|
190
|
+
interface AnnotationEmbeddingFile {
|
|
191
|
+
model: string;
|
|
192
|
+
dimensions: number;
|
|
193
|
+
resourceId: string;
|
|
194
|
+
text: string;
|
|
195
|
+
embedding: number[];
|
|
196
|
+
motivation: string;
|
|
197
|
+
entityTypes: string[];
|
|
198
|
+
}
|
|
199
|
+
declare class EmbeddingStore {
|
|
200
|
+
private readonly project;
|
|
201
|
+
constructor(project: SemiontProject);
|
|
202
|
+
private resourceFilePath;
|
|
203
|
+
private annotationFilePath;
|
|
204
|
+
writeResourceChunks(resourceId: ResourceId, model: string, dimensions: number, chunks: StoredChunk[]): Promise<void>;
|
|
205
|
+
readResourceEmbeddings(resourceId: ResourceId): Promise<ResourceEmbeddingFile | null>;
|
|
206
|
+
deleteResourceEmbeddings(resourceId: ResourceId): Promise<void>;
|
|
207
|
+
writeAnnotationEmbedding(annotationId: AnnotationId, resourceId: ResourceId, model: string, dimensions: number, text: string, embedding: number[], motivation: string, entityTypes: string[]): Promise<void>;
|
|
208
|
+
readAnnotationEmbedding(annotationId: AnnotationId): Promise<AnnotationEmbeddingFile | null>;
|
|
209
|
+
deleteAnnotationEmbedding(annotationId: AnnotationId): Promise<void>;
|
|
210
|
+
/**
|
|
211
|
+
* Scan embeddings directory and return all resource IDs (from *.jsonl files).
|
|
212
|
+
*/
|
|
213
|
+
getAllResourceIds(): Promise<string[]>;
|
|
214
|
+
/**
|
|
215
|
+
* Scan embeddings directory and return all annotation IDs (from *.json files).
|
|
216
|
+
*/
|
|
217
|
+
getAllAnnotationIds(): Promise<string[]>;
|
|
218
|
+
private scanIds;
|
|
219
|
+
}
|
|
220
|
+
|
|
150
221
|
/**
|
|
151
222
|
* Smelter Actor
|
|
152
223
|
*
|
|
153
|
-
* Takes raw content, refines it into embedding vectors, persists them
|
|
154
|
-
* and indexes them into the
|
|
224
|
+
* Takes raw content, refines it into embedding vectors, persists them to the
|
|
225
|
+
* EmbeddingStore (.semiont/embeddings/), and indexes them into the VectorStore
|
|
226
|
+
* (Qdrant). Peer to the Graph Consumer.
|
|
155
227
|
*
|
|
156
228
|
* Pipeline:
|
|
157
229
|
* 1. Subscribe to resource and annotation events from the EventStore
|
|
158
230
|
* 2. Chunk resource text into overlapping passages
|
|
159
231
|
* 3. Embed each chunk via the configured EmbeddingProvider
|
|
160
|
-
* 4.
|
|
232
|
+
* 4. Write vectors to EmbeddingStore (overwrite-in-place, git-durable)
|
|
161
233
|
* 5. Index vectors into the VectorStore (Qdrant) for fast similarity search
|
|
162
234
|
*
|
|
163
235
|
* Uses the same burst-buffer RxJS pipeline as GraphDBConsumer.
|
|
236
|
+
*
|
|
237
|
+
* ## Per-resource serialization
|
|
238
|
+
*
|
|
239
|
+
* Smelter processes events strictly in order per resourceId via
|
|
240
|
+
* `groupBy(resourceId) + concatMap(...)`. This is the stream-consumer
|
|
241
|
+
* flavor of per-resource serialization — the same invariant enforced by
|
|
242
|
+
* `GraphDBConsumer`, `Gatherer`, and (in a different shape) `ViewManager`.
|
|
243
|
+
* See `packages/core/src/serialize-per-key.ts` for the shared primitive
|
|
244
|
+
* used by RPC-style services, and `.plans/PerResourceSerializer.md` for
|
|
245
|
+
* the broader design that would unify the two shapes.
|
|
164
246
|
*/
|
|
165
247
|
|
|
166
248
|
declare class Smelter {
|
|
167
|
-
private eventStore;
|
|
168
249
|
private eventBus;
|
|
169
250
|
private vectorStore;
|
|
170
251
|
private embeddingProvider;
|
|
171
252
|
private contentStore;
|
|
253
|
+
private embeddingStore;
|
|
254
|
+
private viewStorage;
|
|
172
255
|
private static readonly SMELTER_RELEVANT_EVENTS;
|
|
173
256
|
private static readonly BURST_WINDOW_MS;
|
|
174
257
|
private static readonly MAX_BATCH_SIZE;
|
|
@@ -178,35 +261,47 @@ declare class Smelter {
|
|
|
178
261
|
private pipelineSubscription;
|
|
179
262
|
private readonly logger;
|
|
180
263
|
private readonly chunkingConfig;
|
|
181
|
-
constructor(
|
|
264
|
+
constructor(_eventStore: EventStore, eventBus: EventBus, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, contentStore: WorkingTreeStore, embeddingStore: EmbeddingStore, viewStorage: ViewStorage, logger: Logger, chunkingConfig?: ChunkingConfig);
|
|
182
265
|
initialize(): Promise<void>;
|
|
183
266
|
stop(): Promise<void>;
|
|
184
267
|
/**
|
|
185
|
-
* Rebuild the vector store from
|
|
186
|
-
*
|
|
187
|
-
*
|
|
268
|
+
* Rebuild the vector store from the EmbeddingStore (.semiont/embeddings/).
|
|
269
|
+
*
|
|
270
|
+
* For each stored file, checks whether the model matches the configured
|
|
271
|
+
* provider. On mismatch, re-embeds from the stored text and overwrites the
|
|
272
|
+
* file before upserting into Qdrant. On match, loads the stored vectors
|
|
273
|
+
* directly — no embedding provider calls needed.
|
|
188
274
|
*/
|
|
189
275
|
rebuildAll(): Promise<void>;
|
|
190
276
|
private processBatch;
|
|
191
277
|
/**
|
|
192
278
|
* Batch-optimized processing for consecutive events of the same type.
|
|
193
|
-
* Collects all texts across events, embeds in a single embedBatch() call,
|
|
194
|
-
* then distributes results back to their respective resources/annotations.
|
|
195
279
|
*/
|
|
196
280
|
private applyBatchByType;
|
|
197
281
|
/**
|
|
198
|
-
* Batch-embed chunks from multiple
|
|
199
|
-
* embedBatch() call, then
|
|
282
|
+
* Batch-embed chunks from multiple yield:created events in a single
|
|
283
|
+
* embedBatch() call, then write to EmbeddingStore and index per resource.
|
|
200
284
|
*/
|
|
201
285
|
private batchResourceCreated;
|
|
202
286
|
/**
|
|
203
|
-
* Batch-embed exact texts from multiple
|
|
204
|
-
*
|
|
287
|
+
* Batch-embed exact texts from multiple mark:added events in a single
|
|
288
|
+
* embedBatch() call, then write to EmbeddingStore and index per annotation.
|
|
205
289
|
*/
|
|
206
290
|
private batchAnnotationAdded;
|
|
207
291
|
private safeProcessEvent;
|
|
208
292
|
private processEvent;
|
|
209
293
|
private handleResourceCreated;
|
|
294
|
+
/**
|
|
295
|
+
* Re-embed a resource whose content has changed in-place.
|
|
296
|
+
*
|
|
297
|
+
* Used by yield:updated and yield:representation-added handlers. Reads the
|
|
298
|
+
* current storageUri from the materialized view (which is updated before the
|
|
299
|
+
* EventBus fires), deletes stale Qdrant vectors, and overwrites the
|
|
300
|
+
* EmbeddingStore file with fresh chunks.
|
|
301
|
+
*/
|
|
302
|
+
private reembedResource;
|
|
303
|
+
private handleResourceUpdated;
|
|
304
|
+
private handleRepresentationAdded;
|
|
210
305
|
private handleResourceArchived;
|
|
211
306
|
private handleAnnotationAdded;
|
|
212
307
|
private handleAnnotationRemoved;
|
|
@@ -277,8 +372,6 @@ declare function createKnowledgeBase(eventStore: EventStore, project: SemiontPro
|
|
|
277
372
|
* - job:report-progress → job.progress
|
|
278
373
|
* - job:complete → job.completed
|
|
279
374
|
* - job:fail → job.failed
|
|
280
|
-
* - embedding:compute → embedding.computed (from Smelter)
|
|
281
|
-
* - embedding:delete → embedding.deleted (from Smelter)
|
|
282
375
|
*/
|
|
283
376
|
|
|
284
377
|
type ResourceDescriptor$3 = components['schemas']['ResourceDescriptor'];
|
|
@@ -307,8 +400,6 @@ declare class Stower {
|
|
|
307
400
|
private handleJobReportProgress;
|
|
308
401
|
private handleJobComplete;
|
|
309
402
|
private handleJobFail;
|
|
310
|
-
private handleEmbeddingComputed;
|
|
311
|
-
private handleEmbeddingDeleted;
|
|
312
403
|
stop(): Promise<void>;
|
|
313
404
|
}
|
|
314
405
|
|
|
@@ -328,6 +419,15 @@ declare class Stower {
|
|
|
328
419
|
* - gather:resource-requested — resource-level LLM context assembly
|
|
329
420
|
*
|
|
330
421
|
* RxJS pipeline uses groupBy(resourceId) + concatMap for per-resource isolation.
|
|
422
|
+
*
|
|
423
|
+
* ## Per-resource serialization
|
|
424
|
+
*
|
|
425
|
+
* `groupBy(resourceId) + concatMap(...)` is the stream-consumer flavor of
|
|
426
|
+
* per-resource serialization — the same invariant enforced by `Smelter`,
|
|
427
|
+
* `GraphDBConsumer`, and (in a different shape) `ViewManager`. See
|
|
428
|
+
* `packages/core/src/serialize-per-key.ts` for the shared primitive used
|
|
429
|
+
* by RPC-style services, and `.plans/PerResourceSerializer.md` for the
|
|
430
|
+
* broader design that would unify the two shapes.
|
|
331
431
|
*/
|
|
332
432
|
|
|
333
433
|
declare class Gatherer {
|
|
@@ -581,8 +681,6 @@ interface BackupManifestHeader {
|
|
|
581
681
|
interface BackupStreamSummary {
|
|
582
682
|
stream: string;
|
|
583
683
|
eventCount: number;
|
|
584
|
-
firstChecksum: string;
|
|
585
|
-
lastChecksum: string;
|
|
586
684
|
}
|
|
587
685
|
declare const LINKED_DATA_FORMAT: "semiont-linked-data";
|
|
588
686
|
interface LinkedDataManifest {
|
|
@@ -687,7 +785,6 @@ interface BackupImporterOptions {
|
|
|
687
785
|
interface BackupImportResult {
|
|
688
786
|
manifest: BackupManifestHeader;
|
|
689
787
|
stats: ReplayStats;
|
|
690
|
-
hashChainValid: boolean;
|
|
691
788
|
}
|
|
692
789
|
/**
|
|
693
790
|
* Import a backup archive by replaying events through the EventBus.
|