@semiont/make-meaning 0.4.20 → 0.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +25 -176
- package/dist/index.js +329 -1020
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { JobQueue
|
|
1
|
+
import { JobQueue } from '@semiont/jobs';
|
|
2
2
|
import { SemiontProject } from '@semiont/core/node';
|
|
3
|
-
import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId,
|
|
3
|
+
import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, components, AnnotationId, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
|
|
4
4
|
export { AssembledAnnotation, applyBodyOperations, assembleAnnotation } from '@semiont/core';
|
|
5
5
|
import { EventStore, ViewStorage } from '@semiont/event-sourcing';
|
|
6
6
|
import { WorkingTreeStore } from '@semiont/content';
|
|
7
7
|
import { GraphDatabase } from '@semiont/graph';
|
|
8
|
-
import { VectorStore, EmbeddingProvider
|
|
8
|
+
import { VectorStore, EmbeddingProvider } from '@semiont/vectors';
|
|
9
9
|
import { InferenceClient } from '@semiont/inference';
|
|
10
10
|
import { Writable, Readable } from 'node:stream';
|
|
11
11
|
|
|
@@ -79,8 +79,7 @@ interface MakeMeaningConfig {
|
|
|
79
79
|
* per-resource serialization — the same invariant enforced by `Smelter`,
|
|
80
80
|
* `Gatherer`, and (in a different shape) `ViewManager`. See
|
|
81
81
|
* `packages/core/src/serialize-per-key.ts` for the shared primitive used
|
|
82
|
-
* by RPC-style services
|
|
83
|
-
* broader design that would unify the two shapes.
|
|
82
|
+
* by RPC-style services.
|
|
84
83
|
*/
|
|
85
84
|
|
|
86
85
|
declare class GraphDBConsumer {
|
|
@@ -156,157 +155,6 @@ declare class GraphDBConsumer {
|
|
|
156
155
|
shutdown(): Promise<void>;
|
|
157
156
|
}
|
|
158
157
|
|
|
159
|
-
/**
|
|
160
|
-
* EmbeddingStore
|
|
161
|
-
*
|
|
162
|
-
* Durable file-based cache for pre-computed embedding vectors.
|
|
163
|
-
* Stored under .semiont/embeddings/ — committed to git alongside events,
|
|
164
|
-
* but overwritten in place rather than appended.
|
|
165
|
-
*
|
|
166
|
-
* File layout (same 4-hex Jump Consistent Hash sharding as events):
|
|
167
|
-
*
|
|
168
|
-
* .semiont/embeddings/{ab}/{cd}/{resourceId}.jsonl
|
|
169
|
-
* Line 0: { model, dimensions } ← model header
|
|
170
|
-
* Line N: { chunkIndex, text, embedding[] } ← one chunk per line
|
|
171
|
-
*
|
|
172
|
-
* .semiont/embeddings/{ab}/{cd}/{annotationId}.json
|
|
173
|
-
* { model, dimensions, resourceId, text, embedding[], motivation, entityTypes }
|
|
174
|
-
*
|
|
175
|
-
* rebuildAll() in Smelter reads these files and upserts into Qdrant without
|
|
176
|
-
* calling the embedding provider — unless the stored model doesn't match the
|
|
177
|
-
* configured provider, in which case the file is re-embedded and overwritten.
|
|
178
|
-
*/
|
|
179
|
-
|
|
180
|
-
interface StoredChunk {
|
|
181
|
-
chunkIndex: number;
|
|
182
|
-
text: string;
|
|
183
|
-
embedding: number[];
|
|
184
|
-
}
|
|
185
|
-
interface ResourceEmbeddingFile {
|
|
186
|
-
model: string;
|
|
187
|
-
dimensions: number;
|
|
188
|
-
chunks: StoredChunk[];
|
|
189
|
-
}
|
|
190
|
-
interface AnnotationEmbeddingFile {
|
|
191
|
-
model: string;
|
|
192
|
-
dimensions: number;
|
|
193
|
-
resourceId: string;
|
|
194
|
-
text: string;
|
|
195
|
-
embedding: number[];
|
|
196
|
-
motivation: string;
|
|
197
|
-
entityTypes: string[];
|
|
198
|
-
}
|
|
199
|
-
declare class EmbeddingStore {
|
|
200
|
-
private readonly project;
|
|
201
|
-
constructor(project: SemiontProject);
|
|
202
|
-
private resourceFilePath;
|
|
203
|
-
private annotationFilePath;
|
|
204
|
-
writeResourceChunks(resourceId: ResourceId, model: string, dimensions: number, chunks: StoredChunk[]): Promise<void>;
|
|
205
|
-
readResourceEmbeddings(resourceId: ResourceId): Promise<ResourceEmbeddingFile | null>;
|
|
206
|
-
deleteResourceEmbeddings(resourceId: ResourceId): Promise<void>;
|
|
207
|
-
writeAnnotationEmbedding(annotationId: AnnotationId, resourceId: ResourceId, model: string, dimensions: number, text: string, embedding: number[], motivation: string, entityTypes: string[]): Promise<void>;
|
|
208
|
-
readAnnotationEmbedding(annotationId: AnnotationId): Promise<AnnotationEmbeddingFile | null>;
|
|
209
|
-
deleteAnnotationEmbedding(annotationId: AnnotationId): Promise<void>;
|
|
210
|
-
/**
|
|
211
|
-
* Scan embeddings directory and return all resource IDs (from *.jsonl files).
|
|
212
|
-
*/
|
|
213
|
-
getAllResourceIds(): Promise<string[]>;
|
|
214
|
-
/**
|
|
215
|
-
* Scan embeddings directory and return all annotation IDs (from *.json files).
|
|
216
|
-
*/
|
|
217
|
-
getAllAnnotationIds(): Promise<string[]>;
|
|
218
|
-
private scanIds;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
/**
|
|
222
|
-
* Smelter Actor
|
|
223
|
-
*
|
|
224
|
-
* Takes raw content, refines it into embedding vectors, persists them to the
|
|
225
|
-
* EmbeddingStore (.semiont/embeddings/), and indexes them into the VectorStore
|
|
226
|
-
* (Qdrant). Peer to the Graph Consumer.
|
|
227
|
-
*
|
|
228
|
-
* Pipeline:
|
|
229
|
-
* 1. Subscribe to resource and annotation events from the EventStore
|
|
230
|
-
* 2. Chunk resource text into overlapping passages
|
|
231
|
-
* 3. Embed each chunk via the configured EmbeddingProvider
|
|
232
|
-
* 4. Write vectors to EmbeddingStore (overwrite-in-place, git-durable)
|
|
233
|
-
* 5. Index vectors into the VectorStore (Qdrant) for fast similarity search
|
|
234
|
-
*
|
|
235
|
-
* Uses the same burst-buffer RxJS pipeline as GraphDBConsumer.
|
|
236
|
-
*
|
|
237
|
-
* ## Per-resource serialization
|
|
238
|
-
*
|
|
239
|
-
* Smelter processes events strictly in order per resourceId via
|
|
240
|
-
* `groupBy(resourceId) + concatMap(...)`. This is the stream-consumer
|
|
241
|
-
* flavor of per-resource serialization — the same invariant enforced by
|
|
242
|
-
* `GraphDBConsumer`, `Gatherer`, and (in a different shape) `ViewManager`.
|
|
243
|
-
* See `packages/core/src/serialize-per-key.ts` for the shared primitive
|
|
244
|
-
* used by RPC-style services, and `.plans/PerResourceSerializer.md` for
|
|
245
|
-
* the broader design that would unify the two shapes.
|
|
246
|
-
*/
|
|
247
|
-
|
|
248
|
-
declare class Smelter {
|
|
249
|
-
private eventBus;
|
|
250
|
-
private vectorStore;
|
|
251
|
-
private embeddingProvider;
|
|
252
|
-
private contentStore;
|
|
253
|
-
private embeddingStore;
|
|
254
|
-
private viewStorage;
|
|
255
|
-
private static readonly SMELTER_RELEVANT_EVENTS;
|
|
256
|
-
private static readonly BURST_WINDOW_MS;
|
|
257
|
-
private static readonly MAX_BATCH_SIZE;
|
|
258
|
-
private static readonly IDLE_TIMEOUT_MS;
|
|
259
|
-
private _globalSubscriptions;
|
|
260
|
-
private eventSubject;
|
|
261
|
-
private pipelineSubscription;
|
|
262
|
-
private readonly logger;
|
|
263
|
-
private readonly chunkingConfig;
|
|
264
|
-
constructor(_eventStore: EventStore, eventBus: EventBus, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, contentStore: WorkingTreeStore, embeddingStore: EmbeddingStore, viewStorage: ViewStorage, logger: Logger, chunkingConfig?: ChunkingConfig);
|
|
265
|
-
initialize(): Promise<void>;
|
|
266
|
-
stop(): Promise<void>;
|
|
267
|
-
/**
|
|
268
|
-
* Rebuild the vector store from the EmbeddingStore (.semiont/embeddings/).
|
|
269
|
-
*
|
|
270
|
-
* For each stored file, checks whether the model matches the configured
|
|
271
|
-
* provider. On mismatch, re-embeds from the stored text and overwrites the
|
|
272
|
-
* file before upserting into Qdrant. On match, loads the stored vectors
|
|
273
|
-
* directly — no embedding provider calls needed.
|
|
274
|
-
*/
|
|
275
|
-
rebuildAll(): Promise<void>;
|
|
276
|
-
private processBatch;
|
|
277
|
-
/**
|
|
278
|
-
* Batch-optimized processing for consecutive events of the same type.
|
|
279
|
-
*/
|
|
280
|
-
private applyBatchByType;
|
|
281
|
-
/**
|
|
282
|
-
* Batch-embed chunks from multiple yield:created events in a single
|
|
283
|
-
* embedBatch() call, then write to EmbeddingStore and index per resource.
|
|
284
|
-
*/
|
|
285
|
-
private batchResourceCreated;
|
|
286
|
-
/**
|
|
287
|
-
* Batch-embed exact texts from multiple mark:added events in a single
|
|
288
|
-
* embedBatch() call, then write to EmbeddingStore and index per annotation.
|
|
289
|
-
*/
|
|
290
|
-
private batchAnnotationAdded;
|
|
291
|
-
private safeProcessEvent;
|
|
292
|
-
private processEvent;
|
|
293
|
-
private handleResourceCreated;
|
|
294
|
-
/**
|
|
295
|
-
* Re-embed a resource whose content has changed in-place.
|
|
296
|
-
*
|
|
297
|
-
* Used by yield:updated and yield:representation-added handlers. Reads the
|
|
298
|
-
* current storageUri from the materialized view (which is updated before the
|
|
299
|
-
* EventBus fires), deletes stale Qdrant vectors, and overwrites the
|
|
300
|
-
* EmbeddingStore file with fresh chunks.
|
|
301
|
-
*/
|
|
302
|
-
private reembedResource;
|
|
303
|
-
private handleResourceUpdated;
|
|
304
|
-
private handleRepresentationAdded;
|
|
305
|
-
private handleResourceArchived;
|
|
306
|
-
private handleAnnotationAdded;
|
|
307
|
-
private handleAnnotationRemoved;
|
|
308
|
-
}
|
|
309
|
-
|
|
310
158
|
/**
|
|
311
159
|
* Knowledge Base
|
|
312
160
|
*
|
|
@@ -318,10 +166,11 @@ declare class Smelter {
|
|
|
318
166
|
* - Content Store (working-tree files, URI-addressed) — via WorkingTreeStore
|
|
319
167
|
* - Graph (eventually consistent relationship projection) — via GraphDatabase
|
|
320
168
|
* - Graph Consumer (event-to-graph projection) — via GraphDBConsumer
|
|
321
|
-
* - Vectors (semantic search) — via VectorStore (optional)
|
|
322
|
-
* - Smelter (event-to-vector projection) — via Smelter (optional)
|
|
169
|
+
* - Vectors (semantic search) — via VectorStore (optional, read-only)
|
|
323
170
|
*
|
|
324
|
-
* The
|
|
171
|
+
* The Smelter (event-to-vector projection) runs as an external actor
|
|
172
|
+
* via @semiont/jobs/smelter-main. It subscribes to domain events via
|
|
173
|
+
* the EventBus gateway, embeds content, and writes to Qdrant directly.
|
|
325
174
|
*/
|
|
326
175
|
|
|
327
176
|
interface KnowledgeBase {
|
|
@@ -331,13 +180,10 @@ interface KnowledgeBase {
|
|
|
331
180
|
graph: GraphDatabase;
|
|
332
181
|
graphConsumer: GraphDBConsumer;
|
|
333
182
|
vectors?: VectorStore;
|
|
334
|
-
smelter?: Smelter;
|
|
335
183
|
projectionsDir: string;
|
|
336
184
|
}
|
|
337
185
|
interface CreateKnowledgeBaseOptions {
|
|
338
186
|
vectorStore?: VectorStore;
|
|
339
|
-
embeddingProvider?: EmbeddingProvider;
|
|
340
|
-
chunkingConfig?: ChunkingConfig;
|
|
341
187
|
skipRebuild?: boolean;
|
|
342
188
|
}
|
|
343
189
|
declare function createKnowledgeBase(eventStore: EventStore, project: SemiontProject, graphDb: GraphDatabase, eventBus: EventBus, logger: Logger, options?: CreateKnowledgeBaseOptions): Promise<KnowledgeBase>;
|
|
@@ -369,9 +215,14 @@ declare function createKnowledgeBase(eventStore: EventStore, project: SemiontPro
|
|
|
369
215
|
* - mark:add-entity-type → entitytype.added → mark:entity-type-added / mark:entity-type-add-failed
|
|
370
216
|
* - mark:update-entity-types → entitytag.added / entitytag.removed
|
|
371
217
|
* - job:start → job.started
|
|
372
|
-
* - job:report-progress → job.progress
|
|
373
218
|
* - job:complete → job.completed
|
|
374
219
|
* - job:fail → job.failed
|
|
220
|
+
*
|
|
221
|
+
* Note: `job:report-progress` is intentionally NOT persisted. Progress
|
|
222
|
+
* events are ephemeral UI feedback and would clutter the event log
|
|
223
|
+
* (historical logs show ~3× as many progress entries as start+complete
|
|
224
|
+
* combined). UI consumers subscribe to the bus directly for live
|
|
225
|
+
* progress; the event log keeps only the durable lifecycle boundaries.
|
|
375
226
|
*/
|
|
376
227
|
|
|
377
228
|
type ResourceDescriptor$3 = components['schemas']['ResourceDescriptor'];
|
|
@@ -397,7 +248,6 @@ declare class Stower {
|
|
|
397
248
|
private handleAddEntityType;
|
|
398
249
|
private handleUpdateEntityTypes;
|
|
399
250
|
private handleJobStart;
|
|
400
|
-
private handleJobReportProgress;
|
|
401
251
|
private handleJobComplete;
|
|
402
252
|
private handleJobFail;
|
|
403
253
|
stop(): Promise<void>;
|
|
@@ -426,8 +276,7 @@ declare class Stower {
|
|
|
426
276
|
* per-resource serialization — the same invariant enforced by `Smelter`,
|
|
427
277
|
* `GraphDBConsumer`, and (in a different shape) `ViewManager`. See
|
|
428
278
|
* `packages/core/src/serialize-per-key.ts` for the shared primitive used
|
|
429
|
-
* by RPC-style services
|
|
430
|
-
* broader design that would unify the two shapes.
|
|
279
|
+
* by RPC-style services.
|
|
431
280
|
*/
|
|
432
281
|
|
|
433
282
|
declare class Gatherer {
|
|
@@ -606,17 +455,8 @@ declare function stopKnowledgeSystem(ks: KnowledgeSystem): Promise<void>;
|
|
|
606
455
|
interface MakeMeaningService {
|
|
607
456
|
knowledgeSystem: KnowledgeSystem;
|
|
608
457
|
jobQueue: JobQueue;
|
|
609
|
-
workers: Workers;
|
|
610
458
|
stop: () => Promise<void>;
|
|
611
459
|
}
|
|
612
|
-
type Workers = {
|
|
613
|
-
detection: ReferenceAnnotationWorker;
|
|
614
|
-
generation: GenerationWorker;
|
|
615
|
-
highlight: HighlightAnnotationWorker;
|
|
616
|
-
assessment: AssessmentAnnotationWorker;
|
|
617
|
-
comment: CommentAnnotationWorker;
|
|
618
|
-
tag: TagAnnotationWorker;
|
|
619
|
-
};
|
|
620
460
|
declare function startMakeMeaning(project: SemiontProject, config: MakeMeaningConfig, eventBus: EventBus, logger: Logger, options?: {
|
|
621
461
|
skipRebuild?: boolean;
|
|
622
462
|
}): Promise<MakeMeaningService>;
|
|
@@ -892,6 +732,7 @@ declare function importLinkedData(archive: Readable, options: LinkedDataImporter
|
|
|
892
732
|
*/
|
|
893
733
|
|
|
894
734
|
type ContentFormat = components['schemas']['ContentFormat'];
|
|
735
|
+
type Agent$1 = components['schemas']['Agent'];
|
|
895
736
|
interface UpdateResourceInput {
|
|
896
737
|
resourceId: ResourceId;
|
|
897
738
|
userId: UserId;
|
|
@@ -909,6 +750,14 @@ interface CreateResourceInput {
|
|
|
909
750
|
language?: string;
|
|
910
751
|
entityTypes?: string[];
|
|
911
752
|
creationMethod?: CreationMethod;
|
|
753
|
+
/** Provenance for AI-generated resources: source resource + annotation. */
|
|
754
|
+
generatedFrom?: {
|
|
755
|
+
resourceId?: string;
|
|
756
|
+
annotationId?: string;
|
|
757
|
+
};
|
|
758
|
+
generationPrompt?: string;
|
|
759
|
+
generator?: Agent$1 | Agent$1[];
|
|
760
|
+
isDraft?: boolean;
|
|
912
761
|
}
|
|
913
762
|
declare class ResourceOperations {
|
|
914
763
|
/**
|
|
@@ -1199,4 +1048,4 @@ declare function generateReferenceSuggestions(referenceTitle: string, client: In
|
|
|
1199
1048
|
declare const PACKAGE_NAME = "@semiont/make-meaning";
|
|
1200
1049
|
declare const VERSION = "0.1.0";
|
|
1201
1050
|
|
|
1202
|
-
export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations,
|
|
1051
|
+
export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations, Stower, type UpdateAnnotationBodyResult, type UpdateResourceInput, VERSION, bootstrapEntityTypes, createKnowledgeBase, exportBackup, exportLinkedData, generateReferenceSuggestions, generateResourceSummary, importBackup, importLinkedData, isBackupManifest, readEntityTypesProjection, startMakeMeaning, stopKnowledgeSystem, validateManifestVersion };
|