@semiont/make-meaning 0.4.19 → 0.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +39 -162
- package/dist/index.js +342 -1034
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { JobQueue
|
|
1
|
+
import { JobQueue } from '@semiont/jobs';
|
|
2
2
|
import { SemiontProject } from '@semiont/core/node';
|
|
3
|
-
import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId,
|
|
3
|
+
import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, components, AnnotationId, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
|
|
4
4
|
export { AssembledAnnotation, applyBodyOperations, assembleAnnotation } from '@semiont/core';
|
|
5
5
|
import { EventStore, ViewStorage } from '@semiont/event-sourcing';
|
|
6
6
|
import { WorkingTreeStore } from '@semiont/content';
|
|
7
7
|
import { GraphDatabase } from '@semiont/graph';
|
|
8
|
-
import { VectorStore, EmbeddingProvider
|
|
8
|
+
import { VectorStore, EmbeddingProvider } from '@semiont/vectors';
|
|
9
9
|
import { InferenceClient } from '@semiont/inference';
|
|
10
10
|
import { Writable, Readable } from 'node:stream';
|
|
11
11
|
|
|
@@ -72,6 +72,14 @@ interface MakeMeaningConfig {
|
|
|
72
72
|
* BURST_WINDOW_MS = 50 — debounce window before flushing a batch
|
|
73
73
|
* MAX_BATCH_SIZE = 500 — force flush to bound memory
|
|
74
74
|
* IDLE_TIMEOUT_MS = 200 — silence before returning to passthrough
|
|
75
|
+
*
|
|
76
|
+
* ## Per-resource serialization
|
|
77
|
+
*
|
|
78
|
+
* `groupBy(resourceId) + concatMap(...)` is the stream-consumer flavor of
|
|
79
|
+
* per-resource serialization — the same invariant enforced by `Smelter`,
|
|
80
|
+
* `Gatherer`, and (in a different shape) `ViewManager`. See
|
|
81
|
+
* `packages/core/src/serialize-per-key.ts` for the shared primitive used
|
|
82
|
+
* by RPC-style services.
|
|
75
83
|
*/
|
|
76
84
|
|
|
77
85
|
declare class GraphDBConsumer {
|
|
@@ -147,147 +155,6 @@ declare class GraphDBConsumer {
|
|
|
147
155
|
shutdown(): Promise<void>;
|
|
148
156
|
}
|
|
149
157
|
|
|
150
|
-
/**
|
|
151
|
-
* EmbeddingStore
|
|
152
|
-
*
|
|
153
|
-
* Durable file-based cache for pre-computed embedding vectors.
|
|
154
|
-
* Stored under .semiont/embeddings/ — committed to git alongside events,
|
|
155
|
-
* but overwritten in place rather than appended.
|
|
156
|
-
*
|
|
157
|
-
* File layout (same 4-hex Jump Consistent Hash sharding as events):
|
|
158
|
-
*
|
|
159
|
-
* .semiont/embeddings/{ab}/{cd}/{resourceId}.jsonl
|
|
160
|
-
* Line 0: { model, dimensions } ← model header
|
|
161
|
-
* Line N: { chunkIndex, text, embedding[] } ← one chunk per line
|
|
162
|
-
*
|
|
163
|
-
* .semiont/embeddings/{ab}/{cd}/{annotationId}.json
|
|
164
|
-
* { model, dimensions, resourceId, text, embedding[], motivation, entityTypes }
|
|
165
|
-
*
|
|
166
|
-
* rebuildAll() in Smelter reads these files and upserts into Qdrant without
|
|
167
|
-
* calling the embedding provider — unless the stored model doesn't match the
|
|
168
|
-
* configured provider, in which case the file is re-embedded and overwritten.
|
|
169
|
-
*/
|
|
170
|
-
|
|
171
|
-
interface StoredChunk {
|
|
172
|
-
chunkIndex: number;
|
|
173
|
-
text: string;
|
|
174
|
-
embedding: number[];
|
|
175
|
-
}
|
|
176
|
-
interface ResourceEmbeddingFile {
|
|
177
|
-
model: string;
|
|
178
|
-
dimensions: number;
|
|
179
|
-
chunks: StoredChunk[];
|
|
180
|
-
}
|
|
181
|
-
interface AnnotationEmbeddingFile {
|
|
182
|
-
model: string;
|
|
183
|
-
dimensions: number;
|
|
184
|
-
resourceId: string;
|
|
185
|
-
text: string;
|
|
186
|
-
embedding: number[];
|
|
187
|
-
motivation: string;
|
|
188
|
-
entityTypes: string[];
|
|
189
|
-
}
|
|
190
|
-
declare class EmbeddingStore {
|
|
191
|
-
private readonly project;
|
|
192
|
-
constructor(project: SemiontProject);
|
|
193
|
-
private resourceFilePath;
|
|
194
|
-
private annotationFilePath;
|
|
195
|
-
writeResourceChunks(resourceId: ResourceId, model: string, dimensions: number, chunks: StoredChunk[]): Promise<void>;
|
|
196
|
-
readResourceEmbeddings(resourceId: ResourceId): Promise<ResourceEmbeddingFile | null>;
|
|
197
|
-
deleteResourceEmbeddings(resourceId: ResourceId): Promise<void>;
|
|
198
|
-
writeAnnotationEmbedding(annotationId: AnnotationId, resourceId: ResourceId, model: string, dimensions: number, text: string, embedding: number[], motivation: string, entityTypes: string[]): Promise<void>;
|
|
199
|
-
readAnnotationEmbedding(annotationId: AnnotationId): Promise<AnnotationEmbeddingFile | null>;
|
|
200
|
-
deleteAnnotationEmbedding(annotationId: AnnotationId): Promise<void>;
|
|
201
|
-
/**
|
|
202
|
-
* Scan embeddings directory and return all resource IDs (from *.jsonl files).
|
|
203
|
-
*/
|
|
204
|
-
getAllResourceIds(): Promise<string[]>;
|
|
205
|
-
/**
|
|
206
|
-
* Scan embeddings directory and return all annotation IDs (from *.json files).
|
|
207
|
-
*/
|
|
208
|
-
getAllAnnotationIds(): Promise<string[]>;
|
|
209
|
-
private scanIds;
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
/**
|
|
213
|
-
* Smelter Actor
|
|
214
|
-
*
|
|
215
|
-
* Takes raw content, refines it into embedding vectors, persists them to the
|
|
216
|
-
* EmbeddingStore (.semiont/embeddings/), and indexes them into the VectorStore
|
|
217
|
-
* (Qdrant). Peer to the Graph Consumer.
|
|
218
|
-
*
|
|
219
|
-
* Pipeline:
|
|
220
|
-
* 1. Subscribe to resource and annotation events from the EventStore
|
|
221
|
-
* 2. Chunk resource text into overlapping passages
|
|
222
|
-
* 3. Embed each chunk via the configured EmbeddingProvider
|
|
223
|
-
* 4. Write vectors to EmbeddingStore (overwrite-in-place, git-durable)
|
|
224
|
-
* 5. Index vectors into the VectorStore (Qdrant) for fast similarity search
|
|
225
|
-
*
|
|
226
|
-
* Uses the same burst-buffer RxJS pipeline as GraphDBConsumer.
|
|
227
|
-
*/
|
|
228
|
-
|
|
229
|
-
declare class Smelter {
|
|
230
|
-
private eventBus;
|
|
231
|
-
private vectorStore;
|
|
232
|
-
private embeddingProvider;
|
|
233
|
-
private contentStore;
|
|
234
|
-
private embeddingStore;
|
|
235
|
-
private viewStorage;
|
|
236
|
-
private static readonly SMELTER_RELEVANT_EVENTS;
|
|
237
|
-
private static readonly BURST_WINDOW_MS;
|
|
238
|
-
private static readonly MAX_BATCH_SIZE;
|
|
239
|
-
private static readonly IDLE_TIMEOUT_MS;
|
|
240
|
-
private _globalSubscriptions;
|
|
241
|
-
private eventSubject;
|
|
242
|
-
private pipelineSubscription;
|
|
243
|
-
private readonly logger;
|
|
244
|
-
private readonly chunkingConfig;
|
|
245
|
-
constructor(_eventStore: EventStore, eventBus: EventBus, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, contentStore: WorkingTreeStore, embeddingStore: EmbeddingStore, viewStorage: ViewStorage, logger: Logger, chunkingConfig?: ChunkingConfig);
|
|
246
|
-
initialize(): Promise<void>;
|
|
247
|
-
stop(): Promise<void>;
|
|
248
|
-
/**
|
|
249
|
-
* Rebuild the vector store from the EmbeddingStore (.semiont/embeddings/).
|
|
250
|
-
*
|
|
251
|
-
* For each stored file, checks whether the model matches the configured
|
|
252
|
-
* provider. On mismatch, re-embeds from the stored text and overwrites the
|
|
253
|
-
* file before upserting into Qdrant. On match, loads the stored vectors
|
|
254
|
-
* directly — no embedding provider calls needed.
|
|
255
|
-
*/
|
|
256
|
-
rebuildAll(): Promise<void>;
|
|
257
|
-
private processBatch;
|
|
258
|
-
/**
|
|
259
|
-
* Batch-optimized processing for consecutive events of the same type.
|
|
260
|
-
*/
|
|
261
|
-
private applyBatchByType;
|
|
262
|
-
/**
|
|
263
|
-
* Batch-embed chunks from multiple yield:created events in a single
|
|
264
|
-
* embedBatch() call, then write to EmbeddingStore and index per resource.
|
|
265
|
-
*/
|
|
266
|
-
private batchResourceCreated;
|
|
267
|
-
/**
|
|
268
|
-
* Batch-embed exact texts from multiple mark:added events in a single
|
|
269
|
-
* embedBatch() call, then write to EmbeddingStore and index per annotation.
|
|
270
|
-
*/
|
|
271
|
-
private batchAnnotationAdded;
|
|
272
|
-
private safeProcessEvent;
|
|
273
|
-
private processEvent;
|
|
274
|
-
private handleResourceCreated;
|
|
275
|
-
/**
|
|
276
|
-
* Re-embed a resource whose content has changed in-place.
|
|
277
|
-
*
|
|
278
|
-
* Used by yield:updated and yield:representation-added handlers. Reads the
|
|
279
|
-
* current storageUri from the materialized view (which is updated before the
|
|
280
|
-
* EventBus fires), deletes stale Qdrant vectors, and overwrites the
|
|
281
|
-
* EmbeddingStore file with fresh chunks.
|
|
282
|
-
*/
|
|
283
|
-
private reembedResource;
|
|
284
|
-
private handleResourceUpdated;
|
|
285
|
-
private handleRepresentationAdded;
|
|
286
|
-
private handleResourceArchived;
|
|
287
|
-
private handleAnnotationAdded;
|
|
288
|
-
private handleAnnotationRemoved;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
158
|
/**
|
|
292
159
|
* Knowledge Base
|
|
293
160
|
*
|
|
@@ -299,10 +166,11 @@ declare class Smelter {
|
|
|
299
166
|
* - Content Store (working-tree files, URI-addressed) — via WorkingTreeStore
|
|
300
167
|
* - Graph (eventually consistent relationship projection) — via GraphDatabase
|
|
301
168
|
* - Graph Consumer (event-to-graph projection) — via GraphDBConsumer
|
|
302
|
-
* - Vectors (semantic search) — via VectorStore (optional)
|
|
303
|
-
* - Smelter (event-to-vector projection) — via Smelter (optional)
|
|
169
|
+
* - Vectors (semantic search) — via VectorStore (optional, read-only)
|
|
304
170
|
*
|
|
305
|
-
* The
|
|
171
|
+
* The Smelter (event-to-vector projection) runs as an external actor
|
|
172
|
+
* via @semiont/jobs/smelter-main. It subscribes to domain events via
|
|
173
|
+
* the EventBus gateway, embeds content, and writes to Qdrant directly.
|
|
306
174
|
*/
|
|
307
175
|
|
|
308
176
|
interface KnowledgeBase {
|
|
@@ -312,13 +180,10 @@ interface KnowledgeBase {
|
|
|
312
180
|
graph: GraphDatabase;
|
|
313
181
|
graphConsumer: GraphDBConsumer;
|
|
314
182
|
vectors?: VectorStore;
|
|
315
|
-
smelter?: Smelter;
|
|
316
183
|
projectionsDir: string;
|
|
317
184
|
}
|
|
318
185
|
interface CreateKnowledgeBaseOptions {
|
|
319
186
|
vectorStore?: VectorStore;
|
|
320
|
-
embeddingProvider?: EmbeddingProvider;
|
|
321
|
-
chunkingConfig?: ChunkingConfig;
|
|
322
187
|
skipRebuild?: boolean;
|
|
323
188
|
}
|
|
324
189
|
declare function createKnowledgeBase(eventStore: EventStore, project: SemiontProject, graphDb: GraphDatabase, eventBus: EventBus, logger: Logger, options?: CreateKnowledgeBaseOptions): Promise<KnowledgeBase>;
|
|
@@ -350,9 +215,14 @@ declare function createKnowledgeBase(eventStore: EventStore, project: SemiontPro
|
|
|
350
215
|
* - mark:add-entity-type → entitytype.added → mark:entity-type-added / mark:entity-type-add-failed
|
|
351
216
|
* - mark:update-entity-types → entitytag.added / entitytag.removed
|
|
352
217
|
* - job:start → job.started
|
|
353
|
-
* - job:report-progress → job.progress
|
|
354
218
|
* - job:complete → job.completed
|
|
355
219
|
* - job:fail → job.failed
|
|
220
|
+
*
|
|
221
|
+
* Note: `job:report-progress` is intentionally NOT persisted. Progress
|
|
222
|
+
* events are ephemeral UI feedback and would clutter the event log
|
|
223
|
+
* (historical logs show ~3× as many progress entries as start+complete
|
|
224
|
+
* combined). UI consumers subscribe to the bus directly for live
|
|
225
|
+
* progress; the event log keeps only the durable lifecycle boundaries.
|
|
356
226
|
*/
|
|
357
227
|
|
|
358
228
|
type ResourceDescriptor$3 = components['schemas']['ResourceDescriptor'];
|
|
@@ -378,7 +248,6 @@ declare class Stower {
|
|
|
378
248
|
private handleAddEntityType;
|
|
379
249
|
private handleUpdateEntityTypes;
|
|
380
250
|
private handleJobStart;
|
|
381
|
-
private handleJobReportProgress;
|
|
382
251
|
private handleJobComplete;
|
|
383
252
|
private handleJobFail;
|
|
384
253
|
stop(): Promise<void>;
|
|
@@ -400,6 +269,14 @@ declare class Stower {
|
|
|
400
269
|
* - gather:resource-requested — resource-level LLM context assembly
|
|
401
270
|
*
|
|
402
271
|
* RxJS pipeline uses groupBy(resourceId) + concatMap for per-resource isolation.
|
|
272
|
+
*
|
|
273
|
+
* ## Per-resource serialization
|
|
274
|
+
*
|
|
275
|
+
* `groupBy(resourceId) + concatMap(...)` is the stream-consumer flavor of
|
|
276
|
+
* per-resource serialization — the same invariant enforced by `Smelter`,
|
|
277
|
+
* `GraphDBConsumer`, and (in a different shape) `ViewManager`. See
|
|
278
|
+
* `packages/core/src/serialize-per-key.ts` for the shared primitive used
|
|
279
|
+
* by RPC-style services.
|
|
403
280
|
*/
|
|
404
281
|
|
|
405
282
|
declare class Gatherer {
|
|
@@ -578,17 +455,8 @@ declare function stopKnowledgeSystem(ks: KnowledgeSystem): Promise<void>;
|
|
|
578
455
|
interface MakeMeaningService {
|
|
579
456
|
knowledgeSystem: KnowledgeSystem;
|
|
580
457
|
jobQueue: JobQueue;
|
|
581
|
-
workers: Workers;
|
|
582
458
|
stop: () => Promise<void>;
|
|
583
459
|
}
|
|
584
|
-
type Workers = {
|
|
585
|
-
detection: ReferenceAnnotationWorker;
|
|
586
|
-
generation: GenerationWorker;
|
|
587
|
-
highlight: HighlightAnnotationWorker;
|
|
588
|
-
assessment: AssessmentAnnotationWorker;
|
|
589
|
-
comment: CommentAnnotationWorker;
|
|
590
|
-
tag: TagAnnotationWorker;
|
|
591
|
-
};
|
|
592
460
|
declare function startMakeMeaning(project: SemiontProject, config: MakeMeaningConfig, eventBus: EventBus, logger: Logger, options?: {
|
|
593
461
|
skipRebuild?: boolean;
|
|
594
462
|
}): Promise<MakeMeaningService>;
|
|
@@ -864,6 +732,7 @@ declare function importLinkedData(archive: Readable, options: LinkedDataImporter
|
|
|
864
732
|
*/
|
|
865
733
|
|
|
866
734
|
type ContentFormat = components['schemas']['ContentFormat'];
|
|
735
|
+
type Agent$1 = components['schemas']['Agent'];
|
|
867
736
|
interface UpdateResourceInput {
|
|
868
737
|
resourceId: ResourceId;
|
|
869
738
|
userId: UserId;
|
|
@@ -881,6 +750,14 @@ interface CreateResourceInput {
|
|
|
881
750
|
language?: string;
|
|
882
751
|
entityTypes?: string[];
|
|
883
752
|
creationMethod?: CreationMethod;
|
|
753
|
+
/** Provenance for AI-generated resources: source resource + annotation. */
|
|
754
|
+
generatedFrom?: {
|
|
755
|
+
resourceId?: string;
|
|
756
|
+
annotationId?: string;
|
|
757
|
+
};
|
|
758
|
+
generationPrompt?: string;
|
|
759
|
+
generator?: Agent$1 | Agent$1[];
|
|
760
|
+
isDraft?: boolean;
|
|
884
761
|
}
|
|
885
762
|
declare class ResourceOperations {
|
|
886
763
|
/**
|
|
@@ -1171,4 +1048,4 @@ declare function generateReferenceSuggestions(referenceTitle: string, client: In
|
|
|
1171
1048
|
declare const PACKAGE_NAME = "@semiont/make-meaning";
|
|
1172
1049
|
declare const VERSION = "0.1.0";
|
|
1173
1050
|
|
|
1174
|
-
export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations,
|
|
1051
|
+
export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations, Stower, type UpdateAnnotationBodyResult, type UpdateResourceInput, VERSION, bootstrapEntityTypes, createKnowledgeBase, exportBackup, exportLinkedData, generateReferenceSuggestions, generateResourceSummary, importBackup, importLinkedData, isBackupManifest, readEntityTypesProjection, startMakeMeaning, stopKnowledgeSystem, validateManifestVersion };
|