@semiont/make-meaning 0.4.20 → 0.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,11 +1,11 @@
1
- import { JobQueue, ReferenceAnnotationWorker, GenerationWorker, HighlightAnnotationWorker, AssessmentAnnotationWorker, CommentAnnotationWorker, TagAnnotationWorker } from '@semiont/jobs';
1
+ import { JobQueue } from '@semiont/jobs';
2
2
  import { SemiontProject } from '@semiont/core/node';
3
- import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, AnnotationId, components, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
3
+ import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, components, AnnotationId, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
4
4
  export { AssembledAnnotation, applyBodyOperations, assembleAnnotation } from '@semiont/core';
5
5
  import { EventStore, ViewStorage } from '@semiont/event-sourcing';
6
6
  import { WorkingTreeStore } from '@semiont/content';
7
7
  import { GraphDatabase } from '@semiont/graph';
8
- import { VectorStore, EmbeddingProvider, ChunkingConfig } from '@semiont/vectors';
8
+ import { VectorStore, EmbeddingProvider } from '@semiont/vectors';
9
9
  import { InferenceClient } from '@semiont/inference';
10
10
  import { Writable, Readable } from 'node:stream';
11
11
 
@@ -79,8 +79,7 @@ interface MakeMeaningConfig {
79
79
  * per-resource serialization — the same invariant enforced by `Smelter`,
80
80
  * `Gatherer`, and (in a different shape) `ViewManager`. See
81
81
  * `packages/core/src/serialize-per-key.ts` for the shared primitive used
82
- * by RPC-style services, and `.plans/PerResourceSerializer.md` for the
83
- * broader design that would unify the two shapes.
82
+ * by RPC-style services.
84
83
  */
85
84
 
86
85
  declare class GraphDBConsumer {
@@ -156,157 +155,6 @@ declare class GraphDBConsumer {
156
155
  shutdown(): Promise<void>;
157
156
  }
158
157
 
159
- /**
160
- * EmbeddingStore
161
- *
162
- * Durable file-based cache for pre-computed embedding vectors.
163
- * Stored under .semiont/embeddings/ — committed to git alongside events,
164
- * but overwritten in place rather than appended.
165
- *
166
- * File layout (same 4-hex Jump Consistent Hash sharding as events):
167
- *
168
- * .semiont/embeddings/{ab}/{cd}/{resourceId}.jsonl
169
- * Line 0: { model, dimensions } ← model header
170
- * Line N: { chunkIndex, text, embedding[] } ← one chunk per line
171
- *
172
- * .semiont/embeddings/{ab}/{cd}/{annotationId}.json
173
- * { model, dimensions, resourceId, text, embedding[], motivation, entityTypes }
174
- *
175
- * rebuildAll() in Smelter reads these files and upserts into Qdrant without
176
- * calling the embedding provider — unless the stored model doesn't match the
177
- * configured provider, in which case the file is re-embedded and overwritten.
178
- */
179
-
180
- interface StoredChunk {
181
- chunkIndex: number;
182
- text: string;
183
- embedding: number[];
184
- }
185
- interface ResourceEmbeddingFile {
186
- model: string;
187
- dimensions: number;
188
- chunks: StoredChunk[];
189
- }
190
- interface AnnotationEmbeddingFile {
191
- model: string;
192
- dimensions: number;
193
- resourceId: string;
194
- text: string;
195
- embedding: number[];
196
- motivation: string;
197
- entityTypes: string[];
198
- }
199
- declare class EmbeddingStore {
200
- private readonly project;
201
- constructor(project: SemiontProject);
202
- private resourceFilePath;
203
- private annotationFilePath;
204
- writeResourceChunks(resourceId: ResourceId, model: string, dimensions: number, chunks: StoredChunk[]): Promise<void>;
205
- readResourceEmbeddings(resourceId: ResourceId): Promise<ResourceEmbeddingFile | null>;
206
- deleteResourceEmbeddings(resourceId: ResourceId): Promise<void>;
207
- writeAnnotationEmbedding(annotationId: AnnotationId, resourceId: ResourceId, model: string, dimensions: number, text: string, embedding: number[], motivation: string, entityTypes: string[]): Promise<void>;
208
- readAnnotationEmbedding(annotationId: AnnotationId): Promise<AnnotationEmbeddingFile | null>;
209
- deleteAnnotationEmbedding(annotationId: AnnotationId): Promise<void>;
210
- /**
211
- * Scan embeddings directory and return all resource IDs (from *.jsonl files).
212
- */
213
- getAllResourceIds(): Promise<string[]>;
214
- /**
215
- * Scan embeddings directory and return all annotation IDs (from *.json files).
216
- */
217
- getAllAnnotationIds(): Promise<string[]>;
218
- private scanIds;
219
- }
220
-
221
- /**
222
- * Smelter Actor
223
- *
224
- * Takes raw content, refines it into embedding vectors, persists them to the
225
- * EmbeddingStore (.semiont/embeddings/), and indexes them into the VectorStore
226
- * (Qdrant). Peer to the Graph Consumer.
227
- *
228
- * Pipeline:
229
- * 1. Subscribe to resource and annotation events from the EventStore
230
- * 2. Chunk resource text into overlapping passages
231
- * 3. Embed each chunk via the configured EmbeddingProvider
232
- * 4. Write vectors to EmbeddingStore (overwrite-in-place, git-durable)
233
- * 5. Index vectors into the VectorStore (Qdrant) for fast similarity search
234
- *
235
- * Uses the same burst-buffer RxJS pipeline as GraphDBConsumer.
236
- *
237
- * ## Per-resource serialization
238
- *
239
- * Smelter processes events strictly in order per resourceId via
240
- * `groupBy(resourceId) + concatMap(...)`. This is the stream-consumer
241
- * flavor of per-resource serialization — the same invariant enforced by
242
- * `GraphDBConsumer`, `Gatherer`, and (in a different shape) `ViewManager`.
243
- * See `packages/core/src/serialize-per-key.ts` for the shared primitive
244
- * used by RPC-style services, and `.plans/PerResourceSerializer.md` for
245
- * the broader design that would unify the two shapes.
246
- */
247
-
248
- declare class Smelter {
249
- private eventBus;
250
- private vectorStore;
251
- private embeddingProvider;
252
- private contentStore;
253
- private embeddingStore;
254
- private viewStorage;
255
- private static readonly SMELTER_RELEVANT_EVENTS;
256
- private static readonly BURST_WINDOW_MS;
257
- private static readonly MAX_BATCH_SIZE;
258
- private static readonly IDLE_TIMEOUT_MS;
259
- private _globalSubscriptions;
260
- private eventSubject;
261
- private pipelineSubscription;
262
- private readonly logger;
263
- private readonly chunkingConfig;
264
- constructor(_eventStore: EventStore, eventBus: EventBus, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, contentStore: WorkingTreeStore, embeddingStore: EmbeddingStore, viewStorage: ViewStorage, logger: Logger, chunkingConfig?: ChunkingConfig);
265
- initialize(): Promise<void>;
266
- stop(): Promise<void>;
267
- /**
268
- * Rebuild the vector store from the EmbeddingStore (.semiont/embeddings/).
269
- *
270
- * For each stored file, checks whether the model matches the configured
271
- * provider. On mismatch, re-embeds from the stored text and overwrites the
272
- * file before upserting into Qdrant. On match, loads the stored vectors
273
- * directly — no embedding provider calls needed.
274
- */
275
- rebuildAll(): Promise<void>;
276
- private processBatch;
277
- /**
278
- * Batch-optimized processing for consecutive events of the same type.
279
- */
280
- private applyBatchByType;
281
- /**
282
- * Batch-embed chunks from multiple yield:created events in a single
283
- * embedBatch() call, then write to EmbeddingStore and index per resource.
284
- */
285
- private batchResourceCreated;
286
- /**
287
- * Batch-embed exact texts from multiple mark:added events in a single
288
- * embedBatch() call, then write to EmbeddingStore and index per annotation.
289
- */
290
- private batchAnnotationAdded;
291
- private safeProcessEvent;
292
- private processEvent;
293
- private handleResourceCreated;
294
- /**
295
- * Re-embed a resource whose content has changed in-place.
296
- *
297
- * Used by yield:updated and yield:representation-added handlers. Reads the
298
- * current storageUri from the materialized view (which is updated before the
299
- * EventBus fires), deletes stale Qdrant vectors, and overwrites the
300
- * EmbeddingStore file with fresh chunks.
301
- */
302
- private reembedResource;
303
- private handleResourceUpdated;
304
- private handleRepresentationAdded;
305
- private handleResourceArchived;
306
- private handleAnnotationAdded;
307
- private handleAnnotationRemoved;
308
- }
309
-
310
158
  /**
311
159
  * Knowledge Base
312
160
  *
@@ -318,10 +166,11 @@ declare class Smelter {
318
166
  * - Content Store (working-tree files, URI-addressed) — via WorkingTreeStore
319
167
  * - Graph (eventually consistent relationship projection) — via GraphDatabase
320
168
  * - Graph Consumer (event-to-graph projection) — via GraphDBConsumer
321
- * - Vectors (semantic search) — via VectorStore (optional)
322
- * - Smelter (event-to-vector projection) — via Smelter (optional)
169
+ * - Vectors (semantic search) — via VectorStore (optional, read-only)
323
170
  *
324
- * The Gatherer and Matcher are the only actors that read from these stores directly.
171
+ * The Smelter (event-to-vector projection) runs as an external actor
172
+ * via @semiont/jobs/smelter-main. It subscribes to domain events via
173
+ * the EventBus gateway, embeds content, and writes to Qdrant directly.
325
174
  */
326
175
 
327
176
  interface KnowledgeBase {
@@ -331,13 +180,10 @@ interface KnowledgeBase {
331
180
  graph: GraphDatabase;
332
181
  graphConsumer: GraphDBConsumer;
333
182
  vectors?: VectorStore;
334
- smelter?: Smelter;
335
183
  projectionsDir: string;
336
184
  }
337
185
  interface CreateKnowledgeBaseOptions {
338
186
  vectorStore?: VectorStore;
339
- embeddingProvider?: EmbeddingProvider;
340
- chunkingConfig?: ChunkingConfig;
341
187
  skipRebuild?: boolean;
342
188
  }
343
189
  declare function createKnowledgeBase(eventStore: EventStore, project: SemiontProject, graphDb: GraphDatabase, eventBus: EventBus, logger: Logger, options?: CreateKnowledgeBaseOptions): Promise<KnowledgeBase>;
@@ -369,9 +215,14 @@ declare function createKnowledgeBase(eventStore: EventStore, project: SemiontPro
369
215
  * - mark:add-entity-type → entitytype.added → mark:entity-type-added / mark:entity-type-add-failed
370
216
  * - mark:update-entity-types → entitytag.added / entitytag.removed
371
217
  * - job:start → job.started
372
- * - job:report-progress → job.progress
373
218
  * - job:complete → job.completed
374
219
  * - job:fail → job.failed
220
+ *
221
+ * Note: `job:report-progress` is intentionally NOT persisted. Progress
222
+ * events are ephemeral UI feedback and would clutter the event log
223
+ * (historical logs show ~3× as many progress entries as start+complete
224
+ * combined). UI consumers subscribe to the bus directly for live
225
+ * progress; the event log keeps only the durable lifecycle boundaries.
375
226
  */
376
227
 
377
228
  type ResourceDescriptor$3 = components['schemas']['ResourceDescriptor'];
@@ -397,7 +248,6 @@ declare class Stower {
397
248
  private handleAddEntityType;
398
249
  private handleUpdateEntityTypes;
399
250
  private handleJobStart;
400
- private handleJobReportProgress;
401
251
  private handleJobComplete;
402
252
  private handleJobFail;
403
253
  stop(): Promise<void>;
@@ -426,8 +276,7 @@ declare class Stower {
426
276
  * per-resource serialization — the same invariant enforced by `Smelter`,
427
277
  * `GraphDBConsumer`, and (in a different shape) `ViewManager`. See
428
278
  * `packages/core/src/serialize-per-key.ts` for the shared primitive used
429
- * by RPC-style services, and `.plans/PerResourceSerializer.md` for the
430
- * broader design that would unify the two shapes.
279
+ * by RPC-style services.
431
280
  */
432
281
 
433
282
  declare class Gatherer {
@@ -606,17 +455,8 @@ declare function stopKnowledgeSystem(ks: KnowledgeSystem): Promise<void>;
606
455
  interface MakeMeaningService {
607
456
  knowledgeSystem: KnowledgeSystem;
608
457
  jobQueue: JobQueue;
609
- workers: Workers;
610
458
  stop: () => Promise<void>;
611
459
  }
612
- type Workers = {
613
- detection: ReferenceAnnotationWorker;
614
- generation: GenerationWorker;
615
- highlight: HighlightAnnotationWorker;
616
- assessment: AssessmentAnnotationWorker;
617
- comment: CommentAnnotationWorker;
618
- tag: TagAnnotationWorker;
619
- };
620
460
  declare function startMakeMeaning(project: SemiontProject, config: MakeMeaningConfig, eventBus: EventBus, logger: Logger, options?: {
621
461
  skipRebuild?: boolean;
622
462
  }): Promise<MakeMeaningService>;
@@ -892,6 +732,7 @@ declare function importLinkedData(archive: Readable, options: LinkedDataImporter
892
732
  */
893
733
 
894
734
  type ContentFormat = components['schemas']['ContentFormat'];
735
+ type Agent$1 = components['schemas']['Agent'];
895
736
  interface UpdateResourceInput {
896
737
  resourceId: ResourceId;
897
738
  userId: UserId;
@@ -909,6 +750,14 @@ interface CreateResourceInput {
909
750
  language?: string;
910
751
  entityTypes?: string[];
911
752
  creationMethod?: CreationMethod;
753
+ /** Provenance for AI-generated resources: source resource + annotation. */
754
+ generatedFrom?: {
755
+ resourceId?: string;
756
+ annotationId?: string;
757
+ };
758
+ generationPrompt?: string;
759
+ generator?: Agent$1 | Agent$1[];
760
+ isDraft?: boolean;
912
761
  }
913
762
  declare class ResourceOperations {
914
763
  /**
@@ -1199,4 +1048,4 @@ declare function generateReferenceSuggestions(referenceTitle: string, client: In
1199
1048
  declare const PACKAGE_NAME = "@semiont/make-meaning";
1200
1049
  declare const VERSION = "0.1.0";
1201
1050
 
1202
- export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations, Smelter, Stower, type UpdateAnnotationBodyResult, type UpdateResourceInput, VERSION, bootstrapEntityTypes, createKnowledgeBase, exportBackup, exportLinkedData, generateReferenceSuggestions, generateResourceSummary, importBackup, importLinkedData, isBackupManifest, readEntityTypesProjection, startMakeMeaning, stopKnowledgeSystem, validateManifestVersion };
1051
+ export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations, Stower, type UpdateAnnotationBodyResult, type UpdateResourceInput, VERSION, bootstrapEntityTypes, createKnowledgeBase, exportBackup, exportLinkedData, generateReferenceSuggestions, generateResourceSummary, importBackup, importLinkedData, isBackupManifest, readEntityTypesProjection, startMakeMeaning, stopKnowledgeSystem, validateManifestVersion };