@semiont/make-meaning 0.4.19 → 0.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,11 +1,11 @@
1
- import { JobQueue, ReferenceAnnotationWorker, GenerationWorker, HighlightAnnotationWorker, AssessmentAnnotationWorker, CommentAnnotationWorker, TagAnnotationWorker } from '@semiont/jobs';
1
+ import { JobQueue } from '@semiont/jobs';
2
2
  import { SemiontProject } from '@semiont/core/node';
3
- import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, AnnotationId, components, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
3
+ import { GraphServiceConfig, VectorsServiceConfig, EmbeddingServiceConfig, EventBus, Logger, StoredEvent, ResourceId, components, AnnotationId, UserId, CreationMethod, ResourceAnnotations, AnnotationCategory, GraphPath, GraphConnection } from '@semiont/core';
4
4
  export { AssembledAnnotation, applyBodyOperations, assembleAnnotation } from '@semiont/core';
5
5
  import { EventStore, ViewStorage } from '@semiont/event-sourcing';
6
6
  import { WorkingTreeStore } from '@semiont/content';
7
7
  import { GraphDatabase } from '@semiont/graph';
8
- import { VectorStore, EmbeddingProvider, ChunkingConfig } from '@semiont/vectors';
8
+ import { VectorStore, EmbeddingProvider } from '@semiont/vectors';
9
9
  import { InferenceClient } from '@semiont/inference';
10
10
  import { Writable, Readable } from 'node:stream';
11
11
 
@@ -72,6 +72,14 @@ interface MakeMeaningConfig {
72
72
  * BURST_WINDOW_MS = 50 — debounce window before flushing a batch
73
73
  * MAX_BATCH_SIZE = 500 — force flush to bound memory
74
74
  * IDLE_TIMEOUT_MS = 200 — silence before returning to passthrough
75
+ *
76
+ * ## Per-resource serialization
77
+ *
78
+ * `groupBy(resourceId) + concatMap(...)` is the stream-consumer flavor of
79
+ * per-resource serialization — the same invariant enforced by `Smelter`,
80
+ * `Gatherer`, and (in a different shape) `ViewManager`. See
81
+ * `packages/core/src/serialize-per-key.ts` for the shared primitive used
82
+ * by RPC-style services.
75
83
  */
76
84
 
77
85
  declare class GraphDBConsumer {
@@ -147,147 +155,6 @@ declare class GraphDBConsumer {
147
155
  shutdown(): Promise<void>;
148
156
  }
149
157
 
150
- /**
151
- * EmbeddingStore
152
- *
153
- * Durable file-based cache for pre-computed embedding vectors.
154
- * Stored under .semiont/embeddings/ — committed to git alongside events,
155
- * but overwritten in place rather than appended.
156
- *
157
- * File layout (same 4-hex Jump Consistent Hash sharding as events):
158
- *
159
- * .semiont/embeddings/{ab}/{cd}/{resourceId}.jsonl
160
- * Line 0: { model, dimensions } ← model header
161
- * Line N: { chunkIndex, text, embedding[] } ← one chunk per line
162
- *
163
- * .semiont/embeddings/{ab}/{cd}/{annotationId}.json
164
- * { model, dimensions, resourceId, text, embedding[], motivation, entityTypes }
165
- *
166
- * rebuildAll() in Smelter reads these files and upserts into Qdrant without
167
- * calling the embedding provider — unless the stored model doesn't match the
168
- * configured provider, in which case the file is re-embedded and overwritten.
169
- */
170
-
171
- interface StoredChunk {
172
- chunkIndex: number;
173
- text: string;
174
- embedding: number[];
175
- }
176
- interface ResourceEmbeddingFile {
177
- model: string;
178
- dimensions: number;
179
- chunks: StoredChunk[];
180
- }
181
- interface AnnotationEmbeddingFile {
182
- model: string;
183
- dimensions: number;
184
- resourceId: string;
185
- text: string;
186
- embedding: number[];
187
- motivation: string;
188
- entityTypes: string[];
189
- }
190
- declare class EmbeddingStore {
191
- private readonly project;
192
- constructor(project: SemiontProject);
193
- private resourceFilePath;
194
- private annotationFilePath;
195
- writeResourceChunks(resourceId: ResourceId, model: string, dimensions: number, chunks: StoredChunk[]): Promise<void>;
196
- readResourceEmbeddings(resourceId: ResourceId): Promise<ResourceEmbeddingFile | null>;
197
- deleteResourceEmbeddings(resourceId: ResourceId): Promise<void>;
198
- writeAnnotationEmbedding(annotationId: AnnotationId, resourceId: ResourceId, model: string, dimensions: number, text: string, embedding: number[], motivation: string, entityTypes: string[]): Promise<void>;
199
- readAnnotationEmbedding(annotationId: AnnotationId): Promise<AnnotationEmbeddingFile | null>;
200
- deleteAnnotationEmbedding(annotationId: AnnotationId): Promise<void>;
201
- /**
202
- * Scan embeddings directory and return all resource IDs (from *.jsonl files).
203
- */
204
- getAllResourceIds(): Promise<string[]>;
205
- /**
206
- * Scan embeddings directory and return all annotation IDs (from *.json files).
207
- */
208
- getAllAnnotationIds(): Promise<string[]>;
209
- private scanIds;
210
- }
211
-
212
- /**
213
- * Smelter Actor
214
- *
215
- * Takes raw content, refines it into embedding vectors, persists them to the
216
- * EmbeddingStore (.semiont/embeddings/), and indexes them into the VectorStore
217
- * (Qdrant). Peer to the Graph Consumer.
218
- *
219
- * Pipeline:
220
- * 1. Subscribe to resource and annotation events from the EventStore
221
- * 2. Chunk resource text into overlapping passages
222
- * 3. Embed each chunk via the configured EmbeddingProvider
223
- * 4. Write vectors to EmbeddingStore (overwrite-in-place, git-durable)
224
- * 5. Index vectors into the VectorStore (Qdrant) for fast similarity search
225
- *
226
- * Uses the same burst-buffer RxJS pipeline as GraphDBConsumer.
227
- */
228
-
229
- declare class Smelter {
230
- private eventBus;
231
- private vectorStore;
232
- private embeddingProvider;
233
- private contentStore;
234
- private embeddingStore;
235
- private viewStorage;
236
- private static readonly SMELTER_RELEVANT_EVENTS;
237
- private static readonly BURST_WINDOW_MS;
238
- private static readonly MAX_BATCH_SIZE;
239
- private static readonly IDLE_TIMEOUT_MS;
240
- private _globalSubscriptions;
241
- private eventSubject;
242
- private pipelineSubscription;
243
- private readonly logger;
244
- private readonly chunkingConfig;
245
- constructor(_eventStore: EventStore, eventBus: EventBus, vectorStore: VectorStore, embeddingProvider: EmbeddingProvider, contentStore: WorkingTreeStore, embeddingStore: EmbeddingStore, viewStorage: ViewStorage, logger: Logger, chunkingConfig?: ChunkingConfig);
246
- initialize(): Promise<void>;
247
- stop(): Promise<void>;
248
- /**
249
- * Rebuild the vector store from the EmbeddingStore (.semiont/embeddings/).
250
- *
251
- * For each stored file, checks whether the model matches the configured
252
- * provider. On mismatch, re-embeds from the stored text and overwrites the
253
- * file before upserting into Qdrant. On match, loads the stored vectors
254
- * directly — no embedding provider calls needed.
255
- */
256
- rebuildAll(): Promise<void>;
257
- private processBatch;
258
- /**
259
- * Batch-optimized processing for consecutive events of the same type.
260
- */
261
- private applyBatchByType;
262
- /**
263
- * Batch-embed chunks from multiple yield:created events in a single
264
- * embedBatch() call, then write to EmbeddingStore and index per resource.
265
- */
266
- private batchResourceCreated;
267
- /**
268
- * Batch-embed exact texts from multiple mark:added events in a single
269
- * embedBatch() call, then write to EmbeddingStore and index per annotation.
270
- */
271
- private batchAnnotationAdded;
272
- private safeProcessEvent;
273
- private processEvent;
274
- private handleResourceCreated;
275
- /**
276
- * Re-embed a resource whose content has changed in-place.
277
- *
278
- * Used by yield:updated and yield:representation-added handlers. Reads the
279
- * current storageUri from the materialized view (which is updated before the
280
- * EventBus fires), deletes stale Qdrant vectors, and overwrites the
281
- * EmbeddingStore file with fresh chunks.
282
- */
283
- private reembedResource;
284
- private handleResourceUpdated;
285
- private handleRepresentationAdded;
286
- private handleResourceArchived;
287
- private handleAnnotationAdded;
288
- private handleAnnotationRemoved;
289
- }
290
-
291
158
  /**
292
159
  * Knowledge Base
293
160
  *
@@ -299,10 +166,11 @@ declare class Smelter {
299
166
  * - Content Store (working-tree files, URI-addressed) — via WorkingTreeStore
300
167
  * - Graph (eventually consistent relationship projection) — via GraphDatabase
301
168
  * - Graph Consumer (event-to-graph projection) — via GraphDBConsumer
302
- * - Vectors (semantic search) — via VectorStore (optional)
303
- * - Smelter (event-to-vector projection) — via Smelter (optional)
169
+ * - Vectors (semantic search) — via VectorStore (optional, read-only)
304
170
  *
305
- * The Gatherer and Matcher are the only actors that read from these stores directly.
171
+ * The Smelter (event-to-vector projection) runs as an external actor
172
+ * via @semiont/jobs/smelter-main. It subscribes to domain events via
173
+ * the EventBus gateway, embeds content, and writes to Qdrant directly.
306
174
  */
307
175
 
308
176
  interface KnowledgeBase {
@@ -312,13 +180,10 @@ interface KnowledgeBase {
312
180
  graph: GraphDatabase;
313
181
  graphConsumer: GraphDBConsumer;
314
182
  vectors?: VectorStore;
315
- smelter?: Smelter;
316
183
  projectionsDir: string;
317
184
  }
318
185
  interface CreateKnowledgeBaseOptions {
319
186
  vectorStore?: VectorStore;
320
- embeddingProvider?: EmbeddingProvider;
321
- chunkingConfig?: ChunkingConfig;
322
187
  skipRebuild?: boolean;
323
188
  }
324
189
  declare function createKnowledgeBase(eventStore: EventStore, project: SemiontProject, graphDb: GraphDatabase, eventBus: EventBus, logger: Logger, options?: CreateKnowledgeBaseOptions): Promise<KnowledgeBase>;
@@ -350,9 +215,14 @@ declare function createKnowledgeBase(eventStore: EventStore, project: SemiontPro
350
215
  * - mark:add-entity-type → entitytype.added → mark:entity-type-added / mark:entity-type-add-failed
351
216
  * - mark:update-entity-types → entitytag.added / entitytag.removed
352
217
  * - job:start → job.started
353
- * - job:report-progress → job.progress
354
218
  * - job:complete → job.completed
355
219
  * - job:fail → job.failed
220
+ *
221
+ * Note: `job:report-progress` is intentionally NOT persisted. Progress
222
+ * events are ephemeral UI feedback and would clutter the event log
223
+ * (historical logs show ~3× as many progress entries as start+complete
224
+ * combined). UI consumers subscribe to the bus directly for live
225
+ * progress; the event log keeps only the durable lifecycle boundaries.
356
226
  */
357
227
 
358
228
  type ResourceDescriptor$3 = components['schemas']['ResourceDescriptor'];
@@ -378,7 +248,6 @@ declare class Stower {
378
248
  private handleAddEntityType;
379
249
  private handleUpdateEntityTypes;
380
250
  private handleJobStart;
381
- private handleJobReportProgress;
382
251
  private handleJobComplete;
383
252
  private handleJobFail;
384
253
  stop(): Promise<void>;
@@ -400,6 +269,14 @@ declare class Stower {
400
269
  * - gather:resource-requested — resource-level LLM context assembly
401
270
  *
402
271
  * RxJS pipeline uses groupBy(resourceId) + concatMap for per-resource isolation.
272
+ *
273
+ * ## Per-resource serialization
274
+ *
275
+ * `groupBy(resourceId) + concatMap(...)` is the stream-consumer flavor of
276
+ * per-resource serialization — the same invariant enforced by `Smelter`,
277
+ * `GraphDBConsumer`, and (in a different shape) `ViewManager`. See
278
+ * `packages/core/src/serialize-per-key.ts` for the shared primitive used
279
+ * by RPC-style services.
403
280
  */
404
281
 
405
282
  declare class Gatherer {
@@ -578,17 +455,8 @@ declare function stopKnowledgeSystem(ks: KnowledgeSystem): Promise<void>;
578
455
  interface MakeMeaningService {
579
456
  knowledgeSystem: KnowledgeSystem;
580
457
  jobQueue: JobQueue;
581
- workers: Workers;
582
458
  stop: () => Promise<void>;
583
459
  }
584
- type Workers = {
585
- detection: ReferenceAnnotationWorker;
586
- generation: GenerationWorker;
587
- highlight: HighlightAnnotationWorker;
588
- assessment: AssessmentAnnotationWorker;
589
- comment: CommentAnnotationWorker;
590
- tag: TagAnnotationWorker;
591
- };
592
460
  declare function startMakeMeaning(project: SemiontProject, config: MakeMeaningConfig, eventBus: EventBus, logger: Logger, options?: {
593
461
  skipRebuild?: boolean;
594
462
  }): Promise<MakeMeaningService>;
@@ -864,6 +732,7 @@ declare function importLinkedData(archive: Readable, options: LinkedDataImporter
864
732
  */
865
733
 
866
734
  type ContentFormat = components['schemas']['ContentFormat'];
735
+ type Agent$1 = components['schemas']['Agent'];
867
736
  interface UpdateResourceInput {
868
737
  resourceId: ResourceId;
869
738
  userId: UserId;
@@ -881,6 +750,14 @@ interface CreateResourceInput {
881
750
  language?: string;
882
751
  entityTypes?: string[];
883
752
  creationMethod?: CreationMethod;
753
+ /** Provenance for AI-generated resources: source resource + annotation. */
754
+ generatedFrom?: {
755
+ resourceId?: string;
756
+ annotationId?: string;
757
+ };
758
+ generationPrompt?: string;
759
+ generator?: Agent$1 | Agent$1[];
760
+ isDraft?: boolean;
884
761
  }
885
762
  declare class ResourceOperations {
886
763
  /**
@@ -1171,4 +1048,4 @@ declare function generateReferenceSuggestions(referenceTitle: string, client: In
1171
1048
  declare const PACKAGE_NAME = "@semiont/make-meaning";
1172
1049
  declare const VERSION = "0.1.0";
1173
1050
 
1174
- export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations, Smelter, Stower, type UpdateAnnotationBodyResult, type UpdateResourceInput, VERSION, bootstrapEntityTypes, createKnowledgeBase, exportBackup, exportLinkedData, generateReferenceSuggestions, generateResourceSummary, importBackup, importLinkedData, isBackupManifest, readEntityTypesProjection, startMakeMeaning, stopKnowledgeSystem, validateManifestVersion };
1051
+ export { AnnotationContext, AnnotationOperations, BACKUP_FORMAT, type BackupContentReader, type BackupEventStoreReader, type BackupExporterOptions, type BackupImportResult, type BackupImporterOptions, type BackupManifestHeader, type BackupStreamSummary, Browser, type BuildContextOptions, CloneTokenManager, type ContentBlobResolver, type CreateAnnotationResult, type CreateResourceInput, type CreateResourceResult, FORMAT_VERSION, Gatherer, GraphContext, type GraphEdge, type GraphNode, type GraphRepresentation, type KnowledgeBase, type KnowledgeSystem, LLMContext, type LLMContextOptions, type LinkedDataContentReader, type LinkedDataExporterOptions, type LinkedDataImportResult, type LinkedDataImporterOptions, type LinkedDataViewReader, type ListResourcesFilters, type MakeMeaningConfig, type MakeMeaningService, Matcher, PACKAGE_NAME, type ReplayStats, ResourceContext, ResourceOperations, Stower, type UpdateAnnotationBodyResult, type UpdateResourceInput, VERSION, bootstrapEntityTypes, createKnowledgeBase, exportBackup, exportLinkedData, generateReferenceSuggestions, generateResourceSummary, importBackup, importLinkedData, isBackupManifest, readEntityTypesProjection, startMakeMeaning, stopKnowledgeSystem, validateManifestVersion };