@semiont/event-sourcing 0.4.14 → 0.4.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,410 +6,212 @@
6
6
  [![npm downloads](https://img.shields.io/npm/dm/@semiont/event-sourcing.svg)](https://www.npmjs.com/package/@semiont/event-sourcing)
7
7
  [![License](https://img.shields.io/npm/l/@semiont/event-sourcing.svg)](https://github.com/The-AI-Alliance/semiont/blob/main/LICENSE)
8
8
 
9
- Event sourcing infrastructure for [Semiont](https://github.com/The-AI-Alliance/semiont) - provides event persistence, pub/sub, and materialized views for building event-driven applications.
9
+ Event sourcing infrastructure for the Semiont knowledge platform. Provides the persistence layer for the append-only event log, materialized views, and event-driven projections.
10
10
 
11
- ## What is Event Sourcing?
11
+ ## Architecture
12
12
 
13
- Event sourcing is a pattern where state changes are stored as a sequence of immutable events. Instead of storing current state, you store the history of events that led to the current state.
13
+ ```
14
+ appendEvent(event, options?)
15
+ 1. Persist to EventLog (JSONL files)
16
+ 2. Materialize views (resource descriptors, entity types)
17
+ 3. Publish StoredEvent to Core EventBus typed channels
14
18
 
15
- **Benefits:**
16
- - **Complete audit trail** - Every change is recorded with timestamp and user
17
- - **Time travel** - Rebuild state at any point in history
18
- - **Event replay** - Reprocess events to rebuild views or fix bugs
19
- - **Microservices-ready** - Events enable distributed systems to stay in sync
19
+ options.correlationId threads a command correlation id into event metadata,
20
+ enabling clients to match command-result events back to the POST that
21
+ initiated them. See docs/architecture/STREAMS.md.
22
+ ```
20
23
 
21
- ## Installation
24
+ The **EventStore** is the single write path. It coordinates three concerns:
22
25
 
23
- ```bash
24
- npm install @semiont/event-sourcing
25
- ```
26
+ - **EventLog** — Append-only persistence to sharded JSONL files under `.semiont/events/`. This is the source of truth.
27
+ - **ViewManager** — Materializes resource views and system projections from events. Supports both incremental updates on every append and a full `rebuildAll(eventLog)` for startup recovery.
28
+ - **Core EventBus** (`@semiont/core`) — Publishes `StoredEvent` to typed channels after persistence
26
29
 
27
- **Prerequisites:**
28
- - Node.js >= 20.18.1
29
- - `@semiont/core` and `@semiont/api-client` (peer dependencies)
30
+ Event publishing uses the Core EventBus from `@semiont/core`. There is no internal pub/sub system — all subscribers (GraphDBConsumer, Smelter, SSE routes) subscribe directly to typed channels on the Core EventBus.
30
31
 
31
- ## Architecture Context
32
+ The materialized views directory is **ephemeral by design** — see the [ViewManager / ViewMaterializer](#viewmanager--viewmaterializer) section for the rebuild model and how it relates to the graph and vector consumers.
32
33
 
33
- **Infrastructure Ownership**: In production applications, the event store is **created and managed by [@semiont/make-meaning](../make-meaning/)'s `startMakeMeaning()` function**, which serves as the single orchestration point for all infrastructure components (EventStore, GraphDB, RepStore, InferenceClient, JobQueue, Workers).
34
+ ## Installation
34
35
 
35
- The quick start example below shows direct instantiation for **testing, CLI tools, or event replay scripts**. For backend integration, access the event store through the `makeMeaning` context object.
36
+ ```bash
37
+ npm install @semiont/event-sourcing
38
+ ```
36
39
 
37
40
  ## Quick Start
38
41
 
39
42
  ```typescript
40
- import {
41
- EventStore,
42
- FilesystemViewStorage,
43
- type IdentifierConfig,
44
- } from '@semiont/event-sourcing';
45
- import { resourceId, userId } from '@semiont/core';
46
-
47
- // 1. Create event store
48
- const eventStore = new EventStore(
49
- {
50
- basePath: './data',
51
- dataDir: './data/events',
52
- enableSharding: true,
53
- maxEventsPerFile: 10000,
54
- },
55
- new FilesystemViewStorage('./data'),
56
- { baseUrl: 'http://localhost:4000' }
57
- );
58
-
59
- // 2. Append events
60
- const event = await eventStore.appendEvent({
61
- type: 'resource.created',
62
- resourceId: resourceId('doc-abc123'),
63
- userId: userId('user@example.com'),
43
+ import { createEventStore } from '@semiont/event-sourcing';
44
+ import { SemiontProject } from '@semiont/core/node';
45
+ import { EventBus, resourceId, userId, CREATION_METHODS } from '@semiont/core';
46
+
47
+ const project = new SemiontProject('/path/to/project');
48
+ const eventBus = new EventBus();
49
+ const eventStore = createEventStore(project, eventBus, logger);
50
+
51
+ // Append an event — persists, materializes views, publishes to EventBus
52
+ const stored = await eventStore.appendEvent({
53
+ type: 'yield:created',
54
+ resourceId: resourceId('doc-123'),
55
+ userId: userId('did:web:example.com:users:alice'),
56
+ version: 1,
64
57
  payload: {
65
58
  name: 'My Document',
66
- format: 'text/plain',
67
- contentChecksum: 'sha256:...',
68
- entityTypes: [],
59
+ format: 'text/markdown',
60
+ contentChecksum: 'sha256:abc...',
61
+ creationMethod: CREATION_METHODS.API,
69
62
  },
70
63
  });
71
64
 
72
- // 3. Subscribe to events
73
- eventStore.bus.subscribe(
74
- resourceId('doc-abc123'),
75
- async (storedEvent) => {
76
- console.log('Event received:', storedEvent.event.type);
77
- }
78
- );
79
-
80
- // 4. Query events
81
- const events = await eventStore.log.queryEvents(
82
- resourceId('doc-abc123'),
83
- { eventTypes: ['resource.created', 'annotation.added'] }
84
- );
85
- ```
86
-
87
- ## Architecture
88
-
89
- The event-sourcing package follows a layered architecture with clear separation of concerns:
90
-
91
- ```
92
- ┌─────────────────────────────────────────┐
93
- │ EventStore │ ← Orchestration
94
- │ (coordinates log, bus, views) │
95
- └─────────────────────────────────────────┘
96
- │ │ │
97
- ┌────┘ ┌────┘ └────┐
98
- ▼ ▼ ▼
99
- ┌────────┐ ┌──────────┐ ┌──────────────┐
100
- │EventLog│ │ EventBus │ │ ViewManager │
101
- │(persist) │ (pub/sub)│ │ (materialize)│
102
- └────────┘ └──────────┘ └──────────────┘
103
- │ │ │
104
- ▼ ▼ ▼
105
- ┌──────────┐ ┌──────────────┐ ┌─────────────┐
106
- │EventStorage EventSubscriptions ViewStorage │
107
- │(JSONL files) (in-memory) (JSON files) │
108
- └──────────┘ └──────────────┘ └─────────────┘
65
+ // stored.event — the ResourceEvent
66
+ // stored.metadata — { sequenceNumber, prevEventHash, checksum }
109
67
  ```
110
68
 
111
- **Key Components:**
69
+ ## Components
112
70
 
113
- - **EventStore** - Orchestration layer that coordinates event operations
114
- - **EventLog** - Append-only event persistence with JSONL storage
115
- - **EventBus** - Pub/sub notifications for real-time event processing
116
- - **ViewManager** - Materialized view updates from event streams
117
- - **EventStorage** - Filesystem storage with sharding for scalability
118
- - **ViewStorage** - Materialized view persistence (current state)
119
-
120
- ## Core Concepts
121
-
122
- ### Events
71
+ ### EventStore
123
72
 
124
- Events are immutable records of state changes:
73
+ Orchestration layer. `appendEvent()` is the only write method — it coordinates persistence, view materialization, and event publishing in sequence.
125
74
 
126
75
  ```typescript
127
- import type { ResourceEvent, StoredEvent } from '@semiont/core';
76
+ import { createEventStore } from '@semiont/event-sourcing';
128
77
 
129
- // Event to append (before storage)
130
- const event: Omit<ResourceEvent, 'id' | 'timestamp'> = {
131
- type: 'resource.created',
132
- resourceId: resourceId('doc-123'),
133
- userId: userId('user@example.com'),
134
- payload: { /* event-specific data */ },
135
- };
136
-
137
- // Stored event (after persistence)
138
- const stored: StoredEvent = {
139
- event: {
140
- id: eventId('evt-456'),
141
- timestamp: '2024-01-01T00:00:00Z',
142
- ...event,
143
- },
144
- metadata: {
145
- sequenceNumber: 1,
146
- checksum: 'sha256:...',
147
- version: '1.0',
148
- },
149
- };
78
+ const eventStore = createEventStore(project, eventBus, logger);
150
79
  ```
151
80
 
152
- ### Event Types
153
-
154
- Semiont uses a hierarchical event type system:
155
-
156
- - `resource.created` - New resource created
157
- - `resource.cloned` - Resource cloned from another
158
- - `resource.archived` / `resource.unarchived` - Archive status changed
159
- - `annotation.added` / `annotation.deleted` - Annotations modified
160
- - `annotation.body.updated` - Annotation body changed
161
- - `entitytag.added` / `entitytag.removed` - Entity type tags modified
162
- - `entitytype.added` - New entity type registered (system-level)
81
+ The `coreEventBus` parameter is required. After persistence, `appendEvent` publishes the full `StoredEvent` to:
82
+ - The global typed channel (e.g., `eventBus.get('mark:added')`)
83
+ - The resource-scoped typed channel (e.g., `eventBus.scope(resourceId).get('mark:added')`)
163
84
 
164
- ### Materialized Views
85
+ ### EventLog
165
86
 
166
- Views are projections of event streams into queryable state:
87
+ Append-only event persistence to sharded JSONL files. Each resource gets its own event stream under `.semiont/events/<shard>/<resourceId>.jsonl`. System events go to `__system__.jsonl`.
167
88
 
168
89
  ```typescript
169
- import type { ResourceView } from '@semiont/event-sourcing';
170
-
171
- // A view contains both metadata and annotations
172
- const view: ResourceView = {
173
- resource: {
174
- '@id': 'http://localhost:4000/resources/doc-123',
175
- name: 'My Document',
176
- representations: [/* ... */],
177
- entityTypes: ['Person', 'Organization'],
178
- },
179
- annotations: {
180
- annotations: [/* ... */],
181
- },
182
- };
183
- ```
184
-
185
- Views are automatically updated when events are appended.
186
-
187
- ## Documentation
188
-
189
- 📚 **[Event Store Guide](./docs/EventStore.md)** - EventStore API and orchestration
190
-
191
- 📖 **[Event Log Guide](./docs/EventLog.md)** - Event persistence and storage
192
-
193
- 🔔 **[Event Bus Guide](./docs/EventBus.md)** - Pub/sub and subscriptions
194
-
195
- 🔍 **[Views Guide](./docs/Views.md)** - Materialized views and projections
90
+ // Append (used internally by EventStore)
91
+ const stored = await eventStore.log.append(event, resourceId);
196
92
 
197
- ⚙️ **[Configuration Guide](./docs/Configuration.md)** - Setup and options
93
+ // Read all events for a resource
94
+ const events = await eventStore.log.getEvents(resourceId);
198
95
 
199
- ## Key Features
200
-
201
- - **Type-safe** - Full TypeScript support with branded types from `@semiont/core`
202
- - **Filesystem-based** - No external database required (JSONL for events, JSON for views)
203
- - **Sharded storage** - Automatic sharding for scalability (65,536 shards using Jump Consistent Hash)
204
- - **Real-time** - Pub/sub subscriptions for live event processing
205
- - **Event replay** - Rebuild views from event history at any time
206
- - **Framework-agnostic** - Pure TypeScript, no web framework dependencies
207
-
208
- ## Use Cases
209
-
210
- ✅ **CLI tools** - Build offline tools that use event sourcing without the full backend
211
-
212
- ✅ **Worker processes** - Separate microservices that process events independently
213
-
214
- ✅ **Testing** - Isolated event stores for unit/integration tests
215
-
216
- ✅ **Analytics** - Process event streams for metrics and insights
217
-
218
- ✅ **Audit systems** - Complete history of all changes with provenance
96
+ // List all resource IDs
97
+ const ids = await eventStore.log.getAllResourceIds();
98
+ ```
219
99
 
220
- **Not for frontend** - Use `@semiont/react-ui` hooks for frontend applications
100
+ ### EventQuery
221
101
 
222
- ## API Overview
223
-
224
- ### EventStore
102
+ Read-only query interface with filtering support.
225
103
 
226
104
  ```typescript
227
- const store = new EventStore(storageConfig, viewStorage, identifierConfig);
105
+ import { EventQuery } from '@semiont/event-sourcing';
228
106
 
229
- // Append event (coordinates persistence → view → notification)
230
- const stored = await store.appendEvent(event);
107
+ const query = new EventQuery(eventStore.log.storage);
231
108
 
232
- // Access components
233
- store.log // EventLog - persistence
234
- store.bus // EventBus - pub/sub
235
- store.views // ViewManager - views
236
- ```
109
+ // Get all events for a resource
110
+ const events = await query.getResourceEvents(resourceId);
237
111
 
238
- ### EventLog
239
-
240
- ```typescript
241
- // Append event to log
242
- const stored = await eventLog.append(event, resourceId);
243
-
244
- // Get all events for resource
245
- const events = await eventLog.getEvents(resourceId);
246
-
247
- // Query with filter
248
- const filtered = await eventLog.queryEvents(resourceId, {
249
- eventTypes: ['annotation.added'],
250
- fromSequence: 10,
112
+ // Query with filters
113
+ const filtered = await query.queryEvents({
114
+ resourceId,
115
+ eventTypes: ['mark:added', 'mark:removed'],
116
+ limit: 50,
251
117
  });
252
118
  ```
253
119
 
254
- ### EventBus
120
+ ### ViewManager / ViewMaterializer
255
121
 
256
- ```typescript
257
- // Subscribe to resource events
258
- const sub = eventBus.subscribe(resourceId, async (event) => {
259
- console.log('Event:', event.event.type);
260
- });
122
+ Materializes JSON views from events. Resource views are projected to `<stateDir>/resources/<shard>/<resourceId>.json`. System views (entity types) are projected to `<stateDir>/projections/__system__/`. The storage-uri index lives at `<stateDir>/projections/storage-uri-index.json`.
261
123
 
262
- // Subscribe to all system events
263
- const globalSub = eventBus.subscribeGlobal(async (event) => {
264
- console.log('System event:', event.event.type);
265
- });
124
+ The materializer processes events through a large switch statement that builds up resource descriptors, annotation collections, and system state. There are two paths into it:
266
125
 
267
- // Unsubscribe
268
- sub.unsubscribe();
269
- ```
126
+ **Live append path** — every `EventStore.appendEvent()` call materializes the event incrementally:
127
+ - Resource events → `views.materializeResource(rid, event, getAllEvents)` → updates the resource view file and the storage-uri index.
128
+ - System events (currently `mark:entity-type-added`) → `views.materializeSystem(eventType, payload)` → updates `entitytypes.json`.
270
129
 
271
- ### ViewManager
130
+ **Startup rebuild path** — `views.rebuildAll(eventLog)` walks the entire event log once at process start and writes every view from scratch. Idempotent: existing view files are overwritten. This is the recovery mechanism for the materialized layer.
272
131
 
273
132
  ```typescript
274
- // Materialize resource view from events
275
- await viewManager.materializeResource(
276
- resourceId,
277
- event,
278
- () => eventLog.getEvents(resourceId)
279
- );
280
-
281
- // Get materialized view
282
- const view = await viewStorage.get(resourceId);
133
+ // Called once during knowledge-base construction, before any HTTP request
134
+ await eventStore.views.rebuildAll(eventStore.log);
283
135
  ```
284
136
 
285
- ## Storage Format
286
-
287
- ### Events (JSONL)
288
-
289
- Events are stored in append-only JSONL files with sharding:
290
-
291
- ```
292
- data/
293
- events/
294
- ab/ # Shard level 1 (256 directories)
295
- cd/ # Shard level 2 (256 subdirectories)
296
- doc-abc123.jsonl # Event log for resource
297
- ```
137
+ The two paths use the same materialization primitives, so replaying event 1..N via `rebuildAll` produces the same final state as the live path walking 1..N over time.
298
138
 
299
- Each line in the JSONL file is a complete `StoredEvent`:
139
+ #### Why startup rebuild exists
300
140
 
301
- ```json
302
- {"event":{"id":"evt-1","type":"resource.created","timestamp":"2024-01-01T00:00:00Z","resourceId":"doc-abc123","userId":"user@example.com","payload":{}},"metadata":{"sequenceNumber":1,"checksum":"sha256:...","version":"1.0"}}
303
- ```
141
+ The materialized views directory (`stateDir`) is **ephemeral by design** — it's safe to wipe (container recreation, `semiont destroy`, dev cleanup), and the event log under `.semiont/events/` is the single source of truth. `rebuildAll` is what makes "ephemeral" safe: any time `stateDir` goes empty, the next process start repopulates it from the event log.
304
142
 
305
- ### Views (JSON)
143
+ This makes the views layer the third leg of a symmetric pattern: the three derived read models (graph, vectors, materialized views) each have exactly one explicit rebuild method called from one place at startup:
306
144
 
307
- Materialized views are stored as JSON files with the same sharding:
145
+ | Derived store | Rebuild method | Owned by |
146
+ |---|---|---|
147
+ | Graph (Neo4j) | `GraphDBConsumer.rebuildAll()` | `@semiont/make-meaning` |
148
+ | Vectors (Qdrant) | `Smelter.rebuildAll()` | `@semiont/make-meaning` |
149
+ | Materialized views | `ViewManager.rebuildAll(eventLog)` | `@semiont/event-sourcing` |
308
150
 
309
- ```
310
- data/
311
- projections/
312
- resources/
313
- ab/
314
- cd/
315
- doc-abc123.json # Materialized view
316
- ```
151
+ All three are called from `createKnowledgeBase` before the HTTP server begins accepting requests, so by the time any client can hit the API, all three derived stores are caught up to the event log.
317
152
 
318
- ## Performance
153
+ `rebuildAll` accepts any object satisfying the `RebuildEventSource` structural type (`getEvents(rid)` + `getAllResourceIds()`); the concrete `EventLog` satisfies it without an explicit conformance declaration.
319
154
 
320
- - **Sharding** - 65,536 shards using Jump Consistent Hash prevents filesystem bottlenecks
321
- - **Append-only** - JSONL writes are fast (no updates, only appends)
322
- - **In-memory subscriptions** - Pub/sub has zero I/O overhead
323
- - **Lazy view materialization** - Views only built on demand or when events occur
155
+ ### EventValidator
324
156
 
325
- ## Error Handling
157
+ Verifies event chain integrity using cryptographic checksums.
326
158
 
327
159
  ```typescript
328
- try {
329
- await eventStore.appendEvent(event);
330
- } catch (error) {
331
- if (error.code === 'ENOENT') {
332
- // Storage directory doesn't exist
333
- }
334
- throw error;
335
- }
336
- ```
337
-
338
- ## Testing
160
+ import { EventValidator } from '@semiont/event-sourcing';
161
+
162
+ const validator = new EventValidator();
163
+ const result = validator.validateChain(events);
164
+ // { valid: boolean, errors: string[] }
165
+ ```
166
+
167
+ ### Storage
168
+
169
+ - **EventStorage** — Low-level JSONL file I/O with sharding (jump-consistent hash)
170
+ - **FilesystemViewStorage** — JSON view persistence implementing the `ViewStorage` interface
171
+ - **Storage URI Index** — Maps `file://` URIs to resource IDs for filesystem-based resources
172
+
173
+ ## Event Types
174
+
175
+ All persisted events use flow verb names (see `ResourceEvent` in `@semiont/core`):
176
+
177
+ | Event Type | Flow | Description |
178
+ |---|---|---|
179
+ | `yield:created` | Yield | Resource created |
180
+ | `yield:updated` | Yield | Resource content updated |
181
+ | `yield:moved` | Yield | Resource file moved |
182
+ | `yield:representation-added` | Yield | Multi-format representation added |
183
+ | `mark:added` | Mark | Annotation created |
184
+ | `mark:removed` | Mark | Annotation deleted |
185
+ | `mark:body-updated` | Mark | Annotation body modified |
186
+ | `mark:archived` | Mark | Resource archived |
187
+ | `mark:unarchived` | Mark | Resource unarchived |
188
+ | `mark:entity-tag-added` | Mark | Entity type tag added to resource |
189
+ | `mark:entity-tag-removed` | Mark | Entity type tag removed from resource |
190
+ | `mark:entity-type-added` | Mark | New entity type added (system-level) |
191
+ | `job:started` | Job | Background job started |
192
+ | `job:progress` | Job | Background job progress update |
193
+ | `job:completed` | Job | Background job completed |
194
+ | `job:failed` | Job | Background job failed |
195
+ | `embedding:computed` | Embedding | Vector embedding computed |
196
+ | `embedding:deleted` | Embedding | Vector embedding deleted |
197
+
198
+ ## Exports
339
199
 
340
200
  ```typescript
341
- import { EventStore, FilesystemViewStorage } from '@semiont/event-sourcing';
342
- import { describe, it, beforeEach } from 'vitest';
343
-
344
- describe('Event sourcing', () => {
345
- let eventStore: EventStore;
346
-
347
- beforeEach(() => {
348
- eventStore = new EventStore(
349
- { basePath: './test-data', dataDir: './test-data', enableSharding: false },
350
- new FilesystemViewStorage('./test-data'),
351
- { baseUrl: 'http://localhost:4000' }
352
- );
353
- });
354
-
355
- it('should append and retrieve events', async () => {
356
- const event = await eventStore.appendEvent({
357
- type: 'resource.created',
358
- resourceId: resourceId('test-1'),
359
- userId: userId('test@example.com'),
360
- payload: {},
361
- });
362
-
363
- const events = await eventStore.log.getEvents(resourceId('test-1'));
364
- expect(events).toHaveLength(1);
365
- });
366
- });
367
- ```
368
-
369
- ## Examples
201
+ // Core
202
+ export { EventStore, createEventStore, EventLog, ViewManager };
370
203
 
371
- ### Building a CLI Tool
204
+ // Storage
205
+ export { EventStorage, FilesystemViewStorage, type ViewStorage, type ResourceView };
206
+ export { getShardPath, sha256, jumpConsistentHash };
207
+ export { resolveStorageUri, writeStorageUriEntry, removeStorageUriEntry };
372
208
 
373
- ```typescript
374
- import { EventStore, FilesystemViewStorage } from '@semiont/event-sourcing';
375
- import { resourceId, userId } from '@semiont/core';
376
-
377
- async function rebuildViews(basePath: string) {
378
- const store = new EventStore(
379
- { basePath, dataDir: basePath, enableSharding: true },
380
- new FilesystemViewStorage(basePath),
381
- { baseUrl: 'http://localhost:4000' }
382
- );
383
-
384
- const resourceIds = await store.log.getAllResourceIds();
385
- console.log(`Rebuilding ${resourceIds.length} resources...`);
386
-
387
- for (const id of resourceIds) {
388
- const events = await store.log.getEvents(id);
389
- console.log(`Resource ${id}: ${events.length} events`);
390
- // Views are automatically materialized by ViewManager
391
- }
392
- }
393
- ```
209
+ // Query & Validation
210
+ export { EventQuery, EventValidator };
394
211
 
395
- ### Event Processing Worker
212
+ // Views
213
+ export { ViewMaterializer };
396
214
 
397
- ```typescript
398
- async function startWorker() {
399
- const store = new EventStore(/* config */);
400
-
401
- // Subscribe to all annotation events
402
- store.bus.subscribeGlobal(async (event) => {
403
- if (event.event.type === 'annotation.added') {
404
- console.log('Processing annotation:', event.event.payload);
405
- // Custom processing logic here
406
- }
407
- });
408
-
409
- console.log('Worker started, listening for events...');
410
- }
215
+ // Utilities
216
+ export { generateAnnotationId };
411
217
  ```
412
-
413
- ## License
414
-
415
- Apache-2.0