@agentlip/hub 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,255 @@
1
+ /**
2
+ * Derived job staleness guard for @agentlip/hub
3
+ *
4
+ * Implements bd-16d.4.7 (staleness guard) per AGENTLIP_PLAN.md §4.6.
5
+ *
6
+ * Core requirement: before committing derived outputs (enrichments/attachments),
7
+ * verify the message hasn't changed. Use same transaction for check+commit.
8
+ *
9
+ * Staleness detection:
10
+ * - Discard if content_raw changed (message was edited)
11
+ * - Discard if version changed (handles ABA: edit back to original)
12
+ * - Discard if deleted_at IS NOT NULL (message was tombstoned)
13
+ * - Discard if message no longer exists
14
+ *
15
+ * Pattern:
16
+ * 1. Job starts, captures MessageSnapshot (id, content_raw, version)
17
+ * 2. Job processes content (enrichment, extraction, etc.)
18
+ * 3. Job commits via withMessageStalenessGuard:
19
+ * - Re-read message state in SAME transaction
20
+ * - Verify snapshot still matches
21
+ * - If stale: discard, return {ok: false, reason: '...'}
22
+ * - If fresh: call fn(currentMessage) to commit derived rows/events
23
+ *
24
+ * Usage Example (plugin or derived job):
25
+ * ```typescript
26
+ * import { getMessageById } from "@agentlip/kernel";
27
+ * import { withMessageStalenessGuard, captureSnapshot } from "@agentlip/hub/src/derivedStaleness";
28
+ *
29
+ * // 1. Read message and capture snapshot
30
+ * const message = getMessageById(db, messageId);
31
+ * if (!message) return;
32
+ * const snapshot = captureSnapshot(message);
33
+ *
34
+ * // 2. Process content (may take seconds; message could change during this)
35
+ * const enrichments = await analyzeContent(message.content_raw);
36
+ *
37
+ * // 3. Commit with staleness protection
38
+ * const result = withMessageStalenessGuard(db, snapshot, (current) => {
39
+ * // Safe: message verified unchanged in this transaction
40
+ * const enrichmentId = insertEnrichment(db, {
41
+ * messageId: current.id,
42
+ * kind: "sentiment",
43
+ * data: enrichments,
44
+ * });
45
+ *
46
+ * const eventId = insertEvent({
47
+ * db,
48
+ * name: "message.enriched",
49
+ * scopes: { channel_id: current.channel_id, topic_id: current.topic_id },
50
+ * entity: { type: "enrichment", id: enrichmentId },
51
+ * data: { enrichment_id: enrichmentId },
52
+ * });
53
+ *
54
+ * return { enrichmentId, eventId };
55
+ * });
56
+ *
57
+ * if (!result.ok) {
58
+ * console.warn(`Discarded stale enrichment: ${result.reason} - ${result.detail}`);
59
+ * return;
60
+ * }
61
+ *
62
+ * console.log(`Committed enrichment: ${result.value.enrichmentId}`);
63
+ * ```
64
+ */
65
+
66
+ import type { Database } from "bun:sqlite";
67
+
68
+ // ─────────────────────────────────────────────────────────────────────────────
69
+ // Types
70
+ // ─────────────────────────────────────────────────────────────────────────────
71
+
72
+ /**
73
+ * Immutable snapshot of message state captured when derived job starts.
74
+ *
75
+ * Guards against:
76
+ * - Content changes (edits)
77
+ * - Version changes (ABA problem: edit back to original content)
78
+ * - Tombstone deletes
79
+ */
80
+ export interface MessageSnapshot {
81
+ /** Message ID */
82
+ messageId: string;
83
+
84
+ /** Original content_raw when job started */
85
+ contentRaw: string;
86
+
87
+ /** Original version when job started */
88
+ version: number;
89
+ }
90
+
91
+ /**
92
+ * Current message state read during staleness verification.
93
+ *
94
+ * Includes channel_id/topic_id so caller can emit events with correct scopes.
95
+ */
96
+ export interface CurrentMessageState {
97
+ id: string;
98
+ topic_id: string;
99
+ channel_id: string;
100
+ sender: string;
101
+ content_raw: string;
102
+ version: number;
103
+ created_at: string;
104
+ edited_at: string | null;
105
+ deleted_at: string | null;
106
+ deleted_by: string | null;
107
+ }
108
+
109
+ /**
110
+ * Staleness check result: success case
111
+ */
112
+ export interface StalenessCheckSuccess<T> {
113
+ ok: true;
114
+ /** Value returned by fn */
115
+ value: T;
116
+ }
117
+
118
+ /**
119
+ * Staleness check result: failure case
120
+ */
121
+ export interface StalenessCheckFailure {
122
+ ok: false;
123
+ /** Why the check failed */
124
+ reason: "STALE_CONTENT" | "STALE_VERSION" | "DELETED" | "MISSING";
125
+ /** Human-readable detail */
126
+ detail: string;
127
+ }
128
+
129
+ export type StalenessCheckResult<T> = StalenessCheckSuccess<T> | StalenessCheckFailure;
130
+
131
+ // ─────────────────────────────────────────────────────────────────────────────
132
+ // Staleness Guard Implementation
133
+ // ─────────────────────────────────────────────────────────────────────────────
134
+
135
+ /**
136
+ * Execute derived output commit with staleness protection.
137
+ *
138
+ * Pattern (per §4.6):
139
+ * 1. Re-read message state in SAME transaction as derived insert
140
+ * 2. Compare with original snapshot:
141
+ * - content_raw must match exactly
142
+ * - version must match exactly (prevents ABA)
143
+ * - deleted_at must be NULL
144
+ * 3. If stale: discard (rollback transaction), return {ok: false, reason}
145
+ * 4. If fresh: call fn(currentMessage) to commit derived rows/events
146
+ *
147
+ * Usage:
148
+ * ```ts
149
+ * const snapshot = { messageId, contentRaw, version };
150
+ * const result = await withMessageStalenessGuard(db, snapshot, (current) => {
151
+ * // Safe to commit: message hasn't changed
152
+ * const enrichmentId = insertEnrichment(db, {...});
153
+ * const eventId = insertEvent(db, {...});
154
+ * return { enrichmentId, eventId };
155
+ * });
156
+ *
157
+ * if (!result.ok) {
158
+ * console.log(`Discarded stale output: ${result.reason}`);
159
+ * return;
160
+ * }
161
+ * ```
162
+ *
163
+ * @param db - Database instance
164
+ * @param snapshot - Immutable snapshot captured when job started
165
+ * @param fn - Callback to commit derived outputs; receives current message state
166
+ * @returns Success with fn's return value, or failure with reason
167
+ */
168
+ export function withMessageStalenessGuard<T>(
169
+ db: Database,
170
+ snapshot: MessageSnapshot,
171
+ fn: (current: CurrentMessageState) => T
172
+ ): StalenessCheckResult<T> {
173
+ // Validate snapshot
174
+ if (!snapshot.messageId || typeof snapshot.messageId !== "string") {
175
+ throw new Error("Invalid snapshot: messageId must be a non-empty string");
176
+ }
177
+ if (typeof snapshot.contentRaw !== "string") {
178
+ throw new Error("Invalid snapshot: contentRaw must be a string");
179
+ }
180
+ if (typeof snapshot.version !== "number" || snapshot.version < 1) {
181
+ throw new Error("Invalid snapshot: version must be a positive number");
182
+ }
183
+
184
+ // Use transaction for atomic check+commit
185
+ const result = db.transaction((): StalenessCheckResult<T> => {
186
+ // Re-read message state
187
+ const current = db
188
+ .query<CurrentMessageState, [string]>(
189
+ `SELECT id, topic_id, channel_id, sender, content_raw, version,
190
+ created_at, edited_at, deleted_at, deleted_by
191
+ FROM messages
192
+ WHERE id = ?`
193
+ )
194
+ .get(snapshot.messageId);
195
+
196
+ // Check 1: message still exists
197
+ if (!current) {
198
+ return {
199
+ ok: false,
200
+ reason: "MISSING",
201
+ detail: `Message ${snapshot.messageId} no longer exists`,
202
+ };
203
+ }
204
+
205
+ // Check 2: message not tombstoned
206
+ if (current.deleted_at !== null) {
207
+ return {
208
+ ok: false,
209
+ reason: "DELETED",
210
+ detail: `Message ${snapshot.messageId} was deleted at ${current.deleted_at}`,
211
+ };
212
+ }
213
+
214
+ // Check 3: version unchanged (prevents ABA)
215
+ if (current.version !== snapshot.version) {
216
+ return {
217
+ ok: false,
218
+ reason: "STALE_VERSION",
219
+ detail: `Message ${snapshot.messageId} version changed: ${snapshot.version} → ${current.version}`,
220
+ };
221
+ }
222
+
223
+ // Check 4: content unchanged
224
+ if (current.content_raw !== snapshot.contentRaw) {
225
+ return {
226
+ ok: false,
227
+ reason: "STALE_CONTENT",
228
+ detail: `Message ${snapshot.messageId} content changed`,
229
+ };
230
+ }
231
+
232
+ // All checks passed: message is fresh, safe to commit
233
+ const value = fn(current);
234
+ return { ok: true, value };
235
+ })();
236
+
237
+ return result;
238
+ }
239
+
240
+ /**
241
+ * Capture a snapshot of a message for later staleness verification.
242
+ *
243
+ * Helper to extract only the fields needed for staleness guard.
244
+ * Use when a job starts processing a message.
245
+ *
246
+ * @param message - Full message object from queries
247
+ * @returns Minimal snapshot for staleness verification
248
+ */
249
+ export function captureSnapshot(message: CurrentMessageState): MessageSnapshot {
250
+ return {
251
+ messageId: message.id,
252
+ contentRaw: message.content_raw,
253
+ version: message.version,
254
+ };
255
+ }
@@ -0,0 +1,374 @@
1
+ /**
2
+ * Extractor plugin derived pipeline for @agentlip/hub
3
+ *
4
+ * Implements bd-16d.4.6: execute extractor plugins on new messages,
5
+ * insert topic_attachments with idempotency, emit topic.attachment_added events.
6
+ *
7
+ * Flow:
8
+ * 1. Read message and capture snapshot (for staleness guard)
9
+ * 2. Run each enabled extractor plugin via runPlugin<Attachment[]>
10
+ * 3. For each attachment:
11
+ * - Validate shape (kind non-empty string, value_json is object)
12
+ * - Enforce 16KB size limit on serialized value_json
13
+ * - Compute dedupe_key (attachment.dedupe_key ?? JSON.stringify(value_json))
14
+ * - Insert with idempotency (dedupe by topic_id, kind, key, dedupe_key)
15
+ * - Emit topic.attachment_added only on new inserts (not on dedupe)
16
+ * 4. Wrap in staleness-guarded transaction (discard if message changed/deleted)
17
+ *
18
+ * Security:
19
+ * - Runs plugins in isolated Workers with timeout (via runPlugin)
20
+ * - Enforces attachment size limits (16KB per attachment metadata)
21
+ * - Validates attachment structure before insertion
22
+ * - Staleness guard prevents committing results from stale content
23
+ */
24
+
25
+ import type { Database } from "bun:sqlite";
26
+ import { validatePluginModulePath, type WorkspaceConfig } from "./config";
27
+ import {
28
+ runPlugin,
29
+ type Attachment,
30
+ type ExtractInput,
31
+ type PluginResult,
32
+ } from "./pluginRuntime";
33
+ import { withMessageStalenessGuard, captureSnapshot } from "./derivedStaleness";
34
+ import { getMessageById, findAttachmentByDedupeKey, insertEvent } from "@agentlip/kernel";
35
+ import { SIZE_LIMITS } from "./bodyParser";
36
+
37
+ // ─────────────────────────────────────────────────────────────────────────────
38
+ // Types
39
+ // ─────────────────────────────────────────────────────────────────────────────
40
+
41
+ export interface RunExtractorPluginsForMessageOptions {
42
+ db: Database;
43
+ workspaceRoot: string;
44
+ workspaceConfig: WorkspaceConfig;
45
+ messageId: string;
46
+ /** Optional hook to publish newly-created event IDs (for WS fanout) */
47
+ onEventIds?: (eventIds: number[]) => void;
48
+ }
49
+
50
+ export interface ExtractorRunResult {
51
+ /** Total number of plugins executed */
52
+ pluginsExecuted: number;
53
+ /** Number of attachments inserted (excluding duplicates) */
54
+ attachmentsInserted: number;
55
+ /** Number of attachments skipped (duplicate dedupe_key) */
56
+ attachmentsDeduplicated: number;
57
+ /** Number of plugins that failed */
58
+ pluginsFailed: number;
59
+ /** Event IDs emitted (topic.attachment_added) */
60
+ eventIds: number[];
61
+ }
62
+
63
+ // ─────────────────────────────────────────────────────────────────────────────
64
+ // Main Entry Point
65
+ // ─────────────────────────────────────────────────────────────────────────────
66
+
67
+ /**
68
+ * Run extractor plugins for a message and insert attachments with idempotency.
69
+ *
70
+ * Steps:
71
+ * 1. Read message and capture snapshot
72
+ * 2. Run each enabled extractor plugin
73
+ * 3. Validate and insert attachments with dedupe protection
74
+ * 4. Emit topic.attachment_added events only for new attachments
75
+ * 5. Wrap in staleness-guarded transaction
76
+ *
77
+ * @param options - Extractor run options
78
+ * @returns Result with stats and event IDs, or null if message is stale/missing
79
+ */
80
+ export async function runExtractorPluginsForMessage(
81
+ options: RunExtractorPluginsForMessageOptions
82
+ ): Promise<ExtractorRunResult | null> {
83
+ const { db, workspaceRoot, workspaceConfig, messageId, onEventIds } = options;
84
+
85
+ // Step 1: Read message and capture snapshot
86
+ const message = getMessageById(db, messageId);
87
+ if (!message) {
88
+ return null; // Message doesn't exist
89
+ }
90
+
91
+ if (message.deleted_at) {
92
+ return null; // Message is tombstoned, skip processing
93
+ }
94
+
95
+ const snapshot = captureSnapshot(message);
96
+
97
+ // Step 2: Find enabled extractor plugins
98
+ const extractorPlugins =
99
+ workspaceConfig.plugins?.filter(
100
+ (p) => p.type === "extractor" && p.enabled
101
+ ) ?? [];
102
+
103
+ if (extractorPlugins.length === 0) {
104
+ return {
105
+ pluginsExecuted: 0,
106
+ attachmentsInserted: 0,
107
+ attachmentsDeduplicated: 0,
108
+ pluginsFailed: 0,
109
+ eventIds: [],
110
+ };
111
+ }
112
+
113
+ // Step 3: Run each extractor plugin
114
+ const allAttachments: Array<{
115
+ attachment: Attachment;
116
+ pluginName: string;
117
+ }> = [];
118
+ let pluginsExecuted = 0;
119
+ let pluginsFailed = 0;
120
+
121
+ for (const plugin of extractorPlugins) {
122
+ pluginsExecuted++;
123
+
124
+ const input: ExtractInput = {
125
+ message: {
126
+ id: message.id,
127
+ content_raw: message.content_raw,
128
+ sender: message.sender,
129
+ topic_id: message.topic_id,
130
+ channel_id: message.channel_id,
131
+ created_at: message.created_at,
132
+ },
133
+ config: plugin.config ?? {},
134
+ };
135
+
136
+ // Resolve plugin module path
137
+ let modulePath: string;
138
+ try {
139
+ if (plugin.module) {
140
+ modulePath = validatePluginModulePath(plugin.module, workspaceRoot);
141
+ } else {
142
+ // Built-in plugins are not implemented in v1 (require explicit module)
143
+ pluginsFailed++;
144
+ console.warn(
145
+ `[extractorDerived] Plugin '${plugin.name}' has no module path, skipping`
146
+ );
147
+ continue;
148
+ }
149
+ } catch (err: any) {
150
+ pluginsFailed++;
151
+ console.warn(
152
+ `[extractorDerived] Plugin '${plugin.name}' module path validation failed: ${err.message}`
153
+ );
154
+ continue;
155
+ }
156
+
157
+ // Get timeout from pluginDefaults
158
+ const timeoutMs = workspaceConfig.pluginDefaults?.timeout;
159
+
160
+ const result: PluginResult<Attachment[]> = await runPlugin<Attachment[]>({
161
+ type: "extractor",
162
+ modulePath,
163
+ input,
164
+ timeoutMs,
165
+ pluginName: plugin.name,
166
+ });
167
+
168
+ if (!result.ok) {
169
+ pluginsFailed++;
170
+ console.warn(
171
+ `[extractorDerived] Plugin ${plugin.name} failed: ${result.error} (code: ${result.code})`
172
+ );
173
+ continue;
174
+ }
175
+
176
+ // Collect attachments
177
+ for (const attachment of result.data) {
178
+ allAttachments.push({ attachment, pluginName: plugin.name });
179
+ }
180
+ }
181
+
182
+ // Step 4: Insert attachments with staleness guard
183
+ let guardResult: any;
184
+ try {
185
+ guardResult = withMessageStalenessGuard(db, snapshot, (currentMessage) => {
186
+ let attachmentsInserted = 0;
187
+ let attachmentsDeduplicated = 0;
188
+ const eventIds: number[] = [];
189
+
190
+ for (const { attachment, pluginName } of allAttachments) {
191
+ // Validate attachment shape
192
+ const validationError = validateAttachment(attachment);
193
+ if (validationError) {
194
+ console.warn(
195
+ `[extractorDerived] Invalid attachment from ${pluginName}: ${validationError}`
196
+ );
197
+ continue;
198
+ }
199
+
200
+ // Enforce 16KB size limit
201
+ const serializedValue = JSON.stringify(attachment.value_json);
202
+ if (serializedValue.length > SIZE_LIMITS.ATTACHMENT) {
203
+ console.warn(
204
+ `[extractorDerived] Attachment from ${pluginName} exceeds size limit: ${serializedValue.length} bytes (max ${SIZE_LIMITS.ATTACHMENT})`
205
+ );
206
+ continue;
207
+ }
208
+
209
+ // Compute dedupe_key (empty strings are invalid per schema)
210
+ const dedupeKey =
211
+ attachment.dedupe_key && attachment.dedupe_key.trim().length > 0
212
+ ? attachment.dedupe_key
213
+ : serializedValue;
214
+
215
+ // Check for existing attachment (idempotency)
216
+ const existing = findAttachmentByDedupeKey(
217
+ db,
218
+ currentMessage.topic_id,
219
+ attachment.kind,
220
+ attachment.key ?? null,
221
+ dedupeKey
222
+ );
223
+
224
+ if (existing) {
225
+ attachmentsDeduplicated++;
226
+ continue; // Skip duplicate, no event emitted
227
+ }
228
+
229
+ // Insert new attachment
230
+ const attachmentId = generateId("att");
231
+ const now = new Date().toISOString();
232
+
233
+ db.run(
234
+ `
235
+ INSERT INTO topic_attachments (
236
+ id, topic_id, kind, key, value_json, dedupe_key, source_message_id, created_at
237
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
238
+ `,
239
+ [
240
+ attachmentId,
241
+ currentMessage.topic_id,
242
+ attachment.kind,
243
+ attachment.key ?? null,
244
+ serializedValue,
245
+ dedupeKey,
246
+ currentMessage.id,
247
+ now,
248
+ ]
249
+ );
250
+
251
+ attachmentsInserted++;
252
+
253
+ // Emit topic.attachment_added event
254
+ const eventId = insertEvent({
255
+ db,
256
+ name: "topic.attachment_added",
257
+ scopes: {
258
+ channel_id: currentMessage.channel_id,
259
+ topic_id: currentMessage.topic_id,
260
+ },
261
+ entity: {
262
+ type: "attachment",
263
+ id: attachmentId,
264
+ },
265
+ data: {
266
+ attachment: {
267
+ id: attachmentId,
268
+ topic_id: currentMessage.topic_id,
269
+ kind: attachment.kind,
270
+ key: attachment.key ?? null,
271
+ value_json: attachment.value_json,
272
+ dedupe_key: dedupeKey,
273
+ source_message_id: currentMessage.id,
274
+ created_at: now,
275
+ },
276
+ },
277
+ });
278
+
279
+ eventIds.push(eventId);
280
+ }
281
+
282
+ return {
283
+ pluginsExecuted,
284
+ attachmentsInserted,
285
+ attachmentsDeduplicated,
286
+ pluginsFailed,
287
+ eventIds,
288
+ };
289
+ });
290
+ } catch (err: any) {
291
+ console.warn(
292
+ `[extractorDerived] Failed to commit derived attachments: ${err?.message ?? String(err)}`
293
+ );
294
+ return null;
295
+ }
296
+
297
+ // Handle staleness
298
+ if (!guardResult.ok) {
299
+ console.warn(
300
+ `[extractorDerived] Message ${messageId} is stale: ${guardResult.reason} - ${guardResult.detail}`
301
+ );
302
+ return null;
303
+ }
304
+
305
+ // Publish event IDs for WS fanout
306
+ if (onEventIds && guardResult.value.eventIds.length > 0) {
307
+ onEventIds(guardResult.value.eventIds);
308
+ }
309
+
310
+ return guardResult.value;
311
+ }
312
+
313
+ // ─────────────────────────────────────────────────────────────────────────────
314
+ // Helpers
315
+ // ─────────────────────────────────────────────────────────────────────────────
316
+
317
+ /**
318
+ * Validate attachment structure.
319
+ *
320
+ * Requirements:
321
+ * - kind is non-empty string
322
+ * - value_json is plain object (not array, not null)
323
+ * - key is optional string
324
+ * - dedupe_key is optional string
325
+ *
326
+ * @param attachment - Attachment to validate
327
+ * @returns Error message if invalid, null if valid
328
+ */
329
+ function validateAttachment(attachment: Attachment): string | null {
330
+ if (typeof attachment.kind !== "string" || attachment.kind.trim().length === 0) {
331
+ return "attachment.kind must be a non-empty string";
332
+ }
333
+
334
+ if (
335
+ attachment.value_json === null ||
336
+ attachment.value_json === undefined ||
337
+ typeof attachment.value_json !== "object" ||
338
+ Array.isArray(attachment.value_json)
339
+ ) {
340
+ return "attachment.value_json must be a plain object";
341
+ }
342
+
343
+ if (
344
+ attachment.key !== undefined &&
345
+ typeof attachment.key !== "string"
346
+ ) {
347
+ return "attachment.key must be a string or undefined";
348
+ }
349
+
350
+ if (attachment.dedupe_key !== undefined) {
351
+ if (typeof attachment.dedupe_key !== "string") {
352
+ return "attachment.dedupe_key must be a string or undefined";
353
+ }
354
+ if (attachment.dedupe_key.trim().length === 0) {
355
+ return "attachment.dedupe_key must be a non-empty string if provided";
356
+ }
357
+ }
358
+
359
+ return null;
360
+ }
361
+
362
+ /**
363
+ * Generate unique ID with prefix.
364
+ *
365
+ * Format: {prefix}_{timestamp36}{random8}
366
+ *
367
+ * @param prefix - ID prefix (e.g. "attach", "enrich")
368
+ * @returns Generated ID
369
+ */
370
+ function generateId(prefix: string): string {
371
+ const randomPart = Math.random().toString(36).substring(2, 10);
372
+ const timestamp = Date.now().toString(36);
373
+ return `${prefix}_${timestamp}${randomPart}`;
374
+ }