claude-memory-layer 1.0.11 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/AGENTS.md +60 -0
  2. package/README.md +166 -2
  3. package/bootstrap-kb/decisions/decisions.md +244 -0
  4. package/bootstrap-kb/glossary/glossary.md +46 -0
  5. package/bootstrap-kb/modules/.claude-plugin.md +22 -0
  6. package/bootstrap-kb/modules/agents.md.md +15 -0
  7. package/bootstrap-kb/modules/claude.md.md +15 -0
  8. package/bootstrap-kb/modules/context.md.md +15 -0
  9. package/bootstrap-kb/modules/docs.md +18 -0
  10. package/bootstrap-kb/modules/handoff.md.md +15 -0
  11. package/bootstrap-kb/modules/package-lock.json.md +15 -0
  12. package/bootstrap-kb/modules/package.json.md +15 -0
  13. package/bootstrap-kb/modules/plan.md.md +15 -0
  14. package/bootstrap-kb/modules/readme.md.md +15 -0
  15. package/bootstrap-kb/modules/scripts.md +26 -0
  16. package/bootstrap-kb/modules/spec.md.md +15 -0
  17. package/bootstrap-kb/modules/specs.md +20 -0
  18. package/bootstrap-kb/modules/src.md +51 -0
  19. package/bootstrap-kb/modules/tests.md +42 -0
  20. package/bootstrap-kb/modules/tsconfig.json.md +15 -0
  21. package/bootstrap-kb/modules/vitest.config.ts.md +15 -0
  22. package/bootstrap-kb/overview/overview.md +40 -0
  23. package/bootstrap-kb/sources/manifest.json +950 -0
  24. package/bootstrap-kb/sources/manifest.md +227 -0
  25. package/bootstrap-kb/timeline/timeline.md +57 -0
  26. package/d.sh +3 -0
  27. package/deploy.sh +3 -0
  28. package/dist/cli/index.js +2389 -286
  29. package/dist/cli/index.js.map +4 -4
  30. package/dist/core/index.js +1017 -132
  31. package/dist/core/index.js.map +4 -4
  32. package/dist/hooks/post-tool-use.js +1347 -202
  33. package/dist/hooks/post-tool-use.js.map +4 -4
  34. package/dist/hooks/session-end.js +1339 -194
  35. package/dist/hooks/session-end.js.map +4 -4
  36. package/dist/hooks/session-start.js +1343 -198
  37. package/dist/hooks/session-start.js.map +4 -4
  38. package/dist/hooks/stop.js +1351 -206
  39. package/dist/hooks/stop.js.map +4 -4
  40. package/dist/hooks/user-prompt-submit.js +1347 -202
  41. package/dist/hooks/user-prompt-submit.js.map +4 -4
  42. package/dist/server/api/index.js +1436 -211
  43. package/dist/server/api/index.js.map +4 -4
  44. package/dist/server/index.js +1445 -220
  45. package/dist/server/index.js.map +4 -4
  46. package/dist/services/memory-service.js +1345 -199
  47. package/dist/services/memory-service.js.map +4 -4
  48. package/dist/ui/app.js +69 -2
  49. package/dist/ui/index.html +8 -0
  50. package/docs/MCP_MEMORY_SERVICE_COMPARATIVE_REVIEW.md +271 -0
  51. package/docs/MEMU_ADOPTION.md +40 -0
  52. package/memory/.claude-plugin/commands/2026-02-25.md +263 -0
  53. package/memory/_index.md +405 -0
  54. package/memory/default/uncategorized/2026-02-25.md +4839 -0
  55. package/memory/specs/20260207-dashboard-upgrade/2026-02-25.md +142 -0
  56. package/memory/specs/citations-system/2026-02-25.md +1121 -0
  57. package/memory/specs/endless-mode/2026-02-25.md +1392 -0
  58. package/memory/specs/entity-edge-model/2026-02-25.md +1263 -0
  59. package/memory/specs/evidence-aligner-v2/2026-02-25.md +1028 -0
  60. package/memory/specs/mcp-desktop-integration/2026-02-25.md +1334 -0
  61. package/memory/specs/post-tool-use-hook/2026-02-25.md +1164 -0
  62. package/memory/specs/private-tags/2026-02-25.md +1057 -0
  63. package/memory/specs/progressive-disclosure/2026-02-25.md +1436 -0
  64. package/memory/specs/task-entity-system/2026-02-25.md +924 -0
  65. package/memory/specs/vector-outbox-v2/2026-02-25.md +1510 -0
  66. package/memory/specs/web-viewer-ui/2026-02-25.md +1709 -0
  67. package/package.json +2 -1
  68. package/scripts/build.ts +6 -0
  69. package/src/cli/index.ts +281 -2
  70. package/src/core/consolidated-store.ts +63 -1
  71. package/src/core/consolidation-worker.ts +115 -6
  72. package/src/core/event-store.ts +14 -0
  73. package/src/core/index.ts +1 -0
  74. package/src/core/ingest-interceptor.ts +80 -0
  75. package/src/core/markdown-mirror.ts +70 -0
  76. package/src/core/md-mirror.ts +92 -0
  77. package/src/core/mongo-sync-config.ts +165 -0
  78. package/src/core/mongo-sync-worker.ts +381 -0
  79. package/src/core/retriever.ts +540 -150
  80. package/src/core/sqlite-event-store.ts +350 -1
  81. package/src/core/tag-taxonomy.ts +51 -0
  82. package/src/core/types.ts +28 -0
  83. package/src/server/api/health.ts +53 -0
  84. package/src/server/api/index.ts +3 -1
  85. package/src/server/api/stats.ts +46 -1
  86. package/src/services/bootstrap-organizer.ts +443 -0
  87. package/src/services/codex-session-history-importer.ts +474 -0
  88. package/src/services/memory-service.ts +373 -68
  89. package/src/ui/app.js +69 -2
  90. package/src/ui/index.html +8 -0
  91. package/tests/bootstrap-organizer.test.ts +111 -0
  92. package/tests/consolidation-worker.test.ts +75 -0
  93. package/tests/ingest-interceptor.test.ts +38 -0
  94. package/tests/markdown-mirror.test.ts +85 -0
  95. package/tests/md-mirror.test.ts +50 -0
  96. package/tests/retriever-fallback-chain.test.ts +223 -0
  97. package/tests/retriever-strategy-scope.test.ts +97 -0
  98. package/tests/retriever.memu-adoption.test.ts +122 -0
  99. package/tests/sqlite-event-store-replication.test.ts +92 -0
@@ -0,0 +1,1510 @@
1
+
2
+ ## 2026-02-25T12:31:26.442Z | f506a245-208e-42b1-a52a-016f6ecb94b4
3
+ - type: session_summary
4
+ - session: import:organized
5
+ # Vector Outbox V2 Context
6
+
7
+ > **Version**: 2.0.0
8
+ > **Created**: 2026-01-31
9
+
10
+ ## 1. 배경
11
+
12
+ ### 1.1 분산 시스템의 정합성 문제
13
+
14
+ 두 개의 서로 다른 저장소(DuckDB, LanceDB)에 데이터를 저장할 때:
15
+
16
+ ```
17
+ Application
18
+
19
+ ├──▶ DuckDB (entries 저장) ✓
20
+
21
+ └──▶ LanceDB (벡터 저장) ✗ (실패)
22
+
23
+ 결과: DuckDB에는 있지만 LanceDB에는 없는 불일치 상태
24
+ ```
25
+
26
+ **문제 시나리오**:
27
+ 1. Entry가 저장되었지만 임베딩이 실패
28
+ 2. 재시작 시 어떤 entry가 임베딩되지 않았는지 알 수 없음
29
+ 3. 검색 결과에서 해당 entry가 누락
30
+
31
+ ### 1.2 Transactional Outbox Pattern
32
+
33
+ 마이크로서비스 아키텍처에서 검증된 패턴:
34
+
35
+ ```
36
+ ┌─────────────────────────────────────┐
37
+ │ DuckDB Transaction │
38
+ │ │
39
+ │ 1. INSERT INTO entries (...) │
40
+ │ 2. INSERT INTO vector_outbox (...) │
41
+ │ │
42
+ │ COMMIT │
43
+ └─────────────────────────────────────┘
44
+
45
+ │ (비동기)
46
+
47
+ ┌─────────────────────────────────────┐
48
+ │ Vector Worker │
49
+ │ │
50
+ │ 1. SELECT * FROM vector_outbox │
51
+ │ WHERE status = 'pending' │
52
+ │ 2. Generate embedding │
53
+ │ 3. Upsert to LanceDB │
54
+ │ 4. UPDATE status = 'done' │
55
+ └─────────────────────────────────────┘
56
+ ```
57
+
58
+ **장점**:
59
+ - 원자성 보장 (DuckDB 트랜잭션 내)
60
+ - 실패 시 재시도 가능
61
+ - 상태 추적 가능
62
+
63
+ ## 2. Memo.txt 참고 사항
64
+
65
+ ### 2.1 핵심 원칙 (섹션 2.6)
66
+
67
+ > **6. Vector store 정합성**
68
+ > - DuckDB에 먼저 기록 → outbox → 단일 writer가 LanceDB에 upsert → DuckDB 상태 업데이트
69
+
70
+ ### 2.2 스키마 (섹션 4.4)
71
+
72
+ ```sql
73
+ CREATE TABLE vector_outbox (
74
+ job_id VARCHAR PRIMARY KEY,
75
+ item_kind VARCHAR NOT NULL, -- entry|task_title
76
+ item_id VARCHAR NOT NULL,
77
+ embedding_version VARCHAR NOT NULL,
78
+ status VARCHAR NOT NULL, -- pending|done|failed
79
+ error VARCHAR,
80
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
81
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
82
+ UNIQUE(item_kind, item_id, embedding_version)
83
+ );
84
+ ```
85
+
86
+ ### 2.3 Worker 지시 (섹션 8)
87
+
88
+ **Outbox enqueue**:
89
+ - entry materialized 시: `item_kind='entry'`
90
+ - task_created/task_title_changed 시: `item_kind='task_title'`
91
+
92
+ **Single writer worker**:
93
+ - pending 배치 처리
94
+ - LanceDB **idempotent upsert**
95
+ - 같은 id는 중복 row가 쌓이지 않게
96
+
97
+ **reconcile()**:
98
+ - pending 재처리
99
+ - failed는 재시도 정책
100
+
101
+ ## 3. Idris2 영감 적용
102
+
103
+ ### 3.1 상태 머신 타입 안전성
104
+
105
+ **Idris2 개념**:
106
+ ```idris
107
+ -- 상태 전이가 타입 수준에서 정의됨
108
+ data OutboxState = Pending | Processing | Done | Failed
109
+
110
+ data Transition : OutboxState -> OutboxState -> Type where
111
+ Claim : Transition Pending Processing
112
+ Complete : Transition Processing Done
113
+ Fail : Transition Processing Failed
114
+ Retry : Transition Failed Pending
115
+ ```
116
+
117
+ **TypeScript 적용**:
118
+ ```typescript
119
+ // 유효한 전이만 타입으로 정의
120
+ type ValidTransition =
121
+ | { from: 'pending'; to: 'processing' }
122
+ | { from: 'processing'; to: 'done' }
123
+ | { from: 'processing'; to: 'failed' }
124
+ | { from: 'failed'; to: 'pending' };
125
+
126
+ // 런타임 검증
127
+ function assertValidTransition(from: OutboxStatus, to: OutboxStatus): void {
128
+ const valid: ValidTransition[] = [...];
129
+ if (!valid.some(t => t.from === from && t.to === to)) {
130
+ throw new Error(`Invalid transition: ${from} → ${to}`);
131
+ }
132
+ }
133
+ ```
134
+
135
+ ### 3.2 Idempotency 증명
136
+
137
+ **Idris2 개념**:
138
+ ```idris
139
+ -- 같은 입력에 같은 결과
140
+ idempotent : (f : a -> a) -> Type
141
+ idempotent f = (x : a) -> f (f x) = f x
142
+ ```
143
+
144
+ **TypeScript 적용**:
145
+ ```typescript
146
+ // UNIQUE 제약으로 idempotency 보장
147
+ const UNIQUE_CONSTRAINT = `
148
+ UNIQUE(item_kind, item_id, embedding_version)
149
+ `;
150
+
151
+ // enqueue는 여러 번 호출해도 같은 결과
152
+ async function enqueue(input: EnqueueInput): Promise<EnqueueResult> {
153
+ await db.run(`
154
+ INSERT INTO vector_outbox (...)
155
+ ON CONFLICT (item_kind, item_id, embedding_version)
156
+ DO NOTHING
157
+ `, [...]);
158
+ // 결과는 항상 동일: 하나의 job만 존재
159
+ }
160
+ ```
161
+
162
+ ### 3.3 벡터 upsert 증명
163
+
164
+ ```typescript
165
+ // upsert 후 조건: 정확히 하나의 레코드만 존재
166
+ interface UpsertInvariant {
167
+ // Pre: record with id=X may or may not exist
168
+ // Post: exactly one record with id=X exists, with latest values
169
+ }
170
+
171
+ async function upsert(record: VectorRecord): Promise<void> {
172
+ // Delete existing (if any)
173
+ await table.delete(`id = '${record.id}'`);
174
+ // Insert new
175
+ await table.add([record]);
176
+ // Invariant: exactly one record with id exists
177
+ }
178
+ ```
179
+
180
+ ## 4. 기존 코드와의 관계
181
+
182
+ ### 4.1 현재 vector-worker.ts
183
+
184
+ ```typescript
185
+ // 현재 구현 (src/core/vector-worker.ts)
186
+ export class VectorWorker {
187
+ private embedder: Embedder;
188
+ private vectorStore: VectorStore;
189
+ private db: Database;
190
+ }
191
+
192
+ export interface OutboxItem {
193
+ id: string;
194
+ eventId: string;
195
+ content: string;
196
+ status: 'pending' | 'processing' | 'done' | 'failed';
197
+ retryCount: number;
198
+ createdAt: Date;
199
+ errorMessage?: string;
200
+ }
201
+ ```
202
+
203
+ ### 4.2 V2 확장 포인트
204
+
205
+ | 기존 | V2 확장 |
206
+ |-----|---------|
207
+ | eventId 기반 | item_kind + item_id |
208
+ | 단순 status | embedding_version 추가 |
209
+ | 재시도 없음 | reconcile + retry policy |
210
+ | 중복 가능 | UNIQUE + upsert |
211
+
212
+ ### 4.3 현재 types.ts
213
+
214
+ ```typescript
215
+ // 현재 OutboxItem 정의
216
+ export interface OutboxItem {
217
+ id: string;
218
+ eventId: string;
219
+ content: string;
220
+ status: 'pending' | 'processing' | 'done' | 'failed';
221
+ retryCount: number;
222
+ createdAt: Date;
223
+ errorMessage?: string;
224
+ }
225
+ ```
226
+
227
+ V2에서 OutboxJob으로 확장.
228
+
229
+ ## 5. 설계 결정 사항
230
+
231
+ ### 5.1 왜 embedding_version을 포함하는가?
232
+
233
+ **시나리오**:
234
+ 1. 모델 v1으로 entry A 임베딩
235
+ 2. 모델 v2로 업그레이드
236
+ 3. entry A를 v2로 재임베딩 필요
237
+
238
+ **해결**:
239
+ ```sql
240
+ -- v1과 v2 job이 별도로 존재 가능
241
+ UNIQUE(item_kind, item_id, embedding_version)
242
+
243
+ -- v1 job
244
+ (job_1, 'entry', 'e1', 'minilm-v1', 'done')
245
+
246
+ -- v2 job
247
+ (job_2, 'entry', 'e1', 'minilm-v2', 'pending')
248
+ ```
249
+
250
+ ### 5.2 왜 단일 writer인가?
251
+
252
+ **문제**: 여러 프로세스가 동시에 LanceDB에 쓰면
253
+ - 동일 id에 대해 race condition
254
+ - delete와 add 사이에 다른 write 끼어들 수 있음
255
+ - LanceDB 자체 락 메커니즘 미약
256
+
257
+ **해결**:
258
+ - 애플리케이션 레벨에서 단일 writer 보장
259
+ - 파일 락 또는 DB 락 사용
260
+
261
+ ### 5.3 왜 delete + add인가?
262
+
263
+ LanceDB가 true upsert를 지원하지 않기 때문:
264
+
265
+ ```typescript
266
+ // 방법 1: update 시도 (LanceDB 제한적 지원)
267
+ await table.update({ id: 'e1' }, { vector: newVector }); // 불완전
268
+
269
+ // 방법 2: delete + add (권장)
270
+ await table.delete(`id = 'e1'`);
271
+ await table.add([{ id: 'e1', vector: newVector, ... }]); // 확실
272
+ ```
273
+
274
+ ## 6. Reconcile 전략
275
+
276
+ ### 6.1 Failed Job 재시도
277
+
278
+ ```typescript
279
+ const RETRY_POLICY = {
280
+ maxRetries: 3,
281
+ backoffMs: [1000, 5000, 30000] // 1초, 5초, 30초
282
+ };
283
+
284
+ async function shouldRetry(job: OutboxJob): Promise<boolean> {
285
+ return job.retryCount < RETRY_POLICY.maxRetries;
286
+ }
287
+
288
+ async function reconcileFailed(): Promise<number> {
289
+ return db.run(`
290
+ UPDATE vector_outbox
291
+ SET status = 'pending',
292
+ retry_count = retry_count + 1,
293
+ updated_at = CURRENT_TIMESTAMP
294
+ WHERE status = 'failed'
295
+ AND retry_count < ?
296
+ `, [RETRY_POLICY.maxRetries]);
297
+ }
298
+ ```
299
+
300
+ ### 6.2 Stuck Job 복구
301
+
302
+ Processing 상태에서 worker가 죽으면:
303
+
304
+ ```typescript
305
+ const STUCK_THRESHOLD_MS = 5 * 60 * 1000; // 5분
306
+
307
+ async function recoverStuck(): Promise<number> {
308
+ const threshold = new Date(Date.now() - STUCK_THRESHOLD_MS);
309
+ return db.run(`
310
+ UPDATE vector_outbox
311
+ SET status = 'pending',
312
+ updated_at = CURRENT_TIMESTAMP
313
+ WHERE status = 'processing'
314
+ AND updated_at < ?
315
+ `, [threshold]);
316
+ }
317
+ ```
318
+
319
+ ### 6.3 Done Job 정리
320
+
321
+ 저장 공간 관리:
322
+
323
+ ```typescript
324
+ async function cleanupDone(olderThanDays: number = 7): Promise<number> {
325
+ return db.run(`
326
+ DELETE FROM vector_outbox
327
+ WHERE status = 'done'
328
+ AND updated_at < datetime('now', '-${olderThanDays} days')
329
+ `);
330
+ }
331
+ ```
332
+
333
+ ## 7. 모니터링 및 알림
334
+
335
+ ### 7.1 메트릭
336
+
337
+ ```typescript
338
+ interface OutboxMetrics {
339
+ pendingCount: number;
340
+ processingCount: number;
341
+ doneCount: number;
342
+ failedCount: number;
343
+ avgProcessingTimeMs: number;
344
+ oldestPendingAge: number; // 가장 오래된 pending job의 나이 (ms)
345
+ }
346
+ ```
347
+
348
+ ### 7.2 알림 조건
349
+
350
+ ```typescript
351
+ const ALERT_THRESHOLDS = {
352
+ failedCount: 10, // failed가 10개 이상
353
+ pendingAge: 60 * 60 * 1000 // pending이 1시간 이상 된 경우
354
+ };
355
+
356
+ async function checkAlerts(): Promise<Alert[]> {
357
+ const metrics = await getMetrics();
358
+ const alerts: Alert[] = [];
359
+
360
+ if (metrics.failedCount >= ALERT_THRESHOLDS.failedCount) {
361
+ alerts.push({
362
+ level: 'warning',
363
+ message: `${metrics.failedCount} vector jobs failed`
364
+ });
365
+ }
366
+
367
+ if (metrics.oldestPendingAge >= ALERT_THRESHOLDS.pendingAge) {
368
+ alerts.push({
369
+ level: 'warning',
370
+ message: `Oldest pending job is ${metrics.oldestPendingAge / 1000}s old`
371
+ });
372
+ }
373
+
374
+ return alerts;
375
+ }
376
+ ```
377
+
378
+ ## 8. 에러 처리
379
+
380
+ ### 8.1 임베딩 실패
381
+
382
+ ```typescript
383
+ try {
384
+ const embedding = await embedder.embed(content);
385
+ } catch (error) {
386
+ if (error.message.includes('rate limit')) {
387
+ // 재시도 가능
388
+ throw new RetryableError('Rate limited', { retryAfterMs: 60000 });
389
+ } else if (error.message.includes('too long')) {
390
+ // 영구 실패
391
+ throw new PermanentError('Content too long for embedding');
392
+ }
393
+ throw error;
394
+ }
395
+ ```
396
+
397
+ ### 8.2 LanceDB 실패
398
+
399
+ ```typescript
400
+ try {
401
+ await vectorStore.upsert(record);
402
+ } catch (error) {
403
+ if (error.message.includes('disk full')) {
404
+ throw new PermanentError('Disk full');
405
+ } else if (error.message.includes('connection')) {
406
+ throw new RetryableError('Connection failed');
407
+ }
408
+ throw error;
409
+ }
410
+ ```
411
+
412
+ ### 8.3 콘텐츠 없음
413
+
414
+ ```typescript
415
+ const content = await getContent(job.itemKind, job.itemId);
416
+ if (!content) {
417
+ // Entry가 삭제되었거나 존재하지 않음
418
+ // Job을 done으로 마크하고 건너뜀
419
+ await markDone(job.jobId, { skipped: true, reason: 'content_not_found' });
420
+ return;
421
+ }
422
+ ```
423
+
424
+ ## 9. 성능 고려사항
425
+
426
+ ### 9.1 배치 처리
427
+
428
+ ```typescript
429
+ const BATCH_SIZE = 50;
430
+
431
+ // 한 번에 여러 job claim
432
+ const jobs = await claimJobs(BATCH_SIZE);
433
+
434
+ // 임베딩도 배치로
435
+ const contents = jobs.map(j => j.content);
436
+ const embeddings = await embedder.embedBatch(contents);
437
+
438
+ // LanceDB 배치 insert
439
+ await vectorStore.addBatch(jobs.map((j, i) => ({
440
+ id: j.itemId,
441
+ vector: embeddings[i],
442
+ ...
443
+ })));
444
+ ```
445
+
446
+ ### 9.2 병렬 처리 (주의)
447
+
448
+ ```typescript
449
+ // 단일 writer 내에서 병렬 처리
450
+ // LanceDB 쓰기는 순차적으로, 임베딩은 병렬로
451
+
452
+ const jobs = await claimJobs(BATCH_SIZE);
453
+
454
+ // 임베딩 병렬 생성
455
+ const embeddings = await Promise.all(
456
+ jobs.map(async (job) => {
457
+ const content = await getContent(job.itemKind, job.itemId);
458
+ return embedder.embed(content);
459
+ })
460
+ );
461
+
462
+ // LanceDB 순차 쓰기
463
+ for (let i = 0; i < jobs.length; i++) {
464
+ await vectorStore.upsert({ id: jobs[i].itemId, vector: embeddings[i], ... });
465
+ await markDone(jobs[i].jobId);
466
+ }
467
+ ```
468
+
469
+ ## 10. 참고 자료
470
+
471
+ - **Memo.txt**: 섹션 8 - Vector Outbox + LanceDB writer 구현 지시
472
+ - **현재 구현**: `src/core/vector-worker.ts`
473
+ - **Transactional Outbox**: 마이크로서비스 패턴
474
+ - **AXIOMMIND**: Principle 6 - 벡터 정합성
475
+
476
+ ## 2026-02-25T12:31:26.450Z | becbd566-a1b0-419c-93f8-7ce2a18f4ffd
477
+ - type: session_summary
478
+ - session: import:organized
479
+ # Vector Outbox V2 Implementation Plan
480
+
481
+ > **Version**: 2.0.0
482
+ > **Status**: Draft
483
+ > **Created**: 2026-01-31
484
+
485
+ ## Phase 1: 스키마 및 타입 (P0)
486
+
487
+ ### 1.1 타입 정의
488
+
489
+ **파일**: `src/core/types.ts` 수정
490
+
491
+ ```typescript
492
+ // 추가할 타입들
493
+ export const ItemKindSchema = z.enum(['entry', 'task_title', 'session_summary']);
494
+ export type ItemKind = z.infer<typeof ItemKindSchema>;
495
+
496
+ export const OutboxStatusSchema = z.enum(['pending', 'processing', 'done', 'failed']);
497
+ export type OutboxStatus = z.infer<typeof OutboxStatusSchema>;
498
+
499
+ export const OutboxJobSchema = z.object({
500
+ jobId: z.string(),
501
+ itemKind: ItemKindSchema,
502
+ itemId: z.string(),
503
+ embeddingVersion: z.string(),
504
+ status: OutboxStatusSchema,
505
+ retryCount: z.number().int().nonnegative(),
506
+ error: z.string().optional(),
507
+ createdAt: z.date(),
508
+ updatedAt: z.date()
509
+ });
510
+ export type OutboxJob = z.infer<typeof OutboxJobSchema>;
511
+ ```
512
+
513
+ **작업 항목**:
514
+ - [ ] ItemKind 스키마 추가
515
+ - [ ] OutboxStatus 스키마 추가
516
+ - [ ] OutboxJob 스키마 추가
517
+ - [ ] EnqueueResult, ProcessResult 타입 추가
518
+
519
+ ### 1.2 DB 스키마
520
+
521
+ **파일**: 마이그레이션 스크립트
522
+
523
+ ```sql
524
+ -- 기존 outbox 테이블이 있다면 백업 후 마이그레이션
525
+ -- 신규 vector_outbox 테이블
526
+
527
+ CREATE TABLE vector_outbox (
528
+ job_id VARCHAR PRIMARY KEY,
529
+ item_kind VARCHAR NOT NULL,
530
+ item_id VARCHAR NOT NULL,
531
+ embedding_version VARCHAR NOT NULL,
532
+ status VARCHAR NOT NULL DEFAULT 'pending',
533
+ retry_count INTEGER DEFAULT 0,
534
+ error VARCHAR,
535
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
536
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
537
+ UNIQUE(item_kind, item_id, embedding_version)
538
+ );
539
+
540
+ CREATE INDEX idx_outbox_status ON vector_outbox(status);
541
+ CREATE INDEX idx_outbox_created ON vector_outbox(created_at);
542
+ ```
543
+
544
+ **작업 항목**:
545
+ - [ ] vector_outbox 테이블 DDL
546
+ - [ ] 인덱스 생성
547
+ - [ ] UNIQUE 제약 추가
548
+ - [ ] 기존 데이터 마이그레이션 스크립트 (필요시)
549
+
550
+ ## Phase 2: VectorOutbox 클래스 (P0)
551
+
552
+ ### 2.1 Enqueue 구현
553
+
554
+ **파일**: `src/core/vector-outbox.ts` (신규)
555
+
556
+ ```typescript
557
+ export class VectorOutbox {
558
+ constructor(private db: Database);
559
+
560
+ async enqueue(input: OutboxEnqueueInput): Promise<EnqueueResult> {
561
+ const jobId = uuidv4();
562
+
563
+ try {
564
+ await this.db.run(`
565
+ INSERT INTO vector_outbox
566
+ (job_id, item_kind, item_id, embedding_version, status)
567
+ VALUES (?, ?, ?, ?, 'pending')
568
+ ON CONFLICT (item_kind, item_id, embedding_version)
569
+ DO NOTHING
570
+ `, [jobId, input.itemKind, input.itemId, input.embeddingVersion]);
571
+
572
+ // 삽입 성공 여부 확인
573
+ const inserted = await this.db.query(`
574
+ SELECT job_id FROM vector_outbox
575
+ WHERE item_kind = ? AND item_id = ? AND embedding_version = ?
576
+ `, [input.itemKind, input.itemId, input.embeddingVersion]);
577
+
578
+ const isNew = inserted[0]?.job_id === jobId;
579
+ return { success: true, jobId: inserted[0].job_id, isNew };
580
+
581
+ } catch (error) {
582
+ return { success: false, error: error.message };
583
+ }
584
+ }
585
+ }
586
+ ```
587
+
588
+ **작업 항목**:
589
+ - [ ] enqueue() 메서드 (중복 처리 포함)
590
+ - [ ] enqueueBatch() 배치 메서드
591
+ - [ ] ON CONFLICT DO NOTHING 처리
592
+
593
+ ### 2.2 조회 메서드
594
+
595
+ ```typescript
596
+ async getPendingJobs(limit: number = 100): Promise<OutboxJob[]> {
597
+ return this.db.query(`
598
+ SELECT * FROM vector_outbox
599
+ WHERE status = 'pending'
600
+ ORDER BY created_at ASC
601
+ LIMIT ?
602
+ `, [limit]);
603
+ }
604
+
605
+ async getJobById(jobId: string): Promise<OutboxJob | null> {
606
+ const rows = await this.db.query(`
607
+ SELECT * FROM vector_outbox WHERE job_id = ?
608
+ `, [jobId]);
609
+ return rows[0] || null;
610
+ }
611
+
612
+ async getMetrics(): Promise<OutboxMetrics> {
613
+ // 상태별 통계 조회
614
+ }
615
+ ```
616
+
617
+ **작업 항목**:
618
+ - [ ] getPendingJobs() 메서드
619
+ - [ ] getJobById() 메서드
620
+ - [ ] getMetrics() 통계 메서드
621
+
622
+ ## Phase 3: VectorWorker V2 (P0)
623
+
624
+ ### 3.1 Worker 기본 구현
625
+
626
+ **파일**: `src/core/vector-worker.ts` 수정
627
+
628
+ ```typescript
629
+ export class VectorWorkerV2 {
630
+ constructor(
631
+ private outbox: VectorOutbox,
632
+ private embedder: Embedder,
633
+ private vectorStore: VectorStore,
634
+ private db: Database
635
+ );
636
+
637
+ // Job claiming with lock
638
+ async claimJobs(limit: number): Promise<OutboxJob[]> {
639
+ // 트랜잭션으로 pending → processing 변경
640
+ return this.db.transaction(async (tx) => {
641
+ const jobs = await tx.query(`
642
+ SELECT * FROM vector_outbox
643
+ WHERE status = 'pending'
644
+ ORDER BY created_at ASC
645
+ LIMIT ?
646
+ FOR UPDATE -- DuckDB에서 지원 시
647
+ `, [limit]);
648
+
649
+ if (jobs.length === 0) return [];
650
+
651
+ const jobIds = jobs.map(j => j.job_id);
652
+ await tx.run(`
653
+ UPDATE vector_outbox
654
+ SET status = 'processing', updated_at = CURRENT_TIMESTAMP
655
+ WHERE job_id IN (${jobIds.map(() => '?').join(',')})
656
+ `, jobIds);
657
+
658
+ return jobs.map(j => ({ ...j, status: 'processing' as const }));
659
+ });
660
+ }
661
+ }
662
+ ```
663
+
664
+ **작업 항목**:
665
+ - [ ] claimJobs() 락 처리
666
+ - [ ] markDone() 메서드
667
+ - [ ] markFailed() 메서드
668
+
669
+ ### 3.2 Process 루프
670
+
671
+ ```typescript
672
+ async processAll(): Promise<ProcessResult> {
673
+ const result: ProcessResult = {
674
+ processed: 0,
675
+ succeeded: 0,
676
+ failed: 0,
677
+ errors: []
678
+ };
679
+
680
+ const BATCH_SIZE = 50;
681
+
682
+ while (true) {
683
+ const jobs = await this.claimJobs(BATCH_SIZE);
684
+ if (jobs.length === 0) break;
685
+
686
+ for (const job of jobs) {
687
+ try {
688
+ await this.processJob(job);
689
+ await this.markDone(job.jobId);
690
+ result.succeeded++;
691
+ } catch (error) {
692
+ await this.markFailed(job.jobId, error.message);
693
+ result.failed++;
694
+ result.errors.push({ jobId: job.jobId, error: error.message });
695
+ }
696
+ result.processed++;
697
+ }
698
+ }
699
+
700
+ return result;
701
+ }
702
+
703
+ private async processJob(job: OutboxJob): Promise<void> {
704
+ // 1. 콘텐츠 조회
705
+ const content = await this.getContent(job.itemKind, job.itemId);
706
+ if (!content) {
707
+ throw new Error(`Content not found: ${job.itemKind}:${job.itemId}`);
708
+ }
709
+
710
+ // 2. 임베딩 생성
711
+ const embedding = await this.embedder.embed(content);
712
+
713
+ // 3. LanceDB upsert
714
+ await this.vectorStore.upsert({
715
+ id: `${job.itemKind}:${job.itemId}`,
716
+ vector: embedding,
717
+ content,
718
+ metadata: {
719
+ itemKind: job.itemKind,
720
+ embeddingVersion: job.embeddingVersion,
721
+ indexedAt: new Date().toISOString()
722
+ }
723
+ });
724
+ }
725
+ ```
726
+
727
+ **작업 항목**:
728
+ - [ ] processAll() 메인 루프
729
+ - [ ] processJob() 단일 job 처리
730
+ - [ ] getContent() 콘텐츠 조회 (itemKind별)
731
+
732
+ ### 3.3 Reconcile
733
+
734
+ ```typescript
735
+ async reconcileFailed(maxRetries: number = 3): Promise<number> {
736
+ const result = await this.db.run(`
737
+ UPDATE vector_outbox
738
+ SET status = 'pending',
739
+ retry_count = retry_count + 1,
740
+ updated_at = CURRENT_TIMESTAMP
741
+ WHERE status = 'failed'
742
+ AND retry_count < ?
743
+ `, [maxRetries]);
744
+
745
+ return result.changes;
746
+ }
747
+
748
+ async recoverStuck(stuckThresholdMs: number = 5 * 60 * 1000): Promise<number> {
749
+ const threshold = new Date(Date.now() - stuckThresholdMs);
750
+ const result = await this.db.run(`
751
+ UPDATE vector_outbox
752
+ SET status = 'pending',
753
+ updated_at = CURRENT_TIMESTAMP
754
+ WHERE status = 'processing'
755
+ AND updated_at < ?
756
+ `, [threshold.toISOString()]);
757
+
758
+ return result.changes;
759
+ }
760
+ ```
761
+
762
+ **작업 항목**:
763
+ - [ ] reconcileFailed() 재시도 메서드
764
+ - [ ] recoverStuck() stuck 복구 메서드
765
+ - [ ] cleanupDone() 정리 메서드
766
+
767
+ ## Phase 4: VectorStore Upsert (P0)
768
+
769
+ ### 4.1 Upsert 메서드
770
+
771
+ **파일**: `src/core/vector-store.ts` 수정
772
+
773
+ ```typescript
774
+ // 기존 add() 메서드 외에 upsert() 추가
775
+ async upsert(record: VectorRecord): Promise<void> {
776
+ const table = await this.getOrCreateTable();
777
+
778
+ // 기존 레코드 확인
779
+ const existing = await table
780
+ .search([0]) // dummy
781
+ .filter(`id = '${record.id}'`)
782
+ .limit(1)
783
+ .toArray();
784
+
785
+ if (existing.length > 0) {
786
+ // 삭제 후 재삽입 (LanceDB upsert 패턴)
787
+ await table.delete(`id = '${record.id}'`);
788
+ }
789
+
790
+ await table.add([{
791
+ id: record.id,
792
+ vector: record.vector,
793
+ content: record.content,
794
+ timestamp: record.timestamp,
795
+ ...record.metadata
796
+ }]);
797
+ }
798
+ ```
799
+
800
+ **작업 항목**:
801
+ - [ ] upsert() 메서드 구현
802
+ - [ ] delete + add 패턴 적용
803
+ - [ ] 기존 add()와 구분
804
+
805
+ ### 4.2 테이블 버전 관리
806
+
807
+ ```typescript
808
+ // 임베딩 모델 버전별 테이블
809
+ private getTableName(itemKind: ItemKind, version: string): string {
810
+ const versionSlug = version.replace(/[^a-z0-9]/gi, '_').toLowerCase();
811
+ return `${itemKind}_vectors_${versionSlug}`;
812
+ }
813
+
814
+ async getOrCreateTable(itemKind: ItemKind, version: string): Promise<Table> {
815
+ const tableName = this.getTableName(itemKind, version);
816
+
817
+ try {
818
+ return await this.db.openTable(tableName);
819
+ } catch {
820
+ // 테이블 생성
821
+ return await this.db.createTable(tableName, [
822
+ { id: '', vector: [], content: '', timestamp: '' }
823
+ ]);
824
+ }
825
+ }
826
+ ```
827
+
828
+ **작업 항목**:
829
+ - [ ] getTableName() 버전별 테이블명
830
+ - [ ] getOrCreateTable() lazy 생성
831
+
832
+ ## Phase 5: 통합 및 트리거 (P0)
833
+
834
+ ### 5.1 Entry 저장 시 자동 enqueue
835
+
836
+ **파일**: Graduation 또는 Entry 저장 로직
837
+
838
+ ```typescript
839
+ async function materializeEntry(entry: Entry): Promise<void> {
840
+ // 1. Entry 저장
841
+ await entryRepo.create(entry);
842
+
843
+ // 2. Outbox에 자동 enqueue
844
+ await outbox.enqueue({
845
+ itemKind: 'entry',
846
+ itemId: entry.entry_id,
847
+ embeddingVersion: config.embedding.version
848
+ });
849
+ }
850
+ ```
851
+
852
+ **작업 항목**:
853
+ - [ ] Entry 저장 후 outbox.enqueue() 호출
854
+ - [ ] Task 생성 시 task_title enqueue
855
+ - [ ] Session 종료 시 session_summary enqueue
856
+
857
+ ### 5.2 단일 Writer 보장
858
+
859
+ **파일**: `src/core/worker-lock.ts` (신규)
860
+
861
+ ```typescript
862
+ export class WorkerLock {
863
+ private lockFile: string;
864
+
865
+ constructor(lockPath: string = '/tmp/vector-worker.lock');
866
+
867
+ acquire(): boolean {
868
+ if (fs.existsSync(this.lockFile)) {
869
+ const pid = parseInt(fs.readFileSync(this.lockFile, 'utf8'));
870
+ if (this.isProcessRunning(pid)) {
871
+ return false; // 다른 worker 실행 중
872
+ }
873
+ // stale lock file 제거
874
+ fs.unlinkSync(this.lockFile);
875
+ }
876
+ fs.writeFileSync(this.lockFile, process.pid.toString());
877
+ return true;
878
+ }
879
+
880
+ release(): void {
881
+ if (fs.existsSync(this.lockFile)) {
882
+ fs.unlinkSync(this.lockFile);
883
+ }
884
+ }
885
+ }
886
+ ```
887
+
888
+ **작업 항목**:
889
+ - [ ] WorkerLock 클래스
890
+ - [ ] acquire/release 메서드
891
+ - [ ] stale lock 처리
892
+
893
+ ## Phase 6: CLI 및 모니터링 (P1)
894
+
895
+ ### 6.1 CLI 커맨드
896
+
897
+ **파일**: `src/cli/index.ts` 수정
898
+
899
+ ```typescript
900
+ // 벡터 워커 실행
901
+ program
902
+ .command('vector-worker')
903
+ .description('Process pending vector jobs')
904
+ .option('--once', 'Process once and exit')
905
+ .option('--reconcile', 'Also reconcile failed jobs')
906
+ .action(async (options) => {
907
+ const lock = new WorkerLock();
908
+ if (!lock.acquire()) {
909
+ console.log('Another worker is running');
910
+ process.exit(0);
911
+ }
912
+
913
+ try {
914
+ if (options.reconcile) {
915
+ const recovered = await worker.reconcileFailed(3);
916
+ console.log(`Recovered ${recovered} failed jobs`);
917
+ }
918
+
919
+ const result = await worker.processAll();
920
+ console.log(`Processed: ${result.processed}, Succeeded: ${result.succeeded}, Failed: ${result.failed}`);
921
+
922
+ if (!options.once) {
923
+ // 주기적 실행
924
+ setInterval(async () => {
925
+ await worker.processAll();
926
+ }, 10000);
927
+ }
928
+ } finally {
929
+ lock.release();
930
+ }
931
+ });
932
+ ```
933
+
934
+ **작업 항목**:
935
+ - [ ] `cli vector-worker` 커맨드
936
+ - [ ] `cli vector-status` 통계 조회
937
+ - [ ] `cli vector-reconcile` 수동 복구
938
+
939
+ ### 6.2 상태 조회
940
+
941
+ ```typescript
942
+ program
943
+ .command('vector-status')
944
+ .description('Show vector outbox status')
945
+ .action(async () => {
946
+ const metrics = await outbox.getMetrics();
947
+ console.log('Vector Outbox Status:');
948
+ console.log(` Pending: ${metrics.pendingCount}`);
949
+ console.log(` Processing: ${metrics.processingCount}`);
950
+ console.log(` Done: ${metrics.doneCount}`);
951
+ console.log(` Failed: ${metrics.failedCount}`);
952
+ console.log(` Last sync: ${metrics.lastProcessedAt || 'Never'}`);
953
+ });
954
+ ```
955
+
956
+ **작업 항목**:
957
+ - [ ] 상태 조회 커맨드
958
+ - [ ] 테이블 형식 출력
959
+
960
+ ## 파일 목록
961
+
962
+ ### 신규 파일
963
+ ```
964
+ src/core/vector-outbox.ts # Outbox 관리
965
+ src/core/worker-lock.ts # 단일 worker 락
966
+ ```
967
+
968
+ ### 수정 파일
969
+ ```
970
+ src/core/types.ts # 타입 추가
971
+ src/core/vector-worker.ts # V2 로직 추가
972
+ src/core/vector-store.ts # upsert 추가
973
+ src/cli/index.ts # CLI 커맨드 추가
974
+ ```
975
+
976
+ ## 테스트
977
+
978
+ ### 필수 테스트 케이스
979
+
980
+ 1. **Idempotent Enqueue**
981
+ ```typescript
982
+ await outbox.enqueue({ itemKind: 'entry', itemId: 'e1', embeddingVersion: 'v1' });
983
+ await outbox.enqueue({ itemKind: 'entry', itemId: 'e1', embeddingVersion: 'v1' });
984
+ // 두 번째는 isNew: false
985
+ const count = await db.query('SELECT COUNT(*) FROM vector_outbox');
986
+ expect(count[0].count).toBe(1);
987
+ ```
988
+
989
+ 2. **Process All**
990
+ ```typescript
991
+ await outbox.enqueue({ itemKind: 'entry', itemId: 'e1', ... });
992
+ await outbox.enqueue({ itemKind: 'entry', itemId: 'e2', ... });
993
+ const result = await worker.processAll();
994
+ expect(result.processed).toBe(2);
995
+ expect(result.succeeded).toBe(2);
996
+ ```
997
+
998
+ 3. **Reconcile Failed**
999
+ ```typescript
1000
+ // 실패한 job 생성
1001
+ await db.run(`
1002
+ INSERT INTO vector_outbox (job_id, item_kind, item_id, embedding_version, status, retry_count)
1003
+ VALUES ('j1', 'entry', 'e1', 'v1', 'failed', 1)
1004
+ `);
1005
+ const recovered = await worker.reconcileFailed(3);
1006
+ expect(recovered).toBe(1);
1007
+ // status가 pending으로 변경됨
1008
+ ```
1009
+
1010
+ 4. **LanceDB Upsert**
1011
+ ```typescript
1012
+ await vectorStore.upsert({ id: 'e1', vector: [0.1, 0.2], ... });
1013
+ await vectorStore.upsert({ id: 'e1', vector: [0.3, 0.4], ... });
1014
+ // 중복 없이 최신 벡터만 존재
1015
+ const results = await vectorStore.search([0.3, 0.4], 10);
1016
+ expect(results.filter(r => r.id === 'e1').length).toBe(1);
1017
+ ```
1018
+
1019
+ 5. **Single Writer Lock**
1020
+ ```typescript
1021
+ const lock1 = new WorkerLock();
1022
+ const lock2 = new WorkerLock();
1023
+ expect(lock1.acquire()).toBe(true);
1024
+ expect(lock2.acquire()).toBe(false);
1025
+ lock1.release();
1026
+ expect(lock2.acquire()).toBe(true);
1027
+ ```
1028
+
1029
+ ## 마일스톤
1030
+
1031
+ | 단계 | 완료 기준 |
1032
+ |------|----------|
1033
+ | M1 | 타입 정의 + vector_outbox 테이블 |
1034
+ | M2 | VectorOutbox enqueue/getPending |
1035
+ | M3 | VectorWorkerV2 claimJobs/markDone |
1036
+ | M4 | VectorStore upsert |
1037
+ | M5 | processAll() 전체 루프 |
1038
+ | M6 | reconcile + recoverStuck |
1039
+ | M7 | CLI 커맨드 |
1040
+ | M8 | 테스트 통과 |
1041
+
1042
+ ## 2026-02-25T12:31:26.457Z | 69fdda54-43d8-4a4f-a869-20ab2f32b2ab
1043
+ - type: session_summary
1044
+ - session: import:organized
1045
+ # Vector Outbox V2 Specification
1046
+
1047
+ > **Version**: 2.0.0
1048
+ > **Status**: Draft
1049
+ > **Created**: 2026-01-31
1050
+
1051
+ ## 1. 개요
1052
+
1053
+ ### 1.1 문제 정의
1054
+
1055
+ DuckDB와 LanceDB 간 데이터 정합성 문제:
1056
+
1057
+ 1. **원자성 부재**: DuckDB 쓰기와 LanceDB 쓰기가 별도 트랜잭션
1058
+ 2. **중복 벡터**: 재처리 시 같은 벡터가 중복 저장될 수 있음
1059
+ 3. **일관성**: DuckDB에는 있지만 LanceDB에는 없는 상태 발생
1060
+ 4. **동시성**: 여러 프로세스가 동시에 벡터 저장 시 충돌
1061
+
1062
+ ### 1.2 해결 방향
1063
+
1064
+ **Transactional Outbox Pattern**:
1065
+ 1. DuckDB에 먼저 기록 (vector_outbox 테이블)
1066
+ 2. 단일 writer가 outbox를 처리하여 LanceDB에 upsert
1067
+ 3. 성공 시 outbox 상태 업데이트
1068
+
1069
+ ## 2. 핵심 개념
1070
+
1071
+ ### 2.1 Outbox 패턴 흐름
1072
+
1073
+ ```
1074
+ Application DuckDB LanceDB
1075
+ │ │ │
1076
+ │ 1. Entry 저장 │ │
1077
+ ├────────────────────────▶│ │
1078
+ │ │ │
1079
+ │ 2. Outbox job 생성 │ │
1080
+ ├────────────────────────▶│ │
1081
+ │ │ │
1082
+ │ (트랜잭션 커밋) │ │
1083
+ │ │ │
1084
+ ┌─────┴─────┐
1085
+ │ Worker │
1086
+ └─────┬─────┘
1087
+ │ │
1088
+ │ 3. pending job 조회 │
1089
+ │◀────────────────────────│
1090
+ │ │
1091
+ │ 4. 임베딩 생성 │
1092
+ │ │
1093
+ │ 5. LanceDB upsert │
1094
+ │─────────────────────────▶│
1095
+ │ │
1096
+ │ 6. status='done' 업데이트 │
1097
+ │────────────────────────▶│
1098
+ ```
1099
+
1100
+ ### 2.2 Job 상태 머신
1101
+
1102
+ ```
1103
+ ┌─────────┐ Worker 픽업 ┌────────────┐
1104
+ │ pending │ ──────────────────▶ │ processing │
1105
+ └─────────┘ └──────┬─────┘
1106
+ │ │
1107
+ │ 재시도 (reconcile) │
1108
+ │ │
1109
+ │ ┌───────────────────────┼───────────────────────┐
1110
+ │ │ │ │
1111
+ │ ▼ ▼ ▼
1112
+ │ ┌─────────┐ ┌──────────┐ ┌─────────┐
1113
+ └───▶│ pending │ │ done │ │ failed │
1114
+ └─────────┘ └──────────┘ └─────────┘
1115
+
1116
+ │ retry < max
1117
+
1118
+ ┌─────────┐
1119
+ │ pending │
1120
+ └─────────┘
1121
+ ```
1122
+
1123
+ ### 2.3 Item 종류
1124
+
1125
+ ```typescript
1126
+ type ItemKind =
1127
+ | 'entry' // entries 테이블의 content 임베딩
1128
+ | 'task_title' // entities 테이블의 task 제목 임베딩
1129
+ | 'session_summary' // 세션 요약 임베딩
1130
+ ;
1131
+ ```
1132
+
1133
+ ## 3. DB 스키마
1134
+
1135
+ ### 3.1 vector_outbox 테이블
1136
+
1137
+ ```sql
1138
+ CREATE TABLE vector_outbox (
1139
+ job_id VARCHAR PRIMARY KEY,
1140
+ item_kind VARCHAR NOT NULL, -- entry|task_title|session_summary
1141
+ item_id VARCHAR NOT NULL,
1142
+ embedding_version VARCHAR NOT NULL, -- e.g., 'v1.0.0', 'minilm-v2'
1143
+ status VARCHAR NOT NULL, -- pending|processing|done|failed
1144
+ retry_count INTEGER DEFAULT 0,
1145
+ error VARCHAR,
1146
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1147
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1148
+
1149
+ -- 중복 방지: 같은 아이템+버전은 하나만
1150
+ UNIQUE(item_kind, item_id, embedding_version)
1151
+ );
1152
+
1153
+ CREATE INDEX idx_outbox_status ON vector_outbox(status);
1154
+ CREATE INDEX idx_outbox_created ON vector_outbox(created_at);
1155
+ ```
1156
+
1157
+ ### 3.2 vector_store_meta 테이블 (선택)
1158
+
1159
+ ```sql
1160
+ -- LanceDB 테이블 메타데이터 추적
1161
+ CREATE TABLE vector_store_meta (
1162
+ table_name VARCHAR PRIMARY KEY, -- e.g., 'entry_vectors_v1'
1163
+ embedding_version VARCHAR NOT NULL,
1164
+ item_count INTEGER DEFAULT 0,
1165
+ last_sync_at TIMESTAMP,
1166
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
1167
+ );
1168
+ ```
1169
+
1170
+ ## 4. Outbox API
1171
+
1172
+ ### 4.1 Enqueue
1173
+
1174
+ ```typescript
1175
+ interface OutboxEnqueueInput {
1176
+ itemKind: ItemKind;
1177
+ itemId: string;
1178
+ embeddingVersion: string;
1179
+ }
1180
+
1181
+ interface VectorOutbox {
1182
+ // Job 추가 (중복 시 무시)
1183
+ enqueue(input: OutboxEnqueueInput): Promise<EnqueueResult>;
1184
+
1185
+ // 배치 추가
1186
+ enqueueBatch(inputs: OutboxEnqueueInput[]): Promise<EnqueueResult[]>;
1187
+ }
1188
+
1189
+ type EnqueueResult =
1190
+ | { success: true; jobId: string; isNew: true }
1191
+ | { success: true; jobId: string; isNew: false } // 이미 존재
1192
+ | { success: false; error: string };
1193
+ ```
1194
+
1195
+ ### 4.2 Process (Worker)
1196
+
1197
+ ```typescript
1198
+ interface VectorWorker {
1199
+ // pending job 가져오기 (락)
1200
+ claimJobs(limit: number): Promise<OutboxJob[]>;
1201
+
1202
+ // 처리 완료 표시
1203
+ markDone(jobId: string): Promise<void>;
1204
+
1205
+ // 실패 표시
1206
+ markFailed(jobId: string, error: string): Promise<void>;
1207
+
1208
+ // 전체 처리 루프
1209
+ processAll(): Promise<ProcessResult>;
1210
+ }
1211
+
1212
+ interface ProcessResult {
1213
+ processed: number;
1214
+ succeeded: number;
1215
+ failed: number;
1216
+ errors: { jobId: string; error: string }[];
1217
+ }
1218
+ ```
1219
+
1220
+ ### 4.3 Reconcile
1221
+
1222
+ ```typescript
1223
+ interface VectorOutbox {
1224
+ // 재시도 가능한 failed job을 pending으로 변경
1225
+ reconcileFailed(maxRetries: number): Promise<number>;
1226
+
1227
+ // processing 상태로 오래 멈춘 job 복구
1228
+ recoverStuck(stuckThresholdMs: number): Promise<number>;
1229
+
1230
+ // 완료된 job 정리
1231
+ cleanupDone(olderThanDays: number): Promise<number>;
1232
+ }
1233
+ ```
1234
+
1235
+ ## 5. 단일 Writer 패턴
1236
+
1237
+ ### 5.1 왜 단일 Writer인가?
1238
+
1239
+ **문제**: 여러 프로세스가 동시에 LanceDB에 쓰면
1240
+ - 충돌 발생 가능
1241
+ - 중복 벡터 생성
1242
+ - 트랜잭션 보장 어려움
1243
+
1244
+ **해결**: 하나의 worker만 LanceDB에 쓰기
1245
+
1246
+ ```typescript
1247
+ // 단일 writer 보장 방법들
1248
+
1249
+ // 방법 1: 프로세스 수준 락
1250
+ const lockFile = '/tmp/vector-worker.lock';
1251
+ if (fs.existsSync(lockFile)) {
1252
+ console.log('Another worker is running');
1253
+ process.exit(0);
1254
+ }
1255
+ fs.writeFileSync(lockFile, process.pid.toString());
1256
+
1257
+ // 방법 2: DB 수준 락
1258
+ await db.run(`
1259
+ INSERT INTO worker_locks (worker_name, locked_at)
1260
+ VALUES ('vector_worker', CURRENT_TIMESTAMP)
1261
+ ON CONFLICT (worker_name)
1262
+ DO UPDATE SET locked_at = CURRENT_TIMESTAMP
1263
+ WHERE locked_at < datetime('now', '-5 minutes')
1264
+ `);
1265
+ ```
1266
+
1267
+ ### 5.2 Worker 구현
1268
+
1269
+ ```typescript
1270
+ class VectorWorkerImpl implements VectorWorker {
1271
+ private embedder: Embedder;
1272
+ private vectorStore: VectorStore;
1273
+ private db: Database;
1274
+
1275
+ async processAll(): Promise<ProcessResult> {
1276
+ const result: ProcessResult = { processed: 0, succeeded: 0, failed: 0, errors: [] };
1277
+
1278
+ while (true) {
1279
+ const jobs = await this.claimJobs(BATCH_SIZE);
1280
+ if (jobs.length === 0) break;
1281
+
1282
+ for (const job of jobs) {
1283
+ try {
1284
+ await this.processJob(job);
1285
+ await this.markDone(job.jobId);
1286
+ result.succeeded++;
1287
+ } catch (error) {
1288
+ await this.markFailed(job.jobId, error.message);
1289
+ result.failed++;
1290
+ result.errors.push({ jobId: job.jobId, error: error.message });
1291
+ }
1292
+ result.processed++;
1293
+ }
1294
+ }
1295
+
1296
+ return result;
1297
+ }
1298
+
1299
+ private async processJob(job: OutboxJob): Promise<void> {
1300
+ // 1. 원본 콘텐츠 조회
1301
+ const content = await this.getContent(job.itemKind, job.itemId);
1302
+
1303
+ // 2. 임베딩 생성
1304
+ const embedding = await this.embedder.embed(content);
1305
+
1306
+ // 3. LanceDB upsert (idempotent)
1307
+ await this.vectorStore.upsert({
1308
+ id: job.itemId,
1309
+ vector: embedding,
1310
+ metadata: {
1311
+ itemKind: job.itemKind,
1312
+ embeddingVersion: job.embeddingVersion
1313
+ }
1314
+ });
1315
+ }
1316
+ }
1317
+ ```
1318
+
1319
+ ## 6. LanceDB Upsert 전략
1320
+
1321
+ ### 6.1 Idempotent Upsert
1322
+
1323
+ ```typescript
1324
+ async function upsertVector(record: VectorRecord): Promise<void> {
1325
+ const table = await this.getTable(record.itemKind, record.embeddingVersion);
1326
+
1327
+ // LanceDB에서 기존 레코드 확인
1328
+ const existing = await table
1329
+ .search([0]) // dummy search for filter
1330
+ .filter(`id = '${record.id}'`)
1331
+ .limit(1)
1332
+ .toArray();
1333
+
1334
+ if (existing.length > 0) {
1335
+ // 업데이트: 삭제 후 재삽입
1336
+ await table.delete(`id = '${record.id}'`);
1337
+ }
1338
+
1339
+ // 삽입
1340
+ await table.add([{
1341
+ id: record.id,
1342
+ vector: record.vector,
1343
+ content: record.content,
1344
+ timestamp: record.timestamp,
1345
+ metadata: record.metadata
1346
+ }]);
1347
+ }
1348
+ ```
1349
+
1350
+ ### 6.2 테이블 버전 관리
1351
+
1352
+ ```typescript
1353
+ // 임베딩 모델 버전별 테이블 분리
1354
+ function getTableName(itemKind: ItemKind, embeddingVersion: string): string {
1355
+ // entry_vectors_minilm_v2
1356
+ // task_title_vectors_minilm_v2
1357
+ const versionSlug = embeddingVersion.replace(/[^a-z0-9]/gi, '_');
1358
+ return `${itemKind}_vectors_${versionSlug}`;
1359
+ }
1360
+ ```
1361
+
1362
+ ## 7. Idris2 영감 적용
1363
+
1364
+ ### 7.1 상태 전이 타입 안전성
1365
+
1366
+ ```typescript
1367
+ // 타입 레벨에서 유효한 상태 전이만 허용
1368
+ type ValidTransition =
1369
+ | { from: 'pending'; to: 'processing' }
1370
+ | { from: 'processing'; to: 'done' }
1371
+ | { from: 'processing'; to: 'failed' }
1372
+ | { from: 'failed'; to: 'pending' }; // retry
1373
+
1374
+ function transition(job: OutboxJob, to: OutboxStatus): OutboxJob {
1375
+ const valid: ValidTransition[] = [
1376
+ { from: 'pending', to: 'processing' },
1377
+ { from: 'processing', to: 'done' },
1378
+ { from: 'processing', to: 'failed' },
1379
+ { from: 'failed', to: 'pending' }
1380
+ ];
1381
+
1382
+ const isValid = valid.some(t => t.from === job.status && t.to === to);
1383
+ if (!isValid) {
1384
+ throw new InvalidTransitionError(`Cannot transition from ${job.status} to ${to}`);
1385
+ }
1386
+
1387
+ return { ...job, status: to, updatedAt: new Date() };
1388
+ }
1389
+ ```
1390
+
1391
+ ### 7.2 Idempotency 불변식
1392
+
1393
+ ```typescript
1394
+ // Zod로 idempotency 검증
1395
+ const OutboxJobSchema = z.object({
1396
+ itemKind: ItemKindSchema,
1397
+ itemId: z.string(),
1398
+ embeddingVersion: z.string()
1399
+ }).refine(
1400
+ async (job) => {
1401
+ // 같은 조합은 하나만 존재해야 함
1402
+ const existing = await db.query(`
1403
+ SELECT COUNT(*) as cnt FROM vector_outbox
1404
+ WHERE item_kind = ? AND item_id = ? AND embedding_version = ?
1405
+ `, [job.itemKind, job.itemId, job.embeddingVersion]);
1406
+ return existing[0].cnt <= 1;
1407
+ },
1408
+ { message: 'Duplicate outbox job' }
1409
+ );
1410
+ ```
1411
+
1412
+ ## 8. 기존 코드와의 관계
1413
+
1414
+ ### 8.1 현재 vector-worker.ts
1415
+
1416
+ ```typescript
1417
+ // 현재 구현 (src/core/vector-worker.ts)
1418
+ export class VectorWorker {
1419
+ // OutboxItem 타입 사용
1420
+ // status: 'pending' | 'processing' | 'done' | 'failed'
1421
+ }
1422
+ ```
1423
+
1424
+ ### 8.2 V2 확장 포인트
1425
+
1426
+ | 기존 | V2 확장 |
1427
+ |-----|---------|
1428
+ | OutboxItem 타입 | itemKind, embeddingVersion 추가 |
1429
+ | 단순 배치 처리 | claiming with lock |
1430
+ | 재시도 없음 | reconcile + retry policy |
1431
+ | 중복 가능 | UNIQUE 제약 + upsert |
1432
+
1433
+ ### 8.3 현재 vector-store.ts
1434
+
1435
+ ```typescript
1436
+ // 현재 구현 (src/core/vector-store.ts)
1437
+ export class VectorStore {
1438
+ async add(record: VectorRecord): Promise<void>;
1439
+ async search(query: number[], limit: number): Promise<SearchResult[]>;
1440
+ }
1441
+ ```
1442
+
1443
+ V2에서 `upsert` 메서드 추가 필요.
1444
+
1445
+ ## 9. 운영 고려사항
1446
+
1447
+ ### 9.1 모니터링
1448
+
1449
+ ```typescript
1450
+ // 메트릭 수집
1451
+ interface OutboxMetrics {
1452
+ pendingCount: number;
1453
+ processingCount: number;
1454
+ failedCount: number;
1455
+ doneCount: number;
1456
+ avgProcessingTimeMs: number;
1457
+ lastProcessedAt: Date | null;
1458
+ }
1459
+
1460
+ async function getMetrics(): Promise<OutboxMetrics> {
1461
+ return db.query(`
1462
+ SELECT
1463
+ SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pendingCount,
1464
+ SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processingCount,
1465
+ SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failedCount,
1466
+ SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as doneCount,
1467
+ MAX(updated_at) as lastProcessedAt
1468
+ FROM vector_outbox
1469
+ `);
1470
+ }
1471
+ ```
1472
+
1473
+ ### 9.2 알림
1474
+
1475
+ ```typescript
1476
+ // failed job이 임계값 초과 시 알림
1477
+ const FAILED_THRESHOLD = 10;
1478
+
1479
+ async function checkAndAlert(): Promise<void> {
1480
+ const metrics = await getMetrics();
1481
+ if (metrics.failedCount >= FAILED_THRESHOLD) {
1482
+ console.error(`[ALERT] ${metrics.failedCount} failed vector jobs`);
1483
+ // 추가 알림 로직
1484
+ }
1485
+ }
1486
+ ```
1487
+
1488
+ ### 9.3 정리 작업
1489
+
1490
+ ```typescript
1491
+ // 정기적으로 완료된 job 정리
1492
+ async function cleanup(): Promise<void> {
1493
+ const deleted = await db.run(`
1494
+ DELETE FROM vector_outbox
1495
+ WHERE status = 'done'
1496
+ AND updated_at < datetime('now', '-7 days')
1497
+ `);
1498
+ console.log(`Cleaned up ${deleted.changes} done jobs`);
1499
+ }
1500
+ ```
1501
+
1502
+ ## 10. 성공 기준
1503
+
1504
+ - [ ] vector_outbox 테이블 생성 및 UNIQUE 제약
1505
+ - [ ] enqueue가 중복을 무시하고 idempotent하게 동작
1506
+ - [ ] 단일 worker가 pending job을 처리
1507
+ - [ ] LanceDB upsert로 중복 벡터 방지
1508
+ - [ ] failed job 재시도 (reconcile) 동작
1509
+ - [ ] processing 상태 stuck 복구 동작
1510
+ - [ ] 기존 vector-worker.ts와 호환 유지