claude-memory-layer 1.0.10 → 1.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +60 -0
- package/README.md +166 -2
- package/bootstrap-kb/decisions/decisions.md +244 -0
- package/bootstrap-kb/glossary/glossary.md +46 -0
- package/bootstrap-kb/modules/.claude-plugin.md +22 -0
- package/bootstrap-kb/modules/agents.md.md +15 -0
- package/bootstrap-kb/modules/claude.md.md +15 -0
- package/bootstrap-kb/modules/context.md.md +15 -0
- package/bootstrap-kb/modules/docs.md +18 -0
- package/bootstrap-kb/modules/handoff.md.md +15 -0
- package/bootstrap-kb/modules/package-lock.json.md +15 -0
- package/bootstrap-kb/modules/package.json.md +15 -0
- package/bootstrap-kb/modules/plan.md.md +15 -0
- package/bootstrap-kb/modules/readme.md.md +15 -0
- package/bootstrap-kb/modules/scripts.md +26 -0
- package/bootstrap-kb/modules/spec.md.md +15 -0
- package/bootstrap-kb/modules/specs.md +20 -0
- package/bootstrap-kb/modules/src.md +51 -0
- package/bootstrap-kb/modules/tests.md +42 -0
- package/bootstrap-kb/modules/tsconfig.json.md +15 -0
- package/bootstrap-kb/modules/vitest.config.ts.md +15 -0
- package/bootstrap-kb/overview/overview.md +40 -0
- package/bootstrap-kb/sources/manifest.json +950 -0
- package/bootstrap-kb/sources/manifest.md +227 -0
- package/bootstrap-kb/timeline/timeline.md +57 -0
- package/d.sh +3 -0
- package/deploy.sh +3 -0
- package/dist/cli/index.js +3577 -389
- package/dist/cli/index.js.map +4 -4
- package/dist/core/index.js +1383 -138
- package/dist/core/index.js.map +4 -4
- package/dist/hooks/post-tool-use.js +1917 -214
- package/dist/hooks/post-tool-use.js.map +4 -4
- package/dist/hooks/session-end.js +1813 -231
- package/dist/hooks/session-end.js.map +4 -4
- package/dist/hooks/session-start.js +1802 -205
- package/dist/hooks/session-start.js.map +4 -4
- package/dist/hooks/stop.js +1909 -248
- package/dist/hooks/stop.js.map +4 -4
- package/dist/hooks/user-prompt-submit.js +1861 -206
- package/dist/hooks/user-prompt-submit.js.map +4 -4
- package/dist/server/api/index.js +2341 -217
- package/dist/server/api/index.js.map +4 -4
- package/dist/server/index.js +2350 -226
- package/dist/server/index.js.map +4 -4
- package/dist/services/memory-service.js +1805 -206
- package/dist/services/memory-service.js.map +4 -4
- package/dist/ui/app.js +1447 -55
- package/dist/ui/index.html +318 -147
- package/dist/ui/style.css +892 -0
- package/docs/MCP_MEMORY_SERVICE_COMPARATIVE_REVIEW.md +271 -0
- package/docs/MEMU_ADOPTION.md +40 -0
- package/docs/OPERATIONS.md +18 -0
- package/memory/.claude-plugin/commands/2026-02-25.md +263 -0
- package/memory/_index.md +405 -0
- package/memory/default/uncategorized/2026-02-25.md +4839 -0
- package/memory/specs/20260207-dashboard-upgrade/2026-02-25.md +142 -0
- package/memory/specs/citations-system/2026-02-25.md +1121 -0
- package/memory/specs/endless-mode/2026-02-25.md +1392 -0
- package/memory/specs/entity-edge-model/2026-02-25.md +1263 -0
- package/memory/specs/evidence-aligner-v2/2026-02-25.md +1028 -0
- package/memory/specs/mcp-desktop-integration/2026-02-25.md +1334 -0
- package/memory/specs/post-tool-use-hook/2026-02-25.md +1164 -0
- package/memory/specs/private-tags/2026-02-25.md +1057 -0
- package/memory/specs/progressive-disclosure/2026-02-25.md +1436 -0
- package/memory/specs/task-entity-system/2026-02-25.md +924 -0
- package/memory/specs/vector-outbox-v2/2026-02-25.md +1510 -0
- package/memory/specs/web-viewer-ui/2026-02-25.md +1709 -0
- package/package.json +9 -2
- package/scripts/build.ts +6 -0
- package/scripts/fix-sync-gap.js +32 -0
- package/scripts/heartbeat-memory-orchestrator.sh +28 -0
- package/scripts/report-sync-gap.js +26 -0
- package/scripts/review-queue-auto-resolve.js +21 -0
- package/scripts/sync-gap-auto-heal.sh +17 -0
- package/specs/20260207-dashboard-upgrade/context.md +38 -0
- package/specs/20260207-dashboard-upgrade/spec.md +96 -0
- package/src/cli/index.ts +391 -60
- package/src/core/consolidated-store.ts +63 -1
- package/src/core/consolidation-worker.ts +115 -6
- package/src/core/event-store.ts +14 -0
- package/src/core/index.ts +1 -0
- package/src/core/ingest-interceptor.ts +80 -0
- package/src/core/markdown-mirror.ts +70 -0
- package/src/core/md-mirror.ts +92 -0
- package/src/core/mongo-sync-config.ts +165 -0
- package/src/core/mongo-sync-worker.ts +381 -0
- package/src/core/retriever.ts +540 -150
- package/src/core/sqlite-event-store.ts +794 -7
- package/src/core/sqlite-wrapper.ts +8 -0
- package/src/core/tag-taxonomy.ts +51 -0
- package/src/core/turn-state.ts +159 -0
- package/src/core/types.ts +51 -8
- package/src/core/vector-store.ts +21 -3
- package/src/hooks/post-tool-use.ts +68 -23
- package/src/hooks/session-end.ts +8 -3
- package/src/hooks/stop.ts +96 -25
- package/src/hooks/user-prompt-submit.ts +44 -5
- package/src/server/api/chat.ts +244 -0
- package/src/server/api/citations.ts +3 -3
- package/src/server/api/events.ts +30 -5
- package/src/server/api/health.ts +53 -0
- package/src/server/api/index.ts +9 -1
- package/src/server/api/projects.ts +74 -0
- package/src/server/api/search.ts +3 -3
- package/src/server/api/sessions.ts +3 -3
- package/src/server/api/stats.ts +89 -8
- package/src/server/api/turns.ts +143 -0
- package/src/server/api/utils.ts +46 -0
- package/src/services/bootstrap-organizer.ts +443 -0
- package/src/services/codex-session-history-importer.ts +474 -0
- package/src/services/memory-service.ts +508 -71
- package/src/services/session-history-importer.ts +215 -51
- package/src/ui/app.js +1447 -55
- package/src/ui/index.html +318 -147
- package/src/ui/style.css +892 -0
- package/tests/bootstrap-organizer.test.ts +111 -0
- package/tests/consolidation-worker.test.ts +75 -0
- package/tests/ingest-interceptor.test.ts +38 -0
- package/tests/markdown-mirror.test.ts +85 -0
- package/tests/md-mirror.test.ts +50 -0
- package/tests/retriever-fallback-chain.test.ts +223 -0
- package/tests/retriever-strategy-scope.test.ts +97 -0
- package/tests/retriever.memu-adoption.test.ts +122 -0
- package/tests/sqlite-event-store-replication.test.ts +92 -0
- package/.claude/settings.local.json +0 -27
- package/.claude-memory/test.sqlite +0 -0
- package/.history/package_20260201112328.json +0 -45
- package/.history/package_20260201113602.json +0 -45
- package/.history/package_20260201113713.json +0 -45
- package/.history/package_20260201114110.json +0 -45
- package/.history/package_20260201114632.json +0 -46
- package/.history/package_20260201133143.json +0 -45
- package/.history/package_20260201134319.json +0 -45
- package/.history/package_20260201134326.json +0 -45
- package/.history/package_20260201134334.json +0 -45
- package/.history/package_20260201134912.json +0 -45
- package/.history/package_20260201142928.json +0 -46
- package/.history/package_20260201192048.json +0 -47
- package/.history/package_20260202114053.json +0 -49
- package/.history/package_20260202121115.json +0 -49
- package/test_access.js +0 -49
|
@@ -0,0 +1,1510 @@
|
|
|
1
|
+
|
|
2
|
+
## 2026-02-25T12:31:26.442Z | f506a245-208e-42b1-a52a-016f6ecb94b4
|
|
3
|
+
- type: session_summary
|
|
4
|
+
- session: import:organized
|
|
5
|
+
# Vector Outbox V2 Context
|
|
6
|
+
|
|
7
|
+
> **Version**: 2.0.0
|
|
8
|
+
> **Created**: 2026-01-31
|
|
9
|
+
|
|
10
|
+
## 1. 배경
|
|
11
|
+
|
|
12
|
+
### 1.1 분산 시스템의 정합성 문제
|
|
13
|
+
|
|
14
|
+
두 개의 서로 다른 저장소(DuckDB, LanceDB)에 데이터를 저장할 때:
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
Application
|
|
18
|
+
│
|
|
19
|
+
├──▶ DuckDB (entries 저장) ✓
|
|
20
|
+
│
|
|
21
|
+
└──▶ LanceDB (벡터 저장) ✗ (실패)
|
|
22
|
+
|
|
23
|
+
결과: DuckDB에는 있지만 LanceDB에는 없는 불일치 상태
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
**문제 시나리오**:
|
|
27
|
+
1. Entry가 저장되었지만 임베딩이 실패
|
|
28
|
+
2. 재시작 시 어떤 entry가 임베딩되지 않았는지 알 수 없음
|
|
29
|
+
3. 검색 결과에서 해당 entry가 누락
|
|
30
|
+
|
|
31
|
+
### 1.2 Transactional Outbox Pattern
|
|
32
|
+
|
|
33
|
+
마이크로서비스 아키텍처에서 검증된 패턴:
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
┌─────────────────────────────────────┐
|
|
37
|
+
│ DuckDB Transaction │
|
|
38
|
+
│ │
|
|
39
|
+
│ 1. INSERT INTO entries (...) │
|
|
40
|
+
│ 2. INSERT INTO vector_outbox (...) │
|
|
41
|
+
│ │
|
|
42
|
+
│ COMMIT │
|
|
43
|
+
└─────────────────────────────────────┘
|
|
44
|
+
│
|
|
45
|
+
│ (비동기)
|
|
46
|
+
▼
|
|
47
|
+
┌─────────────────────────────────────┐
|
|
48
|
+
│ Vector Worker │
|
|
49
|
+
│ │
|
|
50
|
+
│ 1. SELECT * FROM vector_outbox │
|
|
51
|
+
│ WHERE status = 'pending' │
|
|
52
|
+
│ 2. Generate embedding │
|
|
53
|
+
│ 3. Upsert to LanceDB │
|
|
54
|
+
│ 4. UPDATE status = 'done' │
|
|
55
|
+
└─────────────────────────────────────┘
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**장점**:
|
|
59
|
+
- 원자성 보장 (DuckDB 트랜잭션 내)
|
|
60
|
+
- 실패 시 재시도 가능
|
|
61
|
+
- 상태 추적 가능
|
|
62
|
+
|
|
63
|
+
## 2. Memo.txt 참고 사항
|
|
64
|
+
|
|
65
|
+
### 2.1 핵심 원칙 (섹션 2.6)
|
|
66
|
+
|
|
67
|
+
> **6. Vector store 정합성**
|
|
68
|
+
> - DuckDB에 먼저 기록 → outbox → 단일 writer가 LanceDB에 upsert → DuckDB 상태 업데이트
|
|
69
|
+
|
|
70
|
+
### 2.2 스키마 (섹션 4.4)
|
|
71
|
+
|
|
72
|
+
```sql
|
|
73
|
+
CREATE TABLE vector_outbox (
|
|
74
|
+
job_id VARCHAR PRIMARY KEY,
|
|
75
|
+
item_kind VARCHAR NOT NULL, -- entry|task_title
|
|
76
|
+
item_id VARCHAR NOT NULL,
|
|
77
|
+
embedding_version VARCHAR NOT NULL,
|
|
78
|
+
status VARCHAR NOT NULL, -- pending|done|failed
|
|
79
|
+
error VARCHAR,
|
|
80
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
81
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
82
|
+
UNIQUE(item_kind, item_id, embedding_version)
|
|
83
|
+
);
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 2.3 Worker 지시 (섹션 8)
|
|
87
|
+
|
|
88
|
+
**Outbox enqueue**:
|
|
89
|
+
- entry materialized 시: `item_kind='entry'`
|
|
90
|
+
- task_created/task_title_changed 시: `item_kind='task_title'`
|
|
91
|
+
|
|
92
|
+
**Single writer worker**:
|
|
93
|
+
- pending 배치 처리
|
|
94
|
+
- LanceDB **idempotent upsert**
|
|
95
|
+
- 같은 id는 중복 row가 쌓이지 않게
|
|
96
|
+
|
|
97
|
+
**reconcile()**:
|
|
98
|
+
- pending 재처리
|
|
99
|
+
- failed는 재시도 정책
|
|
100
|
+
|
|
101
|
+
## 3. Idris2 영감 적용
|
|
102
|
+
|
|
103
|
+
### 3.1 상태 머신 타입 안전성
|
|
104
|
+
|
|
105
|
+
**Idris2 개념**:
|
|
106
|
+
```idris
|
|
107
|
+
-- 상태 전이가 타입 수준에서 정의됨
|
|
108
|
+
data OutboxState = Pending | Processing | Done | Failed
|
|
109
|
+
|
|
110
|
+
data Transition : OutboxState -> OutboxState -> Type where
|
|
111
|
+
Claim : Transition Pending Processing
|
|
112
|
+
Complete : Transition Processing Done
|
|
113
|
+
Fail : Transition Processing Failed
|
|
114
|
+
Retry : Transition Failed Pending
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
**TypeScript 적용**:
|
|
118
|
+
```typescript
|
|
119
|
+
// 유효한 전이만 타입으로 정의
|
|
120
|
+
type ValidTransition =
|
|
121
|
+
| { from: 'pending'; to: 'processing' }
|
|
122
|
+
| { from: 'processing'; to: 'done' }
|
|
123
|
+
| { from: 'processing'; to: 'failed' }
|
|
124
|
+
| { from: 'failed'; to: 'pending' };
|
|
125
|
+
|
|
126
|
+
// 런타임 검증
|
|
127
|
+
function assertValidTransition(from: OutboxStatus, to: OutboxStatus): void {
|
|
128
|
+
const valid: ValidTransition[] = [...];
|
|
129
|
+
if (!valid.some(t => t.from === from && t.to === to)) {
|
|
130
|
+
throw new Error(`Invalid transition: ${from} → ${to}`);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### 3.2 Idempotency 증명
|
|
136
|
+
|
|
137
|
+
**Idris2 개념**:
|
|
138
|
+
```idris
|
|
139
|
+
-- 같은 입력에 같은 결과
|
|
140
|
+
idempotent : (f : a -> a) -> Type
|
|
141
|
+
idempotent f = (x : a) -> f (f x) = f x
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
**TypeScript 적용**:
|
|
145
|
+
```typescript
|
|
146
|
+
// UNIQUE 제약으로 idempotency 보장
|
|
147
|
+
const UNIQUE_CONSTRAINT = `
|
|
148
|
+
UNIQUE(item_kind, item_id, embedding_version)
|
|
149
|
+
`;
|
|
150
|
+
|
|
151
|
+
// enqueue는 여러 번 호출해도 같은 결과
|
|
152
|
+
async function enqueue(input: EnqueueInput): Promise<EnqueueResult> {
|
|
153
|
+
await db.run(`
|
|
154
|
+
INSERT INTO vector_outbox (...)
|
|
155
|
+
ON CONFLICT (item_kind, item_id, embedding_version)
|
|
156
|
+
DO NOTHING
|
|
157
|
+
`, [...]);
|
|
158
|
+
// 결과는 항상 동일: 하나의 job만 존재
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### 3.3 벡터 upsert 증명
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
// upsert 후 조건: 정확히 하나의 레코드만 존재
|
|
166
|
+
interface UpsertInvariant {
|
|
167
|
+
// Pre: record with id=X may or may not exist
|
|
168
|
+
// Post: exactly one record with id=X exists, with latest values
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
async function upsert(record: VectorRecord): Promise<void> {
|
|
172
|
+
// Delete existing (if any)
|
|
173
|
+
await table.delete(`id = '${record.id}'`);
|
|
174
|
+
// Insert new
|
|
175
|
+
await table.add([record]);
|
|
176
|
+
// Invariant: exactly one record with id exists
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## 4. 기존 코드와의 관계
|
|
181
|
+
|
|
182
|
+
### 4.1 현재 vector-worker.ts
|
|
183
|
+
|
|
184
|
+
```typescript
|
|
185
|
+
// 현재 구현 (src/core/vector-worker.ts)
|
|
186
|
+
export class VectorWorker {
|
|
187
|
+
private embedder: Embedder;
|
|
188
|
+
private vectorStore: VectorStore;
|
|
189
|
+
private db: Database;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
export interface OutboxItem {
|
|
193
|
+
id: string;
|
|
194
|
+
eventId: string;
|
|
195
|
+
content: string;
|
|
196
|
+
status: 'pending' | 'processing' | 'done' | 'failed';
|
|
197
|
+
retryCount: number;
|
|
198
|
+
createdAt: Date;
|
|
199
|
+
errorMessage?: string;
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### 4.2 V2 확장 포인트
|
|
204
|
+
|
|
205
|
+
| 기존 | V2 확장 |
|
|
206
|
+
|-----|---------|
|
|
207
|
+
| eventId 기반 | item_kind + item_id |
|
|
208
|
+
| 단순 status | embedding_version 추가 |
|
|
209
|
+
| 재시도 없음 | reconcile + retry policy |
|
|
210
|
+
| 중복 가능 | UNIQUE + upsert |
|
|
211
|
+
|
|
212
|
+
### 4.3 현재 types.ts
|
|
213
|
+
|
|
214
|
+
```typescript
|
|
215
|
+
// 현재 OutboxItem 정의
|
|
216
|
+
export interface OutboxItem {
|
|
217
|
+
id: string;
|
|
218
|
+
eventId: string;
|
|
219
|
+
content: string;
|
|
220
|
+
status: 'pending' | 'processing' | 'done' | 'failed';
|
|
221
|
+
retryCount: number;
|
|
222
|
+
createdAt: Date;
|
|
223
|
+
errorMessage?: string;
|
|
224
|
+
}
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
V2에서 OutboxJob으로 확장.
|
|
228
|
+
|
|
229
|
+
## 5. 설계 결정 사항
|
|
230
|
+
|
|
231
|
+
### 5.1 왜 embedding_version을 포함하는가?
|
|
232
|
+
|
|
233
|
+
**시나리오**:
|
|
234
|
+
1. 모델 v1으로 entry A 임베딩
|
|
235
|
+
2. 모델 v2로 업그레이드
|
|
236
|
+
3. entry A를 v2로 재임베딩 필요
|
|
237
|
+
|
|
238
|
+
**해결**:
|
|
239
|
+
```sql
|
|
240
|
+
-- v1과 v2 job이 별도로 존재 가능
|
|
241
|
+
UNIQUE(item_kind, item_id, embedding_version)
|
|
242
|
+
|
|
243
|
+
-- v1 job
|
|
244
|
+
(job_1, 'entry', 'e1', 'minilm-v1', 'done')
|
|
245
|
+
|
|
246
|
+
-- v2 job
|
|
247
|
+
(job_2, 'entry', 'e1', 'minilm-v2', 'pending')
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### 5.2 왜 단일 writer인가?
|
|
251
|
+
|
|
252
|
+
**문제**: 여러 프로세스가 동시에 LanceDB에 쓰면
|
|
253
|
+
- 동일 id에 대해 race condition
|
|
254
|
+
- delete와 add 사이에 다른 write 끼어들 수 있음
|
|
255
|
+
- LanceDB 자체 락 메커니즘 미약
|
|
256
|
+
|
|
257
|
+
**해결**:
|
|
258
|
+
- 애플리케이션 레벨에서 단일 writer 보장
|
|
259
|
+
- 파일 락 또는 DB 락 사용
|
|
260
|
+
|
|
261
|
+
### 5.3 왜 delete + add인가?
|
|
262
|
+
|
|
263
|
+
LanceDB가 true upsert를 지원하지 않기 때문:
|
|
264
|
+
|
|
265
|
+
```typescript
|
|
266
|
+
// 방법 1: update 시도 (LanceDB 제한적 지원)
|
|
267
|
+
await table.update({ id: 'e1' }, { vector: newVector }); // 불완전
|
|
268
|
+
|
|
269
|
+
// 방법 2: delete + add (권장)
|
|
270
|
+
await table.delete(`id = 'e1'`);
|
|
271
|
+
await table.add([{ id: 'e1', vector: newVector, ... }]); // 확실
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## 6. Reconcile 전략
|
|
275
|
+
|
|
276
|
+
### 6.1 Failed Job 재시도
|
|
277
|
+
|
|
278
|
+
```typescript
|
|
279
|
+
const RETRY_POLICY = {
|
|
280
|
+
maxRetries: 3,
|
|
281
|
+
backoffMs: [1000, 5000, 30000] // 1초, 5초, 30초
|
|
282
|
+
};
|
|
283
|
+
|
|
284
|
+
async function shouldRetry(job: OutboxJob): Promise<boolean> {
|
|
285
|
+
return job.retryCount < RETRY_POLICY.maxRetries;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
async function reconcileFailed(): Promise<number> {
|
|
289
|
+
return db.run(`
|
|
290
|
+
UPDATE vector_outbox
|
|
291
|
+
SET status = 'pending',
|
|
292
|
+
retry_count = retry_count + 1,
|
|
293
|
+
updated_at = CURRENT_TIMESTAMP
|
|
294
|
+
WHERE status = 'failed'
|
|
295
|
+
AND retry_count < ?
|
|
296
|
+
`, [RETRY_POLICY.maxRetries]);
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
### 6.2 Stuck Job 복구
|
|
301
|
+
|
|
302
|
+
Processing 상태에서 worker가 죽으면:
|
|
303
|
+
|
|
304
|
+
```typescript
|
|
305
|
+
const STUCK_THRESHOLD_MS = 5 * 60 * 1000; // 5분
|
|
306
|
+
|
|
307
|
+
async function recoverStuck(): Promise<number> {
|
|
308
|
+
const threshold = new Date(Date.now() - STUCK_THRESHOLD_MS);
|
|
309
|
+
return db.run(`
|
|
310
|
+
UPDATE vector_outbox
|
|
311
|
+
SET status = 'pending',
|
|
312
|
+
updated_at = CURRENT_TIMESTAMP
|
|
313
|
+
WHERE status = 'processing'
|
|
314
|
+
AND updated_at < ?
|
|
315
|
+
`, [threshold]);
|
|
316
|
+
}
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
### 6.3 Done Job 정리
|
|
320
|
+
|
|
321
|
+
저장 공간 관리:
|
|
322
|
+
|
|
323
|
+
```typescript
|
|
324
|
+
async function cleanupDone(olderThanDays: number = 7): Promise<number> {
|
|
325
|
+
return db.run(`
|
|
326
|
+
DELETE FROM vector_outbox
|
|
327
|
+
WHERE status = 'done'
|
|
328
|
+
AND updated_at < datetime('now', '-${olderThanDays} days')
|
|
329
|
+
`);
|
|
330
|
+
}
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
## 7. 모니터링 및 알림
|
|
334
|
+
|
|
335
|
+
### 7.1 메트릭
|
|
336
|
+
|
|
337
|
+
```typescript
|
|
338
|
+
interface OutboxMetrics {
|
|
339
|
+
pendingCount: number;
|
|
340
|
+
processingCount: number;
|
|
341
|
+
doneCount: number;
|
|
342
|
+
failedCount: number;
|
|
343
|
+
avgProcessingTimeMs: number;
|
|
344
|
+
oldestPendingAge: number; // 가장 오래된 pending job의 나이 (ms)
|
|
345
|
+
}
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
### 7.2 알림 조건
|
|
349
|
+
|
|
350
|
+
```typescript
|
|
351
|
+
const ALERT_THRESHOLDS = {
|
|
352
|
+
failedCount: 10, // failed가 10개 이상
|
|
353
|
+
pendingAge: 60 * 60 * 1000 // pending이 1시간 이상 된 경우
|
|
354
|
+
};
|
|
355
|
+
|
|
356
|
+
async function checkAlerts(): Promise<Alert[]> {
|
|
357
|
+
const metrics = await getMetrics();
|
|
358
|
+
const alerts: Alert[] = [];
|
|
359
|
+
|
|
360
|
+
if (metrics.failedCount >= ALERT_THRESHOLDS.failedCount) {
|
|
361
|
+
alerts.push({
|
|
362
|
+
level: 'warning',
|
|
363
|
+
message: `${metrics.failedCount} vector jobs failed`
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
if (metrics.oldestPendingAge >= ALERT_THRESHOLDS.pendingAge) {
|
|
368
|
+
alerts.push({
|
|
369
|
+
level: 'warning',
|
|
370
|
+
message: `Oldest pending job is ${metrics.oldestPendingAge / 1000}s old`
|
|
371
|
+
});
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
return alerts;
|
|
375
|
+
}
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
## 8. 에러 처리
|
|
379
|
+
|
|
380
|
+
### 8.1 임베딩 실패
|
|
381
|
+
|
|
382
|
+
```typescript
|
|
383
|
+
try {
|
|
384
|
+
const embedding = await embedder.embed(content);
|
|
385
|
+
} catch (error) {
|
|
386
|
+
if (error.message.includes('rate limit')) {
|
|
387
|
+
// 재시도 가능
|
|
388
|
+
throw new RetryableError('Rate limited', { retryAfterMs: 60000 });
|
|
389
|
+
} else if (error.message.includes('too long')) {
|
|
390
|
+
// 영구 실패
|
|
391
|
+
throw new PermanentError('Content too long for embedding');
|
|
392
|
+
}
|
|
393
|
+
throw error;
|
|
394
|
+
}
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
### 8.2 LanceDB 실패
|
|
398
|
+
|
|
399
|
+
```typescript
|
|
400
|
+
try {
|
|
401
|
+
await vectorStore.upsert(record);
|
|
402
|
+
} catch (error) {
|
|
403
|
+
if (error.message.includes('disk full')) {
|
|
404
|
+
throw new PermanentError('Disk full');
|
|
405
|
+
} else if (error.message.includes('connection')) {
|
|
406
|
+
throw new RetryableError('Connection failed');
|
|
407
|
+
}
|
|
408
|
+
throw error;
|
|
409
|
+
}
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
### 8.3 콘텐츠 없음
|
|
413
|
+
|
|
414
|
+
```typescript
|
|
415
|
+
const content = await getContent(job.itemKind, job.itemId);
|
|
416
|
+
if (!content) {
|
|
417
|
+
// Entry가 삭제되었거나 존재하지 않음
|
|
418
|
+
// Job을 done으로 마크하고 건너뜀
|
|
419
|
+
await markDone(job.jobId, { skipped: true, reason: 'content_not_found' });
|
|
420
|
+
return;
|
|
421
|
+
}
|
|
422
|
+
```
|
|
423
|
+
|
|
424
|
+
## 9. 성능 고려사항
|
|
425
|
+
|
|
426
|
+
### 9.1 배치 처리
|
|
427
|
+
|
|
428
|
+
```typescript
|
|
429
|
+
const BATCH_SIZE = 50;
|
|
430
|
+
|
|
431
|
+
// 한 번에 여러 job claim
|
|
432
|
+
const jobs = await claimJobs(BATCH_SIZE);
|
|
433
|
+
|
|
434
|
+
// 임베딩도 배치로
|
|
435
|
+
const contents = jobs.map(j => j.content);
|
|
436
|
+
const embeddings = await embedder.embedBatch(contents);
|
|
437
|
+
|
|
438
|
+
// LanceDB 배치 insert
|
|
439
|
+
await vectorStore.addBatch(jobs.map((j, i) => ({
|
|
440
|
+
id: j.itemId,
|
|
441
|
+
vector: embeddings[i],
|
|
442
|
+
...
|
|
443
|
+
})));
|
|
444
|
+
```
|
|
445
|
+
|
|
446
|
+
### 9.2 병렬 처리 (주의)
|
|
447
|
+
|
|
448
|
+
```typescript
|
|
449
|
+
// 단일 writer 내에서 병렬 처리
|
|
450
|
+
// LanceDB 쓰기는 순차적으로, 임베딩은 병렬로
|
|
451
|
+
|
|
452
|
+
const jobs = await claimJobs(BATCH_SIZE);
|
|
453
|
+
|
|
454
|
+
// 임베딩 병렬 생성
|
|
455
|
+
const embeddings = await Promise.all(
|
|
456
|
+
jobs.map(async (job) => {
|
|
457
|
+
const content = await getContent(job.itemKind, job.itemId);
|
|
458
|
+
return embedder.embed(content);
|
|
459
|
+
})
|
|
460
|
+
);
|
|
461
|
+
|
|
462
|
+
// LanceDB 순차 쓰기
|
|
463
|
+
for (let i = 0; i < jobs.length; i++) {
|
|
464
|
+
await vectorStore.upsert({ id: jobs[i].itemId, vector: embeddings[i], ... });
|
|
465
|
+
await markDone(jobs[i].jobId);
|
|
466
|
+
}
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
## 10. 참고 자료
|
|
470
|
+
|
|
471
|
+
- **Memo.txt**: 섹션 8 - Vector Outbox + LanceDB writer 구현 지시
|
|
472
|
+
- **현재 구현**: `src/core/vector-worker.ts`
|
|
473
|
+
- **Transactional Outbox**: 마이크로서비스 패턴
|
|
474
|
+
- **AXIOMMIND**: Principle 6 - 벡터 정합성
|
|
475
|
+
|
|
476
|
+
## 2026-02-25T12:31:26.450Z | becbd566-a1b0-419c-93f8-7ce2a18f4ffd
|
|
477
|
+
- type: session_summary
|
|
478
|
+
- session: import:organized
|
|
479
|
+
# Vector Outbox V2 Implementation Plan
|
|
480
|
+
|
|
481
|
+
> **Version**: 2.0.0
|
|
482
|
+
> **Status**: Draft
|
|
483
|
+
> **Created**: 2026-01-31
|
|
484
|
+
|
|
485
|
+
## Phase 1: 스키마 및 타입 (P0)
|
|
486
|
+
|
|
487
|
+
### 1.1 타입 정의
|
|
488
|
+
|
|
489
|
+
**파일**: `src/core/types.ts` 수정
|
|
490
|
+
|
|
491
|
+
```typescript
|
|
492
|
+
// 추가할 타입들
|
|
493
|
+
export const ItemKindSchema = z.enum(['entry', 'task_title', 'session_summary']);
|
|
494
|
+
export type ItemKind = z.infer<typeof ItemKindSchema>;
|
|
495
|
+
|
|
496
|
+
export const OutboxStatusSchema = z.enum(['pending', 'processing', 'done', 'failed']);
|
|
497
|
+
export type OutboxStatus = z.infer<typeof OutboxStatusSchema>;
|
|
498
|
+
|
|
499
|
+
export const OutboxJobSchema = z.object({
|
|
500
|
+
jobId: z.string(),
|
|
501
|
+
itemKind: ItemKindSchema,
|
|
502
|
+
itemId: z.string(),
|
|
503
|
+
embeddingVersion: z.string(),
|
|
504
|
+
status: OutboxStatusSchema,
|
|
505
|
+
retryCount: z.number().int().nonnegative(),
|
|
506
|
+
error: z.string().optional(),
|
|
507
|
+
createdAt: z.date(),
|
|
508
|
+
updatedAt: z.date()
|
|
509
|
+
});
|
|
510
|
+
export type OutboxJob = z.infer<typeof OutboxJobSchema>;
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
**작업 항목**:
|
|
514
|
+
- [ ] ItemKind 스키마 추가
|
|
515
|
+
- [ ] OutboxStatus 스키마 추가
|
|
516
|
+
- [ ] OutboxJob 스키마 추가
|
|
517
|
+
- [ ] EnqueueResult, ProcessResult 타입 추가
|
|
518
|
+
|
|
519
|
+
### 1.2 DB 스키마
|
|
520
|
+
|
|
521
|
+
**파일**: 마이그레이션 스크립트
|
|
522
|
+
|
|
523
|
+
```sql
|
|
524
|
+
-- 기존 outbox 테이블이 있다면 백업 후 마이그레이션
|
|
525
|
+
-- 신규 vector_outbox 테이블
|
|
526
|
+
|
|
527
|
+
CREATE TABLE vector_outbox (
|
|
528
|
+
job_id VARCHAR PRIMARY KEY,
|
|
529
|
+
item_kind VARCHAR NOT NULL,
|
|
530
|
+
item_id VARCHAR NOT NULL,
|
|
531
|
+
embedding_version VARCHAR NOT NULL,
|
|
532
|
+
status VARCHAR NOT NULL DEFAULT 'pending',
|
|
533
|
+
retry_count INTEGER DEFAULT 0,
|
|
534
|
+
error VARCHAR,
|
|
535
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
536
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
537
|
+
UNIQUE(item_kind, item_id, embedding_version)
|
|
538
|
+
);
|
|
539
|
+
|
|
540
|
+
CREATE INDEX idx_outbox_status ON vector_outbox(status);
|
|
541
|
+
CREATE INDEX idx_outbox_created ON vector_outbox(created_at);
|
|
542
|
+
```
|
|
543
|
+
|
|
544
|
+
**작업 항목**:
|
|
545
|
+
- [ ] vector_outbox 테이블 DDL
|
|
546
|
+
- [ ] 인덱스 생성
|
|
547
|
+
- [ ] UNIQUE 제약 추가
|
|
548
|
+
- [ ] 기존 데이터 마이그레이션 스크립트 (필요시)
|
|
549
|
+
|
|
550
|
+
## Phase 2: VectorOutbox 클래스 (P0)
|
|
551
|
+
|
|
552
|
+
### 2.1 Enqueue 구현
|
|
553
|
+
|
|
554
|
+
**파일**: `src/core/vector-outbox.ts` (신규)
|
|
555
|
+
|
|
556
|
+
```typescript
|
|
557
|
+
export class VectorOutbox {
|
|
558
|
+
constructor(private db: Database);
|
|
559
|
+
|
|
560
|
+
async enqueue(input: OutboxEnqueueInput): Promise<EnqueueResult> {
|
|
561
|
+
const jobId = uuidv4();
|
|
562
|
+
|
|
563
|
+
try {
|
|
564
|
+
await this.db.run(`
|
|
565
|
+
INSERT INTO vector_outbox
|
|
566
|
+
(job_id, item_kind, item_id, embedding_version, status)
|
|
567
|
+
VALUES (?, ?, ?, ?, 'pending')
|
|
568
|
+
ON CONFLICT (item_kind, item_id, embedding_version)
|
|
569
|
+
DO NOTHING
|
|
570
|
+
`, [jobId, input.itemKind, input.itemId, input.embeddingVersion]);
|
|
571
|
+
|
|
572
|
+
// 삽입 성공 여부 확인
|
|
573
|
+
const inserted = await this.db.query(`
|
|
574
|
+
SELECT job_id FROM vector_outbox
|
|
575
|
+
WHERE item_kind = ? AND item_id = ? AND embedding_version = ?
|
|
576
|
+
`, [input.itemKind, input.itemId, input.embeddingVersion]);
|
|
577
|
+
|
|
578
|
+
const isNew = inserted[0]?.job_id === jobId;
|
|
579
|
+
return { success: true, jobId: inserted[0].job_id, isNew };
|
|
580
|
+
|
|
581
|
+
} catch (error) {
|
|
582
|
+
return { success: false, error: error.message };
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
```
|
|
587
|
+
|
|
588
|
+
**작업 항목**:
|
|
589
|
+
- [ ] enqueue() 메서드 (중복 처리 포함)
|
|
590
|
+
- [ ] enqueueBatch() 배치 메서드
|
|
591
|
+
- [ ] ON CONFLICT DO NOTHING 처리
|
|
592
|
+
|
|
593
|
+
### 2.2 조회 메서드
|
|
594
|
+
|
|
595
|
+
```typescript
|
|
596
|
+
async getPendingJobs(limit: number = 100): Promise<OutboxJob[]> {
|
|
597
|
+
return this.db.query(`
|
|
598
|
+
SELECT * FROM vector_outbox
|
|
599
|
+
WHERE status = 'pending'
|
|
600
|
+
ORDER BY created_at ASC
|
|
601
|
+
LIMIT ?
|
|
602
|
+
`, [limit]);
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
async getJobById(jobId: string): Promise<OutboxJob | null> {
|
|
606
|
+
const rows = await this.db.query(`
|
|
607
|
+
SELECT * FROM vector_outbox WHERE job_id = ?
|
|
608
|
+
`, [jobId]);
|
|
609
|
+
return rows[0] || null;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
async getMetrics(): Promise<OutboxMetrics> {
|
|
613
|
+
// 상태별 통계 조회
|
|
614
|
+
}
|
|
615
|
+
```
|
|
616
|
+
|
|
617
|
+
**작업 항목**:
|
|
618
|
+
- [ ] getPendingJobs() 메서드
|
|
619
|
+
- [ ] getJobById() 메서드
|
|
620
|
+
- [ ] getMetrics() 통계 메서드
|
|
621
|
+
|
|
622
|
+
## Phase 3: VectorWorker V2 (P0)
|
|
623
|
+
|
|
624
|
+
### 3.1 Worker 기본 구현
|
|
625
|
+
|
|
626
|
+
**파일**: `src/core/vector-worker.ts` 수정
|
|
627
|
+
|
|
628
|
+
```typescript
|
|
629
|
+
export class VectorWorkerV2 {
|
|
630
|
+
constructor(
|
|
631
|
+
private outbox: VectorOutbox,
|
|
632
|
+
private embedder: Embedder,
|
|
633
|
+
private vectorStore: VectorStore,
|
|
634
|
+
private db: Database
|
|
635
|
+
);
|
|
636
|
+
|
|
637
|
+
// Job claiming with lock
|
|
638
|
+
async claimJobs(limit: number): Promise<OutboxJob[]> {
|
|
639
|
+
// 트랜잭션으로 pending → processing 변경
|
|
640
|
+
return this.db.transaction(async (tx) => {
|
|
641
|
+
const jobs = await tx.query(`
|
|
642
|
+
SELECT * FROM vector_outbox
|
|
643
|
+
WHERE status = 'pending'
|
|
644
|
+
ORDER BY created_at ASC
|
|
645
|
+
LIMIT ?
|
|
646
|
+
FOR UPDATE -- DuckDB에서 지원 시
|
|
647
|
+
`, [limit]);
|
|
648
|
+
|
|
649
|
+
if (jobs.length === 0) return [];
|
|
650
|
+
|
|
651
|
+
const jobIds = jobs.map(j => j.job_id);
|
|
652
|
+
await tx.run(`
|
|
653
|
+
UPDATE vector_outbox
|
|
654
|
+
SET status = 'processing', updated_at = CURRENT_TIMESTAMP
|
|
655
|
+
WHERE job_id IN (${jobIds.map(() => '?').join(',')})
|
|
656
|
+
`, jobIds);
|
|
657
|
+
|
|
658
|
+
return jobs.map(j => ({ ...j, status: 'processing' as const }));
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
```
|
|
663
|
+
|
|
664
|
+
**작업 항목**:
|
|
665
|
+
- [ ] claimJobs() 락 처리
|
|
666
|
+
- [ ] markDone() 메서드
|
|
667
|
+
- [ ] markFailed() 메서드
|
|
668
|
+
|
|
669
|
+
### 3.2 Process 루프
|
|
670
|
+
|
|
671
|
+
```typescript
|
|
672
|
+
async processAll(): Promise<ProcessResult> {
|
|
673
|
+
const result: ProcessResult = {
|
|
674
|
+
processed: 0,
|
|
675
|
+
succeeded: 0,
|
|
676
|
+
failed: 0,
|
|
677
|
+
errors: []
|
|
678
|
+
};
|
|
679
|
+
|
|
680
|
+
const BATCH_SIZE = 50;
|
|
681
|
+
|
|
682
|
+
while (true) {
|
|
683
|
+
const jobs = await this.claimJobs(BATCH_SIZE);
|
|
684
|
+
if (jobs.length === 0) break;
|
|
685
|
+
|
|
686
|
+
for (const job of jobs) {
|
|
687
|
+
try {
|
|
688
|
+
await this.processJob(job);
|
|
689
|
+
await this.markDone(job.jobId);
|
|
690
|
+
result.succeeded++;
|
|
691
|
+
} catch (error) {
|
|
692
|
+
await this.markFailed(job.jobId, error.message);
|
|
693
|
+
result.failed++;
|
|
694
|
+
result.errors.push({ jobId: job.jobId, error: error.message });
|
|
695
|
+
}
|
|
696
|
+
result.processed++;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
return result;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
private async processJob(job: OutboxJob): Promise<void> {
|
|
704
|
+
// 1. 콘텐츠 조회
|
|
705
|
+
const content = await this.getContent(job.itemKind, job.itemId);
|
|
706
|
+
if (!content) {
|
|
707
|
+
throw new Error(`Content not found: ${job.itemKind}:${job.itemId}`);
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// 2. 임베딩 생성
|
|
711
|
+
const embedding = await this.embedder.embed(content);
|
|
712
|
+
|
|
713
|
+
// 3. LanceDB upsert
|
|
714
|
+
await this.vectorStore.upsert({
|
|
715
|
+
id: `${job.itemKind}:${job.itemId}`,
|
|
716
|
+
vector: embedding,
|
|
717
|
+
content,
|
|
718
|
+
metadata: {
|
|
719
|
+
itemKind: job.itemKind,
|
|
720
|
+
embeddingVersion: job.embeddingVersion,
|
|
721
|
+
indexedAt: new Date().toISOString()
|
|
722
|
+
}
|
|
723
|
+
});
|
|
724
|
+
}
|
|
725
|
+
```
|
|
726
|
+
|
|
727
|
+
**작업 항목**:
|
|
728
|
+
- [ ] processAll() 메인 루프
|
|
729
|
+
- [ ] processJob() 단일 job 처리
|
|
730
|
+
- [ ] getContent() 콘텐츠 조회 (itemKind별)
|
|
731
|
+
|
|
732
|
+
### 3.3 Reconcile
|
|
733
|
+
|
|
734
|
+
```typescript
|
|
735
|
+
async reconcileFailed(maxRetries: number = 3): Promise<number> {
|
|
736
|
+
const result = await this.db.run(`
|
|
737
|
+
UPDATE vector_outbox
|
|
738
|
+
SET status = 'pending',
|
|
739
|
+
retry_count = retry_count + 1,
|
|
740
|
+
updated_at = CURRENT_TIMESTAMP
|
|
741
|
+
WHERE status = 'failed'
|
|
742
|
+
AND retry_count < ?
|
|
743
|
+
`, [maxRetries]);
|
|
744
|
+
|
|
745
|
+
return result.changes;
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
async recoverStuck(stuckThresholdMs: number = 5 * 60 * 1000): Promise<number> {
|
|
749
|
+
const threshold = new Date(Date.now() - stuckThresholdMs);
|
|
750
|
+
const result = await this.db.run(`
|
|
751
|
+
UPDATE vector_outbox
|
|
752
|
+
SET status = 'pending',
|
|
753
|
+
updated_at = CURRENT_TIMESTAMP
|
|
754
|
+
WHERE status = 'processing'
|
|
755
|
+
AND updated_at < ?
|
|
756
|
+
`, [threshold.toISOString()]);
|
|
757
|
+
|
|
758
|
+
return result.changes;
|
|
759
|
+
}
|
|
760
|
+
```
|
|
761
|
+
|
|
762
|
+
**작업 항목**:
|
|
763
|
+
- [ ] reconcileFailed() 재시도 메서드
|
|
764
|
+
- [ ] recoverStuck() stuck 복구 메서드
|
|
765
|
+
- [ ] cleanupDone() 정리 메서드
|
|
766
|
+
|
|
767
|
+
## Phase 4: VectorStore Upsert (P0)
|
|
768
|
+
|
|
769
|
+
### 4.1 Upsert 메서드
|
|
770
|
+
|
|
771
|
+
**파일**: `src/core/vector-store.ts` 수정
|
|
772
|
+
|
|
773
|
+
```typescript
|
|
774
|
+
// 기존 add() 메서드 외에 upsert() 추가
|
|
775
|
+
async upsert(record: VectorRecord): Promise<void> {
|
|
776
|
+
const table = await this.getOrCreateTable();
|
|
777
|
+
|
|
778
|
+
// 기존 레코드 확인
|
|
779
|
+
const existing = await table
|
|
780
|
+
.search([0]) // dummy
|
|
781
|
+
.filter(`id = '${record.id}'`)
|
|
782
|
+
.limit(1)
|
|
783
|
+
.toArray();
|
|
784
|
+
|
|
785
|
+
if (existing.length > 0) {
|
|
786
|
+
// 삭제 후 재삽입 (LanceDB upsert 패턴)
|
|
787
|
+
await table.delete(`id = '${record.id}'`);
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
await table.add([{
|
|
791
|
+
id: record.id,
|
|
792
|
+
vector: record.vector,
|
|
793
|
+
content: record.content,
|
|
794
|
+
timestamp: record.timestamp,
|
|
795
|
+
...record.metadata
|
|
796
|
+
}]);
|
|
797
|
+
}
|
|
798
|
+
```
|
|
799
|
+
|
|
800
|
+
**작업 항목**:
|
|
801
|
+
- [ ] upsert() 메서드 구현
|
|
802
|
+
- [ ] delete + add 패턴 적용
|
|
803
|
+
- [ ] 기존 add()와 구분
|
|
804
|
+
|
|
805
|
+
### 4.2 테이블 버전 관리
|
|
806
|
+
|
|
807
|
+
```typescript
|
|
808
|
+
// 임베딩 모델 버전별 테이블
|
|
809
|
+
private getTableName(itemKind: ItemKind, version: string): string {
|
|
810
|
+
const versionSlug = version.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
|
811
|
+
return `${itemKind}_vectors_${versionSlug}`;
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
async getOrCreateTable(itemKind: ItemKind, version: string): Promise<Table> {
|
|
815
|
+
const tableName = this.getTableName(itemKind, version);
|
|
816
|
+
|
|
817
|
+
try {
|
|
818
|
+
return await this.db.openTable(tableName);
|
|
819
|
+
} catch {
|
|
820
|
+
// 테이블 생성
|
|
821
|
+
return await this.db.createTable(tableName, [
|
|
822
|
+
{ id: '', vector: [], content: '', timestamp: '' }
|
|
823
|
+
]);
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
```
|
|
827
|
+
|
|
828
|
+
**작업 항목**:
|
|
829
|
+
- [ ] getTableName() 버전별 테이블명
|
|
830
|
+
- [ ] getOrCreateTable() lazy 생성
|
|
831
|
+
|
|
832
|
+
## Phase 5: 통합 및 트리거 (P0)
|
|
833
|
+
|
|
834
|
+
### 5.1 Entry 저장 시 자동 enqueue
|
|
835
|
+
|
|
836
|
+
**파일**: Graduation 또는 Entry 저장 로직
|
|
837
|
+
|
|
838
|
+
```typescript
|
|
839
|
+
async function materializeEntry(entry: Entry): Promise<void> {
|
|
840
|
+
// 1. Entry 저장
|
|
841
|
+
await entryRepo.create(entry);
|
|
842
|
+
|
|
843
|
+
// 2. Outbox에 자동 enqueue
|
|
844
|
+
await outbox.enqueue({
|
|
845
|
+
itemKind: 'entry',
|
|
846
|
+
itemId: entry.entry_id,
|
|
847
|
+
embeddingVersion: config.embedding.version
|
|
848
|
+
});
|
|
849
|
+
}
|
|
850
|
+
```
|
|
851
|
+
|
|
852
|
+
**작업 항목**:
|
|
853
|
+
- [ ] Entry 저장 후 outbox.enqueue() 호출
|
|
854
|
+
- [ ] Task 생성 시 task_title enqueue
|
|
855
|
+
- [ ] Session 종료 시 session_summary enqueue
|
|
856
|
+
|
|
857
|
+
### 5.2 단일 Writer 보장
|
|
858
|
+
|
|
859
|
+
**파일**: `src/core/worker-lock.ts` (신규)
|
|
860
|
+
|
|
861
|
+
```typescript
|
|
862
|
+
export class WorkerLock {
|
|
863
|
+
private lockFile: string;
|
|
864
|
+
|
|
865
|
+
constructor(lockPath: string = '/tmp/vector-worker.lock');
|
|
866
|
+
|
|
867
|
+
acquire(): boolean {
|
|
868
|
+
if (fs.existsSync(this.lockFile)) {
|
|
869
|
+
const pid = parseInt(fs.readFileSync(this.lockFile, 'utf8'));
|
|
870
|
+
if (this.isProcessRunning(pid)) {
|
|
871
|
+
return false; // 다른 worker 실행 중
|
|
872
|
+
}
|
|
873
|
+
// stale lock file 제거
|
|
874
|
+
fs.unlinkSync(this.lockFile);
|
|
875
|
+
}
|
|
876
|
+
fs.writeFileSync(this.lockFile, process.pid.toString());
|
|
877
|
+
return true;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
release(): void {
|
|
881
|
+
if (fs.existsSync(this.lockFile)) {
|
|
882
|
+
fs.unlinkSync(this.lockFile);
|
|
883
|
+
}
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
```
|
|
887
|
+
|
|
888
|
+
**작업 항목**:
|
|
889
|
+
- [ ] WorkerLock 클래스
|
|
890
|
+
- [ ] acquire/release 메서드
|
|
891
|
+
- [ ] stale lock 처리
|
|
892
|
+
|
|
893
|
+
## Phase 6: CLI 및 모니터링 (P1)
|
|
894
|
+
|
|
895
|
+
### 6.1 CLI 커맨드
|
|
896
|
+
|
|
897
|
+
**파일**: `src/cli/index.ts` 수정
|
|
898
|
+
|
|
899
|
+
```typescript
|
|
900
|
+
// 벡터 워커 실행
|
|
901
|
+
program
|
|
902
|
+
.command('vector-worker')
|
|
903
|
+
.description('Process pending vector jobs')
|
|
904
|
+
.option('--once', 'Process once and exit')
|
|
905
|
+
.option('--reconcile', 'Also reconcile failed jobs')
|
|
906
|
+
.action(async (options) => {
|
|
907
|
+
const lock = new WorkerLock();
|
|
908
|
+
if (!lock.acquire()) {
|
|
909
|
+
console.log('Another worker is running');
|
|
910
|
+
process.exit(0);
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
try {
|
|
914
|
+
if (options.reconcile) {
|
|
915
|
+
const recovered = await worker.reconcileFailed(3);
|
|
916
|
+
console.log(`Recovered ${recovered} failed jobs`);
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
const result = await worker.processAll();
|
|
920
|
+
console.log(`Processed: ${result.processed}, Succeeded: ${result.succeeded}, Failed: ${result.failed}`);
|
|
921
|
+
|
|
922
|
+
if (!options.once) {
|
|
923
|
+
// 주기적 실행
|
|
924
|
+
setInterval(async () => {
|
|
925
|
+
await worker.processAll();
|
|
926
|
+
}, 10000);
|
|
927
|
+
}
|
|
928
|
+
} finally {
|
|
929
|
+
lock.release();
|
|
930
|
+
}
|
|
931
|
+
});
|
|
932
|
+
```
|
|
933
|
+
|
|
934
|
+
**작업 항목**:
|
|
935
|
+
- [ ] `cli vector-worker` 커맨드
|
|
936
|
+
- [ ] `cli vector-status` 통계 조회
|
|
937
|
+
- [ ] `cli vector-reconcile` 수동 복구
|
|
938
|
+
|
|
939
|
+
### 6.2 상태 조회
|
|
940
|
+
|
|
941
|
+
```typescript
|
|
942
|
+
program
|
|
943
|
+
.command('vector-status')
|
|
944
|
+
.description('Show vector outbox status')
|
|
945
|
+
.action(async () => {
|
|
946
|
+
const metrics = await outbox.getMetrics();
|
|
947
|
+
console.log('Vector Outbox Status:');
|
|
948
|
+
console.log(` Pending: ${metrics.pendingCount}`);
|
|
949
|
+
console.log(` Processing: ${metrics.processingCount}`);
|
|
950
|
+
console.log(` Done: ${metrics.doneCount}`);
|
|
951
|
+
console.log(` Failed: ${metrics.failedCount}`);
|
|
952
|
+
console.log(` Last sync: ${metrics.lastProcessedAt || 'Never'}`);
|
|
953
|
+
});
|
|
954
|
+
```
|
|
955
|
+
|
|
956
|
+
**작업 항목**:
|
|
957
|
+
- [ ] 상태 조회 커맨드
|
|
958
|
+
- [ ] 테이블 형식 출력
|
|
959
|
+
|
|
960
|
+
## 파일 목록
|
|
961
|
+
|
|
962
|
+
### 신규 파일
|
|
963
|
+
```
|
|
964
|
+
src/core/vector-outbox.ts # Outbox 관리
|
|
965
|
+
src/core/worker-lock.ts # 단일 worker 락
|
|
966
|
+
```
|
|
967
|
+
|
|
968
|
+
### 수정 파일
|
|
969
|
+
```
|
|
970
|
+
src/core/types.ts # 타입 추가
|
|
971
|
+
src/core/vector-worker.ts # V2 로직 추가
|
|
972
|
+
src/core/vector-store.ts # upsert 추가
|
|
973
|
+
src/cli/index.ts # CLI 커맨드 추가
|
|
974
|
+
```
|
|
975
|
+
|
|
976
|
+
## 테스트
|
|
977
|
+
|
|
978
|
+
### 필수 테스트 케이스
|
|
979
|
+
|
|
980
|
+
1. **Idempotent Enqueue**
|
|
981
|
+
```typescript
|
|
982
|
+
await outbox.enqueue({ itemKind: 'entry', itemId: 'e1', embeddingVersion: 'v1' });
|
|
983
|
+
await outbox.enqueue({ itemKind: 'entry', itemId: 'e1', embeddingVersion: 'v1' });
|
|
984
|
+
// 두 번째는 isNew: false
|
|
985
|
+
const count = await db.query('SELECT COUNT(*) FROM vector_outbox');
|
|
986
|
+
expect(count[0].count).toBe(1);
|
|
987
|
+
```
|
|
988
|
+
|
|
989
|
+
2. **Process All**
|
|
990
|
+
```typescript
|
|
991
|
+
await outbox.enqueue({ itemKind: 'entry', itemId: 'e1', ... });
|
|
992
|
+
await outbox.enqueue({ itemKind: 'entry', itemId: 'e2', ... });
|
|
993
|
+
const result = await worker.processAll();
|
|
994
|
+
expect(result.processed).toBe(2);
|
|
995
|
+
expect(result.succeeded).toBe(2);
|
|
996
|
+
```
|
|
997
|
+
|
|
998
|
+
3. **Reconcile Failed**
|
|
999
|
+
```typescript
|
|
1000
|
+
// 실패한 job 생성
|
|
1001
|
+
await db.run(`
|
|
1002
|
+
INSERT INTO vector_outbox (job_id, item_kind, item_id, embedding_version, status, retry_count)
|
|
1003
|
+
VALUES ('j1', 'entry', 'e1', 'v1', 'failed', 1)
|
|
1004
|
+
`);
|
|
1005
|
+
const recovered = await worker.reconcileFailed(3);
|
|
1006
|
+
expect(recovered).toBe(1);
|
|
1007
|
+
// status가 pending으로 변경됨
|
|
1008
|
+
```
|
|
1009
|
+
|
|
1010
|
+
4. **LanceDB Upsert**
|
|
1011
|
+
```typescript
|
|
1012
|
+
await vectorStore.upsert({ id: 'e1', vector: [0.1, 0.2], ... });
|
|
1013
|
+
await vectorStore.upsert({ id: 'e1', vector: [0.3, 0.4], ... });
|
|
1014
|
+
// 중복 없이 최신 벡터만 존재
|
|
1015
|
+
const results = await vectorStore.search([0.3, 0.4], 10);
|
|
1016
|
+
expect(results.filter(r => r.id === 'e1').length).toBe(1);
|
|
1017
|
+
```
|
|
1018
|
+
|
|
1019
|
+
5. **Single Writer Lock**
|
|
1020
|
+
```typescript
|
|
1021
|
+
const lock1 = new WorkerLock();
|
|
1022
|
+
const lock2 = new WorkerLock();
|
|
1023
|
+
expect(lock1.acquire()).toBe(true);
|
|
1024
|
+
expect(lock2.acquire()).toBe(false);
|
|
1025
|
+
lock1.release();
|
|
1026
|
+
expect(lock2.acquire()).toBe(true);
|
|
1027
|
+
```
|
|
1028
|
+
|
|
1029
|
+
## 마일스톤
|
|
1030
|
+
|
|
1031
|
+
| 단계 | 완료 기준 |
|
|
1032
|
+
|------|----------|
|
|
1033
|
+
| M1 | 타입 정의 + vector_outbox 테이블 |
|
|
1034
|
+
| M2 | VectorOutbox enqueue/getPending |
|
|
1035
|
+
| M3 | VectorWorkerV2 claimJobs/markDone |
|
|
1036
|
+
| M4 | VectorStore upsert |
|
|
1037
|
+
| M5 | processAll() 전체 루프 |
|
|
1038
|
+
| M6 | reconcile + recoverStuck |
|
|
1039
|
+
| M7 | CLI 커맨드 |
|
|
1040
|
+
| M8 | 테스트 통과 |
|
|
1041
|
+
|
|
1042
|
+
## 2026-02-25T12:31:26.457Z | 69fdda54-43d8-4a4f-a869-20ab2f32b2ab
|
|
1043
|
+
- type: session_summary
|
|
1044
|
+
- session: import:organized
|
|
1045
|
+
# Vector Outbox V2 Specification
|
|
1046
|
+
|
|
1047
|
+
> **Version**: 2.0.0
|
|
1048
|
+
> **Status**: Draft
|
|
1049
|
+
> **Created**: 2026-01-31
|
|
1050
|
+
|
|
1051
|
+
## 1. 개요
|
|
1052
|
+
|
|
1053
|
+
### 1.1 문제 정의
|
|
1054
|
+
|
|
1055
|
+
DuckDB와 LanceDB 간 데이터 정합성 문제:
|
|
1056
|
+
|
|
1057
|
+
1. **원자성 부재**: DuckDB 쓰기와 LanceDB 쓰기가 별도 트랜잭션
|
|
1058
|
+
2. **중복 벡터**: 재처리 시 같은 벡터가 중복 저장될 수 있음
|
|
1059
|
+
3. **일관성**: DuckDB에는 있지만 LanceDB에는 없는 상태 발생
|
|
1060
|
+
4. **동시성**: 여러 프로세스가 동시에 벡터 저장 시 충돌
|
|
1061
|
+
|
|
1062
|
+
### 1.2 해결 방향
|
|
1063
|
+
|
|
1064
|
+
**Transactional Outbox Pattern**:
|
|
1065
|
+
1. DuckDB에 먼저 기록 (vector_outbox 테이블)
|
|
1066
|
+
2. 단일 writer가 outbox를 처리하여 LanceDB에 upsert
|
|
1067
|
+
3. 성공 시 outbox 상태 업데이트
|
|
1068
|
+
|
|
1069
|
+
## 2. 핵심 개념
|
|
1070
|
+
|
|
1071
|
+
### 2.1 Outbox 패턴 흐름
|
|
1072
|
+
|
|
1073
|
+
```
|
|
1074
|
+
Application DuckDB LanceDB
|
|
1075
|
+
│ │ │
|
|
1076
|
+
│ 1. Entry 저장 │ │
|
|
1077
|
+
├────────────────────────▶│ │
|
|
1078
|
+
│ │ │
|
|
1079
|
+
│ 2. Outbox job 생성 │ │
|
|
1080
|
+
├────────────────────────▶│ │
|
|
1081
|
+
│ │ │
|
|
1082
|
+
│ (트랜잭션 커밋) │ │
|
|
1083
|
+
│ │ │
|
|
1084
|
+
┌─────┴─────┐
|
|
1085
|
+
│ Worker │
|
|
1086
|
+
└─────┬─────┘
|
|
1087
|
+
│ │
|
|
1088
|
+
│ 3. pending job 조회 │
|
|
1089
|
+
│◀────────────────────────│
|
|
1090
|
+
│ │
|
|
1091
|
+
│ 4. 임베딩 생성 │
|
|
1092
|
+
│ │
|
|
1093
|
+
│ 5. LanceDB upsert │
|
|
1094
|
+
│─────────────────────────▶│
|
|
1095
|
+
│ │
|
|
1096
|
+
│ 6. status='done' 업데이트 │
|
|
1097
|
+
│────────────────────────▶│
|
|
1098
|
+
```
|
|
1099
|
+
|
|
1100
|
+
### 2.2 Job 상태 머신
|
|
1101
|
+
|
|
1102
|
+
```
|
|
1103
|
+
┌─────────┐ Worker 픽업 ┌────────────┐
|
|
1104
|
+
│ pending │ ──────────────────▶ │ processing │
|
|
1105
|
+
└─────────┘ └──────┬─────┘
|
|
1106
|
+
│ │
|
|
1107
|
+
│ 재시도 (reconcile) │
|
|
1108
|
+
│ │
|
|
1109
|
+
│ ┌───────────────────────┼───────────────────────┐
|
|
1110
|
+
│ │ │ │
|
|
1111
|
+
│ ▼ ▼ ▼
|
|
1112
|
+
│ ┌─────────┐ ┌──────────┐ ┌─────────┐
|
|
1113
|
+
└───▶│ pending │ │ done │ │ failed │
|
|
1114
|
+
└─────────┘ └──────────┘ └─────────┘
|
|
1115
|
+
│
|
|
1116
|
+
│ retry < max
|
|
1117
|
+
▼
|
|
1118
|
+
┌─────────┐
|
|
1119
|
+
│ pending │
|
|
1120
|
+
└─────────┘
|
|
1121
|
+
```
|
|
1122
|
+
|
|
1123
|
+
### 2.3 Item 종류
|
|
1124
|
+
|
|
1125
|
+
```typescript
|
|
1126
|
+
type ItemKind =
|
|
1127
|
+
| 'entry' // entries 테이블의 content 임베딩
|
|
1128
|
+
| 'task_title' // entities 테이블의 task 제목 임베딩
|
|
1129
|
+
| 'session_summary' // 세션 요약 임베딩
|
|
1130
|
+
;
|
|
1131
|
+
```
|
|
1132
|
+
|
|
1133
|
+
## 3. DB 스키마
|
|
1134
|
+
|
|
1135
|
+
### 3.1 vector_outbox 테이블
|
|
1136
|
+
|
|
1137
|
+
```sql
|
|
1138
|
+
CREATE TABLE vector_outbox (
|
|
1139
|
+
job_id VARCHAR PRIMARY KEY,
|
|
1140
|
+
item_kind VARCHAR NOT NULL, -- entry|task_title|session_summary
|
|
1141
|
+
item_id VARCHAR NOT NULL,
|
|
1142
|
+
embedding_version VARCHAR NOT NULL, -- e.g., 'v1.0.0', 'minilm-v2'
|
|
1143
|
+
status VARCHAR NOT NULL, -- pending|processing|done|failed
|
|
1144
|
+
retry_count INTEGER DEFAULT 0,
|
|
1145
|
+
error VARCHAR,
|
|
1146
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
1147
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
1148
|
+
|
|
1149
|
+
-- 중복 방지: 같은 아이템+버전은 하나만
|
|
1150
|
+
UNIQUE(item_kind, item_id, embedding_version)
|
|
1151
|
+
);
|
|
1152
|
+
|
|
1153
|
+
CREATE INDEX idx_outbox_status ON vector_outbox(status);
|
|
1154
|
+
CREATE INDEX idx_outbox_created ON vector_outbox(created_at);
|
|
1155
|
+
```
|
|
1156
|
+
|
|
1157
|
+
### 3.2 vector_store_meta 테이블 (선택)
|
|
1158
|
+
|
|
1159
|
+
```sql
|
|
1160
|
+
-- LanceDB 테이블 메타데이터 추적
|
|
1161
|
+
CREATE TABLE vector_store_meta (
|
|
1162
|
+
table_name VARCHAR PRIMARY KEY, -- e.g., 'entry_vectors_v1'
|
|
1163
|
+
embedding_version VARCHAR NOT NULL,
|
|
1164
|
+
item_count INTEGER DEFAULT 0,
|
|
1165
|
+
last_sync_at TIMESTAMP,
|
|
1166
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
1167
|
+
);
|
|
1168
|
+
```
|
|
1169
|
+
|
|
1170
|
+
## 4. Outbox API
|
|
1171
|
+
|
|
1172
|
+
### 4.1 Enqueue
|
|
1173
|
+
|
|
1174
|
+
```typescript
|
|
1175
|
+
interface OutboxEnqueueInput {
|
|
1176
|
+
itemKind: ItemKind;
|
|
1177
|
+
itemId: string;
|
|
1178
|
+
embeddingVersion: string;
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
interface VectorOutbox {
|
|
1182
|
+
// Job 추가 (중복 시 무시)
|
|
1183
|
+
enqueue(input: OutboxEnqueueInput): Promise<EnqueueResult>;
|
|
1184
|
+
|
|
1185
|
+
// 배치 추가
|
|
1186
|
+
enqueueBatch(inputs: OutboxEnqueueInput[]): Promise<EnqueueResult[]>;
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
type EnqueueResult =
|
|
1190
|
+
| { success: true; jobId: string; isNew: true }
|
|
1191
|
+
| { success: true; jobId: string; isNew: false } // 이미 존재
|
|
1192
|
+
| { success: false; error: string };
|
|
1193
|
+
```
|
|
1194
|
+
|
|
1195
|
+
### 4.2 Process (Worker)
|
|
1196
|
+
|
|
1197
|
+
```typescript
|
|
1198
|
+
interface VectorWorker {
|
|
1199
|
+
// pending job 가져오기 (락)
|
|
1200
|
+
claimJobs(limit: number): Promise<OutboxJob[]>;
|
|
1201
|
+
|
|
1202
|
+
// 처리 완료 표시
|
|
1203
|
+
markDone(jobId: string): Promise<void>;
|
|
1204
|
+
|
|
1205
|
+
// 실패 표시
|
|
1206
|
+
markFailed(jobId: string, error: string): Promise<void>;
|
|
1207
|
+
|
|
1208
|
+
// 전체 처리 루프
|
|
1209
|
+
processAll(): Promise<ProcessResult>;
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
interface ProcessResult {
|
|
1213
|
+
processed: number;
|
|
1214
|
+
succeeded: number;
|
|
1215
|
+
failed: number;
|
|
1216
|
+
errors: { jobId: string; error: string }[];
|
|
1217
|
+
}
|
|
1218
|
+
```
|
|
1219
|
+
|
|
1220
|
+
### 4.3 Reconcile
|
|
1221
|
+
|
|
1222
|
+
```typescript
|
|
1223
|
+
interface VectorOutbox {
|
|
1224
|
+
// 재시도 가능한 failed job을 pending으로 변경
|
|
1225
|
+
reconcileFailed(maxRetries: number): Promise<number>;
|
|
1226
|
+
|
|
1227
|
+
// processing 상태로 오래 멈춘 job 복구
|
|
1228
|
+
recoverStuck(stuckThresholdMs: number): Promise<number>;
|
|
1229
|
+
|
|
1230
|
+
// 완료된 job 정리
|
|
1231
|
+
cleanupDone(olderThanDays: number): Promise<number>;
|
|
1232
|
+
}
|
|
1233
|
+
```
|
|
1234
|
+
|
|
1235
|
+
## 5. 단일 Writer 패턴
|
|
1236
|
+
|
|
1237
|
+
### 5.1 왜 단일 Writer인가?
|
|
1238
|
+
|
|
1239
|
+
**문제**: 여러 프로세스가 동시에 LanceDB에 쓰면
|
|
1240
|
+
- 충돌 발생 가능
|
|
1241
|
+
- 중복 벡터 생성
|
|
1242
|
+
- 트랜잭션 보장 어려움
|
|
1243
|
+
|
|
1244
|
+
**해결**: 하나의 worker만 LanceDB에 쓰기
|
|
1245
|
+
|
|
1246
|
+
```typescript
|
|
1247
|
+
// 단일 writer 보장 방법들
|
|
1248
|
+
|
|
1249
|
+
// 방법 1: 프로세스 수준 락
|
|
1250
|
+
const lockFile = '/tmp/vector-worker.lock';
|
|
1251
|
+
if (fs.existsSync(lockFile)) {
|
|
1252
|
+
console.log('Another worker is running');
|
|
1253
|
+
process.exit(0);
|
|
1254
|
+
}
|
|
1255
|
+
fs.writeFileSync(lockFile, process.pid.toString());
|
|
1256
|
+
|
|
1257
|
+
// 방법 2: DB 수준 락
|
|
1258
|
+
await db.run(`
|
|
1259
|
+
INSERT INTO worker_locks (worker_name, locked_at)
|
|
1260
|
+
VALUES ('vector_worker', CURRENT_TIMESTAMP)
|
|
1261
|
+
ON CONFLICT (worker_name)
|
|
1262
|
+
DO UPDATE SET locked_at = CURRENT_TIMESTAMP
|
|
1263
|
+
WHERE locked_at < datetime('now', '-5 minutes')
|
|
1264
|
+
`);
|
|
1265
|
+
```
|
|
1266
|
+
|
|
1267
|
+
### 5.2 Worker 구현
|
|
1268
|
+
|
|
1269
|
+
```typescript
|
|
1270
|
+
class VectorWorkerImpl implements VectorWorker {
|
|
1271
|
+
private embedder: Embedder;
|
|
1272
|
+
private vectorStore: VectorStore;
|
|
1273
|
+
private db: Database;
|
|
1274
|
+
|
|
1275
|
+
async processAll(): Promise<ProcessResult> {
|
|
1276
|
+
const result: ProcessResult = { processed: 0, succeeded: 0, failed: 0, errors: [] };
|
|
1277
|
+
|
|
1278
|
+
while (true) {
|
|
1279
|
+
const jobs = await this.claimJobs(BATCH_SIZE);
|
|
1280
|
+
if (jobs.length === 0) break;
|
|
1281
|
+
|
|
1282
|
+
for (const job of jobs) {
|
|
1283
|
+
try {
|
|
1284
|
+
await this.processJob(job);
|
|
1285
|
+
await this.markDone(job.jobId);
|
|
1286
|
+
result.succeeded++;
|
|
1287
|
+
} catch (error) {
|
|
1288
|
+
await this.markFailed(job.jobId, error.message);
|
|
1289
|
+
result.failed++;
|
|
1290
|
+
result.errors.push({ jobId: job.jobId, error: error.message });
|
|
1291
|
+
}
|
|
1292
|
+
result.processed++;
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
|
|
1296
|
+
return result;
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
private async processJob(job: OutboxJob): Promise<void> {
|
|
1300
|
+
// 1. 원본 콘텐츠 조회
|
|
1301
|
+
const content = await this.getContent(job.itemKind, job.itemId);
|
|
1302
|
+
|
|
1303
|
+
// 2. 임베딩 생성
|
|
1304
|
+
const embedding = await this.embedder.embed(content);
|
|
1305
|
+
|
|
1306
|
+
// 3. LanceDB upsert (idempotent)
|
|
1307
|
+
await this.vectorStore.upsert({
|
|
1308
|
+
id: job.itemId,
|
|
1309
|
+
vector: embedding,
|
|
1310
|
+
metadata: {
|
|
1311
|
+
itemKind: job.itemKind,
|
|
1312
|
+
embeddingVersion: job.embeddingVersion
|
|
1313
|
+
}
|
|
1314
|
+
});
|
|
1315
|
+
}
|
|
1316
|
+
}
|
|
1317
|
+
```
|
|
1318
|
+
|
|
1319
|
+
## 6. LanceDB Upsert 전략
|
|
1320
|
+
|
|
1321
|
+
### 6.1 Idempotent Upsert
|
|
1322
|
+
|
|
1323
|
+
```typescript
|
|
1324
|
+
async function upsertVector(record: VectorRecord): Promise<void> {
|
|
1325
|
+
const table = await this.getTable(record.itemKind, record.embeddingVersion);
|
|
1326
|
+
|
|
1327
|
+
// LanceDB에서 기존 레코드 확인
|
|
1328
|
+
const existing = await table
|
|
1329
|
+
.search([0]) // dummy search for filter
|
|
1330
|
+
.filter(`id = '${record.id}'`)
|
|
1331
|
+
.limit(1)
|
|
1332
|
+
.toArray();
|
|
1333
|
+
|
|
1334
|
+
if (existing.length > 0) {
|
|
1335
|
+
// 업데이트: 삭제 후 재삽입
|
|
1336
|
+
await table.delete(`id = '${record.id}'`);
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
// 삽입
|
|
1340
|
+
await table.add([{
|
|
1341
|
+
id: record.id,
|
|
1342
|
+
vector: record.vector,
|
|
1343
|
+
content: record.content,
|
|
1344
|
+
timestamp: record.timestamp,
|
|
1345
|
+
metadata: record.metadata
|
|
1346
|
+
}]);
|
|
1347
|
+
}
|
|
1348
|
+
```
|
|
1349
|
+
|
|
1350
|
+
### 6.2 테이블 버전 관리
|
|
1351
|
+
|
|
1352
|
+
```typescript
|
|
1353
|
+
// 임베딩 모델 버전별 테이블 분리
|
|
1354
|
+
function getTableName(itemKind: ItemKind, embeddingVersion: string): string {
|
|
1355
|
+
// entry_vectors_minilm_v2
|
|
1356
|
+
// task_title_vectors_minilm_v2
|
|
1357
|
+
const versionSlug = embeddingVersion.replace(/[^a-z0-9]/gi, '_');
|
|
1358
|
+
return `${itemKind}_vectors_${versionSlug}`;
|
|
1359
|
+
}
|
|
1360
|
+
```
|
|
1361
|
+
|
|
1362
|
+
## 7. Idris2 영감 적용
|
|
1363
|
+
|
|
1364
|
+
### 7.1 상태 전이 타입 안전성
|
|
1365
|
+
|
|
1366
|
+
```typescript
|
|
1367
|
+
// 타입 레벨에서 유효한 상태 전이만 허용
|
|
1368
|
+
type ValidTransition =
|
|
1369
|
+
| { from: 'pending'; to: 'processing' }
|
|
1370
|
+
| { from: 'processing'; to: 'done' }
|
|
1371
|
+
| { from: 'processing'; to: 'failed' }
|
|
1372
|
+
| { from: 'failed'; to: 'pending' }; // retry
|
|
1373
|
+
|
|
1374
|
+
function transition(job: OutboxJob, to: OutboxStatus): OutboxJob {
|
|
1375
|
+
const valid: ValidTransition[] = [
|
|
1376
|
+
{ from: 'pending', to: 'processing' },
|
|
1377
|
+
{ from: 'processing', to: 'done' },
|
|
1378
|
+
{ from: 'processing', to: 'failed' },
|
|
1379
|
+
{ from: 'failed', to: 'pending' }
|
|
1380
|
+
];
|
|
1381
|
+
|
|
1382
|
+
const isValid = valid.some(t => t.from === job.status && t.to === to);
|
|
1383
|
+
if (!isValid) {
|
|
1384
|
+
throw new InvalidTransitionError(`Cannot transition from ${job.status} to ${to}`);
|
|
1385
|
+
}
|
|
1386
|
+
|
|
1387
|
+
return { ...job, status: to, updatedAt: new Date() };
|
|
1388
|
+
}
|
|
1389
|
+
```
|
|
1390
|
+
|
|
1391
|
+
### 7.2 Idempotency 불변식
|
|
1392
|
+
|
|
1393
|
+
```typescript
|
|
1394
|
+
// Zod로 idempotency 검증
|
|
1395
|
+
const OutboxJobSchema = z.object({
|
|
1396
|
+
itemKind: ItemKindSchema,
|
|
1397
|
+
itemId: z.string(),
|
|
1398
|
+
embeddingVersion: z.string()
|
|
1399
|
+
}).refine(
|
|
1400
|
+
async (job) => {
|
|
1401
|
+
// 같은 조합은 하나만 존재해야 함
|
|
1402
|
+
const existing = await db.query(`
|
|
1403
|
+
SELECT COUNT(*) as cnt FROM vector_outbox
|
|
1404
|
+
WHERE item_kind = ? AND item_id = ? AND embedding_version = ?
|
|
1405
|
+
`, [job.itemKind, job.itemId, job.embeddingVersion]);
|
|
1406
|
+
return existing[0].cnt <= 1;
|
|
1407
|
+
},
|
|
1408
|
+
{ message: 'Duplicate outbox job' }
|
|
1409
|
+
);
|
|
1410
|
+
```
|
|
1411
|
+
|
|
1412
|
+
## 8. 기존 코드와의 관계
|
|
1413
|
+
|
|
1414
|
+
### 8.1 현재 vector-worker.ts
|
|
1415
|
+
|
|
1416
|
+
```typescript
|
|
1417
|
+
// 현재 구현 (src/core/vector-worker.ts)
|
|
1418
|
+
export class VectorWorker {
|
|
1419
|
+
// OutboxItem 타입 사용
|
|
1420
|
+
// status: 'pending' | 'processing' | 'done' | 'failed'
|
|
1421
|
+
}
|
|
1422
|
+
```
|
|
1423
|
+
|
|
1424
|
+
### 8.2 V2 확장 포인트
|
|
1425
|
+
|
|
1426
|
+
| 기존 | V2 확장 |
|
|
1427
|
+
|-----|---------|
|
|
1428
|
+
| OutboxItem 타입 | itemKind, embeddingVersion 추가 |
|
|
1429
|
+
| 단순 배치 처리 | claiming with lock |
|
|
1430
|
+
| 재시도 없음 | reconcile + retry policy |
|
|
1431
|
+
| 중복 가능 | UNIQUE 제약 + upsert |
|
|
1432
|
+
|
|
1433
|
+
### 8.3 현재 vector-store.ts
|
|
1434
|
+
|
|
1435
|
+
```typescript
|
|
1436
|
+
// 현재 구현 (src/core/vector-store.ts)
|
|
1437
|
+
export class VectorStore {
|
|
1438
|
+
async add(record: VectorRecord): Promise<void>;
|
|
1439
|
+
async search(query: number[], limit: number): Promise<SearchResult[]>;
|
|
1440
|
+
}
|
|
1441
|
+
```
|
|
1442
|
+
|
|
1443
|
+
V2에서 `upsert` 메서드 추가 필요.
|
|
1444
|
+
|
|
1445
|
+
## 9. 운영 고려사항
|
|
1446
|
+
|
|
1447
|
+
### 9.1 모니터링
|
|
1448
|
+
|
|
1449
|
+
```typescript
|
|
1450
|
+
// 메트릭 수집
|
|
1451
|
+
interface OutboxMetrics {
|
|
1452
|
+
pendingCount: number;
|
|
1453
|
+
processingCount: number;
|
|
1454
|
+
failedCount: number;
|
|
1455
|
+
doneCount: number;
|
|
1456
|
+
avgProcessingTimeMs: number;
|
|
1457
|
+
lastProcessedAt: Date | null;
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1460
|
+
async function getMetrics(): Promise<OutboxMetrics> {
|
|
1461
|
+
return db.query(`
|
|
1462
|
+
SELECT
|
|
1463
|
+
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) as pendingCount,
|
|
1464
|
+
SUM(CASE WHEN status = 'processing' THEN 1 ELSE 0 END) as processingCount,
|
|
1465
|
+
SUM(CASE WHEN status = 'failed' THEN 1 ELSE 0 END) as failedCount,
|
|
1466
|
+
SUM(CASE WHEN status = 'done' THEN 1 ELSE 0 END) as doneCount,
|
|
1467
|
+
MAX(updated_at) as lastProcessedAt
|
|
1468
|
+
FROM vector_outbox
|
|
1469
|
+
`);
|
|
1470
|
+
}
|
|
1471
|
+
```
|
|
1472
|
+
|
|
1473
|
+
### 9.2 알림
|
|
1474
|
+
|
|
1475
|
+
```typescript
|
|
1476
|
+
// failed job이 임계값 초과 시 알림
|
|
1477
|
+
const FAILED_THRESHOLD = 10;
|
|
1478
|
+
|
|
1479
|
+
async function checkAndAlert(): Promise<void> {
|
|
1480
|
+
const metrics = await getMetrics();
|
|
1481
|
+
if (metrics.failedCount >= FAILED_THRESHOLD) {
|
|
1482
|
+
console.error(`[ALERT] ${metrics.failedCount} failed vector jobs`);
|
|
1483
|
+
// 추가 알림 로직
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
```
|
|
1487
|
+
|
|
1488
|
+
### 9.3 정리 작업
|
|
1489
|
+
|
|
1490
|
+
```typescript
|
|
1491
|
+
// 정기적으로 완료된 job 정리
|
|
1492
|
+
async function cleanup(): Promise<void> {
|
|
1493
|
+
const deleted = await db.run(`
|
|
1494
|
+
DELETE FROM vector_outbox
|
|
1495
|
+
WHERE status = 'done'
|
|
1496
|
+
AND updated_at < datetime('now', '-7 days')
|
|
1497
|
+
`);
|
|
1498
|
+
console.log(`Cleaned up ${deleted.changes} done jobs`);
|
|
1499
|
+
}
|
|
1500
|
+
```
|
|
1501
|
+
|
|
1502
|
+
## 10. 성공 기준
|
|
1503
|
+
|
|
1504
|
+
- [ ] vector_outbox 테이블 생성 및 UNIQUE 제약
|
|
1505
|
+
- [ ] enqueue가 중복을 무시하고 idempotent하게 동작
|
|
1506
|
+
- [ ] 단일 worker가 pending job을 처리
|
|
1507
|
+
- [ ] LanceDB upsert로 중복 벡터 방지
|
|
1508
|
+
- [ ] failed job 재시도 (reconcile) 동작
|
|
1509
|
+
- [ ] processing 상태 stuck 복구 동작
|
|
1510
|
+
- [ ] 기존 vector-worker.ts와 호환 유지
|