engrm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mcp.json +9 -0
- package/AUTH-DESIGN.md +436 -0
- package/BRIEF.md +197 -0
- package/CLAUDE.md +44 -0
- package/COMPETITIVE.md +174 -0
- package/CONTEXT-OPTIMIZATION.md +305 -0
- package/INFRASTRUCTURE.md +252 -0
- package/LICENSE +105 -0
- package/MARKET.md +230 -0
- package/PLAN.md +278 -0
- package/README.md +121 -0
- package/SENTINEL.md +293 -0
- package/SERVER-API-PLAN.md +553 -0
- package/SPEC.md +843 -0
- package/SWOT.md +148 -0
- package/SYNC-ARCHITECTURE.md +294 -0
- package/VIBE-CODER-STRATEGY.md +250 -0
- package/bun.lock +375 -0
- package/hooks/post-tool-use.ts +144 -0
- package/hooks/session-start.ts +64 -0
- package/hooks/stop.ts +131 -0
- package/mem-page.html +1305 -0
- package/package.json +30 -0
- package/src/capture/dedup.test.ts +103 -0
- package/src/capture/dedup.ts +76 -0
- package/src/capture/extractor.test.ts +245 -0
- package/src/capture/extractor.ts +330 -0
- package/src/capture/quality.test.ts +168 -0
- package/src/capture/quality.ts +104 -0
- package/src/capture/retrospective.test.ts +115 -0
- package/src/capture/retrospective.ts +121 -0
- package/src/capture/scanner.test.ts +131 -0
- package/src/capture/scanner.ts +100 -0
- package/src/capture/scrubber.test.ts +144 -0
- package/src/capture/scrubber.ts +181 -0
- package/src/cli.ts +517 -0
- package/src/config.ts +238 -0
- package/src/context/inject.test.ts +940 -0
- package/src/context/inject.ts +382 -0
- package/src/embeddings/backfill.ts +50 -0
- package/src/embeddings/embedder.test.ts +76 -0
- package/src/embeddings/embedder.ts +139 -0
- package/src/lifecycle/aging.test.ts +103 -0
- package/src/lifecycle/aging.ts +36 -0
- package/src/lifecycle/compaction.test.ts +264 -0
- package/src/lifecycle/compaction.ts +190 -0
- package/src/lifecycle/purge.test.ts +100 -0
- package/src/lifecycle/purge.ts +37 -0
- package/src/lifecycle/scheduler.test.ts +120 -0
- package/src/lifecycle/scheduler.ts +101 -0
- package/src/provisioning/browser-auth.ts +172 -0
- package/src/provisioning/provision.test.ts +198 -0
- package/src/provisioning/provision.ts +94 -0
- package/src/register.test.ts +167 -0
- package/src/register.ts +178 -0
- package/src/server.ts +436 -0
- package/src/storage/migrations.test.ts +244 -0
- package/src/storage/migrations.ts +261 -0
- package/src/storage/outbox.test.ts +229 -0
- package/src/storage/outbox.ts +131 -0
- package/src/storage/projects.test.ts +137 -0
- package/src/storage/projects.ts +184 -0
- package/src/storage/sqlite.test.ts +798 -0
- package/src/storage/sqlite.ts +934 -0
- package/src/storage/vec.test.ts +198 -0
- package/src/sync/auth.test.ts +76 -0
- package/src/sync/auth.ts +68 -0
- package/src/sync/client.ts +183 -0
- package/src/sync/engine.test.ts +94 -0
- package/src/sync/engine.ts +127 -0
- package/src/sync/pull.test.ts +279 -0
- package/src/sync/pull.ts +170 -0
- package/src/sync/push.test.ts +117 -0
- package/src/sync/push.ts +230 -0
- package/src/tools/get.ts +34 -0
- package/src/tools/pin.ts +47 -0
- package/src/tools/save.test.ts +301 -0
- package/src/tools/save.ts +231 -0
- package/src/tools/search.test.ts +69 -0
- package/src/tools/search.ts +181 -0
- package/src/tools/timeline.ts +64 -0
- package/tsconfig.json +22 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sync engine orchestrator.
|
|
3
|
+
*
|
|
4
|
+
* Manages push/pull timers and startup backfill.
|
|
5
|
+
* If sync is not configured (no API key), the engine is a no-op.
|
|
6
|
+
*
|
|
7
|
+
* Timers:
|
|
8
|
+
* - Push: every config.sync.interval_seconds (default 30s)
|
|
9
|
+
* - Pull: every 60s
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { MemDatabase } from "../storage/sqlite.js";
|
|
13
|
+
import type { Config } from "../config.js";
|
|
14
|
+
import { VectorClient } from "./client.js";
|
|
15
|
+
import { pushOutbox } from "./push.js";
|
|
16
|
+
import { pullFromVector } from "./pull.js";
|
|
17
|
+
|
|
18
|
+
const DEFAULT_PULL_INTERVAL = 60_000; // 60 seconds
|
|
19
|
+
|
|
20
|
+
export class SyncEngine {
|
|
21
|
+
private client: VectorClient | null = null;
|
|
22
|
+
private pushTimer: ReturnType<typeof setInterval> | null = null;
|
|
23
|
+
private pullTimer: ReturnType<typeof setInterval> | null = null;
|
|
24
|
+
private _pushing = false;
|
|
25
|
+
private _pulling = false;
|
|
26
|
+
private _running = false;
|
|
27
|
+
|
|
28
|
+
constructor(
|
|
29
|
+
private readonly db: MemDatabase,
|
|
30
|
+
private readonly config: Config
|
|
31
|
+
) {
|
|
32
|
+
if (VectorClient.isConfigured(config)) {
|
|
33
|
+
try {
|
|
34
|
+
this.client = new VectorClient(config);
|
|
35
|
+
} catch {
|
|
36
|
+
// Configuration invalid — stay in offline mode
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Start the sync engine.
|
|
43
|
+
* Runs startup backfill, then sets up push/pull timers.
|
|
44
|
+
*/
|
|
45
|
+
start(): void {
|
|
46
|
+
if (!this.client || !this.config.sync.enabled) {
|
|
47
|
+
this._running = false;
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
this._running = true;
|
|
52
|
+
|
|
53
|
+
// Startup backfill: push any pending outbox items
|
|
54
|
+
this.pushNow().catch(() => {});
|
|
55
|
+
|
|
56
|
+
// Push timer
|
|
57
|
+
const pushInterval = this.config.sync.interval_seconds * 1000;
|
|
58
|
+
this.pushTimer = setInterval(() => {
|
|
59
|
+
this.pushNow().catch(() => {});
|
|
60
|
+
}, pushInterval);
|
|
61
|
+
|
|
62
|
+
// Pull timer
|
|
63
|
+
this.pullTimer = setInterval(() => {
|
|
64
|
+
this.pullNow().catch(() => {});
|
|
65
|
+
}, DEFAULT_PULL_INTERVAL);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Stop all timers and clean up.
|
|
70
|
+
*/
|
|
71
|
+
stop(): void {
|
|
72
|
+
if (this.pushTimer) {
|
|
73
|
+
clearInterval(this.pushTimer);
|
|
74
|
+
this.pushTimer = null;
|
|
75
|
+
}
|
|
76
|
+
if (this.pullTimer) {
|
|
77
|
+
clearInterval(this.pullTimer);
|
|
78
|
+
this.pullTimer = null;
|
|
79
|
+
}
|
|
80
|
+
this._running = false;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Force an immediate push of pending outbox items.
|
|
85
|
+
*/
|
|
86
|
+
async pushNow(): Promise<void> {
|
|
87
|
+
if (!this.client || this._pushing) return;
|
|
88
|
+
this._pushing = true;
|
|
89
|
+
try {
|
|
90
|
+
await pushOutbox(
|
|
91
|
+
this.db,
|
|
92
|
+
this.client,
|
|
93
|
+
this.config,
|
|
94
|
+
this.config.sync.batch_size
|
|
95
|
+
);
|
|
96
|
+
} finally {
|
|
97
|
+
this._pushing = false;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Force an immediate pull from Candengo Vector.
|
|
103
|
+
*/
|
|
104
|
+
async pullNow(): Promise<void> {
|
|
105
|
+
if (!this.client || this._pulling) return;
|
|
106
|
+
this._pulling = true;
|
|
107
|
+
try {
|
|
108
|
+
await pullFromVector(this.db, this.client, this.config);
|
|
109
|
+
} finally {
|
|
110
|
+
this._pulling = false;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Is the sync engine running?
|
|
116
|
+
*/
|
|
117
|
+
isRunning(): boolean {
|
|
118
|
+
return this._running;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Is the client configured for remote sync?
|
|
123
|
+
*/
|
|
124
|
+
isConfigured(): boolean {
|
|
125
|
+
return this.client !== null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
import { describe, expect, test, beforeEach, afterEach } from "bun:test";
|
|
2
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { MemDatabase } from "../storage/sqlite.js";
|
|
6
|
+
import type { Config } from "../config.js";
|
|
7
|
+
import { pullFromVector } from "./pull.js";
|
|
8
|
+
import type {
|
|
9
|
+
VectorClient,
|
|
10
|
+
VectorChangeFeedResponse,
|
|
11
|
+
VectorSearchResult,
|
|
12
|
+
} from "./client.js";
|
|
13
|
+
|
|
14
|
+
let db: MemDatabase;
|
|
15
|
+
let tmpDir: string;
|
|
16
|
+
|
|
17
|
+
function makeConfig(overrides: Partial<Config> = {}): Config {
|
|
18
|
+
return {
|
|
19
|
+
candengo_url: "https://candengo.com",
|
|
20
|
+
candengo_api_key: "cvk_test123",
|
|
21
|
+
site_id: "test-site",
|
|
22
|
+
namespace: "dev-memory",
|
|
23
|
+
user_id: "david",
|
|
24
|
+
user_email: "",
|
|
25
|
+
device_id: "laptop-abc",
|
|
26
|
+
teams: [],
|
|
27
|
+
sync: { enabled: true, interval_seconds: 30, batch_size: 50 },
|
|
28
|
+
search: { default_limit: 10, local_boost: 1.2, scope: "all" },
|
|
29
|
+
scrubbing: {
|
|
30
|
+
enabled: true,
|
|
31
|
+
custom_patterns: [],
|
|
32
|
+
default_sensitivity: "shared",
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function makeChange(
|
|
38
|
+
id: number,
|
|
39
|
+
overrides: Partial<VectorSearchResult> = {}
|
|
40
|
+
): VectorSearchResult {
|
|
41
|
+
return {
|
|
42
|
+
source_id:
|
|
43
|
+
overrides.source_id ?? `other-user-other-device-obs-${id}`,
|
|
44
|
+
content: overrides.content ?? `Title ${id}\n\nNarrative for ${id}`,
|
|
45
|
+
score: 1.0,
|
|
46
|
+
metadata: overrides.metadata ?? {
|
|
47
|
+
project_canonical: "github.com/test/repo",
|
|
48
|
+
project_name: "repo",
|
|
49
|
+
type: "discovery",
|
|
50
|
+
title: `Title ${id}`,
|
|
51
|
+
user_id: "other-user",
|
|
52
|
+
device_id: "other-device",
|
|
53
|
+
agent: "claude-code",
|
|
54
|
+
quality: 0.7,
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function mockClient(
|
|
60
|
+
pages: VectorChangeFeedResponse[]
|
|
61
|
+
): VectorClient {
|
|
62
|
+
let callCount = 0;
|
|
63
|
+
return {
|
|
64
|
+
pullChanges: async (
|
|
65
|
+
_cursor?: string,
|
|
66
|
+
_limit?: number
|
|
67
|
+
): Promise<VectorChangeFeedResponse> => {
|
|
68
|
+
const page = pages[callCount] ?? {
|
|
69
|
+
changes: [],
|
|
70
|
+
cursor: "end",
|
|
71
|
+
has_more: false,
|
|
72
|
+
};
|
|
73
|
+
callCount++;
|
|
74
|
+
return page;
|
|
75
|
+
},
|
|
76
|
+
} as unknown as VectorClient;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
beforeEach(() => {
|
|
80
|
+
tmpDir = mkdtempSync(join(tmpdir(), "engrm-pull-test-"));
|
|
81
|
+
db = new MemDatabase(join(tmpDir, "test.db"));
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
afterEach(() => {
|
|
85
|
+
db.close();
|
|
86
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
describe("pullFromVector", () => {
|
|
90
|
+
test("merges single page of changes", async () => {
|
|
91
|
+
const client = mockClient([
|
|
92
|
+
{
|
|
93
|
+
changes: [makeChange(1), makeChange(2)],
|
|
94
|
+
cursor: "cursor-1",
|
|
95
|
+
has_more: false,
|
|
96
|
+
},
|
|
97
|
+
]);
|
|
98
|
+
|
|
99
|
+
const result = await pullFromVector(db, client, makeConfig());
|
|
100
|
+
expect(result.received).toBe(2);
|
|
101
|
+
expect(result.merged).toBe(2);
|
|
102
|
+
expect(result.skipped).toBe(0);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
test("loops on has_more", async () => {
|
|
106
|
+
const client = mockClient([
|
|
107
|
+
{
|
|
108
|
+
changes: [makeChange(1), makeChange(2)],
|
|
109
|
+
cursor: "cursor-1",
|
|
110
|
+
has_more: true,
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
changes: [makeChange(3)],
|
|
114
|
+
cursor: "cursor-2",
|
|
115
|
+
has_more: false,
|
|
116
|
+
},
|
|
117
|
+
]);
|
|
118
|
+
|
|
119
|
+
const result = await pullFromVector(db, client, makeConfig());
|
|
120
|
+
expect(result.received).toBe(3);
|
|
121
|
+
expect(result.merged).toBe(3);
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test("loops through multiple pages", async () => {
|
|
125
|
+
const client = mockClient([
|
|
126
|
+
{
|
|
127
|
+
changes: [makeChange(1)],
|
|
128
|
+
cursor: "c1",
|
|
129
|
+
has_more: true,
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
changes: [makeChange(2)],
|
|
133
|
+
cursor: "c2",
|
|
134
|
+
has_more: true,
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
changes: [makeChange(3)],
|
|
138
|
+
cursor: "c3",
|
|
139
|
+
has_more: false,
|
|
140
|
+
},
|
|
141
|
+
]);
|
|
142
|
+
|
|
143
|
+
const result = await pullFromVector(db, client, makeConfig());
|
|
144
|
+
expect(result.received).toBe(3);
|
|
145
|
+
expect(result.merged).toBe(3);
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
test("stops at MAX_PAGES safety limit", async () => {
|
|
149
|
+
// Create 25 pages (MAX_PAGES = 20)
|
|
150
|
+
const pages: VectorChangeFeedResponse[] = Array.from(
|
|
151
|
+
{ length: 25 },
|
|
152
|
+
(_, i) => ({
|
|
153
|
+
changes: [makeChange(i + 1)],
|
|
154
|
+
cursor: `cursor-${i + 1}`,
|
|
155
|
+
has_more: true,
|
|
156
|
+
})
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
const client = mockClient(pages);
|
|
160
|
+
const result = await pullFromVector(db, client, makeConfig());
|
|
161
|
+
// Should stop at 20 pages
|
|
162
|
+
expect(result.received).toBe(20);
|
|
163
|
+
expect(result.merged).toBe(20);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
test("skips observations from own device", async () => {
|
|
167
|
+
const ownChange: VectorSearchResult = {
|
|
168
|
+
source_id: "david-laptop-abc-obs-1",
|
|
169
|
+
content: "Title\n\nNarrative",
|
|
170
|
+
score: 1.0,
|
|
171
|
+
metadata: {
|
|
172
|
+
project_canonical: "github.com/test/repo",
|
|
173
|
+
type: "discovery",
|
|
174
|
+
title: "Title",
|
|
175
|
+
user_id: "david",
|
|
176
|
+
device_id: "laptop-abc",
|
|
177
|
+
agent: "claude-code",
|
|
178
|
+
quality: 0.7,
|
|
179
|
+
},
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
const client = mockClient([
|
|
183
|
+
{
|
|
184
|
+
changes: [ownChange, makeChange(2)],
|
|
185
|
+
cursor: "c1",
|
|
186
|
+
has_more: false,
|
|
187
|
+
},
|
|
188
|
+
]);
|
|
189
|
+
|
|
190
|
+
const result = await pullFromVector(db, client, makeConfig());
|
|
191
|
+
expect(result.received).toBe(2);
|
|
192
|
+
expect(result.merged).toBe(1);
|
|
193
|
+
expect(result.skipped).toBe(1);
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
test("skips duplicate remote_source_id", async () => {
|
|
197
|
+
const client = mockClient([
|
|
198
|
+
{
|
|
199
|
+
changes: [makeChange(1)],
|
|
200
|
+
cursor: "c1",
|
|
201
|
+
has_more: false,
|
|
202
|
+
},
|
|
203
|
+
]);
|
|
204
|
+
|
|
205
|
+
// Pull once
|
|
206
|
+
await pullFromVector(db, client, makeConfig());
|
|
207
|
+
|
|
208
|
+
// Pull same change again
|
|
209
|
+
const client2 = mockClient([
|
|
210
|
+
{
|
|
211
|
+
changes: [makeChange(1)],
|
|
212
|
+
cursor: "c2",
|
|
213
|
+
has_more: false,
|
|
214
|
+
},
|
|
215
|
+
]);
|
|
216
|
+
|
|
217
|
+
const result = await pullFromVector(db, client2, makeConfig());
|
|
218
|
+
expect(result.merged).toBe(0);
|
|
219
|
+
expect(result.skipped).toBe(1);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
test("saves cursor after each page", async () => {
|
|
223
|
+
const client = mockClient([
|
|
224
|
+
{
|
|
225
|
+
changes: [makeChange(1)],
|
|
226
|
+
cursor: "cursor-page1",
|
|
227
|
+
has_more: true,
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
changes: [makeChange(2)],
|
|
231
|
+
cursor: "cursor-page2",
|
|
232
|
+
has_more: false,
|
|
233
|
+
},
|
|
234
|
+
]);
|
|
235
|
+
|
|
236
|
+
await pullFromVector(db, client, makeConfig());
|
|
237
|
+
expect(db.getSyncState("pull_cursor")).toBe("cursor-page2");
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
test("handles empty response", async () => {
|
|
241
|
+
const client = mockClient([
|
|
242
|
+
{
|
|
243
|
+
changes: [],
|
|
244
|
+
cursor: "",
|
|
245
|
+
has_more: false,
|
|
246
|
+
},
|
|
247
|
+
]);
|
|
248
|
+
|
|
249
|
+
const result = await pullFromVector(db, client, makeConfig());
|
|
250
|
+
expect(result.received).toBe(0);
|
|
251
|
+
expect(result.merged).toBe(0);
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
test("skips changes without project_canonical", async () => {
|
|
255
|
+
const noProject: VectorSearchResult = {
|
|
256
|
+
source_id: "other-other-obs-1",
|
|
257
|
+
content: "Title\n\nNarrative",
|
|
258
|
+
score: 1.0,
|
|
259
|
+
metadata: {
|
|
260
|
+
type: "discovery",
|
|
261
|
+
title: "Title",
|
|
262
|
+
user_id: "other",
|
|
263
|
+
device_id: "other",
|
|
264
|
+
},
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
const client = mockClient([
|
|
268
|
+
{
|
|
269
|
+
changes: [noProject],
|
|
270
|
+
cursor: "c1",
|
|
271
|
+
has_more: false,
|
|
272
|
+
},
|
|
273
|
+
]);
|
|
274
|
+
|
|
275
|
+
const result = await pullFromVector(db, client, makeConfig());
|
|
276
|
+
expect(result.skipped).toBe(1);
|
|
277
|
+
expect(result.merged).toBe(0);
|
|
278
|
+
});
|
|
279
|
+
});
|
package/src/sync/pull.ts
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pull engine: fetch observations from Candengo Vector change feed
|
|
3
|
+
* and merge into local SQLite.
|
|
4
|
+
*
|
|
5
|
+
* Uses server-side cursors to track position in the change feed.
|
|
6
|
+
* Skips observations from the current device (already local).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { MemDatabase } from "../storage/sqlite.js";
|
|
10
|
+
import type { Config } from "../config.js";
|
|
11
|
+
import { VectorClient, type VectorSearchResult } from "./client.js";
|
|
12
|
+
import { parseSourceId } from "./auth.js";
|
|
13
|
+
import { composeEmbeddingText, embedText } from "../embeddings/embedder.js";
|
|
14
|
+
|
|
15
|
+
const PULL_CURSOR_KEY = "pull_cursor";
|
|
16
|
+
|
|
17
|
+
export interface PullResult {
|
|
18
|
+
received: number;
|
|
19
|
+
merged: number;
|
|
20
|
+
skipped: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Safety limit to prevent infinite loops if server keeps returning has_more. */
|
|
24
|
+
const MAX_PAGES = 20;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Pull changes from Candengo Vector and merge into local SQLite.
|
|
28
|
+
* Loops on has_more until all pages are consumed (up to MAX_PAGES).
|
|
29
|
+
*/
|
|
30
|
+
export async function pullFromVector(
|
|
31
|
+
db: MemDatabase,
|
|
32
|
+
client: VectorClient,
|
|
33
|
+
config: Config,
|
|
34
|
+
limit: number = 50
|
|
35
|
+
): Promise<PullResult> {
|
|
36
|
+
let cursor = db.getSyncState(PULL_CURSOR_KEY) ?? undefined;
|
|
37
|
+
let totalReceived = 0;
|
|
38
|
+
let totalMerged = 0;
|
|
39
|
+
let totalSkipped = 0;
|
|
40
|
+
|
|
41
|
+
for (let page = 0; page < MAX_PAGES; page++) {
|
|
42
|
+
const response = await client.pullChanges(cursor, limit);
|
|
43
|
+
const { merged, skipped } = mergeChanges(db, config, response.changes);
|
|
44
|
+
|
|
45
|
+
totalReceived += response.changes.length;
|
|
46
|
+
totalMerged += merged;
|
|
47
|
+
totalSkipped += skipped;
|
|
48
|
+
|
|
49
|
+
// Update cursor after each page so progress is saved even if we crash
|
|
50
|
+
if (response.cursor) {
|
|
51
|
+
db.setSyncState(PULL_CURSOR_KEY, response.cursor);
|
|
52
|
+
cursor = response.cursor;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (!response.has_more || response.changes.length === 0) break;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return { received: totalReceived, merged: totalMerged, skipped: totalSkipped };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Merge a batch of changes into local SQLite. Returns merged/skipped counts.
|
|
63
|
+
* Embedding is intentionally synchronous-per-change to avoid overwhelming the model.
|
|
64
|
+
*/
|
|
65
|
+
function mergeChanges(
|
|
66
|
+
db: MemDatabase,
|
|
67
|
+
config: Config,
|
|
68
|
+
changes: VectorSearchResult[]
|
|
69
|
+
): { merged: number; skipped: number } {
|
|
70
|
+
let merged = 0;
|
|
71
|
+
let skipped = 0;
|
|
72
|
+
|
|
73
|
+
for (const change of changes) {
|
|
74
|
+
const parsed = parseSourceId(change.source_id);
|
|
75
|
+
|
|
76
|
+
// Skip observations from own device
|
|
77
|
+
if (parsed && parsed.deviceId === config.device_id) {
|
|
78
|
+
skipped++;
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Check if already imported (by remote_source_id)
|
|
83
|
+
const existing = db.db
|
|
84
|
+
.query<{ id: number }, [string]>(
|
|
85
|
+
"SELECT id FROM observations WHERE remote_source_id = ?"
|
|
86
|
+
)
|
|
87
|
+
.get(change.source_id);
|
|
88
|
+
|
|
89
|
+
if (existing) {
|
|
90
|
+
skipped++;
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Find or create the project
|
|
95
|
+
const projectCanonical =
|
|
96
|
+
(change.metadata?.project_canonical as string) ?? null;
|
|
97
|
+
if (!projectCanonical) {
|
|
98
|
+
skipped++;
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
let project = db.getProjectByCanonicalId(projectCanonical);
|
|
103
|
+
if (!project) {
|
|
104
|
+
project = db.upsertProject({
|
|
105
|
+
canonical_id: projectCanonical,
|
|
106
|
+
name:
|
|
107
|
+
(change.metadata?.project_name as string) ??
|
|
108
|
+
projectCanonical.split("/").pop() ??
|
|
109
|
+
"unknown",
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Insert the observation
|
|
114
|
+
const obs = db.insertObservation({
|
|
115
|
+
session_id: (change.metadata?.session_id as string) ?? null,
|
|
116
|
+
project_id: project.id,
|
|
117
|
+
type: (change.metadata?.type as string) ?? "discovery",
|
|
118
|
+
title: (change.metadata?.title as string) ?? change.content.split("\n")[0] ?? "Untitled",
|
|
119
|
+
narrative: extractNarrative(change.content),
|
|
120
|
+
facts: change.metadata?.facts
|
|
121
|
+
? JSON.stringify(change.metadata.facts)
|
|
122
|
+
: null,
|
|
123
|
+
concepts: change.metadata?.concepts
|
|
124
|
+
? JSON.stringify(change.metadata.concepts)
|
|
125
|
+
: null,
|
|
126
|
+
quality: (change.metadata?.quality as number) ?? 0.5,
|
|
127
|
+
lifecycle: "active",
|
|
128
|
+
sensitivity: "shared",
|
|
129
|
+
user_id: (change.metadata?.user_id as string) ?? "unknown",
|
|
130
|
+
device_id: (change.metadata?.device_id as string) ?? "unknown",
|
|
131
|
+
agent: (change.metadata?.agent as string) ?? "unknown",
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
// Mark with remote source ID for dedup
|
|
135
|
+
db.db
|
|
136
|
+
.query("UPDATE observations SET remote_source_id = ? WHERE id = ?")
|
|
137
|
+
.run(change.source_id, obs.id);
|
|
138
|
+
|
|
139
|
+
// Embed for local vector search (fire-and-forget — don't block pull loop)
|
|
140
|
+
if (db.vecAvailable) {
|
|
141
|
+
embedAndInsert(db, obs).catch(() => {});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
merged++;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return { merged, skipped };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Embed an observation and insert into vec_observations.
|
|
152
|
+
*/
|
|
153
|
+
async function embedAndInsert(
|
|
154
|
+
db: MemDatabase,
|
|
155
|
+
obs: { id: number; title: string; narrative: string | null; facts: string | null; concepts: string | null }
|
|
156
|
+
): Promise<void> {
|
|
157
|
+
const text = composeEmbeddingText(obs);
|
|
158
|
+
const embedding = await embedText(text);
|
|
159
|
+
if (embedding) db.vecInsert(obs.id, embedding);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Extract narrative from Vector content (everything after the title line).
|
|
164
|
+
*/
|
|
165
|
+
function extractNarrative(content: string): string | null {
|
|
166
|
+
const lines = content.split("\n");
|
|
167
|
+
if (lines.length <= 1) return null;
|
|
168
|
+
const narrative = lines.slice(1).join("\n").trim();
|
|
169
|
+
return narrative.length > 0 ? narrative : null;
|
|
170
|
+
}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { describe, expect, test, beforeEach, afterEach } from "bun:test";
|
|
2
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { MemDatabase } from "../storage/sqlite.js";
|
|
6
|
+
import type { Config } from "../config.js";
|
|
7
|
+
import { buildVectorDocument } from "./push.js";
|
|
8
|
+
|
|
9
|
+
let db: MemDatabase;
|
|
10
|
+
let tmpDir: string;
|
|
11
|
+
let projectId: number;
|
|
12
|
+
|
|
13
|
+
function makeConfig(): Config {
|
|
14
|
+
return {
|
|
15
|
+
candengo_url: "https://candengo.com",
|
|
16
|
+
candengo_api_key: "cvk_test123",
|
|
17
|
+
site_id: "test-site",
|
|
18
|
+
namespace: "dev-memory",
|
|
19
|
+
user_id: "david",
|
|
20
|
+
device_id: "laptop-abc",
|
|
21
|
+
user_email: "",
|
|
22
|
+
teams: [],
|
|
23
|
+
sync: { enabled: true, interval_seconds: 30, batch_size: 50 },
|
|
24
|
+
search: { default_limit: 10, local_boost: 1.2, scope: "all" },
|
|
25
|
+
scrubbing: { enabled: true, custom_patterns: [], default_sensitivity: "shared" },
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
beforeEach(() => {
|
|
30
|
+
tmpDir = mkdtempSync(join(tmpdir(), "candengo-push-test-"));
|
|
31
|
+
db = new MemDatabase(join(tmpDir, "test.db"));
|
|
32
|
+
const project = db.upsertProject({
|
|
33
|
+
canonical_id: "github.com/test/repo",
|
|
34
|
+
name: "repo",
|
|
35
|
+
});
|
|
36
|
+
projectId = project.id;
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
afterEach(() => {
|
|
40
|
+
db.close();
|
|
41
|
+
rmSync(tmpDir, { recursive: true, force: true });
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
describe("buildVectorDocument", () => {
|
|
45
|
+
test("produces correct structure", () => {
|
|
46
|
+
const obs = db.insertObservation({
|
|
47
|
+
project_id: projectId,
|
|
48
|
+
type: "bugfix",
|
|
49
|
+
title: "Fixed auth bug",
|
|
50
|
+
narrative: "The auth was broken due to a type mismatch",
|
|
51
|
+
facts: '["fact1", "fact2"]',
|
|
52
|
+
concepts: '["auth", "debugging"]',
|
|
53
|
+
quality: 0.8,
|
|
54
|
+
user_id: "david",
|
|
55
|
+
device_id: "laptop-abc",
|
|
56
|
+
agent: "claude-code",
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const doc = buildVectorDocument(obs, makeConfig(), {
|
|
60
|
+
canonical_id: "github.com/test/repo",
|
|
61
|
+
name: "repo",
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
expect(doc.site_id).toBe("test-site");
|
|
65
|
+
expect(doc.namespace).toBe("dev-memory");
|
|
66
|
+
expect(doc.source_type).toBe("bugfix");
|
|
67
|
+
expect(doc.source_id).toBe(`david-laptop-abc-obs-${obs.id}`);
|
|
68
|
+
expect(doc.content).toContain("Fixed auth bug");
|
|
69
|
+
expect(doc.content).toContain("The auth was broken");
|
|
70
|
+
expect(doc.content).toContain("- fact1");
|
|
71
|
+
expect(doc.content).toContain("- fact2");
|
|
72
|
+
expect(doc.metadata.project_canonical).toBe("github.com/test/repo");
|
|
73
|
+
expect(doc.metadata.quality).toBe(0.8);
|
|
74
|
+
expect(doc.metadata.user_id).toBe("david");
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("handles observation with no narrative or facts", () => {
|
|
78
|
+
const obs = db.insertObservation({
|
|
79
|
+
project_id: projectId,
|
|
80
|
+
type: "change",
|
|
81
|
+
title: "Simple change",
|
|
82
|
+
quality: 0.3,
|
|
83
|
+
user_id: "david",
|
|
84
|
+
device_id: "laptop-abc",
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
const doc = buildVectorDocument(obs, makeConfig(), {
|
|
88
|
+
canonical_id: "github.com/test/repo",
|
|
89
|
+
name: "repo",
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
expect(doc.content).toBe("Simple change");
|
|
93
|
+
expect(doc.metadata.title).toBe("Simple change");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test("includes files_modified in metadata", () => {
|
|
97
|
+
const obs = db.insertObservation({
|
|
98
|
+
project_id: projectId,
|
|
99
|
+
type: "change",
|
|
100
|
+
title: "Edit file",
|
|
101
|
+
files_modified: '["src/main.ts", "src/utils.ts"]',
|
|
102
|
+
quality: 0.5,
|
|
103
|
+
user_id: "david",
|
|
104
|
+
device_id: "laptop-abc",
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
const doc = buildVectorDocument(obs, makeConfig(), {
|
|
108
|
+
canonical_id: "github.com/test/repo",
|
|
109
|
+
name: "repo",
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
expect(doc.metadata.files_modified).toEqual([
|
|
113
|
+
"src/main.ts",
|
|
114
|
+
"src/utils.ts",
|
|
115
|
+
]);
|
|
116
|
+
});
|
|
117
|
+
});
|