@producible/cereworker-hippocampus 26.520.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/conversation-extractor.d.ts +23 -0
- package/dist/conversation-extractor.d.ts.map +1 -0
- package/dist/conversation-extractor.js +85 -0
- package/dist/conversation-extractor.js.map +1 -0
- package/dist/conversation-extractor.test.d.ts +2 -0
- package/dist/conversation-extractor.test.d.ts.map +1 -0
- package/dist/conversation-extractor.test.js +105 -0
- package/dist/conversation-extractor.test.js.map +1 -0
- package/dist/curator.d.ts +31 -0
- package/dist/curator.d.ts.map +1 -0
- package/dist/curator.js +219 -0
- package/dist/curator.js.map +1 -0
- package/dist/curator.test.d.ts +2 -0
- package/dist/curator.test.d.ts.map +1 -0
- package/dist/curator.test.js +209 -0
- package/dist/curator.test.js.map +1 -0
- package/dist/finetune-archive.d.ts +47 -0
- package/dist/finetune-archive.d.ts.map +1 -0
- package/dist/finetune-archive.js +203 -0
- package/dist/finetune-archive.js.map +1 -0
- package/dist/finetune-archive.test.d.ts +2 -0
- package/dist/finetune-archive.test.d.ts.map +1 -0
- package/dist/finetune-archive.test.js +74 -0
- package/dist/finetune-archive.test.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/store.d.ts +47 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +277 -0
- package/dist/store.js.map +1 -0
- package/dist/store.test.d.ts +2 -0
- package/dist/store.test.d.ts.map +1 -0
- package/dist/store.test.js +200 -0
- package/dist/store.test.js.map +1 -0
- package/dist/tools.d.ts +37 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +59 -0
- package/dist/tools.js.map +1 -0
- package/dist/tools.test.d.ts +2 -0
- package/dist/tools.test.d.ts.map +1 -0
- package/dist/tools.test.js +85 -0
- package/dist/tools.test.js.map +1 -0
- package/dist/types.d.ts +37 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +32 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
import { describe, it, expect, vi, afterEach } from 'vitest';
|
|
2
|
+
import { mkdtempSync, rmSync, writeFileSync, readFileSync, existsSync, mkdirSync } from 'node:fs';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { HippocampusStore } from './store.js';
|
|
6
|
+
import { HippocampusCurator } from './curator.js';
|
|
7
|
+
function setup() {
|
|
8
|
+
const dir = mkdtempSync(join(tmpdir(), 'cereworker-curator-test-'));
|
|
9
|
+
const store = new HippocampusStore(dir);
|
|
10
|
+
const generator = { generate: vi.fn() };
|
|
11
|
+
const curator = new HippocampusCurator(store, generator);
|
|
12
|
+
return { dir, store, generator, curator };
|
|
13
|
+
}
|
|
14
|
+
function ensureQueueDir(ftDir) {
|
|
15
|
+
const queueDir = join(ftDir, 'queue');
|
|
16
|
+
mkdirSync(queueDir, { recursive: true });
|
|
17
|
+
return queueDir;
|
|
18
|
+
}
|
|
19
|
+
describe('HippocampusCurator', () => {
|
|
20
|
+
let dir;
|
|
21
|
+
afterEach(() => {
|
|
22
|
+
rmSync(dir, { recursive: true, force: true });
|
|
23
|
+
});
|
|
24
|
+
describe('curate', () => {
|
|
25
|
+
it('returns empty when no uncurated content', async () => {
|
|
26
|
+
const ctx = setup();
|
|
27
|
+
dir = ctx.dir;
|
|
28
|
+
const result = await ctx.curator.curate();
|
|
29
|
+
expect(result.pairs).toHaveLength(0);
|
|
30
|
+
expect(result.skipped).toBe(0);
|
|
31
|
+
expect(result.errors).toHaveLength(0);
|
|
32
|
+
});
|
|
33
|
+
it('sends content to generator and parses JSON response', async () => {
|
|
34
|
+
const ctx = setup();
|
|
35
|
+
dir = ctx.dir;
|
|
36
|
+
writeFileSync(join(dir, 'MEMORY.md'), 'User prefers dark mode', 'utf-8');
|
|
37
|
+
const pairs = [{ instruction: 'What theme does the user prefer?', response: 'The user prefers dark mode.', source: 'MEMORY.md' }];
|
|
38
|
+
ctx.generator.generate.mockResolvedValue(JSON.stringify(pairs));
|
|
39
|
+
const result = await ctx.curator.curate();
|
|
40
|
+
expect(result.pairs).toHaveLength(1);
|
|
41
|
+
expect(result.pairs[0].instruction).toBe('What theme does the user prefer?');
|
|
42
|
+
expect(result.pairs[0].response).toBe('The user prefers dark mode.');
|
|
43
|
+
expect(ctx.generator.generate).toHaveBeenCalledOnce();
|
|
44
|
+
});
|
|
45
|
+
it('handles generator failure gracefully', async () => {
|
|
46
|
+
const ctx = setup();
|
|
47
|
+
dir = ctx.dir;
|
|
48
|
+
writeFileSync(join(dir, 'MEMORY.md'), 'some content', 'utf-8');
|
|
49
|
+
ctx.generator.generate.mockRejectedValue(new Error('LLM down'));
|
|
50
|
+
const result = await ctx.curator.curate();
|
|
51
|
+
expect(result.pairs).toHaveLength(0);
|
|
52
|
+
expect(result.errors).toContain('Cerebrum call failed: LLM down');
|
|
53
|
+
});
|
|
54
|
+
it('handles non-array response', async () => {
|
|
55
|
+
const ctx = setup();
|
|
56
|
+
dir = ctx.dir;
|
|
57
|
+
writeFileSync(join(dir, 'MEMORY.md'), 'content', 'utf-8');
|
|
58
|
+
ctx.generator.generate.mockResolvedValue('"not an array"');
|
|
59
|
+
const result = await ctx.curator.curate();
|
|
60
|
+
expect(result.pairs).toHaveLength(0);
|
|
61
|
+
expect(result.errors.some((e) => e.includes('not an array'))).toBe(true);
|
|
62
|
+
});
|
|
63
|
+
it('handles markdown code blocks in response', async () => {
|
|
64
|
+
const ctx = setup();
|
|
65
|
+
dir = ctx.dir;
|
|
66
|
+
writeFileSync(join(dir, 'MEMORY.md'), 'info', 'utf-8');
|
|
67
|
+
const json = JSON.stringify([{ instruction: 'What frontend framework is used in this project?', response: 'The project uses React with Ink for TUI rendering.', source: 'x' }]);
|
|
68
|
+
ctx.generator.generate.mockResolvedValue('```json\n' + json + '\n```');
|
|
69
|
+
const result = await ctx.curator.curate();
|
|
70
|
+
expect(result.pairs).toHaveLength(1);
|
|
71
|
+
});
|
|
72
|
+
it('filters entries missing instruction or response', async () => {
|
|
73
|
+
const ctx = setup();
|
|
74
|
+
dir = ctx.dir;
|
|
75
|
+
writeFileSync(join(dir, 'MEMORY.md'), 'content', 'utf-8');
|
|
76
|
+
const data = [
|
|
77
|
+
{ instruction: 'What is the preferred theme for this project?', response: 'The user prefers dark mode for all editors.', source: 'x' },
|
|
78
|
+
{ instruction: 'no response at all for this' },
|
|
79
|
+
{ response: 'no instruction provided here' },
|
|
80
|
+
{},
|
|
81
|
+
];
|
|
82
|
+
ctx.generator.generate.mockResolvedValue(JSON.stringify(data));
|
|
83
|
+
const result = await ctx.curator.curate();
|
|
84
|
+
expect(result.pairs).toHaveLength(1);
|
|
85
|
+
expect(result.pairs[0].instruction).toBe('What is the preferred theme for this project?');
|
|
86
|
+
});
|
|
87
|
+
it('filters entries with short instruction or response', async () => {
|
|
88
|
+
const ctx = setup();
|
|
89
|
+
dir = ctx.dir;
|
|
90
|
+
writeFileSync(join(dir, 'MEMORY.md'), 'content', 'utf-8');
|
|
91
|
+
const data = [
|
|
92
|
+
{ instruction: 'short', response: 'also short', source: 'x' },
|
|
93
|
+
{ instruction: 'a bit longer but still short', response: 'tiny', source: 'x' },
|
|
94
|
+
{ instruction: 'What is the deploy process for production?', response: 'Run deploy.sh from the project root directory.', source: 'x' },
|
|
95
|
+
];
|
|
96
|
+
ctx.generator.generate.mockResolvedValue(JSON.stringify(data));
|
|
97
|
+
const result = await ctx.curator.curate();
|
|
98
|
+
expect(result.pairs).toHaveLength(1);
|
|
99
|
+
expect(result.pairs[0].instruction).toBe('What is the deploy process for production?');
|
|
100
|
+
});
|
|
101
|
+
it('saves pairs to pending.jsonl', async () => {
|
|
102
|
+
const ctx = setup();
|
|
103
|
+
dir = ctx.dir;
|
|
104
|
+
writeFileSync(join(dir, 'MEMORY.md'), 'content', 'utf-8');
|
|
105
|
+
const pairs = [{ instruction: 'What database engine is used in the project?', response: 'SQLite via the node:sqlite built-in module.', source: 'x' }];
|
|
106
|
+
ctx.generator.generate.mockResolvedValue(JSON.stringify(pairs));
|
|
107
|
+
await ctx.curator.curate();
|
|
108
|
+
const pendingPath = join(ctx.store.finetuneDir, 'queue', 'curated-memory.jsonl');
|
|
109
|
+
expect(existsSync(pendingPath)).toBe(true);
|
|
110
|
+
const content = readFileSync(pendingPath, 'utf-8');
|
|
111
|
+
expect(content).toContain('What database engine is used in the project?');
|
|
112
|
+
});
|
|
113
|
+
it('updates curated marker', async () => {
|
|
114
|
+
const ctx = setup();
|
|
115
|
+
dir = ctx.dir;
|
|
116
|
+
writeFileSync(join(dir, 'MEMORY.md'), 'content', 'utf-8');
|
|
117
|
+
ctx.generator.generate.mockResolvedValue('[]');
|
|
118
|
+
await ctx.curator.curate();
|
|
119
|
+
const marker = readFileSync(join(dir, '.curated-marker'), 'utf-8').trim();
|
|
120
|
+
expect(marker).toMatch(/^\d{4}-\d{2}-\d{2}$/);
|
|
121
|
+
});
|
|
122
|
+
});
|
|
123
|
+
describe('getPendingPairs', () => {
|
|
124
|
+
it('returns empty when no pending file', () => {
|
|
125
|
+
const ctx = setup();
|
|
126
|
+
dir = ctx.dir;
|
|
127
|
+
expect(ctx.curator.getPendingPairs()).toEqual([]);
|
|
128
|
+
});
|
|
129
|
+
it('parses JSONL lines', () => {
|
|
130
|
+
const ctx = setup();
|
|
131
|
+
dir = ctx.dir;
|
|
132
|
+
const ftDir = ctx.store.finetuneDir;
|
|
133
|
+
const queueDir = ensureQueueDir(ftDir);
|
|
134
|
+
const lines = [
|
|
135
|
+
JSON.stringify({ instruction: 'q1', response: 'a1', source: 's1', createdAt: 1 }),
|
|
136
|
+
JSON.stringify({ instruction: 'q2', response: 'a2', source: 's2', createdAt: 2 }),
|
|
137
|
+
].join('\n');
|
|
138
|
+
writeFileSync(join(queueDir, 'curated-memory.jsonl'), lines + '\n', 'utf-8');
|
|
139
|
+
const pairs = ctx.curator.getPendingPairs();
|
|
140
|
+
expect(pairs).toHaveLength(2);
|
|
141
|
+
expect(pairs[0].instruction).toBe('q1');
|
|
142
|
+
expect(pairs[1].instruction).toBe('q2');
|
|
143
|
+
});
|
|
144
|
+
it('skips malformed lines', () => {
|
|
145
|
+
const ctx = setup();
|
|
146
|
+
dir = ctx.dir;
|
|
147
|
+
const ftDir = ctx.store.finetuneDir;
|
|
148
|
+
const queueDir = ensureQueueDir(ftDir);
|
|
149
|
+
const lines = [
|
|
150
|
+
JSON.stringify({ instruction: 'q1', response: 'a1', source: 's1', createdAt: 1 }),
|
|
151
|
+
'not valid json {{{',
|
|
152
|
+
].join('\n');
|
|
153
|
+
writeFileSync(join(queueDir, 'curated-memory.jsonl'), lines + '\n', 'utf-8');
|
|
154
|
+
const pairs = ctx.curator.getPendingPairs();
|
|
155
|
+
expect(pairs).toHaveLength(1);
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
describe('markConsumed', () => {
|
|
159
|
+
it('moves pending to consumed dir and clears pending', () => {
|
|
160
|
+
const ctx = setup();
|
|
161
|
+
dir = ctx.dir;
|
|
162
|
+
const ftDir = ctx.store.finetuneDir;
|
|
163
|
+
const queueDir = ensureQueueDir(ftDir);
|
|
164
|
+
writeFileSync(join(queueDir, 'curated-memory.jsonl'), '{"a":1}\n', 'utf-8');
|
|
165
|
+
ctx.curator.markConsumed();
|
|
166
|
+
// Pending file should be empty
|
|
167
|
+
expect(readFileSync(join(ftDir, 'queue', 'curated-memory.jsonl'), 'utf-8')).toBe('');
|
|
168
|
+
// Consumed dir should exist with today's file
|
|
169
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
170
|
+
expect(existsSync(join(ftDir, 'consumed', `curated-memory-${today}.jsonl`))).toBe(true);
|
|
171
|
+
});
|
|
172
|
+
it('is a no-op when no pending file exists', () => {
|
|
173
|
+
const ctx = setup();
|
|
174
|
+
dir = ctx.dir;
|
|
175
|
+
// Should not throw
|
|
176
|
+
expect(() => ctx.curator.markConsumed()).not.toThrow();
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
describe('deduplication', () => {
|
|
180
|
+
it('does not append duplicate pairs to pending.jsonl', async () => {
|
|
181
|
+
const ctx = setup();
|
|
182
|
+
dir = ctx.dir;
|
|
183
|
+
const ftDir = ctx.store.finetuneDir;
|
|
184
|
+
const queueDir = ensureQueueDir(ftDir);
|
|
185
|
+
// Pre-seed pending with an existing pair
|
|
186
|
+
const existing = JSON.stringify({
|
|
187
|
+
instruction: 'What database engine is used in the project?',
|
|
188
|
+
response: 'SQLite via the node:sqlite built-in module.',
|
|
189
|
+
source: 'MEMORY.md',
|
|
190
|
+
createdAt: 1,
|
|
191
|
+
});
|
|
192
|
+
writeFileSync(join(queueDir, 'curated-memory.jsonl'), existing + '\n', 'utf-8');
|
|
193
|
+
// Curate returns same instruction + a new one
|
|
194
|
+
writeFileSync(join(dir, 'MEMORY.md'), 'content', 'utf-8');
|
|
195
|
+
const data = [
|
|
196
|
+
{ instruction: 'What database engine is used in the project?', response: 'A different answer for testing.', source: 'x' },
|
|
197
|
+
{ instruction: 'What frontend framework is used in the project?', response: 'React with Ink for TUI rendering in the terminal.', source: 'x' },
|
|
198
|
+
];
|
|
199
|
+
ctx.generator.generate.mockResolvedValue(JSON.stringify(data));
|
|
200
|
+
await ctx.curator.curate();
|
|
201
|
+
const pairs = ctx.curator.getPendingPairs();
|
|
202
|
+
// Should have original + 1 new, not the duplicate
|
|
203
|
+
expect(pairs).toHaveLength(2);
|
|
204
|
+
expect(pairs[0].instruction).toBe('What database engine is used in the project?');
|
|
205
|
+
expect(pairs[1].instruction).toBe('What frontend framework is used in the project?');
|
|
206
|
+
});
|
|
207
|
+
});
|
|
208
|
+
});
|
|
209
|
+
//# sourceMappingURL=curator.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"curator.test.js","sourceRoot":"","sources":["../src/curator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,aAAa,EAAE,YAAY,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAClG,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAE,kBAAkB,EAAsB,MAAM,cAAc,CAAC;AAEtE,SAAS,KAAK;IACZ,MAAM,GAAG,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,0BAA0B,CAAC,CAAC,CAAC;IACpE,MAAM,KAAK,GAAG,IAAI,gBAAgB,CAAC,GAAG,CAAC,CAAC;IACxC,MAAM,SAAS,GAAkB,EAAE,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC;IACvD,MAAM,OAAO,GAAG,IAAI,kBAAkB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IACzD,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC;AAC5C,CAAC;AAED,SAAS,cAAc,CAAC,KAAa;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IACtC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzC,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,IAAI,GAAW,CAAC;IAEhB,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;QACtB,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;YACvD,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC1C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC/B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACxC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;YACnE,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,wBAAwB,EAAE,OAAO,CAAC,CAAC;YAEzE,MAAM,KAAK,GAAG,CAAC,EAAE,WAAW,EAAE,kCAAkC,EAAE,QAAQ,EAAE,6BAA6B,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC,CAAC;YACjI,GAAG,CAAC,SAAS,CAAC,QAAqC,CAAC,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;YAE9F,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC1C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;YAC7E,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;YACrE,MAAM,CAAC,GAAG,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,oBAAoB,EAAE,CAAC;QACxD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;YACpD,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,cAAc,EAAE,OAAO,CAAC,CAAC;YAC9D,GAAG,CAAC,SAAS,CAAC,QAAqC,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC;YAE9F,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC1C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,gCAAgC,CAAC,CAAC;QACpE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;YAC1C,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YACzD,GAAG,CAAC,SAAS,CAAC,QAAqC,CAAC,iBAAiB,CAAC,gBAAgB,CAAC,CAAC;YAEzF,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC1C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;YACxD,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;YACvD,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,WAAW,EAAE,kDAAkD,EAAE,QAAQ,EAAE,oDAAoD,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;YAC/K,GAAG,CAAC,SAAS,CAAC,QAAqC,CAAC,iBAAiB,CAAC,WAAW,GAAG,IAAI,GAAG,OAAO,CAAC,CAAC;YAErG,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC1C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;YAC/D,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YAC1D,MAAM,IAAI,GAAG;gBACX,EAAE,WAAW,EAAE,+CAA+C,EAAE,QAAQ,EAAE,6CAA6C,EAAE,MAAM,EAAE,GAAG,EAAE;gBACtI,EAAE,WAAW,EAAE,6BAA6B,EAAE;gBAC9C,EAAE,QAAQ,EAAE,8BAA8B,EAAE;gBAC5C,EAAE;aACH,CAAC;YACD,GAAG,CAAC,SAAS,CAAC,QAAqC,CAAC,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;YAE7F,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC1C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;QAC5F,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;YAClE,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YAC1D,MAAM,IAAI,GAAG;gBACX,EAAE,WAAW,EAAE,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,EAAE,GAAG,EAAE;gBAC7D,EAAE,WAAW,EAAE,8BAA8B,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE;gBAC9E,EAAE,WAAW,EAAE,4CAA4C,EAAE,QAAQ,EAAE,gDAAgD,EAAE,MAAM,EAAE,GAAG,EAAE;aACvI,CAAC;YACD,GAAG,CAAC,SAAS,CAAC,QAAqC,CAAC,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;YAE7F,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC1C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QACzF,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8BAA8B,EAAE,KAAK,IAAI,EAAE;YAC5C,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YAC1D,MAAM,KAAK,GAAG,CAAC,EAAE,WAAW,EAAE,8CAA8C,EAAE,QAAQ,EAAE,6CAA6C,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;YACrJ,GAAG,CAAC,SAAS,CAAC,QAAqC,CAAC,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;YAE9F,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,OAAO,EAAE,sBAAsB,CAAC,CAAC;YACjF,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC3C,MAAM,OAAO,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;YACnD,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,8CAA8C,CAAC,CAAC;QAC5E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wBAAwB,EAAE,KAAK,IAAI,EAAE;YACtC,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YACzD,GAAG,CAAC,SAAS,CAAC,QAAqC,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YAE7E,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,iBAAiB,CAAC,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1E,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,qBAAqB,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;QAC/B,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC5C,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,MAAM,CAAC,GAAG,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACpD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;YAC5B,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC;YACpC,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG;gBACZ,IAAI,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;gBACjF,IAAI,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;aAClF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACb,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,sBAAsB,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;YAE7E,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YAC5C,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC9B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;YAC/B,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC;YACpC,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG;gBACZ,IAAI,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;gBACjF,oBAAoB;aACrB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACb,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,sBAAsB,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;YAE7E,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YAC5C,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;QAC5B,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;YAC1D,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC;YACpC,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACvC,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,sBAAsB,CAAC,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;YAE5E,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC;YAE3B,+BAA+B;YAC/B,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,EAAE,OAAO,EAAE,sBAAsB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACrF,8CAA8C;YAC9C,MAAM,KAAK,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACpD,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,EAAE,UAAU,EAAE,kBAAkB,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1F,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,mBAAmB;YACnB,MAAM,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,GAAG,CAAC,OAAO,EAAE,CAAC;QACzD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;QAC7B,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;YAChE,MAAM,GAAG,GAAG,KAAK,EAAE,CAAC;YACpB,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC;YACd,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC;YACpC,MAAM,QAAQ,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YAEvC,yCAAyC;YACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC;gBAC9B,WAAW,EAAE,8CAA8C;gBAC3D,QAAQ,EAAE,6CAA6C;gBACvD,MAAM,EAAE,WAAW;gBACnB,SAAS,EAAE,CAAC;aACb,CAAC,CAAC;YACH,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,sBAAsB,CAAC,EAAE,QAAQ,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;YAEhF,8CAA8C;YAC9C,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YAC1D,MAAM,IAAI,GAAG;gBACX,EAAE,WAAW,EAAE,8CAA8C,EAAE,QAAQ,EAAE,iCAAiC,EAAE,MAAM,EAAE,GAAG,EAAE;gBACzH,EAAE,WAAW,EAAE,iDAAiD,EAAE,QAAQ,EAAE,mDAAmD,EAAE,MAAM,EAAE,GAAG,EAAE;aAC/I,CAAC;YACD,GAAG,CAAC,SAAS,CAAC,QAAqC,CAAC,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;YAE7F,MAAM,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YAC5C,kDAAkD;YAClD,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC9B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;YAClF,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACvF,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { TrainingPair } from './types.js';
|
|
2
|
+
export type FineTuneQueueSource = 'discovery' | 'conversations' | 'curated-memory';
|
|
3
|
+
export interface FineTuneQueuedBatch {
|
|
4
|
+
pairs: TrainingPair[];
|
|
5
|
+
bySource: Record<FineTuneQueueSource, TrainingPair[]>;
|
|
6
|
+
}
|
|
7
|
+
export interface FineTuneRoundManifest {
|
|
8
|
+
roundId: string;
|
|
9
|
+
jobId: string;
|
|
10
|
+
instanceId?: string;
|
|
11
|
+
status: 'running' | 'completed' | 'failed';
|
|
12
|
+
createdAt: string;
|
|
13
|
+
startedAt: string;
|
|
14
|
+
completedAt?: string;
|
|
15
|
+
requestedMethod?: string;
|
|
16
|
+
activeCheckpointBefore?: string | null;
|
|
17
|
+
totalPairs: number;
|
|
18
|
+
sourceCounts: Record<FineTuneQueueSource, number>;
|
|
19
|
+
exampleClassCounts?: Record<string, number>;
|
|
20
|
+
checkpointPath?: string;
|
|
21
|
+
loss?: number;
|
|
22
|
+
error?: string;
|
|
23
|
+
}
|
|
24
|
+
export declare class FineTuneArchiveStore {
|
|
25
|
+
private readonly rootDir;
|
|
26
|
+
private readonly queueDir;
|
|
27
|
+
private readonly roundsDir;
|
|
28
|
+
private readonly stateDir;
|
|
29
|
+
constructor(directory: string);
|
|
30
|
+
get directory(): string;
|
|
31
|
+
getConversationExtractorStatePath(): string;
|
|
32
|
+
enqueue(source: FineTuneQueueSource, pairs: TrainingPair[]): {
|
|
33
|
+
added: number;
|
|
34
|
+
total: number;
|
|
35
|
+
};
|
|
36
|
+
getQueuedBatch(): FineTuneQueuedBatch;
|
|
37
|
+
clearBatch(batch: FineTuneQueuedBatch): void;
|
|
38
|
+
createRound(batch: FineTuneQueuedBatch, options: {
|
|
39
|
+
jobId: string;
|
|
40
|
+
requestedMethod?: string;
|
|
41
|
+
instanceId?: string;
|
|
42
|
+
activeCheckpointBefore?: string | null;
|
|
43
|
+
}): FineTuneRoundManifest;
|
|
44
|
+
updateRoundStatus(roundId: string, patch: Partial<Pick<FineTuneRoundManifest, 'status' | 'completedAt' | 'checkpointPath' | 'loss' | 'error'>>): FineTuneRoundManifest;
|
|
45
|
+
private getQueuePath;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=finetune-archive.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"finetune-archive.d.ts","sourceRoot":"","sources":["../src/finetune-archive.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE/C,MAAM,MAAM,mBAAmB,GAAG,WAAW,GAAG,eAAe,GAAG,gBAAgB,CAAC;AAEnF,MAAM,WAAW,mBAAmB;IAClC,KAAK,EAAE,YAAY,EAAE,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC,mBAAmB,EAAE,YAAY,EAAE,CAAC,CAAC;CACvD;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,SAAS,GAAG,WAAW,GAAG,QAAQ,CAAC;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,sBAAsB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAC;IAClD,kBAAkB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC5C,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AA2DD,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;gBAEtB,SAAS,EAAE,MAAM;IAY7B,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,iCAAiC,IAAI,MAAM;IAI3C,OAAO,CAAC,MAAM,EAAE,mBAAmB,EAAE,KAAK,EAAE,YAAY,EAAE,GAAG;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE;IAkC7F,cAAc,IAAI,mBAAmB;IAarC,UAAU,CAAC,KAAK,EAAE,mBAAmB,GAAG,IAAI;IA6B5C,WAAW,CACT,KAAK,EAAE,mBAAmB,EAC1B,OAAO,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,eAAe,CAAC,EAAE,MAAM,CAAC;QAAC,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,sBAAsB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,GAChH,qBAAqB;IAuCxB,iBAAiB,CACf,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,qBAAqB,EAAE,QAAQ,GAAG,aAAa,GAAG,gBAAgB,GAAG,MAAM,GAAG,OAAO,CAAC,CAAC,GAC1G,qBAAqB;IAgBxB,OAAO,CAAC,YAAY;CAGrB"}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import { randomUUID } from 'node:crypto';
|
|
2
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
+
import { join, resolve } from 'node:path';
|
|
4
|
+
import { homedir } from 'node:os';
|
|
5
|
+
import { ensureDir, readJsonFile, withTextStoreLock, writeJsonFileAtomic, writeTextFileAtomic, } from '@producible/cereworker-core';
|
|
6
|
+
const SOURCE_ORDER = ['discovery', 'curated-memory', 'conversations'];
|
|
7
|
+
const SOURCE_FILES = {
|
|
8
|
+
discovery: 'discovery.jsonl',
|
|
9
|
+
conversations: 'conversations.jsonl',
|
|
10
|
+
'curated-memory': 'curated-memory.jsonl',
|
|
11
|
+
};
|
|
12
|
+
function expandHome(path) {
|
|
13
|
+
return path.replace(/^~(?=\/|$)/, homedir());
|
|
14
|
+
}
|
|
15
|
+
function serializePair(pair) {
|
|
16
|
+
return JSON.stringify({
|
|
17
|
+
instruction: pair.instruction,
|
|
18
|
+
response: pair.response,
|
|
19
|
+
source: pair.source,
|
|
20
|
+
createdAt: pair.createdAt,
|
|
21
|
+
instanceId: pair.instanceId,
|
|
22
|
+
sessionId: pair.sessionId,
|
|
23
|
+
exampleClass: pair.exampleClass,
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
function pairIdentity(pair) {
|
|
27
|
+
return [
|
|
28
|
+
pair.instruction,
|
|
29
|
+
pair.response,
|
|
30
|
+
pair.source,
|
|
31
|
+
pair.instanceId ?? '',
|
|
32
|
+
pair.sessionId ?? '',
|
|
33
|
+
pair.exampleClass ?? '',
|
|
34
|
+
].join('\u0000');
|
|
35
|
+
}
|
|
36
|
+
function readPairs(path) {
|
|
37
|
+
if (!existsSync(path))
|
|
38
|
+
return [];
|
|
39
|
+
const content = readFileSync(path, 'utf-8').trim();
|
|
40
|
+
if (!content)
|
|
41
|
+
return [];
|
|
42
|
+
return content
|
|
43
|
+
.split('\n')
|
|
44
|
+
.map((line) => {
|
|
45
|
+
try {
|
|
46
|
+
return JSON.parse(line);
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
})
|
|
52
|
+
.filter((pair) => pair !== null);
|
|
53
|
+
}
|
|
54
|
+
function writePairs(path, pairs) {
|
|
55
|
+
const content = pairs.length > 0
|
|
56
|
+
? pairs.map((pair) => serializePair(pair)).join('\n') + '\n'
|
|
57
|
+
: '';
|
|
58
|
+
writeTextFileAtomic(path, content);
|
|
59
|
+
}
|
|
60
|
+
export class FineTuneArchiveStore {
|
|
61
|
+
rootDir;
|
|
62
|
+
queueDir;
|
|
63
|
+
roundsDir;
|
|
64
|
+
stateDir;
|
|
65
|
+
constructor(directory) {
|
|
66
|
+
this.rootDir = resolve(expandHome(directory));
|
|
67
|
+
this.queueDir = join(this.rootDir, 'queue');
|
|
68
|
+
this.roundsDir = join(this.rootDir, 'rounds');
|
|
69
|
+
this.stateDir = join(this.rootDir, 'state');
|
|
70
|
+
ensureDir(this.rootDir);
|
|
71
|
+
ensureDir(this.queueDir);
|
|
72
|
+
ensureDir(this.roundsDir);
|
|
73
|
+
ensureDir(this.stateDir);
|
|
74
|
+
}
|
|
75
|
+
get directory() {
|
|
76
|
+
return this.rootDir;
|
|
77
|
+
}
|
|
78
|
+
getConversationExtractorStatePath() {
|
|
79
|
+
return join(this.stateDir, 'conversation-extractor.json');
|
|
80
|
+
}
|
|
81
|
+
enqueue(source, pairs) {
|
|
82
|
+
if (pairs.length === 0) {
|
|
83
|
+
return { added: 0, total: this.getQueuedBatch().pairs.length };
|
|
84
|
+
}
|
|
85
|
+
const path = this.getQueuePath(source);
|
|
86
|
+
return withTextStoreLock(this.queueDir, () => {
|
|
87
|
+
const existing = readPairs(path);
|
|
88
|
+
const seen = new Set(existing.map((pair) => pairIdentity(pair)));
|
|
89
|
+
const unique = pairs.filter((pair) => {
|
|
90
|
+
const key = pairIdentity(pair);
|
|
91
|
+
if (seen.has(key))
|
|
92
|
+
return false;
|
|
93
|
+
seen.add(key);
|
|
94
|
+
return true;
|
|
95
|
+
});
|
|
96
|
+
const next = unique.length > 0 ? [...existing, ...unique] : existing;
|
|
97
|
+
if (unique.length > 0) {
|
|
98
|
+
writePairs(path, next);
|
|
99
|
+
}
|
|
100
|
+
const sourceCounts = {
|
|
101
|
+
discovery: source === 'discovery' ? next.length : readPairs(this.getQueuePath('discovery')).length,
|
|
102
|
+
conversations: source === 'conversations' ? next.length : readPairs(this.getQueuePath('conversations')).length,
|
|
103
|
+
'curated-memory': source === 'curated-memory' ? next.length : readPairs(this.getQueuePath('curated-memory')).length,
|
|
104
|
+
};
|
|
105
|
+
return {
|
|
106
|
+
added: unique.length,
|
|
107
|
+
total: SOURCE_ORDER.reduce((sum, key) => sum + sourceCounts[key], 0),
|
|
108
|
+
};
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
getQueuedBatch() {
|
|
112
|
+
const bySource = {
|
|
113
|
+
discovery: readPairs(this.getQueuePath('discovery')),
|
|
114
|
+
conversations: readPairs(this.getQueuePath('conversations')),
|
|
115
|
+
'curated-memory': readPairs(this.getQueuePath('curated-memory')),
|
|
116
|
+
};
|
|
117
|
+
return {
|
|
118
|
+
pairs: SOURCE_ORDER.flatMap((source) => bySource[source]),
|
|
119
|
+
bySource,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
clearBatch(batch) {
|
|
123
|
+
withTextStoreLock(this.queueDir, () => {
|
|
124
|
+
for (const source of SOURCE_ORDER) {
|
|
125
|
+
const path = this.getQueuePath(source);
|
|
126
|
+
const existing = readPairs(path);
|
|
127
|
+
if (existing.length === 0)
|
|
128
|
+
continue;
|
|
129
|
+
const removals = new Map();
|
|
130
|
+
for (const pair of batch.bySource[source]) {
|
|
131
|
+
const key = serializePair(pair);
|
|
132
|
+
removals.set(key, (removals.get(key) ?? 0) + 1);
|
|
133
|
+
}
|
|
134
|
+
const remaining = [];
|
|
135
|
+
for (const pair of existing) {
|
|
136
|
+
const key = serializePair(pair);
|
|
137
|
+
const count = removals.get(key) ?? 0;
|
|
138
|
+
if (count > 0) {
|
|
139
|
+
removals.set(key, count - 1);
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
remaining.push(pair);
|
|
143
|
+
}
|
|
144
|
+
writePairs(path, remaining);
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
createRound(batch, options) {
|
|
149
|
+
const roundId = options.jobId || `round-${randomUUID()}`;
|
|
150
|
+
const roundDir = join(this.roundsDir, roundId);
|
|
151
|
+
return withTextStoreLock(roundDir, () => {
|
|
152
|
+
const sourcesDir = join(roundDir, 'sources');
|
|
153
|
+
ensureDir(sourcesDir);
|
|
154
|
+
const exampleClassCounts = batch.pairs.reduce((counts, pair) => {
|
|
155
|
+
const key = pair.exampleClass ?? 'unspecified';
|
|
156
|
+
counts[key] = (counts[key] ?? 0) + 1;
|
|
157
|
+
return counts;
|
|
158
|
+
}, {});
|
|
159
|
+
const manifest = {
|
|
160
|
+
roundId,
|
|
161
|
+
jobId: options.jobId,
|
|
162
|
+
instanceId: options.instanceId,
|
|
163
|
+
status: 'running',
|
|
164
|
+
createdAt: new Date().toISOString(),
|
|
165
|
+
startedAt: new Date().toISOString(),
|
|
166
|
+
requestedMethod: options.requestedMethod,
|
|
167
|
+
activeCheckpointBefore: options.activeCheckpointBefore,
|
|
168
|
+
totalPairs: batch.pairs.length,
|
|
169
|
+
sourceCounts: {
|
|
170
|
+
discovery: batch.bySource.discovery.length,
|
|
171
|
+
conversations: batch.bySource.conversations.length,
|
|
172
|
+
'curated-memory': batch.bySource['curated-memory'].length,
|
|
173
|
+
},
|
|
174
|
+
exampleClassCounts,
|
|
175
|
+
};
|
|
176
|
+
writePairs(join(roundDir, 'training.jsonl'), batch.pairs);
|
|
177
|
+
for (const source of SOURCE_ORDER) {
|
|
178
|
+
writePairs(join(sourcesDir, SOURCE_FILES[source]), batch.bySource[source]);
|
|
179
|
+
}
|
|
180
|
+
writeJsonFileAtomic(join(roundDir, 'manifest.json'), manifest);
|
|
181
|
+
return manifest;
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
updateRoundStatus(roundId, patch) {
|
|
185
|
+
const manifestPath = join(this.roundsDir, roundId, 'manifest.json');
|
|
186
|
+
return withTextStoreLock(join(this.roundsDir, roundId), () => {
|
|
187
|
+
const manifest = readJsonFile(manifestPath, null);
|
|
188
|
+
if (!manifest) {
|
|
189
|
+
throw new Error(`Fine-tune round not found: ${roundId}`);
|
|
190
|
+
}
|
|
191
|
+
const updated = {
|
|
192
|
+
...manifest,
|
|
193
|
+
...patch,
|
|
194
|
+
};
|
|
195
|
+
writeJsonFileAtomic(manifestPath, updated);
|
|
196
|
+
return updated;
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
getQueuePath(source) {
|
|
200
|
+
return join(this.queueDir, SOURCE_FILES[source]);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
//# sourceMappingURL=finetune-archive.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"finetune-archive.js","sourceRoot":"","sources":["../src/finetune-archive.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EACL,SAAS,EACT,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,mBAAmB,GACpB,MAAM,6BAA6B,CAAC;AA4BrC,MAAM,YAAY,GAA0B,CAAC,WAAW,EAAE,gBAAgB,EAAE,eAAe,CAAC,CAAC;AAC7F,MAAM,YAAY,GAAwC;IACxD,SAAS,EAAE,iBAAiB;IAC5B,aAAa,EAAE,qBAAqB;IACpC,gBAAgB,EAAE,sBAAsB;CACzC,CAAC;AAEF,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,OAAO,EAAE,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,aAAa,CAAC,IAAkB;IACvC,OAAO,IAAI,CAAC,SAAS,CAAC;QACpB,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,UAAU,EAAE,IAAI,CAAC,UAAU;QAC3B,SAAS,EAAE,IAAI,CAAC,SAAS;QACzB,YAAY,EAAE,IAAI,CAAC,YAAY;KAChC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,YAAY,CAAC,IAAkB;IACtC,OAAO;QACL,IAAI,CAAC,WAAW;QAChB,IAAI,CAAC,QAAQ;QACb,IAAI,CAAC,MAAM;QACX,IAAI,CAAC,UAAU,IAAI,EAAE;QACrB,IAAI,CAAC,SAAS,IAAI,EAAE;QACpB,IAAI,CAAC,YAAY,IAAI,EAAE;KACxB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AACnB,CAAC;AAED,SAAS,SAAS,CAAC,IAAY;IAC7B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IACjC,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IACnD,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IACxB,OAAO,OAAO;SACX,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAY,EAAE,EAAE;QACpB,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAiB,CAAC;QAC1C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC;SACD,MAAM,CAAC,CAAC,IAAyB,EAAwB,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AAChF,CAAC;AAED,SAAS,UAAU,CAAC,IAAY,EAAE,KAAqB;IACrD,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC;QAC9B,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI;QAC5D,CAAC,CAAC,EAAE,CAAC;IACP,mBAAmB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,OAAO,oBAAoB;IACd,OAAO,CAAS;IAChB,QAAQ,CAAS;IACjB,SAAS,CAAS;IAClB,QAAQ,CAAS;IAElC,YAAY,SAAiB;QAC3B,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;QAC9C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAC5C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAC9C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAE5C,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACxB,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACzB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC1B,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC3B,CAAC;IAED,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,iCAAiC;QAC/B,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,6BAA6B,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,CAAC,MAA2B,EAAE,KAAqB;QACxD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,cAAc,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QACjE,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QACvC,OAAO,iBAAiB,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,EAAE;YAC3C,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACjE,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;gBACnC,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;gBAC/B,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;oBAAE,OAAO,KAAK,CAAC;gBAChC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACd,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;YAEH,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;YACrE,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YACzB,CAAC;YAED,MAAM,YAAY,GAAG;gBACnB,SAAS,EAAE,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC,MAAM;gBAClG,aAAa,EAAE,MAAM,KAAK,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,CAAC,CAAC,MAAM;gBAC9G,gBAAgB,EAAE,MAAM,KAAK,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,CAAC,CAAC,MAAM;aACpH,CAAC;YAEF,OAAO;gBACL,KAAK,EAAE,MAAM,CAAC,MAAM;gBACpB,KAAK,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;aACrE,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC;IAED,cAAc;QACZ,MAAM,QAAQ,GAAG;YACf,SAAS,EAAE,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC;YACpD,aAAa,EAAE,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,CAAC;YAC5D,gBAAgB,EAAE,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,gBAAgB,CAAC,CAAC;SACjE,CAAC;QAEF,OAAO;YACL,KAAK,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACzD,QAAQ;SACT,CAAC;IACJ,CAAC;IAED,UAAU,CAAC,KAA0B;QACnC,iBAAiB,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,EAAE;YACpC,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;gBAClC,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;gBACvC,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;gBACjC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;oBAAE,SAAS;gBAEpC,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;gBAC3C,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC1C,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;oBAChC,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBAClD,CAAC;gBAED,MAAM,SAAS,GAAmB,EAAE,CAAC;gBACrC,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;oBAC5B,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;oBAChC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;oBACrC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;wBACd,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;wBAC7B,SAAS;oBACX,CAAC;oBACD,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACvB,CAAC;gBAED,UAAU,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED,WAAW,CACT,KAA0B,EAC1B,OAAiH;QAEjH,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,IAAI,SAAS,UAAU,EAAE,EAAE,CAAC;QACzD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAC/C,OAAO,iBAAiB,CAAC,QAAQ,EAAE,GAAG,EAAE;YACtC,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAC7C,SAAS,CAAC,UAAU,CAAC,CAAC;YACtB,MAAM,kBAAkB,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAyB,CAAC,MAAM,EAAE,IAAI,EAAE,EAAE;gBACrF,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,IAAI,aAAa,CAAC;gBAC/C,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;gBACrC,OAAO,MAAM,CAAC;YAChB,CAAC,EAAE,EAAE,CAAC,CAAC;YAEP,MAAM,QAAQ,GAA0B;gBACtC,OAAO;gBACP,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM,EAAE,SAAS;gBACjB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,eAAe,EAAE,OAAO,CAAC,eAAe;gBACxC,sBAAsB,EAAE,OAAO,CAAC,sBAAsB;gBACtD,UAAU,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM;gBAC9B,YAAY,EAAE;oBACZ,SAAS,EAAE,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC,MAAM;oBAC1C,aAAa,EAAE,KAAK,CAAC,QAAQ,CAAC,aAAa,CAAC,MAAM;oBAClD,gBAAgB,EAAE,KAAK,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC,MAAM;iBAC1D;gBACD,kBAAkB;aACnB,CAAC;YAEF,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;YAC1D,KAAK,MAAM,MAAM,IAAI,YAAY,EAAE,CAAC;gBAClC,UAAU,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;YAC7E,CAAC;YACD,mBAAmB,CAAC,IAAI,CAAC,QAAQ,EAAE,eAAe,CAAC,EAAE,QAAQ,CAAC,CAAC;YAC/D,OAAO,QAAQ,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC;IAED,iBAAiB,CACf,OAAe,EACf,KAA2G;QAE3G,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,eAAe,CAAC,CAAC;QACpE,OAAO,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,GAAG,EAAE;YAC3D,MAAM,QAAQ,GAAG,YAAY,CAA+B,YAAY,EAAE,IAAI,CAAC,CAAC;YAChF,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,MAAM,IAAI,KAAK,CAAC,8BAA8B,OAAO,EAAE,CAAC,CAAC;YAC3D,CAAC;YACD,MAAM,OAAO,GAA0B;gBACrC,GAAG,QAAQ;gBACX,GAAG,KAAK;aACT,CAAC;YACF,mBAAmB,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;YAC3C,OAAO,OAAO,CAAC;QACjB,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,YAAY,CAAC,MAA2B;QAC9C,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC;IACnD,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"finetune-archive.test.d.ts","sourceRoot":"","sources":["../src/finetune-archive.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { describe, expect, it, afterEach } from 'vitest';
|
|
2
|
+
import { mkdtempSync, readFileSync, rmSync } from 'node:fs';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { FineTuneArchiveStore } from './finetune-archive.js';
|
|
6
|
+
describe('FineTuneArchiveStore', () => {
|
|
7
|
+
let dir;
|
|
8
|
+
afterEach(() => {
|
|
9
|
+
if (dir) {
|
|
10
|
+
rmSync(dir, { recursive: true, force: true });
|
|
11
|
+
}
|
|
12
|
+
});
|
|
13
|
+
function makeStore() {
|
|
14
|
+
dir = mkdtempSync(join(tmpdir(), 'cereworker-finetune-archive-'));
|
|
15
|
+
return new FineTuneArchiveStore(dir);
|
|
16
|
+
}
|
|
17
|
+
function makePair(source, instruction) {
|
|
18
|
+
return {
|
|
19
|
+
instruction,
|
|
20
|
+
response: `${instruction} response`,
|
|
21
|
+
source,
|
|
22
|
+
createdAt: 1,
|
|
23
|
+
instanceId: 'instance-1',
|
|
24
|
+
sessionId: 'session-1',
|
|
25
|
+
exampleClass: 'conversation',
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
it('queues pairs by source and deduplicates identical entries', () => {
|
|
29
|
+
const store = makeStore();
|
|
30
|
+
store.enqueue('discovery', [makePair('discovery', 'hello')]);
|
|
31
|
+
store.enqueue('discovery', [makePair('discovery', 'hello')]);
|
|
32
|
+
store.enqueue('conversations', [makePair('conversation:1', 'question')]);
|
|
33
|
+
const batch = store.getQueuedBatch();
|
|
34
|
+
expect(batch.bySource.discovery).toHaveLength(1);
|
|
35
|
+
expect(batch.bySource.conversations).toHaveLength(1);
|
|
36
|
+
expect(batch.pairs).toHaveLength(2);
|
|
37
|
+
});
|
|
38
|
+
it('creates an immutable round archive from the exact queued batch and clears it', () => {
|
|
39
|
+
const store = makeStore();
|
|
40
|
+
store.enqueue('curated-memory', [makePair('MEMORY.md', 'curated fact')]);
|
|
41
|
+
store.enqueue('conversations', [makePair('conversation:abc', 'conv fact')]);
|
|
42
|
+
const batch = store.getQueuedBatch();
|
|
43
|
+
const manifest = store.createRound(batch, {
|
|
44
|
+
jobId: 'ft-123',
|
|
45
|
+
requestedMethod: 'auto',
|
|
46
|
+
instanceId: 'instance-1',
|
|
47
|
+
activeCheckpointBefore: '/checkpoints/base',
|
|
48
|
+
});
|
|
49
|
+
store.clearBatch(batch);
|
|
50
|
+
expect(manifest.totalPairs).toBe(2);
|
|
51
|
+
expect(manifest.instanceId).toBe('instance-1');
|
|
52
|
+
expect(manifest.activeCheckpointBefore).toBe('/checkpoints/base');
|
|
53
|
+
expect(manifest.exampleClassCounts?.conversation).toBe(2);
|
|
54
|
+
expect(readFileSync(join(dir, 'rounds', 'ft-123', 'training.jsonl'), 'utf-8')).toContain('curated fact');
|
|
55
|
+
expect(readFileSync(join(dir, 'rounds', 'ft-123', 'sources', 'curated-memory.jsonl'), 'utf-8')).toContain('curated fact');
|
|
56
|
+
expect(store.getQueuedBatch().pairs).toHaveLength(0);
|
|
57
|
+
});
|
|
58
|
+
it('updates manifest status after a round completes', () => {
|
|
59
|
+
const store = makeStore();
|
|
60
|
+
store.enqueue('discovery', [makePair('discovery', 'seed')]);
|
|
61
|
+
const batch = store.getQueuedBatch();
|
|
62
|
+
store.createRound(batch, { jobId: 'ft-456', requestedMethod: 'lora', instanceId: 'instance-1' });
|
|
63
|
+
const updated = store.updateRoundStatus('ft-456', {
|
|
64
|
+
status: 'completed',
|
|
65
|
+
completedAt: '2026-03-30T00:00:00.000Z',
|
|
66
|
+
checkpointPath: '/checkpoints/ft-456',
|
|
67
|
+
loss: 0.42,
|
|
68
|
+
});
|
|
69
|
+
expect(updated.status).toBe('completed');
|
|
70
|
+
expect(updated.loss).toBe(0.42);
|
|
71
|
+
expect(updated.checkpointPath).toBe('/checkpoints/ft-456');
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
//# sourceMappingURL=finetune-archive.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"finetune-archive.test.js","sourceRoot":"","sources":["../src/finetune-archive.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC5D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAG7D,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,IAAI,GAAW,CAAC;IAEhB,SAAS,CAAC,GAAG,EAAE;QACb,IAAI,GAAG,EAAE,CAAC;YACR,MAAM,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,SAAS,SAAS;QAChB,GAAG,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,8BAA8B,CAAC,CAAC,CAAC;QAClE,OAAO,IAAI,oBAAoB,CAAC,GAAG,CAAC,CAAC;IACvC,CAAC;IAED,SAAS,QAAQ,CAAC,MAAc,EAAE,WAAmB;QACnD,OAAO;YACL,WAAW;YACX,QAAQ,EAAE,GAAG,WAAW,WAAW;YACnC,MAAM;YACN,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,YAAY;YACxB,SAAS,EAAE,WAAW;YACtB,YAAY,EAAE,cAAc;SAC7B,CAAC;IACJ,CAAC;IAED,EAAE,CAAC,2DAA2D,EAAE,GAAG,EAAE;QACnE,MAAM,KAAK,GAAG,SAAS,EAAE,CAAC;QAC1B,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC;QAC7D,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC;QAC7D,KAAK,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC,QAAQ,CAAC,gBAAgB,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC;QAEzE,MAAM,KAAK,GAAG,KAAK,CAAC,cAAc,EAAE,CAAC;QACrC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACrD,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8EAA8E,EAAE,GAAG,EAAE;QACtF,MAAM,KAAK,GAAG,SAAS,EAAE,CAAC;QAC1B,KAAK,CAAC,OAAO,CAAC,gBAAgB,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC,CAAC,CAAC;QACzE,KAAK,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC,QAAQ,CAAC,kBAAkB,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC;QAE5E,MAAM,KAAK,GAAG,KAAK,CAAC,cAAc,EAAE,CAAC;QACrC,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,CAAC,KAAK,EAAE;YACxC,KAAK,EAAE,QAAQ;YACf,eAAe,EAAE,MAAM;YACvB,UAAU,EAAE,YAAY;YACxB,sBAAsB,EAAE,mBAAmB;SAC5C,CAAC,CAAC;QACH,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QAExB,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC/C,MAAM,CAAC,QAAQ,CAAC,sBAAsB,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAClE,MAAM,CAAC,QAAQ,CAAC,kBAAkB,EAAE,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,EAAE,QAAQ,EAAE,gBAAgB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;QACzG,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,sBAAsB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;QAC1H,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,KAAK,GAAG,SAAS,EAAE,CAAC;QAC1B,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,KAAK,GAAG,KAAK,CAAC,cAAc,EAAE,CAAC;QACrC,KAAK,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,QAAQ,EAAE,eAAe,EAAE,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,CAAC,CAAC;QACjG,MAAM,OAAO,GAAG,KAAK,CAAC,iBAAiB,CAAC,QAAQ,EAAE;YAChD,MAAM,EAAE,WAAW;YACnB,WAAW,EAAE,0BAA0B;YACvC,cAAc,EAAE,qBAAqB;YACrC,IAAI,EAAE,IAAI;SACX,CAAC,CAAC;QAEH,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { HippocampusStore } from './store.js';
|
|
2
|
+
export { createMemoryTools, memoryReadParameters, memoryWriteParameters, memoryLogParameters, memorySearchParameters, } from './tools.js';
|
|
3
|
+
export { HippocampusCurator, type TextGenerator } from './curator.js';
|
|
4
|
+
export { ConversationExtractor, type ConversationSource } from './conversation-extractor.js';
|
|
5
|
+
export { FineTuneArchiveStore } from './finetune-archive.js';
|
|
6
|
+
export type { FineTuneQueueSource, FineTuneQueuedBatch, FineTuneRoundManifest } from './finetune-archive.js';
|
|
7
|
+
export type { TrainingPair, TrainingExampleClass, CurationResult, MemoryEntry, SessionTurnEntry, TrainingCandidate, } from './types.js';
|
|
8
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EACL,iBAAiB,EACjB,oBAAoB,EACpB,qBAAqB,EACrB,mBAAmB,EACnB,sBAAsB,GACvB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,kBAAkB,EAAE,KAAK,aAAa,EAAE,MAAM,cAAc,CAAC;AACtE,OAAO,EAAE,qBAAqB,EAAE,KAAK,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AAC7F,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAC7D,YAAY,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC7G,YAAY,EACV,YAAY,EACZ,oBAAoB,EACpB,cAAc,EACd,WAAW,EACX,gBAAgB,EAChB,iBAAiB,GAClB,MAAM,YAAY,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { HippocampusStore } from './store.js';
|
|
2
|
+
export { createMemoryTools, memoryReadParameters, memoryWriteParameters, memoryLogParameters, memorySearchParameters, } from './tools.js';
|
|
3
|
+
export { HippocampusCurator } from './curator.js';
|
|
4
|
+
export { ConversationExtractor } from './conversation-extractor.js';
|
|
5
|
+
export { FineTuneArchiveStore } from './finetune-archive.js';
|
|
6
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EACL,iBAAiB,EACjB,oBAAoB,EACpB,qBAAqB,EACrB,mBAAmB,EACnB,sBAAsB,GACvB,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,kBAAkB,EAAsB,MAAM,cAAc,CAAC;AACtE,OAAO,EAAE,qBAAqB,EAA2B,MAAM,6BAA6B,CAAC;AAC7F,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC"}
|