@producible/cereworker-hippocampus 26.520.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/conversation-extractor.d.ts +23 -0
- package/dist/conversation-extractor.d.ts.map +1 -0
- package/dist/conversation-extractor.js +85 -0
- package/dist/conversation-extractor.js.map +1 -0
- package/dist/conversation-extractor.test.d.ts +2 -0
- package/dist/conversation-extractor.test.d.ts.map +1 -0
- package/dist/conversation-extractor.test.js +105 -0
- package/dist/conversation-extractor.test.js.map +1 -0
- package/dist/curator.d.ts +31 -0
- package/dist/curator.d.ts.map +1 -0
- package/dist/curator.js +219 -0
- package/dist/curator.js.map +1 -0
- package/dist/curator.test.d.ts +2 -0
- package/dist/curator.test.d.ts.map +1 -0
- package/dist/curator.test.js +209 -0
- package/dist/curator.test.js.map +1 -0
- package/dist/finetune-archive.d.ts +47 -0
- package/dist/finetune-archive.d.ts.map +1 -0
- package/dist/finetune-archive.js +203 -0
- package/dist/finetune-archive.js.map +1 -0
- package/dist/finetune-archive.test.d.ts +2 -0
- package/dist/finetune-archive.test.d.ts.map +1 -0
- package/dist/finetune-archive.test.js +74 -0
- package/dist/finetune-archive.test.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/store.d.ts +47 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +277 -0
- package/dist/store.js.map +1 -0
- package/dist/store.test.d.ts +2 -0
- package/dist/store.test.d.ts.map +1 -0
- package/dist/store.test.js +200 -0
- package/dist/store.test.js.map +1 -0
- package/dist/tools.d.ts +37 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +59 -0
- package/dist/tools.js.map +1 -0
- package/dist/tools.test.d.ts +2 -0
- package/dist/tools.test.d.ts.map +1 -0
- package/dist/tools.test.js +85 -0
- package/dist/tools.test.js.map +1 -0
- package/dist/types.d.ts +37 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +32 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Producible
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { TrainingPair } from './types.js';
|
|
2
|
+
export interface ConversationSource {
|
|
3
|
+
list(): Array<{
|
|
4
|
+
id: string;
|
|
5
|
+
updatedAt: number;
|
|
6
|
+
}>;
|
|
7
|
+
getMessages(id: string): Array<{
|
|
8
|
+
role: string;
|
|
9
|
+
content: string;
|
|
10
|
+
}>;
|
|
11
|
+
}
|
|
12
|
+
export declare class ConversationExtractor {
|
|
13
|
+
private source;
|
|
14
|
+
private lastExtractedAt;
|
|
15
|
+
private processedIds;
|
|
16
|
+
private readonly statePath?;
|
|
17
|
+
private readonly instanceId?;
|
|
18
|
+
constructor(source: ConversationSource, statePath?: string, instanceId?: string);
|
|
19
|
+
extractPairs(): TrainingPair[];
|
|
20
|
+
private loadState;
|
|
21
|
+
private saveState;
|
|
22
|
+
}
|
|
23
|
+
//# sourceMappingURL=conversation-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"conversation-extractor.d.ts","sourceRoot":"","sources":["../src/conversation-extractor.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE/C,MAAM,WAAW,kBAAkB;IACjC,IAAI,IAAI,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACjD,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACnE;AAKD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAqB;IACnC,OAAO,CAAC,eAAe,CAAK;IAC5B,OAAO,CAAC,YAAY,CAAqB;IACzC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;gBAEzB,MAAM,EAAE,kBAAkB,EAAE,SAAS,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,MAAM;IAO/E,YAAY,IAAI,YAAY,EAAE;IAqD9B,OAAO,CAAC,SAAS;IAejB,OAAO,CAAC,SAAS;CAQlB"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import { dirname } from 'node:path';
|
|
3
|
+
const MIN_USER_LENGTH = 20;
|
|
4
|
+
const MIN_ASSISTANT_LENGTH = 50;
|
|
5
|
+
export class ConversationExtractor {
|
|
6
|
+
source;
|
|
7
|
+
lastExtractedAt = 0;
|
|
8
|
+
processedIds = new Set();
|
|
9
|
+
statePath;
|
|
10
|
+
instanceId;
|
|
11
|
+
constructor(source, statePath, instanceId) {
|
|
12
|
+
this.source = source;
|
|
13
|
+
this.statePath = statePath;
|
|
14
|
+
this.instanceId = instanceId;
|
|
15
|
+
this.loadState();
|
|
16
|
+
}
|
|
17
|
+
extractPairs() {
|
|
18
|
+
const conversations = this.source.list()
|
|
19
|
+
.filter((c) => c.updatedAt > this.lastExtractedAt && !this.processedIds.has(c.id));
|
|
20
|
+
const pairs = [];
|
|
21
|
+
const now = Date.now();
|
|
22
|
+
for (const conv of conversations) {
|
|
23
|
+
const messages = this.source.getMessages(conv.id);
|
|
24
|
+
for (let i = 0; i < messages.length - 1; i++) {
|
|
25
|
+
const msg = messages[i];
|
|
26
|
+
if (msg.role !== 'user')
|
|
27
|
+
continue;
|
|
28
|
+
// Find the next assistant message (skip tool calls, system, etc.)
|
|
29
|
+
let assistant = null;
|
|
30
|
+
for (let j = i + 1; j < messages.length; j++) {
|
|
31
|
+
if (messages[j].role === 'assistant' || messages[j].role === 'cerebrum') {
|
|
32
|
+
assistant = messages[j];
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
if (messages[j].role === 'user')
|
|
36
|
+
break;
|
|
37
|
+
}
|
|
38
|
+
if (!assistant)
|
|
39
|
+
continue;
|
|
40
|
+
const instruction = msg.content.trim();
|
|
41
|
+
const response = assistant.content.trim();
|
|
42
|
+
if (instruction.length < MIN_USER_LENGTH || response.length < MIN_ASSISTANT_LENGTH)
|
|
43
|
+
continue;
|
|
44
|
+
pairs.push({
|
|
45
|
+
instruction,
|
|
46
|
+
response,
|
|
47
|
+
source: `conversation:${conv.id}`,
|
|
48
|
+
createdAt: now,
|
|
49
|
+
instanceId: this.instanceId,
|
|
50
|
+
sessionId: conv.id,
|
|
51
|
+
exampleClass: 'conversation',
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
this.processedIds.add(conv.id);
|
|
55
|
+
}
|
|
56
|
+
if (conversations.length > 0) {
|
|
57
|
+
this.lastExtractedAt = Math.max(...conversations.map((c) => c.updatedAt));
|
|
58
|
+
this.saveState();
|
|
59
|
+
}
|
|
60
|
+
return pairs;
|
|
61
|
+
}
|
|
62
|
+
loadState() {
|
|
63
|
+
if (!this.statePath || !existsSync(this.statePath))
|
|
64
|
+
return;
|
|
65
|
+
try {
|
|
66
|
+
const parsed = JSON.parse(readFileSync(this.statePath, 'utf-8'));
|
|
67
|
+
this.lastExtractedAt = parsed.lastExtractedAt ?? 0;
|
|
68
|
+
this.processedIds = new Set(parsed.processedIds ?? []);
|
|
69
|
+
}
|
|
70
|
+
catch {
|
|
71
|
+
this.lastExtractedAt = 0;
|
|
72
|
+
this.processedIds = new Set();
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
saveState() {
|
|
76
|
+
if (!this.statePath)
|
|
77
|
+
return;
|
|
78
|
+
mkdirSync(dirname(this.statePath), { recursive: true });
|
|
79
|
+
writeFileSync(this.statePath, JSON.stringify({
|
|
80
|
+
lastExtractedAt: this.lastExtractedAt,
|
|
81
|
+
processedIds: Array.from(this.processedIds),
|
|
82
|
+
}, null, 2) + '\n', 'utf-8');
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=conversation-extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"conversation-extractor.js","sourceRoot":"","sources":["../src/conversation-extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAQpC,MAAM,eAAe,GAAG,EAAE,CAAC;AAC3B,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAEhC,MAAM,OAAO,qBAAqB;IACxB,MAAM,CAAqB;IAC3B,eAAe,GAAG,CAAC,CAAC;IACpB,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACxB,SAAS,CAAU;IACnB,UAAU,CAAU;IAErC,YAAY,MAA0B,EAAE,SAAkB,EAAE,UAAmB;QAC7E,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED,YAAY;QACV,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE;aACrC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,IAAI,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAErF,MAAM,KAAK,GAAmB,EAAE,CAAC;QACjC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEvB,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;YACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAElD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;gBACxB,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM;oBAAE,SAAS;gBAElC,kEAAkE;gBAClE,IAAI,SAAS,GAA6C,IAAI,CAAC;gBAC/D,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC7C,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;wBACxE,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;wBACxB,MAAM;oBACR,CAAC;oBACD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM;wBAAE,MAAM;gBACzC,CAAC;gBAED,IAAI,CAAC,SAAS;oBAAE,SAAS;gBAEzB,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;gBACvC,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;gBAE1C,IAAI,WAAW,CAAC,MAAM,GAAG,eAAe,IAAI,QAAQ,CAAC,MAAM,GAAG,oBAAoB;oBAAE,SAAS;gBAE7F,KAAK,CAAC,IAAI,CAAC;oBACT,WAAW;oBACX,QAAQ;oBACR,MAAM,EAAE,gBAAgB,IAAI,CAAC,EAAE,EAAE;oBACjC,SAAS,EAAE,GAAG;oBACd,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,SAAS,EAAE,IAAI,CAAC,EAAE;oBAClB,YAAY,EAAE,cAAc;iBAC7B,CAAC,CAAC;YACL,CAAC;YAED,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjC,CAAC;QAED,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;YAC1E,IAAI,CAAC,SAAS,EAAE,CAAC;QACnB,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,SAAS;QACf,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YAAE,OAAO;QAC3D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAG9D,CAAC;YACF,IAAI,CAAC,eAAe,GAAG,MAAM,CAAC,eAAe,IAAI,CAAC,CAAC;YACnD,IAAI,CAAC,YAAY,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;YACzB,IAAI,CAAC,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;QACxC,CAAC;IACH,CAAC;IAEO,SAAS;QACf,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO;QAC5B,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC;YAC3C,eAAe,EAAE,IAAI,CAAC,eAAe;YACrC,YAAY,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC;SAC5C,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;IAC/B,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"conversation-extractor.test.d.ts","sourceRoot":"","sources":["../src/conversation-extractor.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { describe, it, expect, afterEach } from 'vitest';
|
|
2
|
+
import { mkdtempSync, rmSync } from 'node:fs';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { ConversationExtractor } from './conversation-extractor.js';
|
|
6
|
+
function makeSource(convs, updatedAt = Date.now()) {
|
|
7
|
+
return {
|
|
8
|
+
list: () => Object.keys(convs).map((id) => ({ id, updatedAt })),
|
|
9
|
+
getMessages: (id) => convs[id] ?? [],
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
describe('ConversationExtractor', () => {
|
|
13
|
+
let dir = '';
|
|
14
|
+
afterEach(() => {
|
|
15
|
+
if (dir) {
|
|
16
|
+
rmSync(dir, { recursive: true, force: true });
|
|
17
|
+
dir = '';
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
it('extracts user/assistant pairs from conversations', () => {
|
|
21
|
+
const source = makeSource({
|
|
22
|
+
'conv-1': [
|
|
23
|
+
{ role: 'user', content: 'How do I configure the database connection settings?' },
|
|
24
|
+
{ role: 'assistant', content: 'You can configure the database by editing the config.yaml file and setting the connection string under the database section.' },
|
|
25
|
+
],
|
|
26
|
+
});
|
|
27
|
+
const extractor = new ConversationExtractor(source);
|
|
28
|
+
const pairs = extractor.extractPairs();
|
|
29
|
+
expect(pairs).toHaveLength(1);
|
|
30
|
+
expect(pairs[0].instruction).toContain('database connection');
|
|
31
|
+
expect(pairs[0].source).toBe('conversation:conv-1');
|
|
32
|
+
});
|
|
33
|
+
it('skips short user messages', () => {
|
|
34
|
+
const source = makeSource({
|
|
35
|
+
'conv-1': [
|
|
36
|
+
{ role: 'user', content: 'ok thanks' },
|
|
37
|
+
{ role: 'assistant', content: 'You are welcome! Let me know if you have any other questions about the project configuration.' },
|
|
38
|
+
],
|
|
39
|
+
});
|
|
40
|
+
const extractor = new ConversationExtractor(source);
|
|
41
|
+
expect(extractor.extractPairs()).toHaveLength(0);
|
|
42
|
+
});
|
|
43
|
+
it('skips short assistant responses', () => {
|
|
44
|
+
const source = makeSource({
|
|
45
|
+
'conv-1': [
|
|
46
|
+
{ role: 'user', content: 'Can you explain how the deployment pipeline works?' },
|
|
47
|
+
{ role: 'assistant', content: 'Sure, it uses GitHub Actions.' },
|
|
48
|
+
],
|
|
49
|
+
});
|
|
50
|
+
const extractor = new ConversationExtractor(source);
|
|
51
|
+
expect(extractor.extractPairs()).toHaveLength(0);
|
|
52
|
+
});
|
|
53
|
+
it('handles multiple turns in a conversation', () => {
|
|
54
|
+
const source = makeSource({
|
|
55
|
+
'conv-1': [
|
|
56
|
+
{ role: 'user', content: 'What testing framework does this project use?' },
|
|
57
|
+
{ role: 'assistant', content: 'The project uses Vitest as the testing framework with colocated test files and v8 coverage.' },
|
|
58
|
+
{ role: 'user', content: 'How do I run tests for a specific package?' },
|
|
59
|
+
{ role: 'assistant', content: 'You can run tests for a specific package with pnpm test followed by the path to the test file.' },
|
|
60
|
+
],
|
|
61
|
+
});
|
|
62
|
+
const extractor = new ConversationExtractor(source);
|
|
63
|
+
const pairs = extractor.extractPairs();
|
|
64
|
+
expect(pairs).toHaveLength(2);
|
|
65
|
+
});
|
|
66
|
+
it('skips tool-call messages between user and assistant', () => {
|
|
67
|
+
const source = makeSource({
|
|
68
|
+
'conv-1': [
|
|
69
|
+
{ role: 'user', content: 'What is the current version of the project?' },
|
|
70
|
+
{ role: 'tool', content: '{"version": "26.323.8"}' },
|
|
71
|
+
{ role: 'assistant', content: 'The current version of the project is 26.323.8, using CalVer format YY.MMDD.counter.' },
|
|
72
|
+
],
|
|
73
|
+
});
|
|
74
|
+
const extractor = new ConversationExtractor(source);
|
|
75
|
+
const pairs = extractor.extractPairs();
|
|
76
|
+
expect(pairs).toHaveLength(1);
|
|
77
|
+
expect(pairs[0].response).toContain('26.323.8');
|
|
78
|
+
});
|
|
79
|
+
it('does not re-extract already processed conversations', () => {
|
|
80
|
+
const source = makeSource({
|
|
81
|
+
'conv-1': [
|
|
82
|
+
{ role: 'user', content: 'What testing framework does this project use?' },
|
|
83
|
+
{ role: 'assistant', content: 'The project uses Vitest as the testing framework with colocated test files and v8 coverage.' },
|
|
84
|
+
],
|
|
85
|
+
});
|
|
86
|
+
const extractor = new ConversationExtractor(source);
|
|
87
|
+
expect(extractor.extractPairs()).toHaveLength(1);
|
|
88
|
+
expect(extractor.extractPairs()).toHaveLength(0);
|
|
89
|
+
});
|
|
90
|
+
it('persists extractor state when given a state file', () => {
|
|
91
|
+
dir = mkdtempSync(join(tmpdir(), 'conversation-extractor-state-'));
|
|
92
|
+
const statePath = join(dir, 'extractor.json');
|
|
93
|
+
const source = makeSource({
|
|
94
|
+
'conv-1': [
|
|
95
|
+
{ role: 'user', content: 'What testing framework does this project use?' },
|
|
96
|
+
{ role: 'assistant', content: 'The project uses Vitest as the testing framework with colocated test files and v8 coverage.' },
|
|
97
|
+
],
|
|
98
|
+
});
|
|
99
|
+
const first = new ConversationExtractor(source, statePath);
|
|
100
|
+
expect(first.extractPairs()).toHaveLength(1);
|
|
101
|
+
const second = new ConversationExtractor(source, statePath);
|
|
102
|
+
expect(second.extractPairs()).toHaveLength(0);
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
//# sourceMappingURL=conversation-extractor.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"conversation-extractor.test.js","sourceRoot":"","sources":["../src/conversation-extractor.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,qBAAqB,EAA2B,MAAM,6BAA6B,CAAC;AAE7F,SAAS,UAAU,CAAC,KAA+D,EAAE,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE;IACzG,OAAO;QACL,IAAI,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,CAAC;QAC/D,WAAW,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI,EAAE;KACrC,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,IAAI,GAAG,GAAG,EAAE,CAAC;IAEb,SAAS,CAAC,GAAG,EAAE;QACb,IAAI,GAAG,EAAE,CAAC;YACR,MAAM,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;YAC9C,GAAG,GAAG,EAAE,CAAC;QACX,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;QAC1D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,QAAQ,EAAE;gBACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,sDAAsD,EAAE;gBACjF,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,8HAA8H,EAAE;aAC/J;SACF,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QACpD,MAAM,KAAK,GAAG,SAAS,CAAC,YAAY,EAAE,CAAC;QACvC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;QAC9D,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACnC,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,QAAQ,EAAE;gBACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE;gBACtC,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,+FAA+F,EAAE;aAChI;SACF,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QACpD,MAAM,CAAC,SAAS,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,QAAQ,EAAE;gBACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,oDAAoD,EAAE;gBAC/E,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,+BAA+B,EAAE;aAChE;SACF,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QACpD,MAAM,CAAC,SAAS,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,QAAQ,EAAE;gBACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,+CAA+C,EAAE;gBAC1E,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,6FAA6F,EAAE;gBAC7H,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,4CAA4C,EAAE;gBACvE,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,gGAAgG,EAAE;aACjI;SACF,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QACpD,MAAM,KAAK,GAAG,SAAS,CAAC,YAAY,EAAE,CAAC;QACvC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,QAAQ,EAAE;gBACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,6CAA6C,EAAE;gBACxE,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,yBAAyB,EAAE;gBACpD,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,sFAAsF,EAAE;aACvH;SACF,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QACpD,MAAM,KAAK,GAAG,SAAS,CAAC,YAAY,EAAE,CAAC;QACvC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,QAAQ,EAAE;gBACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,+CAA+C,EAAE;gBAC1E,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,6FAA6F,EAAE;aAC9H;SACF,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,qBAAqB,CAAC,MAAM,CAAC,CAAC;QACpD,MAAM,CAAC,SAAS,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACjD,MAAM,CAAC,SAAS,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;QAC1D,GAAG,GAAG,WAAW,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,+BAA+B,CAAC,CAAC,CAAC;QACnE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,QAAQ,EAAE;gBACR,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,+CAA+C,EAAE;gBAC1E,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,6FAA6F,EAAE;aAC9H;SACF,CAAC,CAAC;QACH,MAAM,KAAK,GAAG,IAAI,qBAAqB,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAC3D,MAAM,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAE7C,MAAM,MAAM,GAAG,IAAI,qBAAqB,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { HippocampusStore } from './store.js';
|
|
2
|
+
import type { TrainingPair, CurationResult } from './types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Interface for generating text from the Cerebrum.
|
|
5
|
+
* Kept minimal so the curator doesn't depend on the full CerebrumProvider.
|
|
6
|
+
*/
|
|
7
|
+
export interface TextGenerator {
|
|
8
|
+
generate(prompt: string): Promise<string>;
|
|
9
|
+
}
|
|
10
|
+
export declare class HippocampusCurator {
|
|
11
|
+
private store;
|
|
12
|
+
private generator;
|
|
13
|
+
private readonly instanceId?;
|
|
14
|
+
constructor(store: HippocampusStore, generator: TextGenerator, instanceId?: string);
|
|
15
|
+
/**
|
|
16
|
+
* Curate memories for fine-tuning.
|
|
17
|
+
* Reads uncurated content, sends to Cerebrum for review,
|
|
18
|
+
* and saves approved training pairs to the curated-memory queue file.
|
|
19
|
+
*/
|
|
20
|
+
curate(): Promise<CurationResult>;
|
|
21
|
+
/** Read queued curated-memory pairs that haven't been consumed by fine-tuning yet. */
|
|
22
|
+
getPendingPairs(): TrainingPair[];
|
|
23
|
+
/** Mark queued curated-memory pairs as consumed (move to consumed/curated-memory-YYYY-MM-DD.jsonl). */
|
|
24
|
+
markConsumed(): void;
|
|
25
|
+
private parseResponse;
|
|
26
|
+
private appendPending;
|
|
27
|
+
private pendingPath;
|
|
28
|
+
private readMarker;
|
|
29
|
+
private writeMarker;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=curator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"curator.d.ts","sourceRoot":"","sources":["../src/curator.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AACnD,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AA8C/D;;;GAGG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CAC3C;AAED,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,KAAK,CAAmB;IAChC,OAAO,CAAC,SAAS,CAAgB;IACjC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;gBAEzB,KAAK,EAAE,gBAAgB,EAAE,SAAS,EAAE,aAAa,EAAE,UAAU,CAAC,EAAE,MAAM;IAMlF;;;;OAIG;IACG,MAAM,IAAI,OAAO,CAAC,cAAc,CAAC;IA0DvC,sFAAsF;IACtF,eAAe,IAAI,YAAY,EAAE;IAmBjC,uGAAuG;IACvG,YAAY,IAAI,IAAI;IAoBpB,OAAO,CAAC,aAAa;IAwCrB,OAAO,CAAC,aAAa;IAiBrB,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,UAAU;IAMlB,OAAO,CAAC,WAAW;CAIpB"}
|
package/dist/curator.js
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
const CURATED_MARKER = '.curated-marker';
|
|
4
|
+
const QUEUE_DIR = 'queue';
|
|
5
|
+
const PENDING_FILE = 'curated-memory.jsonl';
|
|
6
|
+
const CONSUMED_DIR = 'consumed';
|
|
7
|
+
const CURATION_PROMPT = `You are a memory curator for CereWorker, an AI agent. Your job is to review the agent's temporary memories and decide which contain **durable knowledge** worth permanently learning through fine-tuning.
|
|
8
|
+
|
|
9
|
+
The training pairs will fine-tune a small model (Cerebellum, Qwen3 0.6B) that answers yes/no questions about task scheduling and tool verification. Create pairs that build contextual understanding of the user's environment.
|
|
10
|
+
|
|
11
|
+
## Instructions
|
|
12
|
+
|
|
13
|
+
Review the memories below. For each piece of knowledge that is:
|
|
14
|
+
- A user preference, decision, or established fact
|
|
15
|
+
- A technical pattern, convention, or architecture decision
|
|
16
|
+
- A recurring workflow or process
|
|
17
|
+
- Important context that would be useful across many future sessions
|
|
18
|
+
|
|
19
|
+
Create a training pair. Skip anything that is:
|
|
20
|
+
- Ephemeral session details (timestamps, one-off tasks)
|
|
21
|
+
- Already obvious or common knowledge
|
|
22
|
+
- Too vague to be useful
|
|
23
|
+
- Contradicted by later memories
|
|
24
|
+
|
|
25
|
+
## Output Format
|
|
26
|
+
|
|
27
|
+
Respond with a JSON array of objects. Each object must have:
|
|
28
|
+
- "instruction": a question or prompt that would naturally elicit this knowledge
|
|
29
|
+
- "response": the factual answer based on the memory
|
|
30
|
+
- "source": the filename the knowledge came from
|
|
31
|
+
|
|
32
|
+
If no memories are worth fine-tuning, respond with an empty array: []
|
|
33
|
+
|
|
34
|
+
Respond ONLY with the JSON array, nothing else.
|
|
35
|
+
|
|
36
|
+
## Examples
|
|
37
|
+
|
|
38
|
+
Good (operational): {"instruction": "The project uses pnpm monorepo. A shell command ran 'npm install'. Is the tool result likely correct?", "response": "No — this project uses pnpm, not npm. The correct command would be 'pnpm install'.", "source": "MEMORY.md"}
|
|
39
|
+
Good (contextual): {"instruction": "What package manager does the user's main project use?", "response": "pnpm, in a monorepo managed by Turborepo.", "source": "MEMORY.md"}
|
|
40
|
+
Bad (skip): {"instruction": "What happened?", "response": "Some stuff.", "source": "2026-03-25.md"}
|
|
41
|
+
|
|
42
|
+
## Memories to Review
|
|
43
|
+
|
|
44
|
+
`;
|
|
45
|
+
export class HippocampusCurator {
|
|
46
|
+
store;
|
|
47
|
+
generator;
|
|
48
|
+
instanceId;
|
|
49
|
+
constructor(store, generator, instanceId) {
|
|
50
|
+
this.store = store;
|
|
51
|
+
this.generator = generator;
|
|
52
|
+
this.instanceId = instanceId;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Curate memories for fine-tuning.
|
|
56
|
+
* Reads uncurated content, sends to Cerebrum for review,
|
|
57
|
+
* and saves approved training pairs to the curated-memory queue file.
|
|
58
|
+
*/
|
|
59
|
+
async curate() {
|
|
60
|
+
const errors = [];
|
|
61
|
+
const lastCurated = this.readMarker();
|
|
62
|
+
const allFiles = this.store.listAll();
|
|
63
|
+
// Gather uncurated content
|
|
64
|
+
const uncurated = [];
|
|
65
|
+
for (const file of allFiles) {
|
|
66
|
+
const content = this.store.readFile(file);
|
|
67
|
+
if (!content)
|
|
68
|
+
continue;
|
|
69
|
+
// Skip if file hasn't changed since last curation
|
|
70
|
+
if (lastCurated && file !== 'MEMORY.md') {
|
|
71
|
+
const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})/);
|
|
72
|
+
if (dateMatch && dateMatch[1] <= lastCurated)
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
uncurated.push({ filename: file, content });
|
|
76
|
+
}
|
|
77
|
+
if (uncurated.length === 0) {
|
|
78
|
+
return { pairs: [], skipped: 0, errors: [] };
|
|
79
|
+
}
|
|
80
|
+
// Build prompt with memory content
|
|
81
|
+
const memoriesText = uncurated
|
|
82
|
+
.map((m) => `### ${m.filename}\n\n${m.content}`)
|
|
83
|
+
.join('\n\n---\n\n');
|
|
84
|
+
const prompt = CURATION_PROMPT + memoriesText;
|
|
85
|
+
// Call Cerebrum
|
|
86
|
+
let responseText;
|
|
87
|
+
try {
|
|
88
|
+
responseText = await this.generator.generate(prompt);
|
|
89
|
+
}
|
|
90
|
+
catch (err) {
|
|
91
|
+
errors.push(`Cerebrum call failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
92
|
+
return { pairs: [], skipped: uncurated.length, errors };
|
|
93
|
+
}
|
|
94
|
+
// Parse response
|
|
95
|
+
const pairs = this.parseResponse(responseText, errors);
|
|
96
|
+
// Save pending pairs
|
|
97
|
+
if (pairs.length > 0) {
|
|
98
|
+
this.appendPending(pairs);
|
|
99
|
+
}
|
|
100
|
+
// Update marker
|
|
101
|
+
this.writeMarker();
|
|
102
|
+
return {
|
|
103
|
+
pairs,
|
|
104
|
+
skipped: uncurated.length - pairs.length,
|
|
105
|
+
errors,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/** Read queued curated-memory pairs that haven't been consumed by fine-tuning yet. */
|
|
109
|
+
getPendingPairs() {
|
|
110
|
+
const path = this.pendingPath();
|
|
111
|
+
if (!existsSync(path))
|
|
112
|
+
return [];
|
|
113
|
+
const content = readFileSync(path, 'utf-8').trim();
|
|
114
|
+
if (!content)
|
|
115
|
+
return [];
|
|
116
|
+
return content
|
|
117
|
+
.split('\n')
|
|
118
|
+
.map((line) => {
|
|
119
|
+
try {
|
|
120
|
+
return JSON.parse(line);
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
})
|
|
126
|
+
.filter((p) => p !== null);
|
|
127
|
+
}
|
|
128
|
+
/** Mark queued curated-memory pairs as consumed (move to consumed/curated-memory-YYYY-MM-DD.jsonl). */
|
|
129
|
+
markConsumed() {
|
|
130
|
+
const pendingPath = this.pendingPath();
|
|
131
|
+
if (!existsSync(pendingPath))
|
|
132
|
+
return;
|
|
133
|
+
const consumedDir = join(this.store.finetuneDir, CONSUMED_DIR);
|
|
134
|
+
if (!existsSync(consumedDir)) {
|
|
135
|
+
mkdirSync(consumedDir, { recursive: true });
|
|
136
|
+
}
|
|
137
|
+
const date = new Date().toISOString().slice(0, 10);
|
|
138
|
+
const consumedPath = join(consumedDir, `curated-memory-${date}.jsonl`);
|
|
139
|
+
// Append to consumed file (in case multiple curations happen in one day)
|
|
140
|
+
const content = readFileSync(pendingPath, 'utf-8');
|
|
141
|
+
writeFileSync(consumedPath, content, { flag: 'a' });
|
|
142
|
+
// Clear pending
|
|
143
|
+
writeFileSync(pendingPath, '', 'utf-8');
|
|
144
|
+
}
|
|
145
|
+
parseResponse(response, errors) {
|
|
146
|
+
let text = response.trim();
|
|
147
|
+
// Handle markdown code blocks
|
|
148
|
+
if (text.startsWith('```')) {
|
|
149
|
+
text = text.split('\n', 1)[0] ? text.split('\n').slice(1).join('\n') : text.slice(3);
|
|
150
|
+
if (text.endsWith('```')) {
|
|
151
|
+
text = text.slice(0, -3);
|
|
152
|
+
}
|
|
153
|
+
text = text.trim();
|
|
154
|
+
}
|
|
155
|
+
try {
|
|
156
|
+
const parsed = JSON.parse(text);
|
|
157
|
+
if (!Array.isArray(parsed)) {
|
|
158
|
+
errors.push('Cerebrum response is not an array');
|
|
159
|
+
return [];
|
|
160
|
+
}
|
|
161
|
+
const now = Date.now();
|
|
162
|
+
return parsed
|
|
163
|
+
.filter((item) => {
|
|
164
|
+
if (typeof item !== 'object' || !item)
|
|
165
|
+
return false;
|
|
166
|
+
if (typeof item.instruction !== 'string' || typeof item.response !== 'string')
|
|
167
|
+
return false;
|
|
168
|
+
return item.instruction.trim().length >= 30 && item.response.trim().length >= 20;
|
|
169
|
+
})
|
|
170
|
+
.map((item) => ({
|
|
171
|
+
instruction: item.instruction,
|
|
172
|
+
response: item.response,
|
|
173
|
+
source: item.source ?? 'unknown',
|
|
174
|
+
createdAt: now,
|
|
175
|
+
instanceId: this.instanceId,
|
|
176
|
+
exampleClass: 'curated-memory',
|
|
177
|
+
}));
|
|
178
|
+
}
|
|
179
|
+
catch (err) {
|
|
180
|
+
errors.push(`Failed to parse Cerebrum response: ${err instanceof Error ? err.message : String(err)}`);
|
|
181
|
+
return [];
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
appendPending(pairs) {
|
|
185
|
+
// Deduplicate against existing pending pairs
|
|
186
|
+
const existing = this.getPendingPairs();
|
|
187
|
+
const seen = new Set(existing.map((p) => p.instruction.trim().toLowerCase()));
|
|
188
|
+
const unique = pairs.filter((p) => {
|
|
189
|
+
const key = p.instruction.trim().toLowerCase();
|
|
190
|
+
if (seen.has(key))
|
|
191
|
+
return false;
|
|
192
|
+
seen.add(key);
|
|
193
|
+
return true;
|
|
194
|
+
});
|
|
195
|
+
if (unique.length === 0)
|
|
196
|
+
return;
|
|
197
|
+
const path = this.pendingPath();
|
|
198
|
+
const lines = unique.map((p) => JSON.stringify(p)).join('\n') + '\n';
|
|
199
|
+
writeFileSync(path, lines, { flag: 'a' });
|
|
200
|
+
}
|
|
201
|
+
pendingPath() {
|
|
202
|
+
const queueDir = join(this.store.finetuneDir, QUEUE_DIR);
|
|
203
|
+
if (!existsSync(queueDir)) {
|
|
204
|
+
mkdirSync(queueDir, { recursive: true });
|
|
205
|
+
}
|
|
206
|
+
return join(queueDir, PENDING_FILE);
|
|
207
|
+
}
|
|
208
|
+
readMarker() {
|
|
209
|
+
const path = join(this.store.directory, CURATED_MARKER);
|
|
210
|
+
if (!existsSync(path))
|
|
211
|
+
return null;
|
|
212
|
+
return readFileSync(path, 'utf-8').trim();
|
|
213
|
+
}
|
|
214
|
+
writeMarker() {
|
|
215
|
+
const date = new Date().toISOString().slice(0, 10);
|
|
216
|
+
writeFileSync(join(this.store.directory, CURATED_MARKER), date, 'utf-8');
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
//# sourceMappingURL=curator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"curator.js","sourceRoot":"","sources":["../src/curator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAIjC,MAAM,cAAc,GAAG,iBAAiB,CAAC;AACzC,MAAM,SAAS,GAAG,OAAO,CAAC;AAC1B,MAAM,YAAY,GAAG,sBAAsB,CAAC;AAC5C,MAAM,YAAY,GAAG,UAAU,CAAC;AAEhC,MAAM,eAAe,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAqCvB,CAAC;AAUF,MAAM,OAAO,kBAAkB;IACrB,KAAK,CAAmB;IACxB,SAAS,CAAgB;IAChB,UAAU,CAAU;IAErC,YAAY,KAAuB,EAAE,SAAwB,EAAE,UAAmB;QAChF,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,MAAM;QACV,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;QAEtC,2BAA2B;QAC3B,MAAM,SAAS,GAA4C,EAAE,CAAC;QAC9D,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YAC1C,IAAI,CAAC,OAAO;gBAAE,SAAS;YAEvB,kDAAkD;YAClD,IAAI,WAAW,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;gBACxC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;gBACrD,IAAI,SAAS,IAAI,SAAS,CAAC,CAAC,CAAC,IAAI,WAAW;oBAAE,SAAS;YACzD,CAAC;YAED,SAAS,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,CAAC;QAC9C,CAAC;QAED,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;QAC/C,CAAC;QAED,mCAAmC;QACnC,MAAM,YAAY,GAAG,SAAS;aAC3B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,QAAQ,OAAO,CAAC,CAAC,OAAO,EAAE,CAAC;aAC/C,IAAI,CAAC,aAAa,CAAC,CAAC;QAEvB,MAAM,MAAM,GAAG,eAAe,GAAG,YAAY,CAAC;QAE9C,gBAAgB;QAChB,IAAI,YAAoB,CAAC;QACzB,IAAI,CAAC;YACH,YAAY,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACvD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,yBAAyB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACzF,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;QAC1D,CAAC;QAED,iBAAiB;QACjB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;QAEvD,qBAAqB;QACrB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC5B,CAAC;QAED,gBAAgB;QAChB,IAAI,CAAC,WAAW,EAAE,CAAC;QAEnB,OAAO;YACL,KAAK;YACL,OAAO,EAAE,SAAS,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM;YACxC,MAAM;SACP,CAAC;IACJ,CAAC;IAED,sFAAsF;IACtF,eAAe;QACb,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAChC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,OAAO,EAAE,CAAC;QAEjC,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QACnD,IAAI,CAAC,OAAO;YAAE,OAAO,EAAE,CAAC;QAExB,OAAO,OAAO;aACX,KAAK,CAAC,IAAI,CAAC;aACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACZ,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAiB,CAAC;YAC1C,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC,CAAC;aACD,MAAM,CAAC,CAAC,CAAC,EAAqB,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAClD,CAAC;IAED,uGAAuG;IACvG,YAAY;QACV,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACvC,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC;YAAE,OAAO;QAErC,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,YAAY,CAAC,CAAC;QAC/D,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC7B,SAAS,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC9C,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACnD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,EAAE,kBAAkB,IAAI,QAAQ,CAAC,CAAC;QAEvE,yEAAyE;QACzE,MAAM,OAAO,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QACnD,aAAa,CAAC,YAAY,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;QAEpD,gBAAgB;QAChB,aAAa,CAAC,WAAW,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;IAC1C,CAAC;IAEO,aAAa,CAAC,QAAgB,EAAE,MAAgB;QACtD,IAAI,IAAI,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;QAE3B,8BAA8B;QAC9B,IAAI,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YAC3B,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACrF,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACzB,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAC3B,CAAC;YACD,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QACrB,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAChC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC3B,MAAM,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;gBACjD,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACvB,OAAO,MAAM;iBACV,MAAM,CAAC,CAAC,IAA6B,EAAE,EAAE;gBACxC,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,CAAC,IAAI;oBAAE,OAAO,KAAK,CAAC;gBACpD,IAAI,OAAO,IAAI,CAAC,WAAW,KAAK,QAAQ,IAAI,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ;oBAAE,OAAO,KAAK,CAAC;gBAC5F,OAAQ,IAAI,CAAC,WAAsB,CAAC,IAAI,EAAE,CAAC,MAAM,IAAI,EAAE,IAAK,IAAI,CAAC,QAAmB,CAAC,IAAI,EAAE,CAAC,MAAM,IAAI,EAAE,CAAC;YAC3G,CAAC,CAAC;iBACD,GAAG,CAAC,CAAC,IAA6B,EAAE,EAAE,CAAC,CAAC;gBACvC,WAAW,EAAE,IAAI,CAAC,WAAqB;gBACvC,QAAQ,EAAE,IAAI,CAAC,QAAkB;gBACjC,MAAM,EAAG,IAAI,CAAC,MAAiB,IAAI,SAAS;gBAC5C,SAAS,EAAE,GAAG;gBACd,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,YAAY,EAAE,gBAAyB;aACxC,CAAC,CAAC,CAAC;QACR,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,sCAAsC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACtG,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,aAAa,CAAC,KAAqB;QACzC,6CAA6C;QAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QACxC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAC9E,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;YAChC,MAAM,GAAG,GAAG,CAAC,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC/C,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;YAChC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;QACH,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO;QAEhC,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAChC,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;QACrE,aAAa,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;IAC5C,CAAC;IAEO,WAAW;QACjB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QACzD,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3C,CAAC;QACD,OAAO,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACtC,CAAC;IAEO,UAAU;QAChB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;QACxD,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;QACnC,OAAO,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAC5C,CAAC;IAEO,WAAW;QACjB,MAAM,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACnD,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,EAAE,cAAc,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3E,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"curator.test.d.ts","sourceRoot":"","sources":["../src/curator.test.ts"],"names":[],"mappings":""}
|