expressible 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +261 -0
- package/dist/commands/add.d.ts +9 -0
- package/dist/commands/add.d.ts.map +1 -0
- package/dist/commands/add.js +175 -0
- package/dist/commands/add.js.map +1 -0
- package/dist/commands/doctor.d.ts +2 -0
- package/dist/commands/doctor.d.ts.map +1 -0
- package/dist/commands/doctor.js +126 -0
- package/dist/commands/doctor.js.map +1 -0
- package/dist/commands/export.d.ts +2 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +108 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +37 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/retrain.d.ts +2 -0
- package/dist/commands/retrain.d.ts.map +1 -0
- package/dist/commands/retrain.js +139 -0
- package/dist/commands/retrain.js.map +1 -0
- package/dist/commands/review.d.ts +2 -0
- package/dist/commands/review.d.ts.map +1 -0
- package/dist/commands/review.js +44 -0
- package/dist/commands/review.js.map +1 -0
- package/dist/commands/run.d.ts +2 -0
- package/dist/commands/run.d.ts.map +1 -0
- package/dist/commands/run.js +83 -0
- package/dist/commands/run.js.map +1 -0
- package/dist/commands/setup.d.ts +2 -0
- package/dist/commands/setup.d.ts.map +1 -0
- package/dist/commands/setup.js +15 -0
- package/dist/commands/setup.js.map +1 -0
- package/dist/commands/stats.d.ts +2 -0
- package/dist/commands/stats.d.ts.map +1 -0
- package/dist/commands/stats.js +52 -0
- package/dist/commands/stats.js.map +1 -0
- package/dist/commands/train.d.ts +2 -0
- package/dist/commands/train.d.ts.map +1 -0
- package/dist/commands/train.js +63 -0
- package/dist/commands/train.js.map +1 -0
- package/dist/core/classifier.d.ts +18 -0
- package/dist/core/classifier.d.ts.map +1 -0
- package/dist/core/classifier.js +220 -0
- package/dist/core/classifier.js.map +1 -0
- package/dist/core/config.d.ts +11 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +15 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/data.d.ts +23 -0
- package/dist/core/data.d.ts.map +1 -0
- package/dist/core/data.js +66 -0
- package/dist/core/data.js.map +1 -0
- package/dist/core/embeddings.d.ts +4 -0
- package/dist/core/embeddings.d.ts.map +1 -0
- package/dist/core/embeddings.js +80 -0
- package/dist/core/embeddings.js.map +1 -0
- package/dist/core/model-io.d.ts +11 -0
- package/dist/core/model-io.d.ts.map +1 -0
- package/dist/core/model-io.js +76 -0
- package/dist/core/model-io.js.map +1 -0
- package/dist/core/tf.d.ts +4 -0
- package/dist/core/tf.d.ts.map +1 -0
- package/dist/core/tf.js +21 -0
- package/dist/core/tf.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +161 -0
- package/dist/index.js.map +1 -0
- package/dist/ui/server.d.ts +3 -0
- package/dist/ui/server.d.ts.map +1 -0
- package/dist/ui/server.js +107 -0
- package/dist/ui/server.js.map +1 -0
- package/dist/ui/static/index.html +486 -0
- package/dist/ui/static/static/index.html +486 -0
- package/dist/utils/display.d.ts +9 -0
- package/dist/utils/display.d.ts.map +1 -0
- package/dist/utils/display.js +34 -0
- package/dist/utils/display.js.map +1 -0
- package/dist/utils/fs.d.ts +3 -0
- package/dist/utils/fs.d.ts.map +1 -0
- package/dist/utils/fs.js +31 -0
- package/dist/utils/fs.js.map +1 -0
- package/dist/utils/paths.d.ts +13 -0
- package/dist/utils/paths.d.ts.map +1 -0
- package/dist/utils/paths.js +55 -0
- package/dist/utils/paths.js.map +1 -0
- package/dist/utils/similarity.d.ts +6 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +23 -0
- package/dist/utils/similarity.js.map +1 -0
- package/package.json +45 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { getGlobalModelCacheDir, ensureDir } from '../utils/paths.js';
|
|
2
|
+
import { success, info } from '../utils/display.js';
|
|
3
|
+
export async function setupCommand() {
|
|
4
|
+
info('Setting up distill...');
|
|
5
|
+
const cacheDir = getGlobalModelCacheDir();
|
|
6
|
+
ensureDir(cacheDir);
|
|
7
|
+
info('Downloading embedding model (all-MiniLM-L6-v2, ~80MB)...');
|
|
8
|
+
const transformers = await import('@xenova/transformers');
|
|
9
|
+
transformers.env.cacheDir = cacheDir;
|
|
10
|
+
transformers.env.allowLocalModels = true;
|
|
11
|
+
await transformers.pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
12
|
+
success('Embedding model downloaded and cached.');
|
|
13
|
+
success('Setup complete. You can now use distill offline.');
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=setup.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"setup.js","sourceRoot":"","sources":["../../src/commands/setup.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AACtE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,qBAAqB,CAAC;AAEpD,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,IAAI,CAAC,uBAAuB,CAAC,CAAC;IAE9B,MAAM,QAAQ,GAAG,sBAAsB,EAAE,CAAC;IAC1C,SAAS,CAAC,QAAQ,CAAC,CAAC;IAEpB,IAAI,CAAC,0DAA0D,CAAC,CAAC;IAEjE,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;IAC1D,YAAY,CAAC,GAAG,CAAC,QAAQ,GAAG,QAAQ,CAAC;IACrC,YAAY,CAAC,GAAG,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAEzC,MAAM,YAAY,CAAC,QAAQ,CAAC,oBAAoB,EAAE,yBAAyB,CAAC,CAAC;IAE7E,OAAO,CAAC,wCAAwC,CAAC,CAAC;IAClD,OAAO,CAAC,kDAAkD,CAAC,CAAC;AAC9D,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stats.d.ts","sourceRoot":"","sources":["../../src/commands/stats.ts"],"names":[],"mappings":"AAOA,wBAAsB,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,CAoDlD"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import { findTaskDir, getModelDir, getModelMetadataPath } from '../utils/paths.js';
|
|
3
|
+
import { readConfig } from '../core/config.js';
|
|
4
|
+
import { loadSamples, loadValidationResults } from '../core/data.js';
|
|
5
|
+
import { heading, table } from '../utils/display.js';
|
|
6
|
+
import { getDirectorySize, formatBytes } from '../utils/fs.js';
|
|
7
|
+
export async function statsCommand() {
|
|
8
|
+
const taskDir = findTaskDir();
|
|
9
|
+
const config = readConfig(taskDir);
|
|
10
|
+
const samples = loadSamples(taskDir);
|
|
11
|
+
const validation = loadValidationResults(taskDir);
|
|
12
|
+
heading(`Distill Project: ${config.name}`);
|
|
13
|
+
const rows = [
|
|
14
|
+
['Task type', config.type],
|
|
15
|
+
['Training samples', String(samples.length)],
|
|
16
|
+
];
|
|
17
|
+
// Model info
|
|
18
|
+
const metadataPath = getModelMetadataPath(taskDir);
|
|
19
|
+
if (fs.existsSync(metadataPath)) {
|
|
20
|
+
const metadata = JSON.parse(fs.readFileSync(metadataPath, 'utf-8'));
|
|
21
|
+
const trainedAt = new Date(metadata.trainedAt).toLocaleString();
|
|
22
|
+
rows.push(['Last trained', trainedAt]);
|
|
23
|
+
if (metadata.accuracy !== undefined) {
|
|
24
|
+
rows.push(['Model accuracy', `${(metadata.accuracy * 100).toFixed(1)}%`]);
|
|
25
|
+
}
|
|
26
|
+
if (metadata.categories) {
|
|
27
|
+
rows.push(['Categories', metadata.categories.join(', ')]);
|
|
28
|
+
}
|
|
29
|
+
const modelSize = getDirectorySize(getModelDir(taskDir));
|
|
30
|
+
rows.push(['Model size', formatBytes(modelSize)]);
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
rows.push(['Model', 'Not trained yet']);
|
|
34
|
+
}
|
|
35
|
+
// Validation info
|
|
36
|
+
if (validation.items.length > 0) {
|
|
37
|
+
const reviewed = validation.items.filter((i) => i.reviewedAt).length;
|
|
38
|
+
const approved = validation.items.filter((i) => i.reviewedAt && i.approved === true).length;
|
|
39
|
+
const approvalRate = reviewed > 0 ? ((approved / reviewed) * 100).toFixed(1) : 'N/A';
|
|
40
|
+
rows.push(['Reviewed items', String(reviewed)]);
|
|
41
|
+
rows.push(['Approval rate', `${approvalRate}%`]);
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
rows.push(['Reviewed items', '0']);
|
|
45
|
+
}
|
|
46
|
+
if (config.description) {
|
|
47
|
+
rows.push(['Description', config.description]);
|
|
48
|
+
}
|
|
49
|
+
table(rows);
|
|
50
|
+
console.log();
|
|
51
|
+
}
|
|
52
|
+
//# sourceMappingURL=stats.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stats.js","sourceRoot":"","sources":["../../src/commands/stats.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,WAAW,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACnF,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,WAAW,EAAE,qBAAqB,EAAE,MAAM,iBAAiB,CAAC;AACrE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAc,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAE/D,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,MAAM,OAAO,GAAG,WAAW,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,UAAU,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;IAElD,OAAO,CAAC,oBAAoB,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IAE3C,MAAM,IAAI,GAAuB;QAC/B,CAAC,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC;QAC1B,CAAC,kBAAkB,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;KAC7C,CAAC;IAEF,aAAa;IACb,MAAM,YAAY,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IACnD,IAAI,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;QACpE,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,cAAc,EAAE,CAAC;QAChE,IAAI,CAAC,IAAI,CAAC,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC,CAAC;QAEvC,IAAI,QAAQ,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,CAAC,gBAAgB,EAAE,GAAG,CAAC,QAAQ,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5E,CAAC;QAED,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;YACxB,IAAI,CAAC,IAAI,CAAC,CAAC,YAAY,EAAE,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,SAAS,GAAG,gBAAgB,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;QACzD,IAAI,CAAC,IAAI,CAAC,CAAC,YAAY,EAAE,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;SAAM,CAAC;QACN,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,iBAAiB,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED,kBAAkB;IAClB,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC;QACrE,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,QAAQ,KAAK,IAAI,CAAC,CAAC,MAAM,CAAC;QAC5F,MAAM,YAAY,GAAG,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;QAErF,IAAI,CAAC,IAAI,CAAC,CAAC,gBAAgB,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;QAChD,IAAI,CAAC,IAAI,CAAC,CAAC,eAAe,EAAE,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC;IACnD,CAAC;SAAM,CAAC;QACN,IAAI,CAAC,IAAI,CAAC,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC,CAAC;IACrC,CAAC;IAED,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;QACvB,IAAI,CAAC,IAAI,CAAC,CAAC,aAAa,EAAE,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC;IACjD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,CAAC;IACZ,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"train.d.ts","sourceRoot":"","sources":["../../src/commands/train.ts"],"names":[],"mappings":"AAOA,wBAAsB,YAAY,IAAI,OAAO,CAAC,IAAI,CAAC,CAwElD"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { findTaskDir } from '../utils/paths.js';
|
|
2
|
+
import { readConfig } from '../core/config.js';
|
|
3
|
+
import { loadSamples, getUniqueOutputCategories } from '../core/data.js';
|
|
4
|
+
import { embedTexts } from '../core/embeddings.js';
|
|
5
|
+
import { trainClassifier } from '../core/classifier.js';
|
|
6
|
+
import { success, error, warn, info, heading, table } from '../utils/display.js';
|
|
7
|
+
export async function trainCommand() {
|
|
8
|
+
const taskDir = findTaskDir();
|
|
9
|
+
const config = readConfig(taskDir);
|
|
10
|
+
const samples = loadSamples(taskDir);
|
|
11
|
+
// Check minimum samples
|
|
12
|
+
if (samples.length < 10) {
|
|
13
|
+
error(`Not enough training samples. You have ${samples.length}, but at least 10 are required.`);
|
|
14
|
+
info(`Add more samples with: expressible distill add`);
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
17
|
+
heading(`Training model "${config.name}"`);
|
|
18
|
+
info(`${samples.length} training samples loaded`);
|
|
19
|
+
const startTime = Date.now();
|
|
20
|
+
// Embed all inputs
|
|
21
|
+
info('Generating embeddings for training data...');
|
|
22
|
+
const inputs = samples.map((s) => s.input);
|
|
23
|
+
const embeddings = await embedTexts(inputs, taskDir);
|
|
24
|
+
info(`Embedded ${embeddings.length} samples`);
|
|
25
|
+
const categories = getUniqueOutputCategories(samples);
|
|
26
|
+
info(`Categories found: ${categories.join(', ')}`);
|
|
27
|
+
// Warn about categories with few samples before training
|
|
28
|
+
const categoryCounts = new Map();
|
|
29
|
+
for (const s of samples) {
|
|
30
|
+
categoryCounts.set(s.output, (categoryCounts.get(s.output) || 0) + 1);
|
|
31
|
+
}
|
|
32
|
+
for (const [cat, count] of categoryCounts) {
|
|
33
|
+
if (count < 5) {
|
|
34
|
+
warn(`Category "${cat}" has only ${count} sample(s). Aim for at least 10 per category.`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const labels = samples.map((s) => s.output);
|
|
38
|
+
const result = await trainClassifier(embeddings, labels, taskDir);
|
|
39
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
40
|
+
heading('Training Complete');
|
|
41
|
+
table([
|
|
42
|
+
['Samples', String(result.numSamples)],
|
|
43
|
+
['Categories', result.categories.join(', ')],
|
|
44
|
+
['Training accuracy', `${(result.accuracy * 100).toFixed(1)}%`],
|
|
45
|
+
['Validation accuracy', `${(result.valAccuracy * 100).toFixed(1)}%`],
|
|
46
|
+
['Epochs', String(result.epochs)],
|
|
47
|
+
['Time elapsed', `${elapsed}s`],
|
|
48
|
+
]);
|
|
49
|
+
// Contextual post-training warnings
|
|
50
|
+
const valFraction = result.numSamples < 30 ? 0.1 : 0.2;
|
|
51
|
+
const valSamples = Math.max(1, Math.floor(result.numSamples * valFraction));
|
|
52
|
+
if (valSamples < 5) {
|
|
53
|
+
warn(`Validation set is very small (${valSamples} sample(s)). Accuracy estimate may be unreliable. Add more training data.`);
|
|
54
|
+
}
|
|
55
|
+
else if (result.accuracy > 0.9 && result.valAccuracy < 0.5) {
|
|
56
|
+
warn(`High training accuracy (${(result.accuracy * 100).toFixed(0)}%) but low validation accuracy (${(result.valAccuracy * 100).toFixed(0)}%) suggests overfitting. Add more samples, especially for underrepresented categories.`);
|
|
57
|
+
}
|
|
58
|
+
else if (result.valAccuracy < 0.6) {
|
|
59
|
+
warn(`Validation accuracy is low (${(result.valAccuracy * 100).toFixed(0)}%). Consider adding more diverse training examples.`);
|
|
60
|
+
}
|
|
61
|
+
success('Model saved to model/');
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=train.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"train.js","sourceRoot":"","sources":["../../src/commands/train.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,WAAW,EAAE,yBAAyB,EAAE,MAAM,iBAAiB,CAAC;AACzE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,qBAAqB,CAAC;AAEjF,MAAM,CAAC,KAAK,UAAU,YAAY;IAChC,MAAM,OAAO,GAAG,WAAW,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAErC,wBAAwB;IACxB,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;QACxB,KAAK,CACH,yCAAyC,OAAO,CAAC,MAAM,iCAAiC,CACzF,CAAC;QACF,IAAI,CAAC,gDAAgD,CAAC,CAAC;QACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,OAAO,CAAC,mBAAmB,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;IAC3C,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,0BAA0B,CAAC,CAAC;IAElD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,mBAAmB;IACnB,IAAI,CAAC,4CAA4C,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IAC3C,MAAM,UAAU,GAAG,MAAM,UAAU,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACrD,IAAI,CAAC,YAAY,UAAU,CAAC,MAAM,UAAU,CAAC,CAAC;IAE9C,MAAM,UAAU,GAAG,yBAAyB,CAAC,OAAO,CAAC,CAAC;IACtD,IAAI,CAAC,qBAAqB,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEnD,yDAAyD;IACzD,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACxE,CAAC;IACD,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,cAAc,EAAE,CAAC;QAC1C,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACd,IAAI,CAAC,aAAa,GAAG,cAAc,KAAK,+CAA+C,CAAC,CAAC;QAC3F,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAC5C,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;IAElE,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAE7D,OAAO,CAAC,mBAAmB,CAAC,CAAC;IAC7B,KAAK,CAAC;QACJ,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACtC,CAAC,YAAY,EAAE,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5C,CAAC,mBAAmB,EAAE,GAAG,CAAC,MAAM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QAC/D,CAAC,qBAAqB,EAAE,GAAG,CAAC,MAAM,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC;QACpE,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACjC,CAAC,cAAc,EAAE,GAAG,OAAO,GAAG,CAAC;KAChC,CAAC,CAAC;IAEH,oCAAoC;IACpC,MAAM,WAAW,GAAG,MAAM,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IACvD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC;IAC5E,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,IAAI,CACF,iCAAiC,UAAU,2EAA2E,CACvH,CAAC;IACJ,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,GAAG,GAAG,IAAI,MAAM,CAAC,WAAW,GAAG,GAAG,EAAE,CAAC;QAC7D,IAAI,CACF,2BAA2B,CAAC,MAAM,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,mCAAmC,CAAC,MAAM,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,wFAAwF,CAC9N,CAAC;IACJ,CAAC;SAAM,IAAI,MAAM,CAAC,WAAW,GAAG,GAAG,EAAE,CAAC;QACpC,IAAI,CACF,+BAA+B,CAAC,MAAM,CAAC,WAAW,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,qDAAqD,CAC1H,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,uBAAuB,CAAC,CAAC;AACnC,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export interface TrainResult {
|
|
2
|
+
accuracy: number;
|
|
3
|
+
valAccuracy: number;
|
|
4
|
+
epochs: number;
|
|
5
|
+
numSamples: number;
|
|
6
|
+
categories: string[];
|
|
7
|
+
}
|
|
8
|
+
export declare function trainClassifier(embeddings: number[][], labels: string[], taskDir: string): Promise<TrainResult>;
|
|
9
|
+
export interface PredictionResult {
|
|
10
|
+
category: string;
|
|
11
|
+
confidence: number;
|
|
12
|
+
allScores: {
|
|
13
|
+
category: string;
|
|
14
|
+
confidence: number;
|
|
15
|
+
}[];
|
|
16
|
+
}
|
|
17
|
+
export declare function predict(embedding: number[], taskDir: string): Promise<PredictionResult>;
|
|
18
|
+
//# sourceMappingURL=classifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../../src/core/classifier.ts"],"names":[],"mappings":"AA6BA,MAAM,WAAW,WAAW;IAC1B,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,EAAE,CAAC;CACtB;AAED,wBAAsB,eAAe,CACnC,UAAU,EAAE,MAAM,EAAE,EAAE,EACtB,MAAM,EAAE,MAAM,EAAE,EAChB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,WAAW,CAAC,CAqMtB;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACvD;AASD,wBAAsB,OAAO,CAC3B,SAAS,EAAE,MAAM,EAAE,EACnB,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,gBAAgB,CAAC,CA6C3B"}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import crypto from 'node:crypto';
|
|
4
|
+
import { getModelDir, ensureDir } from '../utils/paths.js';
|
|
5
|
+
import { EMBEDDING_DIM } from './embeddings.js';
|
|
6
|
+
import { loadTf } from './tf.js';
|
|
7
|
+
import { fileIOHandler } from './model-io.js';
|
|
8
|
+
import { info, warn } from '../utils/display.js';
|
|
9
|
+
/** Simple seeded PRNG (mulberry32) for reproducible shuffles */
|
|
10
|
+
function seededRandom(seed) {
|
|
11
|
+
return () => {
|
|
12
|
+
seed |= 0;
|
|
13
|
+
seed = (seed + 0x6d2b79f5) | 0;
|
|
14
|
+
let t = Math.imul(seed ^ (seed >>> 15), 1 | seed);
|
|
15
|
+
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
|
|
16
|
+
return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
export async function trainClassifier(embeddings, labels, taskDir) {
|
|
20
|
+
const tf = await loadTf();
|
|
21
|
+
const categories = Array.from(new Set(labels)).sort();
|
|
22
|
+
const numCategories = categories.length;
|
|
23
|
+
// Check for underrepresented classes
|
|
24
|
+
const classCounts = new Map();
|
|
25
|
+
for (const label of labels) {
|
|
26
|
+
classCounts.set(label, (classCounts.get(label) ?? 0) + 1);
|
|
27
|
+
}
|
|
28
|
+
for (const [category, count] of classCounts) {
|
|
29
|
+
if (count < 3) {
|
|
30
|
+
warn(`Category "${category}" has only ${count} example(s). Consider adding at least 3 examples per category.`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
// Convert labels to one-hot
|
|
34
|
+
const labelIndices = labels.map((l) => categories.indexOf(l));
|
|
35
|
+
const oneHot = labelIndices.map((idx) => {
|
|
36
|
+
const arr = new Array(numCategories).fill(0);
|
|
37
|
+
arr[idx] = 1;
|
|
38
|
+
return arr;
|
|
39
|
+
});
|
|
40
|
+
// Stratified train/val split — proportional representation of each class
|
|
41
|
+
// For small datasets (<30), use 90/10 split to keep more training data
|
|
42
|
+
// For larger datasets, use standard 80/20
|
|
43
|
+
const valFraction = embeddings.length < 30 ? 0.1 : 0.2;
|
|
44
|
+
// Deterministic seed derived from data
|
|
45
|
+
const dataHash = crypto.createHash('md5')
|
|
46
|
+
.update(labels.join('|'))
|
|
47
|
+
.digest()
|
|
48
|
+
.readUInt32LE(0);
|
|
49
|
+
const rand = seededRandom(dataHash);
|
|
50
|
+
// Group indices by category for stratified split
|
|
51
|
+
const categoryIndices = new Map();
|
|
52
|
+
for (let i = 0; i < labels.length; i++) {
|
|
53
|
+
const cat = labels[i];
|
|
54
|
+
if (!categoryIndices.has(cat))
|
|
55
|
+
categoryIndices.set(cat, []);
|
|
56
|
+
categoryIndices.get(cat).push(i);
|
|
57
|
+
}
|
|
58
|
+
const trainIndices = [];
|
|
59
|
+
const valIndices = [];
|
|
60
|
+
for (const [, indices] of categoryIndices) {
|
|
61
|
+
// Shuffle within category
|
|
62
|
+
for (let i = indices.length - 1; i > 0; i--) {
|
|
63
|
+
const j = Math.floor(rand() * (i + 1));
|
|
64
|
+
[indices[i], indices[j]] = [indices[j], indices[i]];
|
|
65
|
+
}
|
|
66
|
+
// Take proportional validation samples (at least 1 per class if possible)
|
|
67
|
+
const numValForClass = Math.max(1, Math.floor(indices.length * valFraction));
|
|
68
|
+
valIndices.push(...indices.slice(0, numValForClass));
|
|
69
|
+
trainIndices.push(...indices.slice(numValForClass));
|
|
70
|
+
}
|
|
71
|
+
const numTrain = trainIndices.length;
|
|
72
|
+
const numVal = valIndices.length;
|
|
73
|
+
const trainX = tf.tensor2d(trainIndices.map((i) => embeddings[i]));
|
|
74
|
+
const trainY = tf.tensor2d(trainIndices.map((i) => oneHot[i]));
|
|
75
|
+
const valX = tf.tensor2d(valIndices.map((i) => embeddings[i]));
|
|
76
|
+
const valY = tf.tensor2d(valIndices.map((i) => oneHot[i]));
|
|
77
|
+
// Build model
|
|
78
|
+
const model = tf.sequential();
|
|
79
|
+
model.add(tf.layers.dense({
|
|
80
|
+
inputShape: [EMBEDDING_DIM],
|
|
81
|
+
units: 128,
|
|
82
|
+
activation: 'relu',
|
|
83
|
+
}));
|
|
84
|
+
model.add(tf.layers.dropout({ rate: 0.2 }));
|
|
85
|
+
model.add(tf.layers.dense({
|
|
86
|
+
units: 64,
|
|
87
|
+
activation: 'relu',
|
|
88
|
+
}));
|
|
89
|
+
model.add(tf.layers.dropout({ rate: 0.2 }));
|
|
90
|
+
model.add(tf.layers.dense({
|
|
91
|
+
units: numCategories,
|
|
92
|
+
activation: 'softmax',
|
|
93
|
+
}));
|
|
94
|
+
model.compile({
|
|
95
|
+
optimizer: tf.train.adam(0.001),
|
|
96
|
+
loss: 'categoricalCrossentropy',
|
|
97
|
+
metrics: ['accuracy'],
|
|
98
|
+
});
|
|
99
|
+
info(`Training classifier with ${numTrain} samples, validating on ${numVal}...`);
|
|
100
|
+
// Train with early stopping logic
|
|
101
|
+
let bestValLoss = Infinity;
|
|
102
|
+
let patienceCounter = 0;
|
|
103
|
+
const patience = 10;
|
|
104
|
+
let bestEpoch = 0;
|
|
105
|
+
const maxEpochs = 100;
|
|
106
|
+
let finalAccuracy = 0;
|
|
107
|
+
let finalValAccuracy = 0;
|
|
108
|
+
let bestAccuracy = 0;
|
|
109
|
+
let bestValAccuracy = 0;
|
|
110
|
+
let totalEpochs = 0;
|
|
111
|
+
let bestWeights = null;
|
|
112
|
+
for (let epoch = 0; epoch < maxEpochs; epoch++) {
|
|
113
|
+
const history = await model.fit(trainX, trainY, {
|
|
114
|
+
epochs: 1,
|
|
115
|
+
validationData: [valX, valY],
|
|
116
|
+
verbose: 0,
|
|
117
|
+
});
|
|
118
|
+
const h = history.history;
|
|
119
|
+
const valLoss = (h['val_loss']?.[0] ?? h['val_Loss']?.[0] ?? 0);
|
|
120
|
+
const valAcc = (h['val_acc']?.[0] ?? h['val_accuracy']?.[0] ?? 0);
|
|
121
|
+
const trainAcc = (h['acc']?.[0] ?? h['accuracy']?.[0] ?? 0);
|
|
122
|
+
finalAccuracy = trainAcc;
|
|
123
|
+
finalValAccuracy = valAcc;
|
|
124
|
+
totalEpochs = epoch + 1;
|
|
125
|
+
if (valLoss < bestValLoss) {
|
|
126
|
+
bestValLoss = valLoss;
|
|
127
|
+
patienceCounter = 0;
|
|
128
|
+
bestEpoch = epoch;
|
|
129
|
+
bestAccuracy = trainAcc;
|
|
130
|
+
bestValAccuracy = valAcc;
|
|
131
|
+
// Checkpoint best weights
|
|
132
|
+
bestWeights = model.getWeights().map((w) => w.dataSync().slice().buffer);
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
patienceCounter++;
|
|
136
|
+
if (patienceCounter >= patience) {
|
|
137
|
+
info(`Early stopping at epoch ${epoch + 1} (best at epoch ${bestEpoch + 1})`);
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// Restore best weights before saving
|
|
143
|
+
if (bestWeights && totalEpochs > bestEpoch + 1) {
|
|
144
|
+
const currentWeights = model.getWeights();
|
|
145
|
+
const restoredWeights = bestWeights.map((buf, i) => tf.tensor(new Float32Array(buf), currentWeights[i].shape));
|
|
146
|
+
model.setWeights(restoredWeights);
|
|
147
|
+
restoredWeights.forEach((w) => w.dispose());
|
|
148
|
+
finalAccuracy = bestAccuracy;
|
|
149
|
+
finalValAccuracy = bestValAccuracy;
|
|
150
|
+
}
|
|
151
|
+
// Save model (invalidate prediction cache)
|
|
152
|
+
cachedClassifier = null;
|
|
153
|
+
const modelDir = getModelDir(taskDir);
|
|
154
|
+
ensureDir(modelDir);
|
|
155
|
+
await model.save(fileIOHandler(modelDir));
|
|
156
|
+
// Save metadata
|
|
157
|
+
const metadata = {
|
|
158
|
+
type: 'classifier',
|
|
159
|
+
categories,
|
|
160
|
+
trainedAt: new Date().toISOString(),
|
|
161
|
+
numSamples: embeddings.length,
|
|
162
|
+
accuracy: finalValAccuracy,
|
|
163
|
+
epochs: totalEpochs,
|
|
164
|
+
};
|
|
165
|
+
fs.writeFileSync(path.join(modelDir, 'metadata.json'), JSON.stringify(metadata, null, 2) + '\n', 'utf-8');
|
|
166
|
+
// Clean up tensors
|
|
167
|
+
trainX.dispose();
|
|
168
|
+
trainY.dispose();
|
|
169
|
+
valX.dispose();
|
|
170
|
+
valY.dispose();
|
|
171
|
+
model.dispose();
|
|
172
|
+
return {
|
|
173
|
+
accuracy: finalAccuracy,
|
|
174
|
+
valAccuracy: finalValAccuracy,
|
|
175
|
+
epochs: totalEpochs,
|
|
176
|
+
numSamples: embeddings.length,
|
|
177
|
+
categories,
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
// Cache for loaded classifier model to avoid re-reading from disk on every predict call
|
|
181
|
+
let cachedClassifier = null;
|
|
182
|
+
export async function predict(embedding, taskDir) {
|
|
183
|
+
const tf = await loadTf();
|
|
184
|
+
const modelDir = getModelDir(taskDir);
|
|
185
|
+
const metadataPath = path.join(modelDir, 'metadata.json');
|
|
186
|
+
if (!fs.existsSync(metadataPath)) {
|
|
187
|
+
throw new Error('No trained model found. Run "expressible distill train" first.');
|
|
188
|
+
}
|
|
189
|
+
let model;
|
|
190
|
+
let metadata;
|
|
191
|
+
if (cachedClassifier && cachedClassifier.modelDir === modelDir) {
|
|
192
|
+
model = cachedClassifier.model;
|
|
193
|
+
metadata = cachedClassifier.metadata;
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
if (cachedClassifier) {
|
|
197
|
+
cachedClassifier.model.dispose();
|
|
198
|
+
}
|
|
199
|
+
metadata = JSON.parse(fs.readFileSync(metadataPath, 'utf-8'));
|
|
200
|
+
model = await tf.loadLayersModel(fileIOHandler(modelDir));
|
|
201
|
+
cachedClassifier = { modelDir, model, metadata };
|
|
202
|
+
}
|
|
203
|
+
const inputTensor = tf.tensor2d([embedding]);
|
|
204
|
+
const output = model.predict(inputTensor);
|
|
205
|
+
const scores = await output.data();
|
|
206
|
+
const allScores = metadata.categories.map((category, i) => ({
|
|
207
|
+
category,
|
|
208
|
+
confidence: scores[i],
|
|
209
|
+
}));
|
|
210
|
+
allScores.sort((a, b) => b.confidence - a.confidence);
|
|
211
|
+
const best = allScores[0];
|
|
212
|
+
inputTensor.dispose();
|
|
213
|
+
output.dispose();
|
|
214
|
+
return {
|
|
215
|
+
category: best.category,
|
|
216
|
+
confidence: best.confidence,
|
|
217
|
+
allScores,
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
//# sourceMappingURL=classifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"classifier.js","sourceRoot":"","sources":["../../src/core/classifier.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,MAAM,MAAM,aAAa,CAAC;AACjC,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAC3D,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,IAAI,EAAW,MAAM,qBAAqB,CAAC;AAE1D,gEAAgE;AAChE,SAAS,YAAY,CAAC,IAAY;IAChC,OAAO,GAAG,EAAE;QACV,IAAI,IAAI,CAAC,CAAC;QACV,IAAI,GAAG,CAAC,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,KAAK,EAAE,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;QAClD,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC/C,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,UAAU,CAAC;IAC/C,CAAC,CAAC;AACJ,CAAC;AAmBD,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,UAAsB,EACtB,MAAgB,EAChB,OAAe;IAEf,MAAM,EAAE,GAAG,MAAM,MAAM,EAAE,CAAC;IAE1B,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACtD,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,CAAC;IAExC,qCAAqC;IACrC,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC9C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,WAAW,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5D,CAAC;IACD,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,WAAW,EAAE,CAAC;QAC5C,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACd,IAAI,CACF,aAAa,QAAQ,cAAc,KAAK,gEAAgE,CACzG,CAAC;QACJ,CAAC;IACH,CAAC;IAED,4BAA4B;IAC5B,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACtC,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAa,CAAC;QACzD,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACb,OAAO,GAAG,CAAC;IACb,CAAC,CAAC,CAAC;IAEH,yEAAyE;IACzE,uEAAuE;IACvE,0CAA0C;IAC1C,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAEvD,uCAAuC;IACvC,MAAM,QAAQ,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC;SACtC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SACxB,MAAM,EAAE;SACR,YAAY,CAAC,CAAC,CAAC,CAAC;IACnB,MAAM,IAAI,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEpC,iDAAiD;IACjD,MAAM,eAAe,GAAG,IAAI,GAAG,EAAoB,CAAC;IACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACtB,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,eAAe,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QAC5D,eAAe,CAAC,GAAG,CAAC,GAAG,CAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,KAAK,MAAM,CAAC,EAAE,OAAO,CAAC,IAAI,eAAe,EAAE,CAAC;QAC1C,0BAA0B;QAC1B,KAAK,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QACtD,CAAC;QACD,0EAA0E;QAC1E,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,WAAW,CAAC,CAAC,CAAC;QAC7E,UAAU,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC,CAAC;QACrD,YAAY,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,MAAM,QAAQ,GAAG,YAAY,CAAC,MAAM,CAAC;IACrC,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC;IAEjC,MAAM,MAAM,GAAG,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACnE,MAAM,MAAM,GAAG,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3D,cAAc;IACd,MAAM,KAAK,GAAG,EAAE,CAAC,UAAU,EAAE,CAAC;IAC9B,KAAK,CAAC,GAAG,CACP,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC;QACd,UAAU,EAAE,CAAC,aAAa,CAAC;QAC3B,KAAK,EAAE,GAAG;QACV,UAAU,EAAE,MAAM;KACnB,CAAC,CACH,CAAC;IACF,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IAC5C,KAAK,CAAC,GAAG,CACP,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC;QACd,KAAK,EAAE,EAAE;QACT,UAAU,EAAE,MAAM;KACnB,CAAC,CACH,CAAC;IACF,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IAC5C,KAAK,CAAC,GAAG,CACP,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC;QACd,KAAK,EAAE,aAAa;QACpB,UAAU,EAAE,SAAS;KACtB,CAAC,CACH,CAAC;IAEF,KAAK,CAAC,OAAO,CAAC;QACZ,SAAS,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC;QAC/B,IAAI,EAAE,yBAAyB;QAC/B,OAAO,EAAE,CAAC,UAAU,CAAC;KACtB,CAAC,CAAC;IAEH,IAAI,CAAC,4BAA4B,QAAQ,2BAA2B,MAAM,KAAK,CAAC,CAAC;IAEjF,kCAAkC;IAClC,IAAI,WAAW,GAAG,QAAQ,CAAC;IAC3B,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,MAAM,QAAQ,GAAG,EAAE,CAAC;IACpB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,MAAM,SAAS,GAAG,GAAG,CAAC;IAEtB,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,WAAW,GAAyB,IAAI,CAAC;IAE7C,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,SAAS,EAAE,KAAK,EAAE,EAAE,CAAC;QAC/C,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE;YAC9C,MAAM,EAAE,CAAC;YACT,cAAc,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;YAC5B,OAAO,EAAE,CAAC;SACX,CAAC,CAAC;QAEH,MAAM,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC;QAC1B,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAW,CAAC;QAC1E,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAW,CAAC;QAC5E,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAW,CAAC;QAEtE,aAAa,GAAG,QAAQ,CAAC;QACzB,gBAAgB,GAAG,MAAM,CAAC;QAC1B,WAAW,GAAG,KAAK,GAAG,CAAC,CAAC;QAExB,IAAI,OAAO,GAAG,WAAW,EAAE,CAAC;YAC1B,WAAW,GAAG,OAAO,CAAC;YACtB,eAAe,GAAG,CAAC,CAAC;YACpB,SAAS,GAAG,KAAK,CAAC;YAClB,YAAY,GAAG,QAAQ,CAAC;YACxB,eAAe,GAAG,MAAM,CAAC;YACzB,0BAA0B;YAC1B,WAAW,GAAG,KAAK,CAAC,UAAU,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC;QAC3E,CAAC;aAAM,CAAC;YACN,eAAe,EAAE,CAAC;YAClB,IAAI,eAAe,IAAI,QAAQ,EAAE,CAAC;gBAChC,IAAI,CAAC,2BAA2B,KAAK,GAAG,CAAC,mBAAmB,SAAS,GAAG,CAAC,GAAG,CAAC,CAAC;gBAC9E,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,IAAI,WAAW,IAAI,WAAW,GAAG,SAAS,GAAG,CAAC,EAAE,CAAC;QAC/C,MAAM,cAAc,GAAG,KAAK,CAAC,UAAU,EAAE,CAAC;QAC1C,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CACjD,EAAE,CAAC,MAAM,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,EAAE,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAC1D,CAAC;QACF,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,CAAC;QAClC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;QAC5C,aAAa,GAAG,YAAY,CAAC;QAC7B,gBAAgB,GAAG,eAAe,CAAC;IACrC,CAAC;IAED,2CAA2C;IAC3C,gBAAgB,GAAG,IAAI,CAAC;IACxB,MAAM,QAAQ,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IACtC,SAAS,CAAC,QAAQ,CAAC,CAAC;IAEpB,MAAM,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,CAAC;IAE1C,gBAAgB;IAChB,MAAM,QAAQ,GAAuB;QACnC,IAAI,EAAE,YAAY;QAClB,UAAU;QACV,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,UAAU,EAAE,UAAU,CAAC,MAAM;QAC7B,QAAQ,EAAE,gBAAgB;QAC1B,MAAM,EAAE,WAAW;KACpB,CAAC;IACF,EAAE,CAAC,aAAa,CACd,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,eAAe,CAAC,EACpC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EACxC,OAAO,CACR,CAAC;IAEF,mBAAmB;IACnB,MAAM,CAAC,OAAO,EAAE,CAAC;IACjB,MAAM,CAAC,OAAO,EAAE,CAAC;IACjB,IAAI,CAAC,OAAO,EAAE,CAAC;IACf,IAAI,CAAC,OAAO,EAAE,CAAC;IACf,KAAK,CAAC,OAAO,EAAE,CAAC;IAEhB,OAAO;QACL,QAAQ,EAAE,aAAa;QACvB,WAAW,EAAE,gBAAgB;QAC7B,MAAM,EAAE,WAAW;QACnB,UAAU,EAAE,UAAU,CAAC,MAAM;QAC7B,UAAU;KACX,CAAC;AACJ,CAAC;AAQD,wFAAwF;AACxF,IAAI,gBAAgB,GAIT,IAAI,CAAC;AAEhB,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,SAAmB,EACnB,OAAe;IAEf,MAAM,EAAE,GAAG,MAAM,MAAM,EAAE,CAAC;IAE1B,MAAM,QAAQ,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IACtC,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,eAAe,CAAC,CAAC;IAE1D,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,gEAAgE,CAAC,CAAC;IACpF,CAAC;IAED,IAAI,KAAqD,CAAC;IAC1D,IAAI,QAA4B,CAAC;IAEjC,IAAI,gBAAgB,IAAI,gBAAgB,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC/D,KAAK,GAAG,gBAAgB,CAAC,KAAK,CAAC;QAC/B,QAAQ,GAAG,gBAAgB,CAAC,QAAQ,CAAC;IACvC,CAAC;SAAM,CAAC;QACN,IAAI,gBAAgB,EAAE,CAAC;YACrB,gBAAgB,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;QACnC,CAAC;QACD,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;QAC9D,KAAK,GAAG,MAAM,EAAE,CAAC,eAAe,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC,CAAC;QAC1D,gBAAgB,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;IACnD,CAAC;IAED,MAAM,WAAW,GAAG,EAAE,CAAC,QAAQ,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;IAC7C,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,WAAW,CAAiC,CAAC;IAC1E,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;IAEnC,MAAM,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1D,QAAQ;QACR,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC;KACtB,CAAC,CAAC,CAAC;IACJ,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC;IAEtD,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;IAE1B,WAAW,CAAC,OAAO,EAAE,CAAC;IACtB,MAAM,CAAC,OAAO,EAAE,CAAC;IAEjB,OAAO;QACL,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,UAAU,EAAE,IAAI,CAAC,UAAU;QAC3B,SAAS;KACV,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export type TaskType = 'classify';
|
|
2
|
+
export interface DistillConfig {
|
|
3
|
+
name: string;
|
|
4
|
+
type: TaskType;
|
|
5
|
+
description: string;
|
|
6
|
+
createdAt: string;
|
|
7
|
+
version: string;
|
|
8
|
+
}
|
|
9
|
+
export declare function readConfig(taskDir: string): DistillConfig;
|
|
10
|
+
export declare function writeConfig(taskDir: string, config: DistillConfig): void;
|
|
11
|
+
//# sourceMappingURL=config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/core/config.ts"],"names":[],"mappings":"AAGA,MAAM,MAAM,QAAQ,GAAG,UAAU,CAAC;AAElC,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,QAAQ,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,aAAa,CASzD;AAED,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,aAAa,GAAG,IAAI,CAGxE"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import { getConfigPath } from '../utils/paths.js';
|
|
3
|
+
export function readConfig(taskDir) {
|
|
4
|
+
const configPath = getConfigPath(taskDir);
|
|
5
|
+
if (!fs.existsSync(configPath)) {
|
|
6
|
+
throw new Error(`No distill.config.json found in ${taskDir}. Are you in a distill project directory?`);
|
|
7
|
+
}
|
|
8
|
+
const raw = fs.readFileSync(configPath, 'utf-8');
|
|
9
|
+
return JSON.parse(raw);
|
|
10
|
+
}
|
|
11
|
+
export function writeConfig(taskDir, config) {
|
|
12
|
+
const configPath = getConfigPath(taskDir);
|
|
13
|
+
fs.writeFileSync(configPath, JSON.stringify(config, null, 2) + '\n', 'utf-8');
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/core/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAYlD,MAAM,UAAU,UAAU,CAAC,OAAe;IACxC,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAC1C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CACb,mCAAmC,OAAO,2CAA2C,CACtF,CAAC;IACJ,CAAC;IACD,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;IACjD,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAkB,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,OAAe,EAAE,MAAqB;IAChE,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAC1C,EAAE,CAAC,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AAChF,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export interface SamplePair {
|
|
2
|
+
id: string;
|
|
3
|
+
input: string;
|
|
4
|
+
output: string;
|
|
5
|
+
}
|
|
6
|
+
export interface ValidationResult {
|
|
7
|
+
id: string;
|
|
8
|
+
input: string;
|
|
9
|
+
predictedOutput: string;
|
|
10
|
+
approved: boolean;
|
|
11
|
+
correctedOutput?: string;
|
|
12
|
+
reviewedAt?: string;
|
|
13
|
+
}
|
|
14
|
+
export interface ValidationResults {
|
|
15
|
+
items: ValidationResult[];
|
|
16
|
+
}
|
|
17
|
+
export declare function loadSamples(taskDir: string): SamplePair[];
|
|
18
|
+
export declare function getNextSampleId(taskDir: string): string;
|
|
19
|
+
export declare function saveSample(taskDir: string, id: string, input: string, output: string, isJson?: boolean): void;
|
|
20
|
+
export declare function loadValidationResults(taskDir: string): ValidationResults;
|
|
21
|
+
export declare function saveValidationResults(taskDir: string, results: ValidationResults): void;
|
|
22
|
+
export declare function getUniqueOutputCategories(samples: SamplePair[]): string[];
|
|
23
|
+
//# sourceMappingURL=data.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../src/core/data.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,gBAAgB,EAAE,CAAC;CAC3B;AAED,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,UAAU,EAAE,CA4BzD;AAED,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAgBvD;AAED,wBAAgB,UAAU,CACxB,OAAO,EAAE,MAAM,EACf,EAAE,EAAE,MAAM,EACV,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,MAAM,GAAE,OAAe,GACtB,IAAI,CAON;AAED,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,GAAG,iBAAiB,CAOxE;AAED,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,iBAAiB,GAAG,IAAI,CAKvF;AAED,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,UAAU,EAAE,GAAG,MAAM,EAAE,CAGzE"}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { getSamplesDir, getValidationResultsPath } from '../utils/paths.js';
|
|
4
|
+
export function loadSamples(taskDir) {
|
|
5
|
+
const samplesDir = getSamplesDir(taskDir);
|
|
6
|
+
if (!fs.existsSync(samplesDir)) {
|
|
7
|
+
return [];
|
|
8
|
+
}
|
|
9
|
+
const files = fs.readdirSync(samplesDir);
|
|
10
|
+
const inputFiles = files
|
|
11
|
+
.filter((f) => f.includes('.input.'))
|
|
12
|
+
.sort();
|
|
13
|
+
const samples = [];
|
|
14
|
+
for (const inputFile of inputFiles) {
|
|
15
|
+
const id = inputFile.split('.input.')[0];
|
|
16
|
+
const outputFile = files.find((f) => f.startsWith(id + '.output.'));
|
|
17
|
+
if (!outputFile) {
|
|
18
|
+
continue;
|
|
19
|
+
}
|
|
20
|
+
const input = fs.readFileSync(path.join(samplesDir, inputFile), 'utf-8').trim();
|
|
21
|
+
const output = fs.readFileSync(path.join(samplesDir, outputFile), 'utf-8').trim();
|
|
22
|
+
samples.push({ id, input, output });
|
|
23
|
+
}
|
|
24
|
+
return samples;
|
|
25
|
+
}
|
|
26
|
+
export function getNextSampleId(taskDir) {
|
|
27
|
+
const samplesDir = getSamplesDir(taskDir);
|
|
28
|
+
if (!fs.existsSync(samplesDir)) {
|
|
29
|
+
return '001';
|
|
30
|
+
}
|
|
31
|
+
const files = fs.readdirSync(samplesDir);
|
|
32
|
+
const ids = files
|
|
33
|
+
.filter((f) => f.includes('.input.'))
|
|
34
|
+
.map((f) => parseInt(f.split('.input.')[0], 10))
|
|
35
|
+
.filter((n) => !isNaN(n));
|
|
36
|
+
if (ids.length === 0)
|
|
37
|
+
return '001';
|
|
38
|
+
const max = Math.max(...ids);
|
|
39
|
+
return String(max + 1).padStart(3, '0');
|
|
40
|
+
}
|
|
41
|
+
export function saveSample(taskDir, id, input, output, isJson = false) {
|
|
42
|
+
const samplesDir = getSamplesDir(taskDir);
|
|
43
|
+
fs.mkdirSync(samplesDir, { recursive: true });
|
|
44
|
+
const ext = isJson ? 'json' : 'txt';
|
|
45
|
+
fs.writeFileSync(path.join(samplesDir, `${id}.input.${ext}`), input, 'utf-8');
|
|
46
|
+
fs.writeFileSync(path.join(samplesDir, `${id}.output.${ext}`), output, 'utf-8');
|
|
47
|
+
}
|
|
48
|
+
export function loadValidationResults(taskDir) {
|
|
49
|
+
const resultsPath = getValidationResultsPath(taskDir);
|
|
50
|
+
if (!fs.existsSync(resultsPath)) {
|
|
51
|
+
return { items: [] };
|
|
52
|
+
}
|
|
53
|
+
const raw = fs.readFileSync(resultsPath, 'utf-8');
|
|
54
|
+
return JSON.parse(raw);
|
|
55
|
+
}
|
|
56
|
+
export function saveValidationResults(taskDir, results) {
|
|
57
|
+
const resultsPath = getValidationResultsPath(taskDir);
|
|
58
|
+
const dir = path.dirname(resultsPath);
|
|
59
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
60
|
+
fs.writeFileSync(resultsPath, JSON.stringify(results, null, 2) + '\n', 'utf-8');
|
|
61
|
+
}
|
|
62
|
+
export function getUniqueOutputCategories(samples) {
|
|
63
|
+
const categories = new Set(samples.map((s) => s.output));
|
|
64
|
+
return Array.from(categories).sort();
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=data.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data.js","sourceRoot":"","sources":["../../src/core/data.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,wBAAwB,EAAE,MAAM,mBAAmB,CAAC;AAqB5E,MAAM,UAAU,WAAW,CAAC,OAAe;IACzC,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAC1C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;IACzC,MAAM,UAAU,GAAG,KAAK;SACrB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;SACpC,IAAI,EAAE,CAAC;IAEV,MAAM,OAAO,GAAiB,EAAE,CAAC;IAEjC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,EAAE,GAAG,SAAS,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,GAAG,UAAU,CAAC,CAAC,CAAC;QAEpE,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,SAAS,CAAC,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QAChF,MAAM,MAAM,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QAElF,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IACtC,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,OAAe;IAC7C,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAC1C,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC/B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;IACzC,MAAM,GAAG,GAAG,KAAK;SACd,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;SACpC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;SAC/C,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAE5B,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAEnC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC;IAC7B,OAAO,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,UAAU,CACxB,OAAe,EACf,EAAU,EACV,KAAa,EACb,MAAc,EACd,SAAkB,KAAK;IAEvB,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAC1C,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE9C,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC;IACpC,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,EAAE,UAAU,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;IAC9E,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,EAAE,WAAW,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;AAClF,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,OAAe;IACnD,MAAM,WAAW,GAAG,wBAAwB,CAAC,OAAO,CAAC,CAAC;IACtD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;IACvB,CAAC;IACD,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IAClD,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAsB,CAAC;AAC9C,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,OAAe,EAAE,OAA0B;IAC/E,MAAM,WAAW,GAAG,wBAAwB,CAAC,OAAO,CAAC,CAAC;IACtD,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;IACtC,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACvC,EAAE,CAAC,aAAa,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AAClF,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,OAAqB;IAC7D,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IACzD,OAAO,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC;AACvC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../src/core/embeddings.ts"],"names":[],"mappings":"AAmEA,wBAAsB,UAAU,CAC9B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,EAAE,MAAM,GACd,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAiCrB;AAED,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAGhF;AAED,eAAO,MAAM,aAAa,MAAM,CAAC"}
|