@neural-tools/fine-tune 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +21 -80
- package/README.md +459 -0
- package/dist/index.d.mts +71 -0
- package/dist/index.d.ts +11 -9
- package/dist/index.js +3 -150
- package/dist/index.mjs +3 -0
- package/package.json +5 -5
package/LICENSE.md
CHANGED
|
@@ -1,80 +1,21 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 Luke Amy
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
### 2. Pro/Enterprise License (Proprietary)
|
|
24
|
-
|
|
25
|
-
The following features require a valid Pro or Enterprise license:
|
|
26
|
-
|
|
27
|
-
**Pro Features:**
|
|
28
|
-
- Vector database integration
|
|
29
|
-
- Semantic caching
|
|
30
|
-
- Fine-tuning workflows
|
|
31
|
-
- Cloud deployment templates (AWS/GCP)
|
|
32
|
-
- Premium templates and examples
|
|
33
|
-
- GitHub automation features
|
|
34
|
-
|
|
35
|
-
**Enterprise Features:**
|
|
36
|
-
- White-label support
|
|
37
|
-
- Custom integrations
|
|
38
|
-
- Priority support
|
|
39
|
-
- SLA guarantees
|
|
40
|
-
- Team collaboration features
|
|
41
|
-
|
|
42
|
-
These features are proprietary and may not be used without a valid license key purchased from neural-tools.dev.
|
|
43
|
-
|
|
44
|
-
### License Terms
|
|
45
|
-
|
|
46
|
-
1. **Free Tier**: You may use the free tier features for any purpose, including commercial use, under the MIT License terms.
|
|
47
|
-
|
|
48
|
-
2. **Pro/Enterprise**: You must purchase a license to access Pro or Enterprise features. Each license is:
|
|
49
|
-
- Per-user for individual licenses
|
|
50
|
-
- Per-organization for team/enterprise licenses
|
|
51
|
-
- Non-transferable without written consent
|
|
52
|
-
- Subject to the terms at neural-tools.dev/terms
|
|
53
|
-
|
|
54
|
-
3. **Source Code**: This repository is private. You may not:
|
|
55
|
-
- Redistribute the source code
|
|
56
|
-
- Create derivative works for redistribution
|
|
57
|
-
- Reverse engineer Pro/Enterprise features
|
|
58
|
-
- Remove or circumvent license checks
|
|
59
|
-
|
|
60
|
-
4. **Support**: Support is provided based on your license tier:
|
|
61
|
-
- Free: Community support only
|
|
62
|
-
- Pro: Email support (48-hour response)
|
|
63
|
-
- Enterprise: Priority support with SLA
|
|
64
|
-
|
|
65
|
-
### Warranty Disclaimer
|
|
66
|
-
|
|
67
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
68
|
-
|
|
69
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
70
|
-
|
|
71
|
-
### Contact
|
|
72
|
-
|
|
73
|
-
For licensing inquiries:
|
|
74
|
-
- Email: licensing@neural-tools.dev
|
|
75
|
-
- Website: https://neural-tools.dev/pricing
|
|
76
|
-
- Support: support@neural-tools.dev
|
|
77
|
-
|
|
78
|
-
---
|
|
79
|
-
|
|
80
|
-
**Last Updated:** January 2025
|
|
1
|
+
# MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Luke Amy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
# @neural-tools/fine-tune
|
|
2
|
+
|
|
3
|
+
> Fine-tuning utilities for Neural Tools
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/@neural-tools/fine-tune)
|
|
6
|
+
[](../../LICENSE.md)
|
|
7
|
+
|
|
8
|
+
Utilities for preparing, validating, and managing fine-tuning datasets for LLMs. Currently supports OpenAI's fine-tuning format.
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
npm install @neural-tools/fine-tune
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
### With OpenAI
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install @neural-tools/fine-tune openai
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Features
|
|
23
|
+
|
|
24
|
+
- **Dataset Preparation** - Convert various formats to fine-tuning format
|
|
25
|
+
- **Validation** - Ensure datasets meet LLM requirements
|
|
26
|
+
- **Cost Estimation** - Calculate fine-tuning costs before running
|
|
27
|
+
- **Quality Analysis** - Analyze dataset quality and balance
|
|
28
|
+
- **Format Conversion** - Convert between different training formats
|
|
29
|
+
- **Token Counting** - Accurate token counting for cost estimation
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
```typescript
|
|
34
|
+
import { FineTuneDataset } from '@neural-tools/fine-tune';
|
|
35
|
+
|
|
36
|
+
// Create dataset
|
|
37
|
+
const dataset = new FineTuneDataset();
|
|
38
|
+
|
|
39
|
+
// Add training examples
|
|
40
|
+
dataset.addExample({
|
|
41
|
+
messages: [
|
|
42
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
43
|
+
{ role: 'user', content: 'What is the capital of France?' },
|
|
44
|
+
{ role: 'assistant', content: 'The capital of France is Paris.' }
|
|
45
|
+
]
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
dataset.addExample({
|
|
49
|
+
messages: [
|
|
50
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
51
|
+
{ role: 'user', content: 'What is 2 + 2?' },
|
|
52
|
+
{ role: 'assistant', content: '2 + 2 equals 4.' }
|
|
53
|
+
]
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
// Validate dataset
|
|
57
|
+
const validation = await dataset.validate();
|
|
58
|
+
if (!validation.isValid) {
|
|
59
|
+
console.error('Validation errors:', validation.errors);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Get cost estimate
|
|
63
|
+
const estimate = await dataset.estimateCost('gpt-3.5-turbo');
|
|
64
|
+
console.log(`Estimated cost: $${estimate.totalCost.toFixed(2)}`);
|
|
65
|
+
|
|
66
|
+
// Export for OpenAI
|
|
67
|
+
const jsonl = dataset.toJSONL();
|
|
68
|
+
await fs.writeFile('training-data.jsonl', jsonl);
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## API Reference
|
|
72
|
+
|
|
73
|
+
### FineTuneDataset
|
|
74
|
+
|
|
75
|
+
Main class for managing fine-tuning datasets.
|
|
76
|
+
|
|
77
|
+
#### Constructor
|
|
78
|
+
|
|
79
|
+
```typescript
|
|
80
|
+
new FineTuneDataset(options?: DatasetOptions)
|
|
81
|
+
|
|
82
|
+
interface DatasetOptions {
|
|
83
|
+
format?: 'openai' | 'anthropic'; // Default: 'openai'
|
|
84
|
+
validateOnAdd?: boolean; // Default: true
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
#### Methods
|
|
89
|
+
|
|
90
|
+
##### `addExample(example)`
|
|
91
|
+
|
|
92
|
+
Add a training example to the dataset.
|
|
93
|
+
|
|
94
|
+
```typescript
|
|
95
|
+
dataset.addExample({
|
|
96
|
+
messages: [
|
|
97
|
+
{ role: 'system', content: 'System prompt' },
|
|
98
|
+
{ role: 'user', content: 'User message' },
|
|
99
|
+
{ role: 'assistant', content: 'Assistant response' }
|
|
100
|
+
]
|
|
101
|
+
});
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
##### `addExamples(examples)`
|
|
105
|
+
|
|
106
|
+
Add multiple examples at once.
|
|
107
|
+
|
|
108
|
+
```typescript
|
|
109
|
+
dataset.addExamples([
|
|
110
|
+
{ messages: [...] },
|
|
111
|
+
{ messages: [...] },
|
|
112
|
+
{ messages: [...] }
|
|
113
|
+
]);
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
##### `validate()`
|
|
117
|
+
|
|
118
|
+
Validate the dataset.
|
|
119
|
+
|
|
120
|
+
```typescript
|
|
121
|
+
const result = await dataset.validate();
|
|
122
|
+
|
|
123
|
+
interface ValidationResult {
|
|
124
|
+
isValid: boolean;
|
|
125
|
+
errors: string[];
|
|
126
|
+
warnings: string[];
|
|
127
|
+
stats: {
|
|
128
|
+
totalExamples: number;
|
|
129
|
+
avgTokensPerExample: number;
|
|
130
|
+
minTokens: number;
|
|
131
|
+
maxTokens: number;
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
##### `estimateCost(model)`
|
|
137
|
+
|
|
138
|
+
Estimate fine-tuning cost.
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
const estimate = await dataset.estimateCost('gpt-3.5-turbo');
|
|
142
|
+
|
|
143
|
+
interface CostEstimate {
|
|
144
|
+
model: string;
|
|
145
|
+
totalTokens: number;
|
|
146
|
+
trainingCost: number;
|
|
147
|
+
totalCost: number;
|
|
148
|
+
estimatedTime: number; // Minutes
|
|
149
|
+
}
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
##### `analyze()`
|
|
153
|
+
|
|
154
|
+
Get dataset quality metrics.
|
|
155
|
+
|
|
156
|
+
```typescript
|
|
157
|
+
const analysis = await dataset.analyze();
|
|
158
|
+
|
|
159
|
+
interface DatasetAnalysis {
|
|
160
|
+
exampleCount: number;
|
|
161
|
+
avgTokensPerMessage: number;
|
|
162
|
+
tokenDistribution: {
|
|
163
|
+
min: number;
|
|
164
|
+
max: number;
|
|
165
|
+
mean: number;
|
|
166
|
+
median: number;
|
|
167
|
+
};
|
|
168
|
+
roleBalance: {
|
|
169
|
+
system: number;
|
|
170
|
+
user: number;
|
|
171
|
+
assistant: number;
|
|
172
|
+
};
|
|
173
|
+
qualityScore: number; // 0-100
|
|
174
|
+
}
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
##### `toJSONL()`
|
|
178
|
+
|
|
179
|
+
Export dataset as JSONL string.
|
|
180
|
+
|
|
181
|
+
```typescript
|
|
182
|
+
const jsonl = dataset.toJSONL();
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
##### `toJSON()`
|
|
186
|
+
|
|
187
|
+
Export dataset as JSON array.
|
|
188
|
+
|
|
189
|
+
```typescript
|
|
190
|
+
const json = dataset.toJSON();
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
##### `fromJSONL(jsonl)`
|
|
194
|
+
|
|
195
|
+
Load dataset from JSONL string.
|
|
196
|
+
|
|
197
|
+
```typescript
|
|
198
|
+
dataset.fromJSONL(jsonlString);
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
##### `fromJSON(json)`
|
|
202
|
+
|
|
203
|
+
Load dataset from JSON array.
|
|
204
|
+
|
|
205
|
+
```typescript
|
|
206
|
+
dataset.fromJSON(jsonArray);
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## Usage Examples
|
|
210
|
+
|
|
211
|
+
### Basic Chat Fine-Tuning
|
|
212
|
+
|
|
213
|
+
```typescript
|
|
214
|
+
import { FineTuneDataset } from '@neural-tools/fine-tune';
|
|
215
|
+
|
|
216
|
+
const dataset = new FineTuneDataset();
|
|
217
|
+
|
|
218
|
+
// Add customer support examples
|
|
219
|
+
const supportExamples = [
|
|
220
|
+
{
|
|
221
|
+
messages: [
|
|
222
|
+
{ role: 'system', content: 'You are a customer support agent.' },
|
|
223
|
+
{ role: 'user', content: 'How do I reset my password?' },
|
|
224
|
+
{ role: 'assistant', content: 'To reset your password: 1. Go to login page, 2. Click "Forgot Password", 3. Check your email for reset link.' }
|
|
225
|
+
]
|
|
226
|
+
},
|
|
227
|
+
{
|
|
228
|
+
messages: [
|
|
229
|
+
{ role: 'system', content: 'You are a customer support agent.' },
|
|
230
|
+
{ role: 'user', content: 'My order hasn\'t arrived yet.' },
|
|
231
|
+
{ role: 'assistant', content: 'I can help track your order. Can you provide your order number?' }
|
|
232
|
+
]
|
|
233
|
+
}
|
|
234
|
+
// Add 50-100 more examples for good results
|
|
235
|
+
];
|
|
236
|
+
|
|
237
|
+
dataset.addExamples(supportExamples);
|
|
238
|
+
|
|
239
|
+
// Validate
|
|
240
|
+
const validation = await dataset.validate();
|
|
241
|
+
if (validation.isValid) {
|
|
242
|
+
// Save for upload
|
|
243
|
+
const jsonl = dataset.toJSONL();
|
|
244
|
+
await fs.writeFile('support-training.jsonl', jsonl);
|
|
245
|
+
}
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Code Generation Fine-Tuning
|
|
249
|
+
|
|
250
|
+
```typescript
|
|
251
|
+
const dataset = new FineTuneDataset();
|
|
252
|
+
|
|
253
|
+
dataset.addExamples([
|
|
254
|
+
{
|
|
255
|
+
messages: [
|
|
256
|
+
{ role: 'system', content: 'You are a Python code generator.' },
|
|
257
|
+
{ role: 'user', content: 'Write a function to reverse a string' },
|
|
258
|
+
{ role: 'assistant', content: 'def reverse_string(s):\n return s[::-1]' }
|
|
259
|
+
]
|
|
260
|
+
},
|
|
261
|
+
{
|
|
262
|
+
messages: [
|
|
263
|
+
{ role: 'system', content: 'You are a Python code generator.' },
|
|
264
|
+
{ role: 'user', content: 'Create a function to check if number is prime' },
|
|
265
|
+
{ role: 'assistant', content: 'def is_prime(n):\n if n < 2:\n return False\n for i in range(2, int(n**0.5) + 1):\n if n % i == 0:\n return False\n return True' }
|
|
266
|
+
]
|
|
267
|
+
}
|
|
268
|
+
]);
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
### Cost Estimation
|
|
272
|
+
|
|
273
|
+
```typescript
|
|
274
|
+
const dataset = new FineTuneDataset();
|
|
275
|
+
// ... add examples ...
|
|
276
|
+
|
|
277
|
+
// Estimate cost before running
|
|
278
|
+
const estimate = await dataset.estimateCost('gpt-3.5-turbo');
|
|
279
|
+
|
|
280
|
+
console.log(`Training ${dataset.size()} examples`);
|
|
281
|
+
console.log(`Total tokens: ${estimate.totalTokens}`);
|
|
282
|
+
console.log(`Estimated cost: $${estimate.totalCost.toFixed(2)}`);
|
|
283
|
+
console.log(`Estimated time: ${estimate.estimatedTime} minutes`);
|
|
284
|
+
|
|
285
|
+
// Only proceed if cost is acceptable
|
|
286
|
+
if (estimate.totalCost < 50) {
|
|
287
|
+
await uploadAndTrain(dataset);
|
|
288
|
+
}
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Dataset Quality Analysis
|
|
292
|
+
|
|
293
|
+
```typescript
|
|
294
|
+
const analysis = await dataset.analyze();
|
|
295
|
+
|
|
296
|
+
console.log('Dataset Quality Report:');
|
|
297
|
+
console.log(`Examples: ${analysis.exampleCount}`);
|
|
298
|
+
console.log(`Avg tokens per message: ${analysis.avgTokensPerMessage}`);
|
|
299
|
+
console.log(`Quality score: ${analysis.qualityScore}/100`);
|
|
300
|
+
|
|
301
|
+
if (analysis.qualityScore < 70) {
|
|
302
|
+
console.warn('Dataset quality is low. Add more diverse examples.');
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
if (analysis.exampleCount < 50) {
|
|
306
|
+
console.warn('Dataset is small. Recommend at least 50-100 examples.');
|
|
307
|
+
}
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
### Format Conversion
|
|
311
|
+
|
|
312
|
+
```typescript
|
|
313
|
+
// Load from CSV
|
|
314
|
+
import { csvToFineTune } from '@neural-tools/fine-tune';
|
|
315
|
+
|
|
316
|
+
const csv = `
|
|
317
|
+
question,answer
|
|
318
|
+
"What is AI?","Artificial Intelligence is..."
|
|
319
|
+
"What is ML?","Machine Learning is..."
|
|
320
|
+
`;
|
|
321
|
+
|
|
322
|
+
const dataset = csvToFineTune(csv, {
|
|
323
|
+
systemPrompt: 'You are a helpful AI tutor.',
|
|
324
|
+
questionColumn: 'question',
|
|
325
|
+
answerColumn: 'answer'
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
// Export to JSONL
|
|
329
|
+
const jsonl = dataset.toJSONL();
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
### Validation and Error Handling
|
|
333
|
+
|
|
334
|
+
```typescript
|
|
335
|
+
const dataset = new FineTuneDataset();
|
|
336
|
+
dataset.addExample({
|
|
337
|
+
messages: [
|
|
338
|
+
{ role: 'user', content: 'Hello' },
|
|
339
|
+
{ role: 'assistant', content: 'Hi there!' }
|
|
340
|
+
]
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
const validation = await dataset.validate();
|
|
344
|
+
|
|
345
|
+
if (!validation.isValid) {
|
|
346
|
+
console.error('Errors:');
|
|
347
|
+
validation.errors.forEach(error => console.error(` - ${error}`));
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if (validation.warnings.length > 0) {
|
|
351
|
+
console.warn('Warnings:');
|
|
352
|
+
validation.warnings.forEach(warning => console.warn(` - ${warning}`));
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
console.log('Stats:', validation.stats);
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
## Fine-Tuning with OpenAI
|
|
359
|
+
|
|
360
|
+
```typescript
|
|
361
|
+
import OpenAI from 'openai';
|
|
362
|
+
import { FineTuneDataset } from '@neural-tools/fine-tune';
|
|
363
|
+
import fs from 'fs/promises';
|
|
364
|
+
|
|
365
|
+
const openai = new OpenAI();
|
|
366
|
+
const dataset = new FineTuneDataset();
|
|
367
|
+
|
|
368
|
+
// 1. Prepare dataset
|
|
369
|
+
dataset.addExamples([/* your examples */]);
|
|
370
|
+
|
|
371
|
+
// 2. Validate
|
|
372
|
+
const validation = await dataset.validate();
|
|
373
|
+
if (!validation.isValid) {
|
|
374
|
+
throw new Error('Invalid dataset');
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// 3. Save to file
|
|
378
|
+
const jsonl = dataset.toJSONL();
|
|
379
|
+
await fs.writeFile('training.jsonl', jsonl);
|
|
380
|
+
|
|
381
|
+
// 4. Upload file
|
|
382
|
+
const file = await openai.files.create({
|
|
383
|
+
file: await fs.readFile('training.jsonl'),
|
|
384
|
+
purpose: 'fine-tune'
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
// 5. Create fine-tuning job
|
|
388
|
+
const fineTune = await openai.fineTuning.jobs.create({
|
|
389
|
+
training_file: file.id,
|
|
390
|
+
model: 'gpt-3.5-turbo'
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
console.log(`Fine-tune job created: ${fineTune.id}`);
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
## Best Practices
|
|
397
|
+
|
|
398
|
+
### 1. Dataset Size
|
|
399
|
+
|
|
400
|
+
- Minimum: 10 examples (for testing)
|
|
401
|
+
- Recommended: 50-100 examples
|
|
402
|
+
- Optimal: 500+ examples
|
|
403
|
+
|
|
404
|
+
### 2. Example Quality
|
|
405
|
+
|
|
406
|
+
- Clear, consistent formatting
|
|
407
|
+
- Diverse scenarios
|
|
408
|
+
- Accurate, high-quality responses
|
|
409
|
+
- Balanced across use cases
|
|
410
|
+
|
|
411
|
+
### 3. Token Count
|
|
412
|
+
|
|
413
|
+
- Keep examples under 4096 tokens
|
|
414
|
+
- Aim for consistent lengths
|
|
415
|
+
- Monitor token distribution
|
|
416
|
+
|
|
417
|
+
### 4. System Prompts
|
|
418
|
+
|
|
419
|
+
```typescript
|
|
420
|
+
// Good: Specific, consistent
|
|
421
|
+
{ role: 'system', content: 'You are a Python expert who writes clean, documented code.' }
|
|
422
|
+
|
|
423
|
+
// Bad: Generic, vague
|
|
424
|
+
{ role: 'system', content: 'You are helpful.' }
|
|
425
|
+
```
|
|
426
|
+
|
|
427
|
+
## Pricing (as of 2024)
|
|
428
|
+
|
|
429
|
+
OpenAI fine-tuning costs:
|
|
430
|
+
|
|
431
|
+
- **GPT-3.5 Turbo**: ~$0.008 per 1K tokens
|
|
432
|
+
- **GPT-4**: ~$0.030 per 1K tokens
|
|
433
|
+
|
|
434
|
+
Example:
|
|
435
|
+
- 100 examples × 200 tokens = 20K tokens
|
|
436
|
+
- Cost: 20 × $0.008 = **$0.16** (GPT-3.5)
|
|
437
|
+
|
|
438
|
+
## Dependencies
|
|
439
|
+
|
|
440
|
+
- [@neural-tools/core](../core) - Core utilities
|
|
441
|
+
|
|
442
|
+
### Peer Dependencies
|
|
443
|
+
|
|
444
|
+
- `openai` - Optional, for OpenAI integration
|
|
445
|
+
|
|
446
|
+
## Contributing
|
|
447
|
+
|
|
448
|
+
Contributions are welcome! See the [main repository](https://github.com/MacLeanLuke/neural-tools) for guidelines.
|
|
449
|
+
|
|
450
|
+
## License
|
|
451
|
+
|
|
452
|
+
MIT - See [LICENSE.md](../../LICENSE.md) for details.
|
|
453
|
+
|
|
454
|
+
## Links
|
|
455
|
+
|
|
456
|
+
- [Documentation](https://neural-tools.com/docs/fine-tune.html)
|
|
457
|
+
- [GitHub](https://github.com/MacLeanLuke/neural-tools)
|
|
458
|
+
- [npm](https://www.npmjs.com/package/@neural-tools/fine-tune)
|
|
459
|
+
- [OpenAI Fine-Tuning Guide](https://platform.openai.com/docs/guides/fine-tuning)
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
interface FineTuneConfig {
|
|
2
|
+
provider: 'openai' | 'anthropic' | 'custom';
|
|
3
|
+
model: string;
|
|
4
|
+
datasetPath: string;
|
|
5
|
+
validationSplit?: number;
|
|
6
|
+
epochs?: number;
|
|
7
|
+
learningRate?: number;
|
|
8
|
+
batchSize?: number;
|
|
9
|
+
}
|
|
10
|
+
interface TrainingExample {
|
|
11
|
+
messages: Array<{
|
|
12
|
+
role: 'system' | 'user' | 'assistant';
|
|
13
|
+
content: string;
|
|
14
|
+
}>;
|
|
15
|
+
}
|
|
16
|
+
interface FineTuneJob {
|
|
17
|
+
id: string;
|
|
18
|
+
status: 'pending' | 'running' | 'completed' | 'failed';
|
|
19
|
+
model: string;
|
|
20
|
+
trainingFile?: string;
|
|
21
|
+
validationFile?: string;
|
|
22
|
+
createdAt: number;
|
|
23
|
+
finishedAt?: number;
|
|
24
|
+
error?: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Prepare training data for fine-tuning
|
|
28
|
+
*/
|
|
29
|
+
declare function prepareTrainingData(examples: TrainingExample[], options?: {
|
|
30
|
+
validationSplit?: number;
|
|
31
|
+
outputDir?: string;
|
|
32
|
+
}): Promise<{
|
|
33
|
+
trainingFile: string;
|
|
34
|
+
validationFile?: string;
|
|
35
|
+
}>;
|
|
36
|
+
/**
|
|
37
|
+
* Validate training data format
|
|
38
|
+
*/
|
|
39
|
+
declare function validateTrainingData(examples: TrainingExample[]): {
|
|
40
|
+
valid: boolean;
|
|
41
|
+
errors: string[];
|
|
42
|
+
};
|
|
43
|
+
/**
|
|
44
|
+
* Create a fine-tune job (placeholder - actual implementation would call provider APIs)
|
|
45
|
+
*/
|
|
46
|
+
declare function createFineTuneJob(config: FineTuneConfig): Promise<FineTuneJob>;
|
|
47
|
+
/**
|
|
48
|
+
* Get fine-tune job status (placeholder)
|
|
49
|
+
*/
|
|
50
|
+
declare function getFineTuneJob(jobId: string): Promise<FineTuneJob>;
|
|
51
|
+
/**
|
|
52
|
+
* Convert conversations to training examples
|
|
53
|
+
*/
|
|
54
|
+
declare function conversationsToExamples(conversations: Array<{
|
|
55
|
+
system?: string;
|
|
56
|
+
messages: Array<{
|
|
57
|
+
role: 'user' | 'assistant';
|
|
58
|
+
content: string;
|
|
59
|
+
}>;
|
|
60
|
+
}>): TrainingExample[];
|
|
61
|
+
/**
|
|
62
|
+
* Calculate dataset statistics
|
|
63
|
+
*/
|
|
64
|
+
declare function analyzeDataset(examples: TrainingExample[]): {
|
|
65
|
+
totalExamples: number;
|
|
66
|
+
avgMessagesPerExample: number;
|
|
67
|
+
avgTokensPerMessage: number;
|
|
68
|
+
roleDistribution: Record<string, number>;
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
export { type FineTuneConfig, type FineTuneJob, type TrainingExample, analyzeDataset, conversationsToExamples, createFineTuneJob, getFineTuneJob, prepareTrainingData, validateTrainingData };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
interface FineTuneConfig {
|
|
2
2
|
provider: 'openai' | 'anthropic' | 'custom';
|
|
3
3
|
model: string;
|
|
4
4
|
datasetPath: string;
|
|
@@ -7,13 +7,13 @@ export interface FineTuneConfig {
|
|
|
7
7
|
learningRate?: number;
|
|
8
8
|
batchSize?: number;
|
|
9
9
|
}
|
|
10
|
-
|
|
10
|
+
interface TrainingExample {
|
|
11
11
|
messages: Array<{
|
|
12
12
|
role: 'system' | 'user' | 'assistant';
|
|
13
13
|
content: string;
|
|
14
14
|
}>;
|
|
15
15
|
}
|
|
16
|
-
|
|
16
|
+
interface FineTuneJob {
|
|
17
17
|
id: string;
|
|
18
18
|
status: 'pending' | 'running' | 'completed' | 'failed';
|
|
19
19
|
model: string;
|
|
@@ -26,7 +26,7 @@ export interface FineTuneJob {
|
|
|
26
26
|
/**
|
|
27
27
|
* Prepare training data for fine-tuning
|
|
28
28
|
*/
|
|
29
|
-
|
|
29
|
+
declare function prepareTrainingData(examples: TrainingExample[], options?: {
|
|
30
30
|
validationSplit?: number;
|
|
31
31
|
outputDir?: string;
|
|
32
32
|
}): Promise<{
|
|
@@ -36,22 +36,22 @@ export declare function prepareTrainingData(examples: TrainingExample[], options
|
|
|
36
36
|
/**
|
|
37
37
|
* Validate training data format
|
|
38
38
|
*/
|
|
39
|
-
|
|
39
|
+
declare function validateTrainingData(examples: TrainingExample[]): {
|
|
40
40
|
valid: boolean;
|
|
41
41
|
errors: string[];
|
|
42
42
|
};
|
|
43
43
|
/**
|
|
44
44
|
* Create a fine-tune job (placeholder - actual implementation would call provider APIs)
|
|
45
45
|
*/
|
|
46
|
-
|
|
46
|
+
declare function createFineTuneJob(config: FineTuneConfig): Promise<FineTuneJob>;
|
|
47
47
|
/**
|
|
48
48
|
* Get fine-tune job status (placeholder)
|
|
49
49
|
*/
|
|
50
|
-
|
|
50
|
+
declare function getFineTuneJob(jobId: string): Promise<FineTuneJob>;
|
|
51
51
|
/**
|
|
52
52
|
* Convert conversations to training examples
|
|
53
53
|
*/
|
|
54
|
-
|
|
54
|
+
declare function conversationsToExamples(conversations: Array<{
|
|
55
55
|
system?: string;
|
|
56
56
|
messages: Array<{
|
|
57
57
|
role: 'user' | 'assistant';
|
|
@@ -61,9 +61,11 @@ export declare function conversationsToExamples(conversations: Array<{
|
|
|
61
61
|
/**
|
|
62
62
|
* Calculate dataset statistics
|
|
63
63
|
*/
|
|
64
|
-
|
|
64
|
+
declare function analyzeDataset(examples: TrainingExample[]): {
|
|
65
65
|
totalExamples: number;
|
|
66
66
|
avgMessagesPerExample: number;
|
|
67
67
|
avgTokensPerMessage: number;
|
|
68
68
|
roleDistribution: Record<string, number>;
|
|
69
69
|
};
|
|
70
|
+
|
|
71
|
+
export { type FineTuneConfig, type FineTuneJob, type TrainingExample, analyzeDataset, conversationsToExamples, createFineTuneJob, getFineTuneJob, prepareTrainingData, validateTrainingData };
|
package/dist/index.js
CHANGED
|
@@ -1,150 +1,3 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
};
|
|
5
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.prepareTrainingData = prepareTrainingData;
|
|
7
|
-
exports.validateTrainingData = validateTrainingData;
|
|
8
|
-
exports.createFineTuneJob = createFineTuneJob;
|
|
9
|
-
exports.getFineTuneJob = getFineTuneJob;
|
|
10
|
-
exports.conversationsToExamples = conversationsToExamples;
|
|
11
|
-
exports.analyzeDataset = analyzeDataset;
|
|
12
|
-
const core_1 = require("@neural-tools/core");
|
|
13
|
-
const promises_1 = __importDefault(require("fs/promises"));
|
|
14
|
-
const path_1 = __importDefault(require("path"));
|
|
15
|
-
/**
|
|
16
|
-
* Prepare training data for fine-tuning
|
|
17
|
-
*/
|
|
18
|
-
async function prepareTrainingData(examples, options = {}) {
|
|
19
|
-
await (0, core_1.requireFeature)('fine-tuning', 'Fine-tuning');
|
|
20
|
-
const validationSplit = options.validationSplit || 0;
|
|
21
|
-
const outputDir = options.outputDir || './fine-tune-data';
|
|
22
|
-
// Create output directory
|
|
23
|
-
await promises_1.default.mkdir(outputDir, { recursive: true });
|
|
24
|
-
// Split data
|
|
25
|
-
const splitIndex = Math.floor(examples.length * (1 - validationSplit));
|
|
26
|
-
const trainingExamples = examples.slice(0, splitIndex);
|
|
27
|
-
const validationExamples = validationSplit > 0 ? examples.slice(splitIndex) : [];
|
|
28
|
-
// Write training file
|
|
29
|
-
const trainingFile = path_1.default.join(outputDir, 'training.jsonl');
|
|
30
|
-
const trainingContent = trainingExamples
|
|
31
|
-
.map(ex => JSON.stringify(ex))
|
|
32
|
-
.join('\n');
|
|
33
|
-
await promises_1.default.writeFile(trainingFile, trainingContent, 'utf-8');
|
|
34
|
-
// Write validation file if needed
|
|
35
|
-
let validationFile;
|
|
36
|
-
if (validationExamples.length > 0) {
|
|
37
|
-
validationFile = path_1.default.join(outputDir, 'validation.jsonl');
|
|
38
|
-
const validationContent = validationExamples
|
|
39
|
-
.map(ex => JSON.stringify(ex))
|
|
40
|
-
.join('\n');
|
|
41
|
-
await promises_1.default.writeFile(validationFile, validationContent, 'utf-8');
|
|
42
|
-
}
|
|
43
|
-
return { trainingFile, validationFile };
|
|
44
|
-
}
|
|
45
|
-
/**
|
|
46
|
-
* Validate training data format
|
|
47
|
-
*/
|
|
48
|
-
function validateTrainingData(examples) {
|
|
49
|
-
const errors = [];
|
|
50
|
-
if (examples.length === 0) {
|
|
51
|
-
errors.push('No training examples provided');
|
|
52
|
-
return { valid: false, errors };
|
|
53
|
-
}
|
|
54
|
-
examples.forEach((example, index) => {
|
|
55
|
-
if (!example.messages || !Array.isArray(example.messages)) {
|
|
56
|
-
errors.push(`Example ${index}: Missing or invalid messages array`);
|
|
57
|
-
return;
|
|
58
|
-
}
|
|
59
|
-
if (example.messages.length === 0) {
|
|
60
|
-
errors.push(`Example ${index}: Messages array is empty`);
|
|
61
|
-
}
|
|
62
|
-
example.messages.forEach((message, msgIndex) => {
|
|
63
|
-
if (!message.role || !['system', 'user', 'assistant'].includes(message.role)) {
|
|
64
|
-
errors.push(`Example ${index}, Message ${msgIndex}: Invalid role`);
|
|
65
|
-
}
|
|
66
|
-
if (!message.content || typeof message.content !== 'string') {
|
|
67
|
-
errors.push(`Example ${index}, Message ${msgIndex}: Missing or invalid content`);
|
|
68
|
-
}
|
|
69
|
-
});
|
|
70
|
-
});
|
|
71
|
-
return {
|
|
72
|
-
valid: errors.length === 0,
|
|
73
|
-
errors
|
|
74
|
-
};
|
|
75
|
-
}
|
|
76
|
-
/**
|
|
77
|
-
* Create a fine-tune job (placeholder - actual implementation would call provider APIs)
|
|
78
|
-
*/
|
|
79
|
-
async function createFineTuneJob(config) {
|
|
80
|
-
await (0, core_1.requireFeature)('fine-tuning', 'Fine-tuning');
|
|
81
|
-
// Validate dataset exists
|
|
82
|
-
try {
|
|
83
|
-
await promises_1.default.access(config.datasetPath);
|
|
84
|
-
}
|
|
85
|
-
catch {
|
|
86
|
-
throw new Error(`Dataset file not found: ${config.datasetPath}`);
|
|
87
|
-
}
|
|
88
|
-
// In production, this would:
|
|
89
|
-
// 1. Upload training data to provider
|
|
90
|
-
// 2. Start fine-tuning job
|
|
91
|
-
// 3. Return job details
|
|
92
|
-
const job = {
|
|
93
|
-
id: `ft-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
|
94
|
-
status: 'pending',
|
|
95
|
-
model: config.model,
|
|
96
|
-
trainingFile: config.datasetPath,
|
|
97
|
-
createdAt: Date.now()
|
|
98
|
-
};
|
|
99
|
-
return job;
|
|
100
|
-
}
|
|
101
|
-
/**
|
|
102
|
-
* Get fine-tune job status (placeholder)
|
|
103
|
-
*/
|
|
104
|
-
async function getFineTuneJob(jobId) {
|
|
105
|
-
await (0, core_1.requireFeature)('fine-tuning', 'Fine-tuning');
|
|
106
|
-
// In production, this would query the provider API
|
|
107
|
-
throw new Error('Fine-tune job tracking coming soon');
|
|
108
|
-
}
|
|
109
|
-
/**
|
|
110
|
-
* Convert conversations to training examples
|
|
111
|
-
*/
|
|
112
|
-
function conversationsToExamples(conversations) {
|
|
113
|
-
return conversations.map(conv => {
|
|
114
|
-
const messages = [];
|
|
115
|
-
if (conv.system) {
|
|
116
|
-
messages.push({
|
|
117
|
-
role: 'system',
|
|
118
|
-
content: conv.system
|
|
119
|
-
});
|
|
120
|
-
}
|
|
121
|
-
messages.push(...conv.messages);
|
|
122
|
-
return { messages };
|
|
123
|
-
});
|
|
124
|
-
}
|
|
125
|
-
/**
|
|
126
|
-
* Calculate dataset statistics
|
|
127
|
-
*/
|
|
128
|
-
function analyzeDataset(examples) {
|
|
129
|
-
const stats = {
|
|
130
|
-
totalExamples: examples.length,
|
|
131
|
-
avgMessagesPerExample: 0,
|
|
132
|
-
avgTokensPerMessage: 0,
|
|
133
|
-
roleDistribution: {}
|
|
134
|
-
};
|
|
135
|
-
let totalMessages = 0;
|
|
136
|
-
let totalTokens = 0;
|
|
137
|
-
examples.forEach(example => {
|
|
138
|
-
totalMessages += example.messages.length;
|
|
139
|
-
example.messages.forEach(message => {
|
|
140
|
-
// Simple token estimation (real implementation would use tiktoken)
|
|
141
|
-
const tokens = message.content.split(/\s+/).length;
|
|
142
|
-
totalTokens += tokens;
|
|
143
|
-
stats.roleDistribution[message.role] =
|
|
144
|
-
(stats.roleDistribution[message.role] || 0) + 1;
|
|
145
|
-
});
|
|
146
|
-
});
|
|
147
|
-
stats.avgMessagesPerExample = totalMessages / examples.length;
|
|
148
|
-
stats.avgTokensPerMessage = totalTokens / totalMessages;
|
|
149
|
-
return stats;
|
|
150
|
-
}
|
|
1
|
+
"use strict";var v=Object.create;var l=Object.defineProperty;var E=Object.getOwnPropertyDescriptor;var y=Object.getOwnPropertyNames;var F=Object.getPrototypeOf,T=Object.prototype.hasOwnProperty;var x=(n,t)=>{for(var e in t)l(n,e,{get:t[e],enumerable:!0})},d=(n,t,e,r)=>{if(t&&typeof t=="object"||typeof t=="function")for(let i of y(t))!T.call(n,i)&&i!==e&&l(n,i,{get:()=>t[i],enumerable:!(r=E(t,i))||r.enumerable});return n};var f=(n,t,e)=>(e=n!=null?v(F(n)):{},d(t||!n||!n.__esModule?l(e,"default",{value:n,enumerable:!0}):e,n)),w=n=>d(l({},"__esModule",{value:!0}),n);var J={};x(J,{analyzeDataset:()=>A,conversationsToExamples:()=>$,createFineTuneJob:()=>P,getFineTuneJob:()=>j,prepareTrainingData:()=>M,validateTrainingData:()=>D});module.exports=w(J);var g=require("@neural-tools/core"),s=f(require("fs/promises")),p=f(require("path"));async function M(n,t={}){await(0,g.requireFeature)("fine-tuning","Fine-tuning");let e=t.validationSplit||0,r=t.outputDir||"./fine-tune-data";await s.default.mkdir(r,{recursive:!0});let i=Math.floor(n.length*(1-e)),a=n.slice(0,i),o=e>0?n.slice(i):[],c=p.default.join(r,"training.jsonl"),h=a.map(m=>JSON.stringify(m)).join(`
|
|
2
|
+
`);await s.default.writeFile(c,h,"utf-8");let u;if(o.length>0){u=p.default.join(r,"validation.jsonl");let m=o.map(b=>JSON.stringify(b)).join(`
|
|
3
|
+
`);await s.default.writeFile(u,m,"utf-8")}return{trainingFile:c,validationFile:u}}function D(n){let t=[];return n.length===0?(t.push("No training examples provided"),{valid:!1,errors:t}):(n.forEach((e,r)=>{if(!e.messages||!Array.isArray(e.messages)){t.push(`Example ${r}: Missing or invalid messages array`);return}e.messages.length===0&&t.push(`Example ${r}: Messages array is empty`),e.messages.forEach((i,a)=>{(!i.role||!["system","user","assistant"].includes(i.role))&&t.push(`Example ${r}, Message ${a}: Invalid role`),(!i.content||typeof i.content!="string")&&t.push(`Example ${r}, Message ${a}: Missing or invalid content`)})}),{valid:t.length===0,errors:t})}async function P(n){await(0,g.requireFeature)("fine-tuning","Fine-tuning");try{await s.default.access(n.datasetPath)}catch{throw new Error(`Dataset file not found: ${n.datasetPath}`)}return{id:`ft-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,status:"pending",model:n.model,trainingFile:n.datasetPath,createdAt:Date.now()}}async function j(n){throw await(0,g.requireFeature)("fine-tuning","Fine-tuning"),new Error("Fine-tune job tracking coming soon")}function $(n){return n.map(t=>{let e=[];return t.system&&e.push({role:"system",content:t.system}),e.push(...t.messages),{messages:e}})}function A(n){let t={totalExamples:n.length,avgMessagesPerExample:0,avgTokensPerMessage:0,roleDistribution:{}},e=0,r=0;return n.forEach(i=>{e+=i.messages.length,i.messages.forEach(a=>{let o=a.content.split(/\s+/).length;r+=o,t.roleDistribution[a.role]=(t.roleDistribution[a.role]||0)+1})}),t.avgMessagesPerExample=e/n.length,t.avgTokensPerMessage=r/e,t}0&&(module.exports={analyzeDataset,conversationsToExamples,createFineTuneJob,getFineTuneJob,prepareTrainingData,validateTrainingData});
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import{requireFeature as u}from"@neural-tools/core";import o from"fs/promises";import p from"path";async function v(n,t={}){await u("fine-tuning","Fine-tuning");let e=t.validationSplit||0,i=t.outputDir||"./fine-tune-data";await o.mkdir(i,{recursive:!0});let r=Math.floor(n.length*(1-e)),a=n.slice(0,r),s=e>0?n.slice(r):[],m=p.join(i,"training.jsonl"),c=a.map(g=>JSON.stringify(g)).join(`
|
|
2
|
+
`);await o.writeFile(m,c,"utf-8");let l;if(s.length>0){l=p.join(i,"validation.jsonl");let g=s.map(d=>JSON.stringify(d)).join(`
|
|
3
|
+
`);await o.writeFile(l,g,"utf-8")}return{trainingFile:m,validationFile:l}}function E(n){let t=[];return n.length===0?(t.push("No training examples provided"),{valid:!1,errors:t}):(n.forEach((e,i)=>{if(!e.messages||!Array.isArray(e.messages)){t.push(`Example ${i}: Missing or invalid messages array`);return}e.messages.length===0&&t.push(`Example ${i}: Messages array is empty`),e.messages.forEach((r,a)=>{(!r.role||!["system","user","assistant"].includes(r.role))&&t.push(`Example ${i}, Message ${a}: Invalid role`),(!r.content||typeof r.content!="string")&&t.push(`Example ${i}, Message ${a}: Missing or invalid content`)})}),{valid:t.length===0,errors:t})}async function y(n){await u("fine-tuning","Fine-tuning");try{await o.access(n.datasetPath)}catch{throw new Error(`Dataset file not found: ${n.datasetPath}`)}return{id:`ft-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,status:"pending",model:n.model,trainingFile:n.datasetPath,createdAt:Date.now()}}async function F(n){throw await u("fine-tuning","Fine-tuning"),new Error("Fine-tune job tracking coming soon")}function T(n){return n.map(t=>{let e=[];return t.system&&e.push({role:"system",content:t.system}),e.push(...t.messages),{messages:e}})}function x(n){let t={totalExamples:n.length,avgMessagesPerExample:0,avgTokensPerMessage:0,roleDistribution:{}},e=0,i=0;return n.forEach(r=>{e+=r.messages.length,r.messages.forEach(a=>{let s=a.content.split(/\s+/).length;i+=s,t.roleDistribution[a.role]=(t.roleDistribution[a.role]||0)+1})}),t.avgMessagesPerExample=e/n.length,t.avgTokensPerMessage=i/e,t}export{x as analyzeDataset,T as conversationsToExamples,y as createFineTuneJob,F as getFineTuneJob,v as prepareTrainingData,E as validateTrainingData};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@neural-tools/fine-tune",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"description": "Fine-tuning utilities for Neural Tools",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"url": "https://github.com/MacLeanLuke/neural-tools.git",
|
|
14
14
|
"directory": "packages/fine-tune"
|
|
15
15
|
},
|
|
16
|
-
"homepage": "https://neural-tools.com",
|
|
16
|
+
"homepage": "https://neural-tools.com/docs/fine-tune.html",
|
|
17
17
|
"bugs": {
|
|
18
18
|
"url": "https://github.com/MacLeanLuke/neural-tools/issues"
|
|
19
19
|
},
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"ai"
|
|
27
27
|
],
|
|
28
28
|
"dependencies": {
|
|
29
|
-
"@neural-tools/core": "0.1.
|
|
29
|
+
"@neural-tools/core": "0.1.6"
|
|
30
30
|
},
|
|
31
31
|
"devDependencies": {
|
|
32
32
|
"@types/node": "^20.11.5",
|
|
@@ -44,8 +44,8 @@
|
|
|
44
44
|
"dist"
|
|
45
45
|
],
|
|
46
46
|
"scripts": {
|
|
47
|
-
"build": "
|
|
48
|
-
"dev": "
|
|
47
|
+
"build": "tsup",
|
|
48
|
+
"dev": "tsup --watch",
|
|
49
49
|
"clean": "rm -rf dist",
|
|
50
50
|
"test": "echo 'Tests coming soon'"
|
|
51
51
|
}
|