@neural-tools/fine-tune 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +80 -0
- package/dist/index.d.ts +69 -0
- package/dist/index.js +150 -0
- package/package.json +40 -0
package/LICENSE.md
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Neural Tools License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Luke Amy. All rights reserved.
|
|
4
|
+
|
|
5
|
+
## License Agreement
|
|
6
|
+
|
|
7
|
+
This software is provided under a dual-license model:
|
|
8
|
+
|
|
9
|
+
### 1. Free Tier License (MIT)
|
|
10
|
+
|
|
11
|
+
The following components are licensed under the MIT License:
|
|
12
|
+
|
|
13
|
+
- Basic MCP generation functionality
|
|
14
|
+
- Claude command generation
|
|
15
|
+
- Core utilities and types
|
|
16
|
+
- Basic templates
|
|
17
|
+
- Documentation and examples
|
|
18
|
+
|
|
19
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of the free tier components to use, copy, modify, merge, publish, and distribute, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
### 2. Pro/Enterprise License (Proprietary)
|
|
24
|
+
|
|
25
|
+
The following features require a valid Pro or Enterprise license:
|
|
26
|
+
|
|
27
|
+
**Pro Features:**
|
|
28
|
+
- Vector database integration
|
|
29
|
+
- Semantic caching
|
|
30
|
+
- Fine-tuning workflows
|
|
31
|
+
- Cloud deployment templates (AWS/GCP)
|
|
32
|
+
- Premium templates and examples
|
|
33
|
+
- GitHub automation features
|
|
34
|
+
|
|
35
|
+
**Enterprise Features:**
|
|
36
|
+
- White-label support
|
|
37
|
+
- Custom integrations
|
|
38
|
+
- Priority support
|
|
39
|
+
- SLA guarantees
|
|
40
|
+
- Team collaboration features
|
|
41
|
+
|
|
42
|
+
These features are proprietary and may not be used without a valid license key purchased from neural-tools.dev.
|
|
43
|
+
|
|
44
|
+
### License Terms
|
|
45
|
+
|
|
46
|
+
1. **Free Tier**: You may use the free tier features for any purpose, including commercial use, under the MIT License terms.
|
|
47
|
+
|
|
48
|
+
2. **Pro/Enterprise**: You must purchase a license to access Pro or Enterprise features. Each license is:
|
|
49
|
+
- Per-user for individual licenses
|
|
50
|
+
- Per-organization for team/enterprise licenses
|
|
51
|
+
- Non-transferable without written consent
|
|
52
|
+
- Subject to the terms at neural-tools.dev/terms
|
|
53
|
+
|
|
54
|
+
3. **Source Code**: This repository is private. You may not:
|
|
55
|
+
- Redistribute the source code
|
|
56
|
+
- Create derivative works for redistribution
|
|
57
|
+
- Reverse engineer Pro/Enterprise features
|
|
58
|
+
- Remove or circumvent license checks
|
|
59
|
+
|
|
60
|
+
4. **Support**: Support is provided based on your license tier:
|
|
61
|
+
- Free: Community support only
|
|
62
|
+
- Pro: Email support (48-hour response)
|
|
63
|
+
- Enterprise: Priority support with SLA
|
|
64
|
+
|
|
65
|
+
### Warranty Disclaimer
|
|
66
|
+
|
|
67
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
68
|
+
|
|
69
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
70
|
+
|
|
71
|
+
### Contact
|
|
72
|
+
|
|
73
|
+
For licensing inquiries:
|
|
74
|
+
- Email: licensing@neural-tools.dev
|
|
75
|
+
- Website: https://neural-tools.dev/pricing
|
|
76
|
+
- Support: support@neural-tools.dev
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
**Last Updated:** January 2025
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
export interface FineTuneConfig {
|
|
2
|
+
provider: 'openai' | 'anthropic' | 'custom';
|
|
3
|
+
model: string;
|
|
4
|
+
datasetPath: string;
|
|
5
|
+
validationSplit?: number;
|
|
6
|
+
epochs?: number;
|
|
7
|
+
learningRate?: number;
|
|
8
|
+
batchSize?: number;
|
|
9
|
+
}
|
|
10
|
+
export interface TrainingExample {
|
|
11
|
+
messages: Array<{
|
|
12
|
+
role: 'system' | 'user' | 'assistant';
|
|
13
|
+
content: string;
|
|
14
|
+
}>;
|
|
15
|
+
}
|
|
16
|
+
export interface FineTuneJob {
|
|
17
|
+
id: string;
|
|
18
|
+
status: 'pending' | 'running' | 'completed' | 'failed';
|
|
19
|
+
model: string;
|
|
20
|
+
trainingFile?: string;
|
|
21
|
+
validationFile?: string;
|
|
22
|
+
createdAt: number;
|
|
23
|
+
finishedAt?: number;
|
|
24
|
+
error?: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Prepare training data for fine-tuning
|
|
28
|
+
*/
|
|
29
|
+
export declare function prepareTrainingData(examples: TrainingExample[], options?: {
|
|
30
|
+
validationSplit?: number;
|
|
31
|
+
outputDir?: string;
|
|
32
|
+
}): Promise<{
|
|
33
|
+
trainingFile: string;
|
|
34
|
+
validationFile?: string;
|
|
35
|
+
}>;
|
|
36
|
+
/**
|
|
37
|
+
* Validate training data format
|
|
38
|
+
*/
|
|
39
|
+
export declare function validateTrainingData(examples: TrainingExample[]): {
|
|
40
|
+
valid: boolean;
|
|
41
|
+
errors: string[];
|
|
42
|
+
};
|
|
43
|
+
/**
|
|
44
|
+
* Create a fine-tune job (placeholder - actual implementation would call provider APIs)
|
|
45
|
+
*/
|
|
46
|
+
export declare function createFineTuneJob(config: FineTuneConfig): Promise<FineTuneJob>;
|
|
47
|
+
/**
|
|
48
|
+
* Get fine-tune job status (placeholder)
|
|
49
|
+
*/
|
|
50
|
+
export declare function getFineTuneJob(jobId: string): Promise<FineTuneJob>;
|
|
51
|
+
/**
|
|
52
|
+
* Convert conversations to training examples
|
|
53
|
+
*/
|
|
54
|
+
export declare function conversationsToExamples(conversations: Array<{
|
|
55
|
+
system?: string;
|
|
56
|
+
messages: Array<{
|
|
57
|
+
role: 'user' | 'assistant';
|
|
58
|
+
content: string;
|
|
59
|
+
}>;
|
|
60
|
+
}>): TrainingExample[];
|
|
61
|
+
/**
|
|
62
|
+
* Calculate dataset statistics
|
|
63
|
+
*/
|
|
64
|
+
export declare function analyzeDataset(examples: TrainingExample[]): {
|
|
65
|
+
totalExamples: number;
|
|
66
|
+
avgMessagesPerExample: number;
|
|
67
|
+
avgTokensPerMessage: number;
|
|
68
|
+
roleDistribution: Record<string, number>;
|
|
69
|
+
};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.prepareTrainingData = prepareTrainingData;
|
|
7
|
+
exports.validateTrainingData = validateTrainingData;
|
|
8
|
+
exports.createFineTuneJob = createFineTuneJob;
|
|
9
|
+
exports.getFineTuneJob = getFineTuneJob;
|
|
10
|
+
exports.conversationsToExamples = conversationsToExamples;
|
|
11
|
+
exports.analyzeDataset = analyzeDataset;
|
|
12
|
+
const core_1 = require("@neural-tools/core");
|
|
13
|
+
const promises_1 = __importDefault(require("fs/promises"));
|
|
14
|
+
const path_1 = __importDefault(require("path"));
|
|
15
|
+
/**
|
|
16
|
+
* Prepare training data for fine-tuning
|
|
17
|
+
*/
|
|
18
|
+
async function prepareTrainingData(examples, options = {}) {
|
|
19
|
+
await (0, core_1.requireFeature)('fine-tuning', 'Fine-tuning');
|
|
20
|
+
const validationSplit = options.validationSplit || 0;
|
|
21
|
+
const outputDir = options.outputDir || './fine-tune-data';
|
|
22
|
+
// Create output directory
|
|
23
|
+
await promises_1.default.mkdir(outputDir, { recursive: true });
|
|
24
|
+
// Split data
|
|
25
|
+
const splitIndex = Math.floor(examples.length * (1 - validationSplit));
|
|
26
|
+
const trainingExamples = examples.slice(0, splitIndex);
|
|
27
|
+
const validationExamples = validationSplit > 0 ? examples.slice(splitIndex) : [];
|
|
28
|
+
// Write training file
|
|
29
|
+
const trainingFile = path_1.default.join(outputDir, 'training.jsonl');
|
|
30
|
+
const trainingContent = trainingExamples
|
|
31
|
+
.map(ex => JSON.stringify(ex))
|
|
32
|
+
.join('\n');
|
|
33
|
+
await promises_1.default.writeFile(trainingFile, trainingContent, 'utf-8');
|
|
34
|
+
// Write validation file if needed
|
|
35
|
+
let validationFile;
|
|
36
|
+
if (validationExamples.length > 0) {
|
|
37
|
+
validationFile = path_1.default.join(outputDir, 'validation.jsonl');
|
|
38
|
+
const validationContent = validationExamples
|
|
39
|
+
.map(ex => JSON.stringify(ex))
|
|
40
|
+
.join('\n');
|
|
41
|
+
await promises_1.default.writeFile(validationFile, validationContent, 'utf-8');
|
|
42
|
+
}
|
|
43
|
+
return { trainingFile, validationFile };
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Validate training data format
|
|
47
|
+
*/
|
|
48
|
+
function validateTrainingData(examples) {
|
|
49
|
+
const errors = [];
|
|
50
|
+
if (examples.length === 0) {
|
|
51
|
+
errors.push('No training examples provided');
|
|
52
|
+
return { valid: false, errors };
|
|
53
|
+
}
|
|
54
|
+
examples.forEach((example, index) => {
|
|
55
|
+
if (!example.messages || !Array.isArray(example.messages)) {
|
|
56
|
+
errors.push(`Example ${index}: Missing or invalid messages array`);
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
if (example.messages.length === 0) {
|
|
60
|
+
errors.push(`Example ${index}: Messages array is empty`);
|
|
61
|
+
}
|
|
62
|
+
example.messages.forEach((message, msgIndex) => {
|
|
63
|
+
if (!message.role || !['system', 'user', 'assistant'].includes(message.role)) {
|
|
64
|
+
errors.push(`Example ${index}, Message ${msgIndex}: Invalid role`);
|
|
65
|
+
}
|
|
66
|
+
if (!message.content || typeof message.content !== 'string') {
|
|
67
|
+
errors.push(`Example ${index}, Message ${msgIndex}: Missing or invalid content`);
|
|
68
|
+
}
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
return {
|
|
72
|
+
valid: errors.length === 0,
|
|
73
|
+
errors
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Create a fine-tune job (placeholder - actual implementation would call provider APIs)
|
|
78
|
+
*/
|
|
79
|
+
async function createFineTuneJob(config) {
|
|
80
|
+
await (0, core_1.requireFeature)('fine-tuning', 'Fine-tuning');
|
|
81
|
+
// Validate dataset exists
|
|
82
|
+
try {
|
|
83
|
+
await promises_1.default.access(config.datasetPath);
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
throw new Error(`Dataset file not found: ${config.datasetPath}`);
|
|
87
|
+
}
|
|
88
|
+
// In production, this would:
|
|
89
|
+
// 1. Upload training data to provider
|
|
90
|
+
// 2. Start fine-tuning job
|
|
91
|
+
// 3. Return job details
|
|
92
|
+
const job = {
|
|
93
|
+
id: `ft-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
|
94
|
+
status: 'pending',
|
|
95
|
+
model: config.model,
|
|
96
|
+
trainingFile: config.datasetPath,
|
|
97
|
+
createdAt: Date.now()
|
|
98
|
+
};
|
|
99
|
+
return job;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Get fine-tune job status (placeholder)
|
|
103
|
+
*/
|
|
104
|
+
async function getFineTuneJob(jobId) {
|
|
105
|
+
await (0, core_1.requireFeature)('fine-tuning', 'Fine-tuning');
|
|
106
|
+
// In production, this would query the provider API
|
|
107
|
+
throw new Error('Fine-tune job tracking coming soon');
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Convert conversations to training examples
|
|
111
|
+
*/
|
|
112
|
+
function conversationsToExamples(conversations) {
|
|
113
|
+
return conversations.map(conv => {
|
|
114
|
+
const messages = [];
|
|
115
|
+
if (conv.system) {
|
|
116
|
+
messages.push({
|
|
117
|
+
role: 'system',
|
|
118
|
+
content: conv.system
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
messages.push(...conv.messages);
|
|
122
|
+
return { messages };
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Calculate dataset statistics
|
|
127
|
+
*/
|
|
128
|
+
function analyzeDataset(examples) {
|
|
129
|
+
const stats = {
|
|
130
|
+
totalExamples: examples.length,
|
|
131
|
+
avgMessagesPerExample: 0,
|
|
132
|
+
avgTokensPerMessage: 0,
|
|
133
|
+
roleDistribution: {}
|
|
134
|
+
};
|
|
135
|
+
let totalMessages = 0;
|
|
136
|
+
let totalTokens = 0;
|
|
137
|
+
examples.forEach(example => {
|
|
138
|
+
totalMessages += example.messages.length;
|
|
139
|
+
example.messages.forEach(message => {
|
|
140
|
+
// Simple token estimation (real implementation would use tiktoken)
|
|
141
|
+
const tokens = message.content.split(/\s+/).length;
|
|
142
|
+
totalTokens += tokens;
|
|
143
|
+
stats.roleDistribution[message.role] =
|
|
144
|
+
(stats.roleDistribution[message.role] || 0) + 1;
|
|
145
|
+
});
|
|
146
|
+
});
|
|
147
|
+
stats.avgMessagesPerExample = totalMessages / examples.length;
|
|
148
|
+
stats.avgTokensPerMessage = totalTokens / totalMessages;
|
|
149
|
+
return stats;
|
|
150
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@neural-tools/fine-tune",
|
|
3
|
+
"version": "0.1.3",
|
|
4
|
+
"description": "Fine-tuning utilities for Neural Tools",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"license": "SEE LICENSE IN ../../LICENSE.md",
|
|
8
|
+
"publishConfig": {
|
|
9
|
+
"access": "public"
|
|
10
|
+
},
|
|
11
|
+
"repository": {
|
|
12
|
+
"type": "git",
|
|
13
|
+
"url": "https://github.com/MacLeanLuke/ai-toolkit.git",
|
|
14
|
+
"directory": "packages/fine-tune"
|
|
15
|
+
},
|
|
16
|
+
"dependencies": {
|
|
17
|
+
"@neural-tools/core": "0.1.3"
|
|
18
|
+
},
|
|
19
|
+
"devDependencies": {
|
|
20
|
+
"@types/node": "^20.11.5",
|
|
21
|
+
"typescript": "^5.3.3"
|
|
22
|
+
},
|
|
23
|
+
"peerDependencies": {
|
|
24
|
+
"openai": "^4.0.0"
|
|
25
|
+
},
|
|
26
|
+
"peerDependenciesMeta": {
|
|
27
|
+
"openai": {
|
|
28
|
+
"optional": true
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"files": [
|
|
32
|
+
"dist"
|
|
33
|
+
],
|
|
34
|
+
"scripts": {
|
|
35
|
+
"build": "tsc",
|
|
36
|
+
"dev": "tsc --watch",
|
|
37
|
+
"clean": "rm -rf dist",
|
|
38
|
+
"test": "echo 'Tests coming soon'"
|
|
39
|
+
}
|
|
40
|
+
}
|