@neural-tools/fine-tune 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ interface FineTuneConfig {
2
+ provider: 'openai' | 'anthropic' | 'custom';
3
+ model: string;
4
+ datasetPath: string;
5
+ validationSplit?: number;
6
+ epochs?: number;
7
+ learningRate?: number;
8
+ batchSize?: number;
9
+ }
10
+ interface TrainingExample {
11
+ messages: Array<{
12
+ role: 'system' | 'user' | 'assistant';
13
+ content: string;
14
+ }>;
15
+ }
16
+ interface FineTuneJob {
17
+ id: string;
18
+ status: 'pending' | 'running' | 'completed' | 'failed';
19
+ model: string;
20
+ trainingFile?: string;
21
+ validationFile?: string;
22
+ createdAt: number;
23
+ finishedAt?: number;
24
+ error?: string;
25
+ }
26
+ /**
27
+ * Prepare training data for fine-tuning
28
+ */
29
+ declare function prepareTrainingData(examples: TrainingExample[], options?: {
30
+ validationSplit?: number;
31
+ outputDir?: string;
32
+ }): Promise<{
33
+ trainingFile: string;
34
+ validationFile?: string;
35
+ }>;
36
+ /**
37
+ * Validate training data format
38
+ */
39
+ declare function validateTrainingData(examples: TrainingExample[]): {
40
+ valid: boolean;
41
+ errors: string[];
42
+ };
43
+ /**
44
+ * Create a fine-tune job (placeholder - actual implementation would call provider APIs)
45
+ */
46
+ declare function createFineTuneJob(config: FineTuneConfig): Promise<FineTuneJob>;
47
+ /**
48
+ * Get fine-tune job status (placeholder)
49
+ */
50
+ declare function getFineTuneJob(jobId: string): Promise<FineTuneJob>;
51
+ /**
52
+ * Convert conversations to training examples
53
+ */
54
+ declare function conversationsToExamples(conversations: Array<{
55
+ system?: string;
56
+ messages: Array<{
57
+ role: 'user' | 'assistant';
58
+ content: string;
59
+ }>;
60
+ }>): TrainingExample[];
61
+ /**
62
+ * Calculate dataset statistics
63
+ */
64
+ declare function analyzeDataset(examples: TrainingExample[]): {
65
+ totalExamples: number;
66
+ avgMessagesPerExample: number;
67
+ avgTokensPerMessage: number;
68
+ roleDistribution: Record<string, number>;
69
+ };
70
+
71
+ export { type FineTuneConfig, type FineTuneJob, type TrainingExample, analyzeDataset, conversationsToExamples, createFineTuneJob, getFineTuneJob, prepareTrainingData, validateTrainingData };
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- export interface FineTuneConfig {
1
+ interface FineTuneConfig {
2
2
  provider: 'openai' | 'anthropic' | 'custom';
3
3
  model: string;
4
4
  datasetPath: string;
@@ -7,13 +7,13 @@ export interface FineTuneConfig {
7
7
  learningRate?: number;
8
8
  batchSize?: number;
9
9
  }
10
- export interface TrainingExample {
10
+ interface TrainingExample {
11
11
  messages: Array<{
12
12
  role: 'system' | 'user' | 'assistant';
13
13
  content: string;
14
14
  }>;
15
15
  }
16
- export interface FineTuneJob {
16
+ interface FineTuneJob {
17
17
  id: string;
18
18
  status: 'pending' | 'running' | 'completed' | 'failed';
19
19
  model: string;
@@ -26,7 +26,7 @@ export interface FineTuneJob {
26
26
  /**
27
27
  * Prepare training data for fine-tuning
28
28
  */
29
- export declare function prepareTrainingData(examples: TrainingExample[], options?: {
29
+ declare function prepareTrainingData(examples: TrainingExample[], options?: {
30
30
  validationSplit?: number;
31
31
  outputDir?: string;
32
32
  }): Promise<{
@@ -36,22 +36,22 @@ export declare function prepareTrainingData(examples: TrainingExample[], options
36
36
  /**
37
37
  * Validate training data format
38
38
  */
39
- export declare function validateTrainingData(examples: TrainingExample[]): {
39
+ declare function validateTrainingData(examples: TrainingExample[]): {
40
40
  valid: boolean;
41
41
  errors: string[];
42
42
  };
43
43
  /**
44
44
  * Create a fine-tune job (placeholder - actual implementation would call provider APIs)
45
45
  */
46
- export declare function createFineTuneJob(config: FineTuneConfig): Promise<FineTuneJob>;
46
+ declare function createFineTuneJob(config: FineTuneConfig): Promise<FineTuneJob>;
47
47
  /**
48
48
  * Get fine-tune job status (placeholder)
49
49
  */
50
- export declare function getFineTuneJob(jobId: string): Promise<FineTuneJob>;
50
+ declare function getFineTuneJob(jobId: string): Promise<FineTuneJob>;
51
51
  /**
52
52
  * Convert conversations to training examples
53
53
  */
54
- export declare function conversationsToExamples(conversations: Array<{
54
+ declare function conversationsToExamples(conversations: Array<{
55
55
  system?: string;
56
56
  messages: Array<{
57
57
  role: 'user' | 'assistant';
@@ -61,9 +61,11 @@ export declare function conversationsToExamples(conversations: Array<{
61
61
  /**
62
62
  * Calculate dataset statistics
63
63
  */
64
- export declare function analyzeDataset(examples: TrainingExample[]): {
64
+ declare function analyzeDataset(examples: TrainingExample[]): {
65
65
  totalExamples: number;
66
66
  avgMessagesPerExample: number;
67
67
  avgTokensPerMessage: number;
68
68
  roleDistribution: Record<string, number>;
69
69
  };
70
+
71
+ export { type FineTuneConfig, type FineTuneJob, type TrainingExample, analyzeDataset, conversationsToExamples, createFineTuneJob, getFineTuneJob, prepareTrainingData, validateTrainingData };
package/dist/index.js CHANGED
@@ -1,150 +1,3 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.prepareTrainingData = prepareTrainingData;
7
- exports.validateTrainingData = validateTrainingData;
8
- exports.createFineTuneJob = createFineTuneJob;
9
- exports.getFineTuneJob = getFineTuneJob;
10
- exports.conversationsToExamples = conversationsToExamples;
11
- exports.analyzeDataset = analyzeDataset;
12
- const core_1 = require("@neural-tools/core");
13
- const promises_1 = __importDefault(require("fs/promises"));
14
- const path_1 = __importDefault(require("path"));
15
- /**
16
- * Prepare training data for fine-tuning
17
- */
18
- async function prepareTrainingData(examples, options = {}) {
19
- await (0, core_1.requireFeature)('fine-tuning', 'Fine-tuning');
20
- const validationSplit = options.validationSplit || 0;
21
- const outputDir = options.outputDir || './fine-tune-data';
22
- // Create output directory
23
- await promises_1.default.mkdir(outputDir, { recursive: true });
24
- // Split data
25
- const splitIndex = Math.floor(examples.length * (1 - validationSplit));
26
- const trainingExamples = examples.slice(0, splitIndex);
27
- const validationExamples = validationSplit > 0 ? examples.slice(splitIndex) : [];
28
- // Write training file
29
- const trainingFile = path_1.default.join(outputDir, 'training.jsonl');
30
- const trainingContent = trainingExamples
31
- .map(ex => JSON.stringify(ex))
32
- .join('\n');
33
- await promises_1.default.writeFile(trainingFile, trainingContent, 'utf-8');
34
- // Write validation file if needed
35
- let validationFile;
36
- if (validationExamples.length > 0) {
37
- validationFile = path_1.default.join(outputDir, 'validation.jsonl');
38
- const validationContent = validationExamples
39
- .map(ex => JSON.stringify(ex))
40
- .join('\n');
41
- await promises_1.default.writeFile(validationFile, validationContent, 'utf-8');
42
- }
43
- return { trainingFile, validationFile };
44
- }
45
- /**
46
- * Validate training data format
47
- */
48
- function validateTrainingData(examples) {
49
- const errors = [];
50
- if (examples.length === 0) {
51
- errors.push('No training examples provided');
52
- return { valid: false, errors };
53
- }
54
- examples.forEach((example, index) => {
55
- if (!example.messages || !Array.isArray(example.messages)) {
56
- errors.push(`Example ${index}: Missing or invalid messages array`);
57
- return;
58
- }
59
- if (example.messages.length === 0) {
60
- errors.push(`Example ${index}: Messages array is empty`);
61
- }
62
- example.messages.forEach((message, msgIndex) => {
63
- if (!message.role || !['system', 'user', 'assistant'].includes(message.role)) {
64
- errors.push(`Example ${index}, Message ${msgIndex}: Invalid role`);
65
- }
66
- if (!message.content || typeof message.content !== 'string') {
67
- errors.push(`Example ${index}, Message ${msgIndex}: Missing or invalid content`);
68
- }
69
- });
70
- });
71
- return {
72
- valid: errors.length === 0,
73
- errors
74
- };
75
- }
76
- /**
77
- * Create a fine-tune job (placeholder - actual implementation would call provider APIs)
78
- */
79
- async function createFineTuneJob(config) {
80
- await (0, core_1.requireFeature)('fine-tuning', 'Fine-tuning');
81
- // Validate dataset exists
82
- try {
83
- await promises_1.default.access(config.datasetPath);
84
- }
85
- catch {
86
- throw new Error(`Dataset file not found: ${config.datasetPath}`);
87
- }
88
- // In production, this would:
89
- // 1. Upload training data to provider
90
- // 2. Start fine-tuning job
91
- // 3. Return job details
92
- const job = {
93
- id: `ft-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
94
- status: 'pending',
95
- model: config.model,
96
- trainingFile: config.datasetPath,
97
- createdAt: Date.now()
98
- };
99
- return job;
100
- }
101
- /**
102
- * Get fine-tune job status (placeholder)
103
- */
104
- async function getFineTuneJob(jobId) {
105
- await (0, core_1.requireFeature)('fine-tuning', 'Fine-tuning');
106
- // In production, this would query the provider API
107
- throw new Error('Fine-tune job tracking coming soon');
108
- }
109
- /**
110
- * Convert conversations to training examples
111
- */
112
- function conversationsToExamples(conversations) {
113
- return conversations.map(conv => {
114
- const messages = [];
115
- if (conv.system) {
116
- messages.push({
117
- role: 'system',
118
- content: conv.system
119
- });
120
- }
121
- messages.push(...conv.messages);
122
- return { messages };
123
- });
124
- }
125
- /**
126
- * Calculate dataset statistics
127
- */
128
- function analyzeDataset(examples) {
129
- const stats = {
130
- totalExamples: examples.length,
131
- avgMessagesPerExample: 0,
132
- avgTokensPerMessage: 0,
133
- roleDistribution: {}
134
- };
135
- let totalMessages = 0;
136
- let totalTokens = 0;
137
- examples.forEach(example => {
138
- totalMessages += example.messages.length;
139
- example.messages.forEach(message => {
140
- // Simple token estimation (real implementation would use tiktoken)
141
- const tokens = message.content.split(/\s+/).length;
142
- totalTokens += tokens;
143
- stats.roleDistribution[message.role] =
144
- (stats.roleDistribution[message.role] || 0) + 1;
145
- });
146
- });
147
- stats.avgMessagesPerExample = totalMessages / examples.length;
148
- stats.avgTokensPerMessage = totalTokens / totalMessages;
149
- return stats;
150
- }
1
+ "use strict";var v=Object.create;var l=Object.defineProperty;var E=Object.getOwnPropertyDescriptor;var y=Object.getOwnPropertyNames;var F=Object.getPrototypeOf,T=Object.prototype.hasOwnProperty;var x=(n,t)=>{for(var e in t)l(n,e,{get:t[e],enumerable:!0})},d=(n,t,e,r)=>{if(t&&typeof t=="object"||typeof t=="function")for(let i of y(t))!T.call(n,i)&&i!==e&&l(n,i,{get:()=>t[i],enumerable:!(r=E(t,i))||r.enumerable});return n};var f=(n,t,e)=>(e=n!=null?v(F(n)):{},d(t||!n||!n.__esModule?l(e,"default",{value:n,enumerable:!0}):e,n)),w=n=>d(l({},"__esModule",{value:!0}),n);var J={};x(J,{analyzeDataset:()=>A,conversationsToExamples:()=>$,createFineTuneJob:()=>P,getFineTuneJob:()=>j,prepareTrainingData:()=>M,validateTrainingData:()=>D});module.exports=w(J);var g=require("@neural-tools/core"),s=f(require("fs/promises")),p=f(require("path"));async function M(n,t={}){await(0,g.requireFeature)("fine-tuning","Fine-tuning");let e=t.validationSplit||0,r=t.outputDir||"./fine-tune-data";await s.default.mkdir(r,{recursive:!0});let i=Math.floor(n.length*(1-e)),a=n.slice(0,i),o=e>0?n.slice(i):[],c=p.default.join(r,"training.jsonl"),h=a.map(m=>JSON.stringify(m)).join(`
2
+ `);await s.default.writeFile(c,h,"utf-8");let u;if(o.length>0){u=p.default.join(r,"validation.jsonl");let m=o.map(b=>JSON.stringify(b)).join(`
3
+ `);await s.default.writeFile(u,m,"utf-8")}return{trainingFile:c,validationFile:u}}function D(n){let t=[];return n.length===0?(t.push("No training examples provided"),{valid:!1,errors:t}):(n.forEach((e,r)=>{if(!e.messages||!Array.isArray(e.messages)){t.push(`Example ${r}: Missing or invalid messages array`);return}e.messages.length===0&&t.push(`Example ${r}: Messages array is empty`),e.messages.forEach((i,a)=>{(!i.role||!["system","user","assistant"].includes(i.role))&&t.push(`Example ${r}, Message ${a}: Invalid role`),(!i.content||typeof i.content!="string")&&t.push(`Example ${r}, Message ${a}: Missing or invalid content`)})}),{valid:t.length===0,errors:t})}async function P(n){await(0,g.requireFeature)("fine-tuning","Fine-tuning");try{await s.default.access(n.datasetPath)}catch{throw new Error(`Dataset file not found: ${n.datasetPath}`)}return{id:`ft-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,status:"pending",model:n.model,trainingFile:n.datasetPath,createdAt:Date.now()}}async function j(n){throw await(0,g.requireFeature)("fine-tuning","Fine-tuning"),new Error("Fine-tune job tracking coming soon")}function $(n){return n.map(t=>{let e=[];return t.system&&e.push({role:"system",content:t.system}),e.push(...t.messages),{messages:e}})}function A(n){let t={totalExamples:n.length,avgMessagesPerExample:0,avgTokensPerMessage:0,roleDistribution:{}},e=0,r=0;return n.forEach(i=>{e+=i.messages.length,i.messages.forEach(a=>{let o=a.content.split(/\s+/).length;r+=o,t.roleDistribution[a.role]=(t.roleDistribution[a.role]||0)+1})}),t.avgMessagesPerExample=e/n.length,t.avgTokensPerMessage=r/e,t}0&&(module.exports={analyzeDataset,conversationsToExamples,createFineTuneJob,getFineTuneJob,prepareTrainingData,validateTrainingData});
package/dist/index.mjs ADDED
@@ -0,0 +1,3 @@
1
+ import{requireFeature as u}from"@neural-tools/core";import o from"fs/promises";import p from"path";async function v(n,t={}){await u("fine-tuning","Fine-tuning");let e=t.validationSplit||0,i=t.outputDir||"./fine-tune-data";await o.mkdir(i,{recursive:!0});let r=Math.floor(n.length*(1-e)),a=n.slice(0,r),s=e>0?n.slice(r):[],m=p.join(i,"training.jsonl"),c=a.map(g=>JSON.stringify(g)).join(`
2
+ `);await o.writeFile(m,c,"utf-8");let l;if(s.length>0){l=p.join(i,"validation.jsonl");let g=s.map(d=>JSON.stringify(d)).join(`
3
+ `);await o.writeFile(l,g,"utf-8")}return{trainingFile:m,validationFile:l}}function E(n){let t=[];return n.length===0?(t.push("No training examples provided"),{valid:!1,errors:t}):(n.forEach((e,i)=>{if(!e.messages||!Array.isArray(e.messages)){t.push(`Example ${i}: Missing or invalid messages array`);return}e.messages.length===0&&t.push(`Example ${i}: Messages array is empty`),e.messages.forEach((r,a)=>{(!r.role||!["system","user","assistant"].includes(r.role))&&t.push(`Example ${i}, Message ${a}: Invalid role`),(!r.content||typeof r.content!="string")&&t.push(`Example ${i}, Message ${a}: Missing or invalid content`)})}),{valid:t.length===0,errors:t})}async function y(n){await u("fine-tuning","Fine-tuning");try{await o.access(n.datasetPath)}catch{throw new Error(`Dataset file not found: ${n.datasetPath}`)}return{id:`ft-${Date.now()}-${Math.random().toString(36).substr(2,9)}`,status:"pending",model:n.model,trainingFile:n.datasetPath,createdAt:Date.now()}}async function F(n){throw await u("fine-tuning","Fine-tuning"),new Error("Fine-tune job tracking coming soon")}function T(n){return n.map(t=>{let e=[];return t.system&&e.push({role:"system",content:t.system}),e.push(...t.messages),{messages:e}})}function x(n){let t={totalExamples:n.length,avgMessagesPerExample:0,avgTokensPerMessage:0,roleDistribution:{}},e=0,i=0;return n.forEach(r=>{e+=r.messages.length,r.messages.forEach(a=>{let s=a.content.split(/\s+/).length;i+=s,t.roleDistribution[a.role]=(t.roleDistribution[a.role]||0)+1})}),t.avgMessagesPerExample=e/n.length,t.avgTokensPerMessage=i/e,t}export{x as analyzeDataset,T as conversationsToExamples,y as createFineTuneJob,F as getFineTuneJob,v as prepareTrainingData,E as validateTrainingData};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@neural-tools/fine-tune",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "Fine-tuning utilities for Neural Tools",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -10,11 +10,23 @@
10
10
  },
11
11
  "repository": {
12
12
  "type": "git",
13
- "url": "https://github.com/MacLeanLuke/ai-toolkit.git",
13
+ "url": "https://github.com/MacLeanLuke/neural-tools.git",
14
14
  "directory": "packages/fine-tune"
15
15
  },
16
+ "homepage": "https://neural-tools.com/docs/fine-tune.html",
17
+ "bugs": {
18
+ "url": "https://github.com/MacLeanLuke/neural-tools/issues"
19
+ },
20
+ "keywords": [
21
+ "fine-tuning",
22
+ "openai",
23
+ "llm",
24
+ "neural-tools",
25
+ "machine-learning",
26
+ "ai"
27
+ ],
16
28
  "dependencies": {
17
- "@neural-tools/core": "0.1.3"
29
+ "@neural-tools/core": "0.1.5"
18
30
  },
19
31
  "devDependencies": {
20
32
  "@types/node": "^20.11.5",
@@ -32,8 +44,8 @@
32
44
  "dist"
33
45
  ],
34
46
  "scripts": {
35
- "build": "tsc",
36
- "dev": "tsc --watch",
47
+ "build": "tsup",
48
+ "dev": "tsup --watch",
37
49
  "clean": "rm -rf dist",
38
50
  "test": "echo 'Tests coming soon'"
39
51
  }