kvat 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -0
- package/cli.js +46 -0
- package/index.js +198 -0
- package/package.json +47 -0
- package/postinstall.js +79 -0
package/README.md
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# kvat - KVCache Auto-Tuner
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/kvat)
|
|
4
|
+
[](https://pypi.org/project/kvat/)
|
|
5
|
+
[](https://github.com/Keyvanhardani/kvcache-autotune/blob/main/LICENSE)
|
|
6
|
+
|
|
7
|
+
**Automatic KV-Cache Optimization for HuggingFace Transformers**
|
|
8
|
+
|
|
9
|
+
Find the optimal cache strategy, attention backend, and configuration for your model and hardware.
|
|
10
|
+
|
|
11
|
+
## Requirements
|
|
12
|
+
|
|
13
|
+
- **Node.js** 14.0+
|
|
14
|
+
- **Python** 3.9+
|
|
15
|
+
- **PyTorch** 2.0+
|
|
16
|
+
- **Transformers** 4.35+
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm install kvat
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Then install the Python package:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install kvat[full]
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## CLI Usage
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# Optimize any HuggingFace model
|
|
34
|
+
kvat tune meta-llama/Llama-3.2-1B --profile chat-agent
|
|
35
|
+
|
|
36
|
+
# Quick test
|
|
37
|
+
kvat tune gpt2 --profile ci-micro -v
|
|
38
|
+
|
|
39
|
+
# Show system info
|
|
40
|
+
kvat info
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## JavaScript API
|
|
44
|
+
|
|
45
|
+
```javascript
|
|
46
|
+
const kvat = require('kvat');
|
|
47
|
+
|
|
48
|
+
// Check if kvat is installed
|
|
49
|
+
if (!kvat.isKvatInstalled()) {
|
|
50
|
+
await kvat.installKvat(); // Install Python package
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Run tuning
|
|
54
|
+
const result = await kvat.tune('gpt2', {
|
|
55
|
+
profile: 'chat-agent',
|
|
56
|
+
outputDir: './results',
|
|
57
|
+
verbose: true
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
console.log('Results saved to:', result.outputDir);
|
|
61
|
+
|
|
62
|
+
// Get system info
|
|
63
|
+
const info = await kvat.info();
|
|
64
|
+
console.log(info);
|
|
65
|
+
|
|
66
|
+
// Run arbitrary command
|
|
67
|
+
const { stdout, stderr, code } = await kvat.run(['profiles']);
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## API Reference
|
|
71
|
+
|
|
72
|
+
### `isKvatInstalled()`
|
|
73
|
+
|
|
74
|
+
Check if the kvat Python package is installed.
|
|
75
|
+
|
|
76
|
+
Returns: `boolean`
|
|
77
|
+
|
|
78
|
+
### `installKvat(full = true)`
|
|
79
|
+
|
|
80
|
+
Install the kvat Python package.
|
|
81
|
+
|
|
82
|
+
- `full` (boolean): Install with full dependencies (default: true)
|
|
83
|
+
|
|
84
|
+
Returns: `Promise<void>`
|
|
85
|
+
|
|
86
|
+
### `tune(modelId, options)`
|
|
87
|
+
|
|
88
|
+
Run kvat tune command.
|
|
89
|
+
|
|
90
|
+
- `modelId` (string): HuggingFace model ID
|
|
91
|
+
- `options.profile` (string): Profile name (default: 'chat-agent')
|
|
92
|
+
- `options.device` (string): Device cuda/cpu/mps (default: 'cuda')
|
|
93
|
+
- `options.outputDir` (string): Output directory (default: './kvat_results')
|
|
94
|
+
- `options.verbose` (boolean): Verbose output (default: false)
|
|
95
|
+
|
|
96
|
+
Returns: `Promise<{success: boolean, outputDir: string, stdout: string, stderr: string}>`
|
|
97
|
+
|
|
98
|
+
### `info()`
|
|
99
|
+
|
|
100
|
+
Get system information.
|
|
101
|
+
|
|
102
|
+
Returns: `Promise<string>`
|
|
103
|
+
|
|
104
|
+
### `run(args)`
|
|
105
|
+
|
|
106
|
+
Run arbitrary kvat command.
|
|
107
|
+
|
|
108
|
+
- `args` (string[]): Command arguments
|
|
109
|
+
|
|
110
|
+
Returns: `Promise<{stdout: string, stderr: string, code: number}>`
|
|
111
|
+
|
|
112
|
+
## Available Profiles
|
|
113
|
+
|
|
114
|
+
| Profile | Context | Output | Focus |
|
|
115
|
+
|---------|---------|--------|-------|
|
|
116
|
+
| `chat-agent` | 2-8K | 64-256 | TTFT (latency) |
|
|
117
|
+
| `rag` | 8-32K | 256-512 | Balanced |
|
|
118
|
+
| `longform` | 4-8K | 1-2K | Throughput |
|
|
119
|
+
| `ci-micro` | 512 | 32 | Quick testing |
|
|
120
|
+
|
|
121
|
+
## Links
|
|
122
|
+
|
|
123
|
+
- **GitHub**: https://github.com/Keyvanhardani/kvcache-autotune
|
|
124
|
+
- **PyPI**: https://pypi.org/project/kvat/
|
|
125
|
+
- **Documentation**: https://github.com/Keyvanhardani/kvcache-autotune#readme
|
|
126
|
+
|
|
127
|
+
## License
|
|
128
|
+
|
|
129
|
+
Apache 2.0
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
<p align="center">
|
|
134
|
+
<a href="https://keyvan.ai"><strong>Keyvan.ai</strong></a> | <a href="https://www.linkedin.com/in/keyvanhardani">LinkedIn</a>
|
|
135
|
+
</p>
|
|
136
|
+
<p align="center">
|
|
137
|
+
Made in Germany with dedication for the HuggingFace Community
|
|
138
|
+
</p>
|
package/cli.js
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* KVCache Auto-Tuner CLI Wrapper
|
|
5
|
+
*
|
|
6
|
+
* This script forwards all arguments to the Python kvat CLI.
|
|
7
|
+
* Usage: kvat <command> [options]
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
const { spawn } = require('child_process');
|
|
11
|
+
const { getPythonCommand, isKvatInstalled } = require('./index');
|
|
12
|
+
|
|
13
|
+
async function main() {
|
|
14
|
+
const args = process.argv.slice(2);
|
|
15
|
+
|
|
16
|
+
// Check if kvat is installed
|
|
17
|
+
if (!isKvatInstalled()) {
|
|
18
|
+
console.error('Error: kvat Python package is not installed.');
|
|
19
|
+
console.error('');
|
|
20
|
+
console.error('Please install it first:');
|
|
21
|
+
console.error(' pip install kvat[full]');
|
|
22
|
+
console.error('');
|
|
23
|
+
console.error('Or use the JavaScript API to install:');
|
|
24
|
+
console.error(' const kvat = require("kvat");');
|
|
25
|
+
console.error(' await kvat.installKvat();');
|
|
26
|
+
process.exit(1);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const python = getPythonCommand();
|
|
30
|
+
|
|
31
|
+
// Forward all arguments to Python kvat
|
|
32
|
+
const proc = spawn(python, ['-m', 'kvat', ...args], {
|
|
33
|
+
stdio: 'inherit'
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
proc.on('close', (code) => {
|
|
37
|
+
process.exit(code);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
proc.on('error', (err) => {
|
|
41
|
+
console.error('Failed to start kvat:', err.message);
|
|
42
|
+
process.exit(1);
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
main();
|
package/index.js
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* KVCache Auto-Tuner - JavaScript Wrapper
|
|
3
|
+
*
|
|
4
|
+
* This package provides a JavaScript wrapper for the kvat Python CLI.
|
|
5
|
+
* It requires Python 3.9+ and pip to be installed on the system.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const { spawn, execSync } = require('child_process');
|
|
9
|
+
const path = require('path');
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Check if Python and kvat are installed
|
|
13
|
+
* @returns {boolean} True if kvat is available
|
|
14
|
+
*/
|
|
15
|
+
function isKvatInstalled() {
|
|
16
|
+
try {
|
|
17
|
+
execSync('python -m kvat --version', { stdio: 'pipe' });
|
|
18
|
+
return true;
|
|
19
|
+
} catch {
|
|
20
|
+
try {
|
|
21
|
+
execSync('python3 -m kvat --version', { stdio: 'pipe' });
|
|
22
|
+
return true;
|
|
23
|
+
} catch {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Get the Python command to use
|
|
31
|
+
* @returns {string} 'python' or 'python3'
|
|
32
|
+
*/
|
|
33
|
+
function getPythonCommand() {
|
|
34
|
+
try {
|
|
35
|
+
execSync('python --version', { stdio: 'pipe' });
|
|
36
|
+
return 'python';
|
|
37
|
+
} catch {
|
|
38
|
+
return 'python3';
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Install kvat Python package
|
|
44
|
+
* @param {boolean} full - Install with full dependencies
|
|
45
|
+
* @returns {Promise<void>}
|
|
46
|
+
*/
|
|
47
|
+
async function installKvat(full = true) {
|
|
48
|
+
const python = getPythonCommand();
|
|
49
|
+
const pkg = full ? 'kvat[full]' : 'kvat';
|
|
50
|
+
|
|
51
|
+
return new Promise((resolve, reject) => {
|
|
52
|
+
const proc = spawn(python, ['-m', 'pip', 'install', pkg], {
|
|
53
|
+
stdio: 'inherit'
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
proc.on('close', (code) => {
|
|
57
|
+
if (code === 0) {
|
|
58
|
+
resolve();
|
|
59
|
+
} else {
|
|
60
|
+
reject(new Error(`Failed to install kvat (exit code ${code})`));
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Run kvat tune command
|
|
68
|
+
* @param {string} modelId - HuggingFace model ID
|
|
69
|
+
* @param {Object} options - Tuning options
|
|
70
|
+
* @param {string} [options.profile='chat-agent'] - Profile name
|
|
71
|
+
* @param {string} [options.device='cuda'] - Device (cuda, cpu, mps)
|
|
72
|
+
* @param {string} [options.outputDir='./kvat_results'] - Output directory
|
|
73
|
+
* @param {boolean} [options.verbose=false] - Verbose output
|
|
74
|
+
* @returns {Promise<Object>} Tuning result
|
|
75
|
+
*/
|
|
76
|
+
async function tune(modelId, options = {}) {
|
|
77
|
+
const {
|
|
78
|
+
profile = 'chat-agent',
|
|
79
|
+
device = 'cuda',
|
|
80
|
+
outputDir = './kvat_results',
|
|
81
|
+
verbose = false
|
|
82
|
+
} = options;
|
|
83
|
+
|
|
84
|
+
const python = getPythonCommand();
|
|
85
|
+
const args = ['-m', 'kvat', 'tune', modelId, '--profile', profile, '-o', outputDir];
|
|
86
|
+
|
|
87
|
+
if (verbose) {
|
|
88
|
+
args.push('-v');
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return new Promise((resolve, reject) => {
|
|
92
|
+
let stdout = '';
|
|
93
|
+
let stderr = '';
|
|
94
|
+
|
|
95
|
+
const proc = spawn(python, args, {
|
|
96
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
proc.stdout.on('data', (data) => {
|
|
100
|
+
stdout += data.toString();
|
|
101
|
+
if (verbose) {
|
|
102
|
+
process.stdout.write(data);
|
|
103
|
+
}
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
proc.stderr.on('data', (data) => {
|
|
107
|
+
stderr += data.toString();
|
|
108
|
+
if (verbose) {
|
|
109
|
+
process.stderr.write(data);
|
|
110
|
+
}
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
proc.on('close', (code) => {
|
|
114
|
+
if (code === 0) {
|
|
115
|
+
resolve({
|
|
116
|
+
success: true,
|
|
117
|
+
outputDir,
|
|
118
|
+
stdout,
|
|
119
|
+
stderr
|
|
120
|
+
});
|
|
121
|
+
} else {
|
|
122
|
+
reject(new Error(`kvat tune failed (exit code ${code}): ${stderr}`));
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Run kvat info command
|
|
130
|
+
* @returns {Promise<string>} System info
|
|
131
|
+
*/
|
|
132
|
+
async function info() {
|
|
133
|
+
const python = getPythonCommand();
|
|
134
|
+
|
|
135
|
+
return new Promise((resolve, reject) => {
|
|
136
|
+
const proc = spawn(python, ['-m', 'kvat', 'info'], {
|
|
137
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
let stdout = '';
|
|
141
|
+
let stderr = '';
|
|
142
|
+
|
|
143
|
+
proc.stdout.on('data', (data) => {
|
|
144
|
+
stdout += data.toString();
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
proc.stderr.on('data', (data) => {
|
|
148
|
+
stderr += data.toString();
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
proc.on('close', (code) => {
|
|
152
|
+
if (code === 0) {
|
|
153
|
+
resolve(stdout);
|
|
154
|
+
} else {
|
|
155
|
+
reject(new Error(`kvat info failed: ${stderr}`));
|
|
156
|
+
}
|
|
157
|
+
});
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Run arbitrary kvat command
|
|
163
|
+
* @param {string[]} args - Command arguments
|
|
164
|
+
* @returns {Promise<{stdout: string, stderr: string, code: number}>}
|
|
165
|
+
*/
|
|
166
|
+
async function run(args) {
|
|
167
|
+
const python = getPythonCommand();
|
|
168
|
+
|
|
169
|
+
return new Promise((resolve, reject) => {
|
|
170
|
+
const proc = spawn(python, ['-m', 'kvat', ...args], {
|
|
171
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
let stdout = '';
|
|
175
|
+
let stderr = '';
|
|
176
|
+
|
|
177
|
+
proc.stdout.on('data', (data) => {
|
|
178
|
+
stdout += data.toString();
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
proc.stderr.on('data', (data) => {
|
|
182
|
+
stderr += data.toString();
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
proc.on('close', (code) => {
|
|
186
|
+
resolve({ stdout, stderr, code });
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
module.exports = {
|
|
192
|
+
isKvatInstalled,
|
|
193
|
+
installKvat,
|
|
194
|
+
tune,
|
|
195
|
+
info,
|
|
196
|
+
run,
|
|
197
|
+
getPythonCommand
|
|
198
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "kvat",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Automatic KV-Cache Optimization for HuggingFace Transformers - Find the optimal cache strategy, attention backend, and configuration for your model and hardware.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"kvat": "./cli.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"postinstall": "node postinstall.js",
|
|
11
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
12
|
+
},
|
|
13
|
+
"keywords": [
|
|
14
|
+
"transformers",
|
|
15
|
+
"llm",
|
|
16
|
+
"kv-cache",
|
|
17
|
+
"optimization",
|
|
18
|
+
"inference",
|
|
19
|
+
"huggingface",
|
|
20
|
+
"deep-learning",
|
|
21
|
+
"machine-learning",
|
|
22
|
+
"pytorch",
|
|
23
|
+
"flash-attention",
|
|
24
|
+
"ai",
|
|
25
|
+
"ml"
|
|
26
|
+
],
|
|
27
|
+
"author": "Keyvanhardani",
|
|
28
|
+
"license": "Apache-2.0",
|
|
29
|
+
"repository": {
|
|
30
|
+
"type": "git",
|
|
31
|
+
"url": "git+https://github.com/Keyvanhardani/kvcache-autotune.git"
|
|
32
|
+
},
|
|
33
|
+
"bugs": {
|
|
34
|
+
"url": "https://github.com/Keyvanhardani/kvcache-autotune/issues"
|
|
35
|
+
},
|
|
36
|
+
"homepage": "https://github.com/Keyvanhardani/kvcache-autotune#readme",
|
|
37
|
+
"engines": {
|
|
38
|
+
"node": ">=14.0.0"
|
|
39
|
+
},
|
|
40
|
+
"dependencies": {},
|
|
41
|
+
"files": [
|
|
42
|
+
"index.js",
|
|
43
|
+
"cli.js",
|
|
44
|
+
"postinstall.js",
|
|
45
|
+
"README.md"
|
|
46
|
+
]
|
|
47
|
+
}
|
package/postinstall.js
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* KVCache Auto-Tuner - Post-install Script
|
|
5
|
+
*
|
|
6
|
+
* Checks for Python and provides installation instructions.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const { execSync } = require('child_process');
|
|
10
|
+
|
|
11
|
+
function checkPython() {
|
|
12
|
+
try {
|
|
13
|
+
const version = execSync('python --version', { stdio: 'pipe' }).toString().trim();
|
|
14
|
+
return { available: true, version, command: 'python' };
|
|
15
|
+
} catch {
|
|
16
|
+
try {
|
|
17
|
+
const version = execSync('python3 --version', { stdio: 'pipe' }).toString().trim();
|
|
18
|
+
return { available: true, version, command: 'python3' };
|
|
19
|
+
} catch {
|
|
20
|
+
return { available: false };
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function checkKvat(pythonCmd) {
|
|
26
|
+
try {
|
|
27
|
+
execSync(`${pythonCmd} -m kvat --version`, { stdio: 'pipe' });
|
|
28
|
+
return true;
|
|
29
|
+
} catch {
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function main() {
|
|
35
|
+
console.log('');
|
|
36
|
+
console.log('KVCache Auto-Tuner (kvat) - npm package installed');
|
|
37
|
+
console.log('='.repeat(50));
|
|
38
|
+
console.log('');
|
|
39
|
+
|
|
40
|
+
const python = checkPython();
|
|
41
|
+
|
|
42
|
+
if (!python.available) {
|
|
43
|
+
console.log('WARNING: Python is not installed or not in PATH.');
|
|
44
|
+
console.log('');
|
|
45
|
+
console.log('kvat requires Python 3.9+ to run. Please install Python first:');
|
|
46
|
+
console.log(' - Windows: https://www.python.org/downloads/');
|
|
47
|
+
console.log(' - macOS: brew install python3');
|
|
48
|
+
console.log(' - Linux: sudo apt install python3 python3-pip');
|
|
49
|
+
console.log('');
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
console.log(`Python found: ${python.version}`);
|
|
54
|
+
|
|
55
|
+
const kvatInstalled = checkKvat(python.command);
|
|
56
|
+
|
|
57
|
+
if (kvatInstalled) {
|
|
58
|
+
console.log('kvat Python package: Installed');
|
|
59
|
+
console.log('');
|
|
60
|
+
console.log('You can now use kvat:');
|
|
61
|
+
console.log(' kvat tune gpt2 --profile ci-micro');
|
|
62
|
+
console.log(' kvat info');
|
|
63
|
+
} else {
|
|
64
|
+
console.log('kvat Python package: Not installed');
|
|
65
|
+
console.log('');
|
|
66
|
+
console.log('To install the Python package, run:');
|
|
67
|
+
console.log(` ${python.command} -m pip install kvat[full]`);
|
|
68
|
+
console.log('');
|
|
69
|
+
console.log('Or use the JavaScript API:');
|
|
70
|
+
console.log(' const kvat = require("kvat");');
|
|
71
|
+
console.log(' await kvat.installKvat();');
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
console.log('');
|
|
75
|
+
console.log('Documentation: https://github.com/Keyvanhardani/kvcache-autotune');
|
|
76
|
+
console.log('');
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
main();
|