kvat 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,138 @@
1
+ # kvat - KVCache Auto-Tuner
2
+
3
+ [![npm version](https://img.shields.io/npm/v/kvat.svg)](https://www.npmjs.com/package/kvat)
4
+ [![PyPI](https://img.shields.io/pypi/v/kvat.svg)](https://pypi.org/project/kvat/)
5
+ [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/Keyvanhardani/kvcache-autotune/blob/main/LICENSE)
6
+
7
+ **Automatic KV-Cache Optimization for HuggingFace Transformers**
8
+
9
+ Find the optimal cache strategy, attention backend, and configuration for your model and hardware.
10
+
11
+ ## Requirements
12
+
13
+ - **Node.js** 14.0+
14
+ - **Python** 3.9+
15
+ - **PyTorch** 2.0+
16
+ - **Transformers** 4.35+
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ npm install kvat
22
+ ```
23
+
24
+ Then install the Python package:
25
+
26
+ ```bash
27
+ pip install kvat[full]
28
+ ```
29
+
30
+ ## CLI Usage
31
+
32
+ ```bash
33
+ # Optimize any HuggingFace model
34
+ kvat tune meta-llama/Llama-3.2-1B --profile chat-agent
35
+
36
+ # Quick test
37
+ kvat tune gpt2 --profile ci-micro -v
38
+
39
+ # Show system info
40
+ kvat info
41
+ ```
42
+
43
+ ## JavaScript API
44
+
45
+ ```javascript
46
+ const kvat = require('kvat');
47
+
48
+ // Check if kvat is installed
49
+ if (!kvat.isKvatInstalled()) {
50
+ await kvat.installKvat(); // Install Python package
51
+ }
52
+
53
+ // Run tuning
54
+ const result = await kvat.tune('gpt2', {
55
+ profile: 'chat-agent',
56
+ outputDir: './results',
57
+ verbose: true
58
+ });
59
+
60
+ console.log('Results saved to:', result.outputDir);
61
+
62
+ // Get system info
63
+ const info = await kvat.info();
64
+ console.log(info);
65
+
66
+ // Run arbitrary command
67
+ const { stdout, stderr, code } = await kvat.run(['profiles']);
68
+ ```
69
+
70
+ ## API Reference
71
+
72
+ ### `isKvatInstalled()`
73
+
74
+ Check if the kvat Python package is installed.
75
+
76
+ Returns: `boolean`
77
+
78
+ ### `installKvat(full = true)`
79
+
80
+ Install the kvat Python package.
81
+
82
+ - `full` (boolean): Install with full dependencies (default: true)
83
+
84
+ Returns: `Promise<void>`
85
+
86
+ ### `tune(modelId, options)`
87
+
88
+ Run kvat tune command.
89
+
90
+ - `modelId` (string): HuggingFace model ID
91
+ - `options.profile` (string): Profile name (default: 'chat-agent')
92
+ - `options.device` (string): Device cuda/cpu/mps (default: 'cuda')
93
+ - `options.outputDir` (string): Output directory (default: './kvat_results')
94
+ - `options.verbose` (boolean): Verbose output (default: false)
95
+
96
+ Returns: `Promise<{success: boolean, outputDir: string, stdout: string, stderr: string}>`
97
+
98
+ ### `info()`
99
+
100
+ Get system information.
101
+
102
+ Returns: `Promise<string>`
103
+
104
+ ### `run(args)`
105
+
106
+ Run arbitrary kvat command.
107
+
108
+ - `args` (string[]): Command arguments
109
+
110
+ Returns: `Promise<{stdout: string, stderr: string, code: number}>`
111
+
112
+ ## Available Profiles
113
+
114
+ | Profile | Context | Output | Focus |
115
+ |---------|---------|--------|-------|
116
+ | `chat-agent` | 2-8K | 64-256 | TTFT (latency) |
117
+ | `rag` | 8-32K | 256-512 | Balanced |
118
+ | `longform` | 4-8K | 1-2K | Throughput |
119
+ | `ci-micro` | 512 | 32 | Quick testing |
120
+
121
+ ## Links
122
+
123
+ - **GitHub**: https://github.com/Keyvanhardani/kvcache-autotune
124
+ - **PyPI**: https://pypi.org/project/kvat/
125
+ - **Documentation**: https://github.com/Keyvanhardani/kvcache-autotune#readme
126
+
127
+ ## License
128
+
129
+ Apache 2.0
130
+
131
+ ---
132
+
133
+ <p align="center">
134
+ <a href="https://keyvan.ai"><strong>Keyvan.ai</strong></a> | <a href="https://www.linkedin.com/in/keyvanhardani">LinkedIn</a>
135
+ </p>
136
+ <p align="center">
137
+ Made in Germany with dedication for the HuggingFace Community
138
+ </p>
package/cli.js ADDED
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * KVCache Auto-Tuner CLI Wrapper
5
+ *
6
+ * This script forwards all arguments to the Python kvat CLI.
7
+ * Usage: kvat <command> [options]
8
+ */
9
+
10
+ const { spawn } = require('child_process');
11
+ const { getPythonCommand, isKvatInstalled } = require('./index');
12
+
13
+ async function main() {
14
+ const args = process.argv.slice(2);
15
+
16
+ // Check if kvat is installed
17
+ if (!isKvatInstalled()) {
18
+ console.error('Error: kvat Python package is not installed.');
19
+ console.error('');
20
+ console.error('Please install it first:');
21
+ console.error(' pip install kvat[full]');
22
+ console.error('');
23
+ console.error('Or use the JavaScript API to install:');
24
+ console.error(' const kvat = require("kvat");');
25
+ console.error(' await kvat.installKvat();');
26
+ process.exit(1);
27
+ }
28
+
29
+ const python = getPythonCommand();
30
+
31
+ // Forward all arguments to Python kvat
32
+ const proc = spawn(python, ['-m', 'kvat', ...args], {
33
+ stdio: 'inherit'
34
+ });
35
+
36
+ proc.on('close', (code) => {
37
+ process.exit(code);
38
+ });
39
+
40
+ proc.on('error', (err) => {
41
+ console.error('Failed to start kvat:', err.message);
42
+ process.exit(1);
43
+ });
44
+ }
45
+
46
+ main();
package/index.js ADDED
@@ -0,0 +1,198 @@
1
+ /**
2
+ * KVCache Auto-Tuner - JavaScript Wrapper
3
+ *
4
+ * This package provides a JavaScript wrapper for the kvat Python CLI.
5
+ * It requires Python 3.9+ and pip to be installed on the system.
6
+ */
7
+
8
+ const { spawn, execSync } = require('child_process');
9
+ const path = require('path');
10
+
11
+ /**
12
+ * Check if Python and kvat are installed
13
+ * @returns {boolean} True if kvat is available
14
+ */
15
+ function isKvatInstalled() {
16
+ try {
17
+ execSync('python -m kvat --version', { stdio: 'pipe' });
18
+ return true;
19
+ } catch {
20
+ try {
21
+ execSync('python3 -m kvat --version', { stdio: 'pipe' });
22
+ return true;
23
+ } catch {
24
+ return false;
25
+ }
26
+ }
27
+ }
28
+
29
+ /**
30
+ * Get the Python command to use
31
+ * @returns {string} 'python' or 'python3'
32
+ */
33
+ function getPythonCommand() {
34
+ try {
35
+ execSync('python --version', { stdio: 'pipe' });
36
+ return 'python';
37
+ } catch {
38
+ return 'python3';
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Install kvat Python package
44
+ * @param {boolean} full - Install with full dependencies
45
+ * @returns {Promise<void>}
46
+ */
47
+ async function installKvat(full = true) {
48
+ const python = getPythonCommand();
49
+ const pkg = full ? 'kvat[full]' : 'kvat';
50
+
51
+ return new Promise((resolve, reject) => {
52
+ const proc = spawn(python, ['-m', 'pip', 'install', pkg], {
53
+ stdio: 'inherit'
54
+ });
55
+
56
+ proc.on('close', (code) => {
57
+ if (code === 0) {
58
+ resolve();
59
+ } else {
60
+ reject(new Error(`Failed to install kvat (exit code ${code})`));
61
+ }
62
+ });
63
+ });
64
+ }
65
+
66
+ /**
67
+ * Run kvat tune command
68
+ * @param {string} modelId - HuggingFace model ID
69
+ * @param {Object} options - Tuning options
70
+ * @param {string} [options.profile='chat-agent'] - Profile name
71
+ * @param {string} [options.device='cuda'] - Device (cuda, cpu, mps)
72
+ * @param {string} [options.outputDir='./kvat_results'] - Output directory
73
+ * @param {boolean} [options.verbose=false] - Verbose output
74
+ * @returns {Promise<Object>} Tuning result
75
+ */
76
+ async function tune(modelId, options = {}) {
77
+ const {
78
+ profile = 'chat-agent',
79
+ device = 'cuda',
80
+ outputDir = './kvat_results',
81
+ verbose = false
82
+ } = options;
83
+
84
+ const python = getPythonCommand();
85
+ const args = ['-m', 'kvat', 'tune', modelId, '--profile', profile, '-o', outputDir];
86
+
87
+ if (verbose) {
88
+ args.push('-v');
89
+ }
90
+
91
+ return new Promise((resolve, reject) => {
92
+ let stdout = '';
93
+ let stderr = '';
94
+
95
+ const proc = spawn(python, args, {
96
+ stdio: ['pipe', 'pipe', 'pipe']
97
+ });
98
+
99
+ proc.stdout.on('data', (data) => {
100
+ stdout += data.toString();
101
+ if (verbose) {
102
+ process.stdout.write(data);
103
+ }
104
+ });
105
+
106
+ proc.stderr.on('data', (data) => {
107
+ stderr += data.toString();
108
+ if (verbose) {
109
+ process.stderr.write(data);
110
+ }
111
+ });
112
+
113
+ proc.on('close', (code) => {
114
+ if (code === 0) {
115
+ resolve({
116
+ success: true,
117
+ outputDir,
118
+ stdout,
119
+ stderr
120
+ });
121
+ } else {
122
+ reject(new Error(`kvat tune failed (exit code ${code}): ${stderr}`));
123
+ }
124
+ });
125
+ });
126
+ }
127
+
128
+ /**
129
+ * Run kvat info command
130
+ * @returns {Promise<string>} System info
131
+ */
132
+ async function info() {
133
+ const python = getPythonCommand();
134
+
135
+ return new Promise((resolve, reject) => {
136
+ const proc = spawn(python, ['-m', 'kvat', 'info'], {
137
+ stdio: ['pipe', 'pipe', 'pipe']
138
+ });
139
+
140
+ let stdout = '';
141
+ let stderr = '';
142
+
143
+ proc.stdout.on('data', (data) => {
144
+ stdout += data.toString();
145
+ });
146
+
147
+ proc.stderr.on('data', (data) => {
148
+ stderr += data.toString();
149
+ });
150
+
151
+ proc.on('close', (code) => {
152
+ if (code === 0) {
153
+ resolve(stdout);
154
+ } else {
155
+ reject(new Error(`kvat info failed: ${stderr}`));
156
+ }
157
+ });
158
+ });
159
+ }
160
+
161
+ /**
162
+ * Run arbitrary kvat command
163
+ * @param {string[]} args - Command arguments
164
+ * @returns {Promise<{stdout: string, stderr: string, code: number}>}
165
+ */
166
+ async function run(args) {
167
+ const python = getPythonCommand();
168
+
169
+ return new Promise((resolve, reject) => {
170
+ const proc = spawn(python, ['-m', 'kvat', ...args], {
171
+ stdio: ['pipe', 'pipe', 'pipe']
172
+ });
173
+
174
+ let stdout = '';
175
+ let stderr = '';
176
+
177
+ proc.stdout.on('data', (data) => {
178
+ stdout += data.toString();
179
+ });
180
+
181
+ proc.stderr.on('data', (data) => {
182
+ stderr += data.toString();
183
+ });
184
+
185
+ proc.on('close', (code) => {
186
+ resolve({ stdout, stderr, code });
187
+ });
188
+ });
189
+ }
190
+
191
+ module.exports = {
192
+ isKvatInstalled,
193
+ installKvat,
194
+ tune,
195
+ info,
196
+ run,
197
+ getPythonCommand
198
+ };
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "kvat",
3
+ "version": "0.1.0",
4
+ "description": "Automatic KV-Cache Optimization for HuggingFace Transformers - Find the optimal cache strategy, attention backend, and configuration for your model and hardware.",
5
+ "main": "index.js",
6
+ "bin": {
7
+ "kvat": "./cli.js"
8
+ },
9
+ "scripts": {
10
+ "postinstall": "node postinstall.js",
11
+ "test": "echo \"Error: no test specified\" && exit 1"
12
+ },
13
+ "keywords": [
14
+ "transformers",
15
+ "llm",
16
+ "kv-cache",
17
+ "optimization",
18
+ "inference",
19
+ "huggingface",
20
+ "deep-learning",
21
+ "machine-learning",
22
+ "pytorch",
23
+ "flash-attention",
24
+ "ai",
25
+ "ml"
26
+ ],
27
+ "author": "Keyvanhardani",
28
+ "license": "Apache-2.0",
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "git+https://github.com/Keyvanhardani/kvcache-autotune.git"
32
+ },
33
+ "bugs": {
34
+ "url": "https://github.com/Keyvanhardani/kvcache-autotune/issues"
35
+ },
36
+ "homepage": "https://github.com/Keyvanhardani/kvcache-autotune#readme",
37
+ "engines": {
38
+ "node": ">=14.0.0"
39
+ },
40
+ "dependencies": {},
41
+ "files": [
42
+ "index.js",
43
+ "cli.js",
44
+ "postinstall.js",
45
+ "README.md"
46
+ ]
47
+ }
package/postinstall.js ADDED
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * KVCache Auto-Tuner - Post-install Script
5
+ *
6
+ * Checks for Python and provides installation instructions.
7
+ */
8
+
9
+ const { execSync } = require('child_process');
10
+
11
+ function checkPython() {
12
+ try {
13
+ const version = execSync('python --version', { stdio: 'pipe' }).toString().trim();
14
+ return { available: true, version, command: 'python' };
15
+ } catch {
16
+ try {
17
+ const version = execSync('python3 --version', { stdio: 'pipe' }).toString().trim();
18
+ return { available: true, version, command: 'python3' };
19
+ } catch {
20
+ return { available: false };
21
+ }
22
+ }
23
+ }
24
+
25
+ function checkKvat(pythonCmd) {
26
+ try {
27
+ execSync(`${pythonCmd} -m kvat --version`, { stdio: 'pipe' });
28
+ return true;
29
+ } catch {
30
+ return false;
31
+ }
32
+ }
33
+
34
+ function main() {
35
+ console.log('');
36
+ console.log('KVCache Auto-Tuner (kvat) - npm package installed');
37
+ console.log('='.repeat(50));
38
+ console.log('');
39
+
40
+ const python = checkPython();
41
+
42
+ if (!python.available) {
43
+ console.log('WARNING: Python is not installed or not in PATH.');
44
+ console.log('');
45
+ console.log('kvat requires Python 3.9+ to run. Please install Python first:');
46
+ console.log(' - Windows: https://www.python.org/downloads/');
47
+ console.log(' - macOS: brew install python3');
48
+ console.log(' - Linux: sudo apt install python3 python3-pip');
49
+ console.log('');
50
+ return;
51
+ }
52
+
53
+ console.log(`Python found: ${python.version}`);
54
+
55
+ const kvatInstalled = checkKvat(python.command);
56
+
57
+ if (kvatInstalled) {
58
+ console.log('kvat Python package: Installed');
59
+ console.log('');
60
+ console.log('You can now use kvat:');
61
+ console.log(' kvat tune gpt2 --profile ci-micro');
62
+ console.log(' kvat info');
63
+ } else {
64
+ console.log('kvat Python package: Not installed');
65
+ console.log('');
66
+ console.log('To install the Python package, run:');
67
+ console.log(` ${python.command} -m pip install kvat[full]`);
68
+ console.log('');
69
+ console.log('Or use the JavaScript API:');
70
+ console.log(' const kvat = require("kvat");');
71
+ console.log(' await kvat.installKvat();');
72
+ }
73
+
74
+ console.log('');
75
+ console.log('Documentation: https://github.com/Keyvanhardani/kvcache-autotune');
76
+ console.log('');
77
+ }
78
+
79
+ main();