@coder-ka/ollama-as-webapi 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -0
- package/dist/cli.js +26 -0
- package/dist/ollama.js +32 -0
- package/dist/server.js +37 -0
- package/package.json +48 -0
package/README.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# ollama-as-webapi
|
|
2
|
+
|
|
3
|
+
This repository provides a simple CLI tool to serve Ollama models as a Web API.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm i -g @coder-ka/ollama-as-webapi
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
ollama-as-webapi
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
### CLI Options
|
|
18
|
+
|
|
19
|
+
| Option | Description | Default |
|
|
20
|
+
| --- | --- | --- |
|
|
21
|
+
| `--model` | Model name | `gemma3:4b` |
|
|
22
|
+
| `--port` | Port number | `3000` |
|
|
23
|
+
| `--host` | Host name | `localhost` |
|
|
24
|
+
| `--gpu` | Use GPU | `true` |
|
|
25
|
+
|
|
26
|
+
## Web API Reference
|
|
27
|
+
|
|
28
|
+
### Content Generation
|
|
29
|
+
|
|
30
|
+
`/generate`
|
|
31
|
+
|
|
32
|
+
#### Request Body
|
|
33
|
+
|
|
34
|
+
| Field | Required | Type | Description | Default |
|
|
35
|
+
| --- | --- | --- | --- | --- |
|
|
36
|
+
| `prompt` | Yes | string | Prompt to generate content for | `""` |
|
|
37
|
+
| `maxTokens` | No | number | Maximum number of tokens to generate | `256` |
|
|
38
|
+
| `temperature` | No | number | Temperature for sampling | `0.7` |
|
|
39
|
+
| `topP` | No | number | Top-p sampling | `0.9` |
|
|
40
|
+
| `topK` | No | number | Top-k sampling | `40` |
|
|
41
|
+
| `n` | No | number | Number of samples to generate | `1` |
|
|
42
|
+
| `stop` | No | string[] | Stop sequences | `[]` |
|
|
43
|
+
|
|
44
|
+
#### Response Body
|
|
45
|
+
|
|
46
|
+
| Field | Type | Description |
|
|
47
|
+
| --- | --- | --- |
|
|
48
|
+
| `text` | string | Generated content |
|
|
49
|
+
| `tokens` | number | Number of tokens generated |
|
|
50
|
+
| `finishReason` | string | Reason for finishing generation |
|
|
51
|
+
|
|
52
|
+
#### Example
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
curl -X POST http://localhost:3000/generate \
|
|
56
|
+
-H "Content-Type: application/json" \
|
|
57
|
+
-d '{"prompt": "Hello"}'
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### License
|
|
61
|
+
|
|
62
|
+
MIT
|
|
63
|
+
|
|
64
|
+
### How to contribute
|
|
65
|
+
|
|
66
|
+
1. Fork the repository
|
|
67
|
+
2. Create a feature branch
|
|
68
|
+
3. Commit your changes
|
|
69
|
+
4. Push to the branch
|
|
70
|
+
5. Open a Pull Request
|
|
71
|
+
|
|
72
|
+
### Author
|
|
73
|
+
|
|
74
|
+
[Katsuyuki Oeda](https://github.com/coder-ka)
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { Command } from 'commander';
|
|
2
|
+
import { startServer } from './server.js';
|
|
3
|
+
import { pullModelIfNeeded } from './ollama.js';
|
|
4
|
+
const program = new Command();
|
|
5
|
+
program
|
|
6
|
+
.name('ollama-as-webapi')
|
|
7
|
+
.description('Serve Ollama models as a Web API')
|
|
8
|
+
.version('1.0.0');
|
|
9
|
+
program
|
|
10
|
+
.option('--model <name>', 'Model name', 'gemma3:4b')
|
|
11
|
+
.option('--port <number>', 'Port number', '3000')
|
|
12
|
+
.option('--host <name>', 'Host name', 'localhost')
|
|
13
|
+
.option('--gpu <boolean>', 'Use GPU', 'true')
|
|
14
|
+
.action(async (options) => {
|
|
15
|
+
const port = parseInt(options.port, 10);
|
|
16
|
+
const gpu = options.gpu === 'true';
|
|
17
|
+
try {
|
|
18
|
+
await pullModelIfNeeded(options.model);
|
|
19
|
+
startServer(options.host, port, options.model, gpu);
|
|
20
|
+
}
|
|
21
|
+
catch (error) {
|
|
22
|
+
console.error('Failed to start server:', error);
|
|
23
|
+
process.exit(1);
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
program.parse();
|
package/dist/ollama.js
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import ollama from 'ollama';
|
|
2
|
+
export async function generateContent(options) {
|
|
3
|
+
// Note: Ollama handles GPU automatically based on system configuration and model.
|
|
4
|
+
// The 'gpu' option here is mostly for user preference if we were using a different backend,
|
|
5
|
+
// but with Ollama we'll pass model options.
|
|
6
|
+
const response = await ollama.generate({
|
|
7
|
+
model: options.model,
|
|
8
|
+
prompt: options.prompt,
|
|
9
|
+
options: {
|
|
10
|
+
num_predict: options.maxTokens,
|
|
11
|
+
temperature: options.temperature,
|
|
12
|
+
top_p: options.topP,
|
|
13
|
+
top_k: options.topK,
|
|
14
|
+
stop: options.stop,
|
|
15
|
+
},
|
|
16
|
+
stream: false,
|
|
17
|
+
});
|
|
18
|
+
return {
|
|
19
|
+
text: response.response,
|
|
20
|
+
tokens: response.eval_count || 0,
|
|
21
|
+
finishReason: 'stop', // Ollama response doesn't explicitly provide finish_reason in generate call in the same way OpenAI does, but 'stop' is standard for non-stream.
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
export async function pullModelIfNeeded(model) {
|
|
25
|
+
const models = await ollama.list();
|
|
26
|
+
const exists = models.models.some((m) => m.name === model || m.name.startsWith(model + ':'));
|
|
27
|
+
if (!exists) {
|
|
28
|
+
console.log(`Pulling model ${model}...`);
|
|
29
|
+
await ollama.pull({ model });
|
|
30
|
+
console.log(`Model ${model} pulled successfully.`);
|
|
31
|
+
}
|
|
32
|
+
}
|
package/dist/server.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { Hono } from 'hono';
|
|
2
|
+
import { serve } from '@hono/node-server';
|
|
3
|
+
import { generateContent } from './ollama.js';
|
|
4
|
+
export function startServer(host, port, model, gpu) {
|
|
5
|
+
const app = new Hono();
|
|
6
|
+
app.post('/generate', async (c) => {
|
|
7
|
+
try {
|
|
8
|
+
const body = await c.req.json();
|
|
9
|
+
const options = {
|
|
10
|
+
model,
|
|
11
|
+
prompt: body.prompt || '',
|
|
12
|
+
maxTokens: body.maxTokens,
|
|
13
|
+
temperature: body.temperature,
|
|
14
|
+
topP: body.topP,
|
|
15
|
+
topK: body.topK,
|
|
16
|
+
stop: body.stop,
|
|
17
|
+
gpu,
|
|
18
|
+
};
|
|
19
|
+
if (!options.prompt) {
|
|
20
|
+
return c.json({ error: 'Prompt is required' }, 400);
|
|
21
|
+
}
|
|
22
|
+
const result = await generateContent(options);
|
|
23
|
+
return c.json(result);
|
|
24
|
+
}
|
|
25
|
+
catch (error) {
|
|
26
|
+
console.error('Error generating content:', error);
|
|
27
|
+
return c.json({ error: error.message || 'Internal Server Error' }, 500);
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
console.log(`Starting server on http://${host}:${port}`);
|
|
31
|
+
console.log(`Using model: ${model}`);
|
|
32
|
+
serve({
|
|
33
|
+
fetch: app.fetch,
|
|
34
|
+
port,
|
|
35
|
+
hostname: host,
|
|
36
|
+
});
|
|
37
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@coder-ka/ollama-as-webapi",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "This repository provides a simple CLI tool to serve Ollama models as a Web API.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"ollama-as-webapi": "dist/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"build": "tsc",
|
|
11
|
+
"start": "node dist/cli.js",
|
|
12
|
+
"dev": "tsx src/cli.ts",
|
|
13
|
+
"prepublishOnly": "npm run build"
|
|
14
|
+
},
|
|
15
|
+
"author": "Katsuyuki Oeda",
|
|
16
|
+
"license": "MIT",
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "git+https://github.com/coder-ka/ollama-as-webapi.git"
|
|
20
|
+
},
|
|
21
|
+
"bugs": {
|
|
22
|
+
"url": "https://github.com/coder-ka/ollama-as-webapi/issues"
|
|
23
|
+
},
|
|
24
|
+
"homepage": "https://github.com/coder-ka/ollama-as-webapi#readme",
|
|
25
|
+
"keywords": [
|
|
26
|
+
"ollama",
|
|
27
|
+
"ai",
|
|
28
|
+
"llm",
|
|
29
|
+
"webapi",
|
|
30
|
+
"cli"
|
|
31
|
+
],
|
|
32
|
+
"files": [
|
|
33
|
+
"dist",
|
|
34
|
+
"package.json"
|
|
35
|
+
],
|
|
36
|
+
"dependencies": {
|
|
37
|
+
"@hono/node-server": "^1.19.9",
|
|
38
|
+
"commander": "^14.0.3",
|
|
39
|
+
"hono": "^4.12.0",
|
|
40
|
+
"ollama": "^0.6.3"
|
|
41
|
+
},
|
|
42
|
+
"devDependencies": {
|
|
43
|
+
"@types/commander": "^2.12.0",
|
|
44
|
+
"@types/node": "^25.3.0",
|
|
45
|
+
"tsx": "^4.21.0",
|
|
46
|
+
"typescript": "^5.9.3"
|
|
47
|
+
}
|
|
48
|
+
}
|