@coder-ka/ollama-as-webapi 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # ollama-as-webapi
2
+
3
+ This repository provides a simple CLI tool to serve Ollama models as a Web API.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm i -g @coder-ka/ollama-as-webapi
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```bash
14
+ ollama-as-webapi
15
+ ```
16
+
17
+ ### CLI Options
18
+
19
+ | Option | Description | Default |
20
+ | --- | --- | --- |
21
+ | `--model` | Model name | `gemma3:4b` |
22
+ | `--port` | Port number | `3000` |
23
+ | `--host` | Host name | `localhost` |
24
+ | `--gpu` | Use GPU | `true` |
25
+
26
+ ## Web API Reference
27
+
28
+ ### Content Generation
29
+
30
+ `/generate`
31
+
32
+ #### Request Body
33
+
34
+ | Field | Required | Type | Description | Default |
35
+ | --- | --- | --- | --- | --- |
36
+ | `prompt` | Yes | string | Prompt to generate content for | `""` |
37
+ | `maxTokens` | No | number | Maximum number of tokens to generate | `256` |
38
+ | `temperature` | No | number | Temperature for sampling | `0.7` |
39
+ | `topP` | No | number | Top-p sampling | `0.9` |
40
+ | `topK` | No | number | Top-k sampling | `40` |
41
+ | `n` | No | number | Number of samples to generate | `1` |
42
+ | `stop` | No | string[] | Stop sequences | `[]` |
43
+
44
+ #### Response Body
45
+
46
+ | Field | Type | Description |
47
+ | --- | --- | --- |
48
+ | `text` | string | Generated content |
49
+ | `tokens` | number | Number of tokens generated |
50
+ | `finishReason` | string | Reason for finishing generation |
51
+
52
+ #### Example
53
+
54
+ ```bash
55
+ curl -X POST http://localhost:3000/generate \
56
+ -H "Content-Type: application/json" \
57
+ -d '{"prompt": "Hello"}'
58
+ ```
59
+
60
+ ### License
61
+
62
+ MIT
63
+
64
+ ### How to contribute
65
+
66
+ 1. Fork the repository
67
+ 2. Create a feature branch
68
+ 3. Commit your changes
69
+ 4. Push to the branch
70
+ 5. Open a Pull Request
71
+
72
+ ### Author
73
+
74
+ [Katsuyuki Oeda](https://github.com/coder-ka)
package/dist/cli.js ADDED
@@ -0,0 +1,26 @@
1
+ import { Command } from 'commander';
2
+ import { startServer } from './server.js';
3
+ import { pullModelIfNeeded } from './ollama.js';
4
+ const program = new Command();
5
+ program
6
+ .name('ollama-as-webapi')
7
+ .description('Serve Ollama models as a Web API')
8
+ .version('1.0.0');
9
+ program
10
+ .option('--model <name>', 'Model name', 'gemma3:4b')
11
+ .option('--port <number>', 'Port number', '3000')
12
+ .option('--host <name>', 'Host name', 'localhost')
13
+ .option('--gpu <boolean>', 'Use GPU', 'true')
14
+ .action(async (options) => {
15
+ const port = parseInt(options.port, 10);
16
+ const gpu = options.gpu === 'true';
17
+ try {
18
+ await pullModelIfNeeded(options.model);
19
+ startServer(options.host, port, options.model, gpu);
20
+ }
21
+ catch (error) {
22
+ console.error('Failed to start server:', error);
23
+ process.exit(1);
24
+ }
25
+ });
26
+ program.parse();
package/dist/ollama.js ADDED
@@ -0,0 +1,32 @@
1
+ import ollama from 'ollama';
2
+ export async function generateContent(options) {
3
+ // Note: Ollama handles GPU automatically based on system configuration and model.
4
+ // The 'gpu' option here is mostly for user preference if we were using a different backend,
5
+ // but with Ollama we'll pass model options.
6
+ const response = await ollama.generate({
7
+ model: options.model,
8
+ prompt: options.prompt,
9
+ options: {
10
+ num_predict: options.maxTokens,
11
+ temperature: options.temperature,
12
+ top_p: options.topP,
13
+ top_k: options.topK,
14
+ stop: options.stop,
15
+ },
16
+ stream: false,
17
+ });
18
+ return {
19
+ text: response.response,
20
+ tokens: response.eval_count || 0,
21
+ finishReason: 'stop', // Ollama response doesn't explicitly provide finish_reason in generate call in the same way OpenAI does, but 'stop' is standard for non-stream.
22
+ };
23
+ }
24
+ export async function pullModelIfNeeded(model) {
25
+ const models = await ollama.list();
26
+ const exists = models.models.some((m) => m.name === model || m.name.startsWith(model + ':'));
27
+ if (!exists) {
28
+ console.log(`Pulling model ${model}...`);
29
+ await ollama.pull({ model });
30
+ console.log(`Model ${model} pulled successfully.`);
31
+ }
32
+ }
package/dist/server.js ADDED
@@ -0,0 +1,37 @@
1
+ import { Hono } from 'hono';
2
+ import { serve } from '@hono/node-server';
3
+ import { generateContent } from './ollama.js';
4
+ export function startServer(host, port, model, gpu) {
5
+ const app = new Hono();
6
+ app.post('/generate', async (c) => {
7
+ try {
8
+ const body = await c.req.json();
9
+ const options = {
10
+ model,
11
+ prompt: body.prompt || '',
12
+ maxTokens: body.maxTokens,
13
+ temperature: body.temperature,
14
+ topP: body.topP,
15
+ topK: body.topK,
16
+ stop: body.stop,
17
+ gpu,
18
+ };
19
+ if (!options.prompt) {
20
+ return c.json({ error: 'Prompt is required' }, 400);
21
+ }
22
+ const result = await generateContent(options);
23
+ return c.json(result);
24
+ }
25
+ catch (error) {
26
+ console.error('Error generating content:', error);
27
+ return c.json({ error: error.message || 'Internal Server Error' }, 500);
28
+ }
29
+ });
30
+ console.log(`Starting server on http://${host}:${port}`);
31
+ console.log(`Using model: ${model}`);
32
+ serve({
33
+ fetch: app.fetch,
34
+ port,
35
+ hostname: host,
36
+ });
37
+ }
package/package.json ADDED
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "@coder-ka/ollama-as-webapi",
3
+ "version": "1.0.0",
4
+ "description": "This repository provides a simple CLI tool to serve Ollama models as a Web API.",
5
+ "type": "module",
6
+ "bin": {
7
+ "ollama-as-webapi": "dist/cli.js"
8
+ },
9
+ "scripts": {
10
+ "build": "tsc",
11
+ "start": "node dist/cli.js",
12
+ "dev": "tsx src/cli.ts",
13
+ "prepublishOnly": "npm run build"
14
+ },
15
+ "author": "Katsuyuki Oeda",
16
+ "license": "MIT",
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "git+https://github.com/coder-ka/ollama-as-webapi.git"
20
+ },
21
+ "bugs": {
22
+ "url": "https://github.com/coder-ka/ollama-as-webapi/issues"
23
+ },
24
+ "homepage": "https://github.com/coder-ka/ollama-as-webapi#readme",
25
+ "keywords": [
26
+ "ollama",
27
+ "ai",
28
+ "llm",
29
+ "webapi",
30
+ "cli"
31
+ ],
32
+ "files": [
33
+ "dist",
34
+ "package.json"
35
+ ],
36
+ "dependencies": {
37
+ "@hono/node-server": "^1.19.9",
38
+ "commander": "^14.0.3",
39
+ "hono": "^4.12.0",
40
+ "ollama": "^0.6.3"
41
+ },
42
+ "devDependencies": {
43
+ "@types/commander": "^2.12.0",
44
+ "@types/node": "^25.3.0",
45
+ "tsx": "^4.21.0",
46
+ "typescript": "^5.9.3"
47
+ }
48
+ }