inference-server 1.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/README.md +216 -0
  2. package/dist/api/openai/enums.d.ts +4 -0
  3. package/dist/api/openai/enums.js +17 -0
  4. package/dist/api/openai/enums.js.map +1 -0
  5. package/dist/api/openai/handlers/chat.d.ts +3 -0
  6. package/dist/api/openai/handlers/chat.js +358 -0
  7. package/dist/api/openai/handlers/chat.js.map +1 -0
  8. package/dist/api/openai/handlers/completions.d.ts +3 -0
  9. package/dist/api/openai/handlers/completions.js +169 -0
  10. package/dist/api/openai/handlers/completions.js.map +1 -0
  11. package/dist/api/openai/handlers/embeddings.d.ts +3 -0
  12. package/dist/api/openai/handlers/embeddings.js +74 -0
  13. package/dist/api/openai/handlers/embeddings.js.map +1 -0
  14. package/dist/api/openai/handlers/images.d.ts +0 -0
  15. package/dist/api/openai/handlers/images.js +4 -0
  16. package/dist/api/openai/handlers/images.js.map +1 -0
  17. package/dist/api/openai/handlers/models.d.ts +3 -0
  18. package/dist/api/openai/handlers/models.js +23 -0
  19. package/dist/api/openai/handlers/models.js.map +1 -0
  20. package/dist/api/openai/handlers/transcription.d.ts +0 -0
  21. package/dist/api/openai/handlers/transcription.js +4 -0
  22. package/dist/api/openai/handlers/transcription.js.map +1 -0
  23. package/dist/api/openai/index.d.ts +7 -0
  24. package/dist/api/openai/index.js +14 -0
  25. package/dist/api/openai/index.js.map +1 -0
  26. package/dist/api/parseJSONRequestBody.d.ts +2 -0
  27. package/dist/api/parseJSONRequestBody.js +24 -0
  28. package/dist/api/parseJSONRequestBody.js.map +1 -0
  29. package/dist/api/v1/index.d.ts +2 -0
  30. package/dist/api/v1/index.js +29 -0
  31. package/dist/api/v1/index.js.map +1 -0
  32. package/dist/cli.d.ts +1 -0
  33. package/dist/cli.js +10 -0
  34. package/dist/cli.js.map +1 -0
  35. package/dist/engines/gpt4all/engine.d.ts +34 -0
  36. package/dist/engines/gpt4all/engine.js +357 -0
  37. package/dist/engines/gpt4all/engine.js.map +1 -0
  38. package/dist/engines/gpt4all/util.d.ts +3 -0
  39. package/dist/engines/gpt4all/util.js +29 -0
  40. package/dist/engines/gpt4all/util.js.map +1 -0
  41. package/dist/engines/index.d.ts +19 -0
  42. package/dist/engines/index.js +21 -0
  43. package/dist/engines/index.js.map +1 -0
  44. package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
  45. package/dist/engines/node-llama-cpp/engine.js +666 -0
  46. package/dist/engines/node-llama-cpp/engine.js.map +1 -0
  47. package/dist/engines/node-llama-cpp/types.d.ts +13 -0
  48. package/dist/engines/node-llama-cpp/types.js +2 -0
  49. package/dist/engines/node-llama-cpp/types.js.map +1 -0
  50. package/dist/engines/node-llama-cpp/util.d.ts +15 -0
  51. package/dist/engines/node-llama-cpp/util.js +84 -0
  52. package/dist/engines/node-llama-cpp/util.js.map +1 -0
  53. package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
  54. package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
  55. package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
  56. package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
  57. package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
  58. package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
  59. package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
  60. package/dist/engines/stable-diffusion-cpp/types.js +2 -0
  61. package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
  62. package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
  63. package/dist/engines/stable-diffusion-cpp/util.js +55 -0
  64. package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
  65. package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
  66. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
  67. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
  68. package/dist/engines/transformers-js/engine.d.ts +37 -0
  69. package/dist/engines/transformers-js/engine.js +538 -0
  70. package/dist/engines/transformers-js/engine.js.map +1 -0
  71. package/dist/engines/transformers-js/types.d.ts +7 -0
  72. package/dist/engines/transformers-js/types.js +2 -0
  73. package/dist/engines/transformers-js/types.js.map +1 -0
  74. package/dist/engines/transformers-js/util.d.ts +7 -0
  75. package/dist/engines/transformers-js/util.js +36 -0
  76. package/dist/engines/transformers-js/util.js.map +1 -0
  77. package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
  78. package/dist/engines/transformers-js/validateModelFiles.js +133 -0
  79. package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
  80. package/dist/experiments/ChatWithVision.d.ts +11 -0
  81. package/dist/experiments/ChatWithVision.js +91 -0
  82. package/dist/experiments/ChatWithVision.js.map +1 -0
  83. package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
  84. package/dist/experiments/StableDiffPromptGenerator.js +4 -0
  85. package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
  86. package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
  87. package/dist/experiments/VoiceFunctionCall.js +51 -0
  88. package/dist/experiments/VoiceFunctionCall.js.map +1 -0
  89. package/dist/http.d.ts +19 -0
  90. package/dist/http.js +54 -0
  91. package/dist/http.js.map +1 -0
  92. package/dist/index.d.ts +7 -0
  93. package/dist/index.js +8 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/instance.d.ts +88 -0
  96. package/dist/instance.js +594 -0
  97. package/dist/instance.js.map +1 -0
  98. package/dist/lib/acquireFileLock.d.ts +7 -0
  99. package/dist/lib/acquireFileLock.js +38 -0
  100. package/dist/lib/acquireFileLock.js.map +1 -0
  101. package/dist/lib/calculateContextIdentity.d.ts +7 -0
  102. package/dist/lib/calculateContextIdentity.js +39 -0
  103. package/dist/lib/calculateContextIdentity.js.map +1 -0
  104. package/dist/lib/calculateFileChecksum.d.ts +1 -0
  105. package/dist/lib/calculateFileChecksum.js +16 -0
  106. package/dist/lib/calculateFileChecksum.js.map +1 -0
  107. package/dist/lib/copyDirectory.d.ts +6 -0
  108. package/dist/lib/copyDirectory.js +27 -0
  109. package/dist/lib/copyDirectory.js.map +1 -0
  110. package/dist/lib/decodeAudio.d.ts +1 -0
  111. package/dist/lib/decodeAudio.js +26 -0
  112. package/dist/lib/decodeAudio.js.map +1 -0
  113. package/dist/lib/downloadModelFile.d.ts +10 -0
  114. package/dist/lib/downloadModelFile.js +58 -0
  115. package/dist/lib/downloadModelFile.js.map +1 -0
  116. package/dist/lib/flattenMessageTextContent.d.ts +2 -0
  117. package/dist/lib/flattenMessageTextContent.js +11 -0
  118. package/dist/lib/flattenMessageTextContent.js.map +1 -0
  119. package/dist/lib/getCacheDirPath.d.ts +12 -0
  120. package/dist/lib/getCacheDirPath.js +31 -0
  121. package/dist/lib/getCacheDirPath.js.map +1 -0
  122. package/dist/lib/loadImage.d.ts +12 -0
  123. package/dist/lib/loadImage.js +30 -0
  124. package/dist/lib/loadImage.js.map +1 -0
  125. package/dist/lib/logger.d.ts +12 -0
  126. package/dist/lib/logger.js +98 -0
  127. package/dist/lib/logger.js.map +1 -0
  128. package/dist/lib/math.d.ts +7 -0
  129. package/dist/lib/math.js +30 -0
  130. package/dist/lib/math.js.map +1 -0
  131. package/dist/lib/resolveModelFileLocation.d.ts +15 -0
  132. package/dist/lib/resolveModelFileLocation.js +41 -0
  133. package/dist/lib/resolveModelFileLocation.js.map +1 -0
  134. package/dist/lib/util.d.ts +7 -0
  135. package/dist/lib/util.js +61 -0
  136. package/dist/lib/util.js.map +1 -0
  137. package/dist/lib/validateModelFile.d.ts +9 -0
  138. package/dist/lib/validateModelFile.js +62 -0
  139. package/dist/lib/validateModelFile.js.map +1 -0
  140. package/dist/lib/validateModelOptions.d.ts +3 -0
  141. package/dist/lib/validateModelOptions.js +23 -0
  142. package/dist/lib/validateModelOptions.js.map +1 -0
  143. package/dist/pool.d.ts +61 -0
  144. package/dist/pool.js +512 -0
  145. package/dist/pool.js.map +1 -0
  146. package/dist/server.d.ts +59 -0
  147. package/dist/server.js +221 -0
  148. package/dist/server.js.map +1 -0
  149. package/dist/standalone.d.ts +1 -0
  150. package/dist/standalone.js +306 -0
  151. package/dist/standalone.js.map +1 -0
  152. package/dist/store.d.ts +60 -0
  153. package/dist/store.js +203 -0
  154. package/dist/store.js.map +1 -0
  155. package/dist/types/completions.d.ts +57 -0
  156. package/dist/types/completions.js +2 -0
  157. package/dist/types/completions.js.map +1 -0
  158. package/dist/types/index.d.ts +326 -0
  159. package/dist/types/index.js +2 -0
  160. package/dist/types/index.js.map +1 -0
  161. package/docs/engines.md +28 -0
  162. package/docs/gpu.md +72 -0
  163. package/docs/http-api.md +147 -0
  164. package/examples/all-options.js +108 -0
  165. package/examples/chat-cli.js +56 -0
  166. package/examples/chat-server.js +65 -0
  167. package/examples/concurrency.js +70 -0
  168. package/examples/express.js +70 -0
  169. package/examples/pool.js +91 -0
  170. package/package.json +113 -0
  171. package/src/api/openai/enums.ts +20 -0
  172. package/src/api/openai/handlers/chat.ts +408 -0
  173. package/src/api/openai/handlers/completions.ts +196 -0
  174. package/src/api/openai/handlers/embeddings.ts +92 -0
  175. package/src/api/openai/handlers/images.ts +3 -0
  176. package/src/api/openai/handlers/models.ts +33 -0
  177. package/src/api/openai/handlers/transcription.ts +2 -0
  178. package/src/api/openai/index.ts +16 -0
  179. package/src/api/parseJSONRequestBody.ts +26 -0
  180. package/src/api/v1/DRAFT.md +16 -0
  181. package/src/api/v1/index.ts +37 -0
  182. package/src/cli.ts +9 -0
  183. package/src/engines/gpt4all/engine.ts +441 -0
  184. package/src/engines/gpt4all/util.ts +31 -0
  185. package/src/engines/index.ts +28 -0
  186. package/src/engines/node-llama-cpp/engine.ts +811 -0
  187. package/src/engines/node-llama-cpp/types.ts +17 -0
  188. package/src/engines/node-llama-cpp/util.ts +126 -0
  189. package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
  190. package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
  191. package/src/engines/stable-diffusion-cpp/types.ts +54 -0
  192. package/src/engines/stable-diffusion-cpp/util.ts +58 -0
  193. package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
  194. package/src/engines/transformers-js/engine.ts +659 -0
  195. package/src/engines/transformers-js/types.ts +25 -0
  196. package/src/engines/transformers-js/util.ts +40 -0
  197. package/src/engines/transformers-js/validateModelFiles.ts +168 -0
  198. package/src/experiments/ChatWithVision.ts +103 -0
  199. package/src/experiments/StableDiffPromptGenerator.ts +2 -0
  200. package/src/experiments/VoiceFunctionCall.ts +71 -0
  201. package/src/http.ts +72 -0
  202. package/src/index.ts +7 -0
  203. package/src/instance.ts +723 -0
  204. package/src/lib/acquireFileLock.ts +38 -0
  205. package/src/lib/calculateContextIdentity.ts +53 -0
  206. package/src/lib/calculateFileChecksum.ts +18 -0
  207. package/src/lib/copyDirectory.ts +29 -0
  208. package/src/lib/decodeAudio.ts +39 -0
  209. package/src/lib/downloadModelFile.ts +70 -0
  210. package/src/lib/flattenMessageTextContent.ts +19 -0
  211. package/src/lib/getCacheDirPath.ts +34 -0
  212. package/src/lib/loadImage.ts +46 -0
  213. package/src/lib/logger.ts +112 -0
  214. package/src/lib/math.ts +31 -0
  215. package/src/lib/resolveModelFileLocation.ts +49 -0
  216. package/src/lib/util.ts +75 -0
  217. package/src/lib/validateModelFile.ts +71 -0
  218. package/src/lib/validateModelOptions.ts +31 -0
  219. package/src/pool.ts +651 -0
  220. package/src/server.ts +270 -0
  221. package/src/standalone.ts +320 -0
  222. package/src/store.ts +278 -0
  223. package/src/types/completions.ts +86 -0
  224. package/src/types/index.ts +488 -0
  225. package/tsconfig.json +29 -0
  226. package/tsconfig.release.json +11 -0
  227. package/vitest.config.ts +18 -0
package/package.json ADDED
@@ -0,0 +1,113 @@
1
+ {
2
+ "name": "inference-server",
3
+ "version": "1.0.0-beta.19",
4
+ "description": "Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.",
5
+ "main": "dist/index.js",
6
+ "source": "src/index.ts",
7
+ "types": "dist/index.d.ts",
8
+ "type": "module",
9
+ "license": "MIT",
10
+ "bin": {
11
+ "lllms": "./dist/cli.js"
12
+ },
13
+ "repository": "github:iimez/inference-server",
14
+ "bugs": {
15
+ "url": "https://github.com/iimez/inference-server/issues"
16
+ },
17
+ "scripts": {
18
+ "upgrade": "npx npm-check-updates -i",
19
+ "reinstall": "rimraf node_modules && npm install",
20
+ "clean": "rimraf dist",
21
+ "download-test-models": "node scripts/download-test-models.js",
22
+ "prebuild": "npm run clean",
23
+ "build": "tsc -p tsconfig.release.json && tsc-alias -p tsconfig.release.json",
24
+ "test": "vitest --run",
25
+ "test:pool": "vitest tests/pool.test.ts",
26
+ "test:openai": "vitest tests/openai.test.ts",
27
+ "test:gpt4all": "vitest tests/engines/gpt4all.test.ts",
28
+ "test:llama": "vitest tests/engines/node-llama-cpp.test.ts",
29
+ "test:transformers": "vitest tests/engines/transformers.test.ts",
30
+ "test:sd": "vitest tests/engines/stable-diffusion.test.ts",
31
+ "test:experiments": "vitest tests/engines/experiments.test.ts",
32
+ "test:server": "vitest tests/server.test.ts",
33
+ "prewatch": "npm run clean",
34
+ "watch": "tsc -w -p tsconfig.release.json",
35
+ "start": "cross-env NODE_ENV=production node dist/standalone.js"
36
+ },
37
+ "keywords": [
38
+ "local ai",
39
+ "inference server",
40
+ "model pool",
41
+ "gpt4all",
42
+ "node-llama-cpp",
43
+ "transformers.js",
44
+ "llama.cpp",
45
+ "chatbot",
46
+ "bot",
47
+ "llm",
48
+ "ai",
49
+ "nlp",
50
+ "openai api"
51
+ ],
52
+ "engines": {
53
+ "node": ">=18.16.0"
54
+ },
55
+ "imports": {
56
+ "#package/*": "./dist/*"
57
+ },
58
+ "peerDependencies": {
59
+ "@huggingface/transformers": ">=3.0.0",
60
+ "gpt4all": ">=4.0.0",
61
+ "node-llama-cpp": ">=3.0.0",
62
+ "@lmagder/node-stable-diffusion-cpp": ">=0.1.6"
63
+ },
64
+ "peerDependenciesMeta": {
65
+ "node-llama-cpp": {
66
+ "optional": true
67
+ },
68
+ "gpt4all": {
69
+ "optional": true
70
+ },
71
+ "@huggingface/transformers": {
72
+ "optional": true
73
+ },
74
+ "@lmagder/node-stable-diffusion-cpp": {
75
+ "optional": true
76
+ }
77
+ },
78
+ "dependencies": {
79
+ "@alexanderolsen/libsamplerate-js": "^2.1.2",
80
+ "@huggingface/gguf": "^0.1.12",
81
+ "ajv": "^8.17.1",
82
+ "audio-decode": "^2.2.2",
83
+ "chalk": "^5.3.0",
84
+ "cors": "^2.8.5",
85
+ "express": "^4.21.1",
86
+ "ipull": "^3.9.0",
87
+ "nanoid": "^5.0.8",
88
+ "onnxruntime-node": "^1.19.2",
89
+ "p-queue": "^8.0.1",
90
+ "pretty-bytes": "^6.1.1",
91
+ "pretty-ms": "^9.1.0",
92
+ "proper-lockfile": "^4.1.2",
93
+ "sharp": "^0.33.5"
94
+ },
95
+ "devDependencies": {
96
+ "@huggingface/transformers": "^3.0.1",
97
+ "@lmagder/node-stable-diffusion-cpp": "^0.1.5",
98
+ "@types/cors": "^2.8.17",
99
+ "@types/express": "^5.0.0",
100
+ "@types/node": "^22.8.2",
101
+ "@types/proper-lockfile": "^4.1.4",
102
+ "@types/supertest": "^6.0.2",
103
+ "cross-env": "^7.0.3",
104
+ "gpt4all": "^4.0.0",
105
+ "node-llama-cpp": "^3.1.1",
106
+ "openai": "^4.68.4",
107
+ "supertest": "^7.0.0",
108
+ "tsc-alias": "^1.8.10",
109
+ "typescript": "^5.6.3",
110
+ "vite-tsconfig-paths": "^5.0.1",
111
+ "vitest": "^2.1.4"
112
+ }
113
+ }
@@ -0,0 +1,20 @@
1
+ import { CompletionFinishReason, ChatMessage } from '#package/types/index.js'
2
+ import OpenAI from 'openai'
3
+
4
+ export const finishReasonMap: Record<CompletionFinishReason, OpenAI.ChatCompletion.Choice['finish_reason']> = {
5
+ maxTokens: 'length',
6
+ toolCalls: 'tool_calls',
7
+ eogToken: 'stop',
8
+ stopTrigger: 'stop',
9
+ timeout: 'stop',
10
+ cancel: 'stop',
11
+ abort: 'stop',
12
+ } as const
13
+
14
+ export const messageRoleMap: Record<OpenAI.ChatCompletionMessageParam['role'], ChatMessage['role']> = {
15
+ user: 'user',
16
+ system: 'system',
17
+ assistant: 'assistant',
18
+ tool: 'tool',
19
+ function: 'tool',
20
+ }
@@ -0,0 +1,408 @@
1
+ import type { IncomingMessage, ServerResponse } from 'node:http'
2
+ import type { OpenAI } from 'openai'
3
+ import { ChatCompletionMessageParam } from 'openai/resources/chat/completions.js'
4
+ import type { ModelServer } from '#package/server.js'
5
+ import {
6
+ ChatCompletionRequest,
7
+ ToolDefinition,
8
+ ChatMessage,
9
+ MessageContentPart,
10
+ Image,
11
+ } from '#package/types/index.js'
12
+ import { parseJSONRequestBody } from '#package/api/parseJSONRequestBody.js'
13
+ import { omitEmptyValues } from '#package/lib/util.js'
14
+ import { loadImageFromUrl } from '#package/lib/loadImage.js'
15
+ import { finishReasonMap, messageRoleMap } from '../enums.js'
16
+
17
+ interface OpenAIChatCompletionParams
18
+ extends Omit<OpenAI.ChatCompletionCreateParamsStreaming, 'stream'> {
19
+ stream?: boolean
20
+ top_k?: number
21
+ min_p?: number
22
+ repeat_penalty_num?: number
23
+ }
24
+
25
+ interface OpenAIChatCompletionChunk extends OpenAI.ChatCompletionChunk {
26
+ usage?: OpenAI.CompletionUsage
27
+ }
28
+
29
+ async function prepareIncomingMessages(
30
+ messages: ChatCompletionMessageParam[]
31
+ ): Promise<ChatMessage[]> {
32
+ const downloadPromises: Record<string, Promise<Image>> = {}
33
+ const resultMessages: ChatMessage[] = []
34
+
35
+ for (const message of messages) {
36
+ const role = messageRoleMap[message.role]
37
+ const resultMessage: any = {
38
+ role,
39
+ content: [],
40
+ }
41
+ if (role === 'tool' && 'tool_call_id' in message) {
42
+ resultMessage.callId = message.tool_call_id
43
+ }
44
+
45
+ if (typeof message.content === 'string') {
46
+ resultMessage.content.push({
47
+ type: 'text',
48
+ text: message.content,
49
+ })
50
+ } else if (Array.isArray(message.content)) {
51
+ for (const part of message.content) {
52
+
53
+ if (typeof part === 'string') {
54
+ resultMessage.content.push({
55
+ type: 'text',
56
+ text: part,
57
+ })
58
+ } else if (part.type === 'text') {
59
+ resultMessage.content.push({
60
+ type: 'text',
61
+ text: part.text,
62
+ })
63
+ } else if (part.type === 'image_url') {
64
+ if (!downloadPromises[part.image_url.url]) {
65
+ downloadPromises[part.image_url.url] = loadImageFromUrl(part.image_url.url)
66
+ }
67
+ const content: Partial<MessageContentPart> = {
68
+ type: 'image',
69
+ }
70
+ resultMessage.content.push(content)
71
+ downloadPromises[part.image_url.url].then((image) => {
72
+ content.image = image
73
+ })
74
+ } else if (part.type === 'input_audio') {
75
+ resultMessage.content.push({
76
+ type: 'audio',
77
+ audio: part.input_audio,
78
+ })
79
+ } else if (part.type === 'refusal') {
80
+ resultMessage.content.push({
81
+ type: 'text',
82
+ text: part.refusal,
83
+ })
84
+ }
85
+ }
86
+ } else {
87
+ throw new Error('Invalid message content')
88
+ }
89
+
90
+ resultMessages.push(resultMessage)
91
+ }
92
+
93
+ await Promise.all(Object.values(downloadPromises))
94
+
95
+ return resultMessages
96
+
97
+ }
98
+
99
+ function createResponseMessageContent(
100
+ content: string | MessageContentPart[]
101
+ ): OpenAI.ChatCompletionMessage['content'] {
102
+ if (!content) {
103
+ return null
104
+ }
105
+ if (typeof content === 'string') {
106
+ return content
107
+ }
108
+ if (!Array.isArray(content)) {
109
+ throw new Error('Invalid response message content')
110
+ }
111
+
112
+ let text = ''
113
+ for (const part of content) {
114
+ if (part.type === 'text') {
115
+ text += part.text
116
+ }
117
+ // assistant may only respond with text in openai chat completions
118
+ }
119
+ return text
120
+ }
121
+
122
+ // v1/chat/completions
123
+ // https://platform.openai.com/docs/api-reference/chat/create
124
+ export function createChatCompletionHandler(modelServer: ModelServer) {
125
+ return async (req: IncomingMessage, res: ServerResponse) => {
126
+ let args: OpenAIChatCompletionParams
127
+
128
+ try {
129
+ const body = await parseJSONRequestBody(req)
130
+ args = body
131
+ } catch (e) {
132
+ console.error(e)
133
+ res.writeHead(400, { 'Content-Type': 'application/json' })
134
+ res.end(JSON.stringify({ error: 'Invalid request' }))
135
+ return
136
+ }
137
+
138
+ // TODO ajv schema validation?
139
+ if (!args.model || !args.messages) {
140
+ res.writeHead(400, { 'Content-Type': 'application/json' })
141
+ res.end(JSON.stringify({ error: 'Invalid request (need at least model and messages)' }))
142
+ return
143
+ }
144
+
145
+ if (!modelServer.modelExists(args.model)) {
146
+ res.writeHead(400, { 'Content-Type': 'application/json' })
147
+ res.end(JSON.stringify({ error: 'Model does not exist' }))
148
+ return
149
+ }
150
+
151
+ const controller = new AbortController()
152
+ req.on('close', () => {
153
+ console.debug('Client closed connection')
154
+ controller.abort()
155
+ })
156
+ req.on('end', () => {
157
+ console.debug('Client ended connection')
158
+ controller.abort()
159
+ })
160
+ req.on('aborted', () => {
161
+ console.debug('Client aborted connection')
162
+ controller.abort()
163
+ })
164
+ req.on('error', () => {
165
+ console.debug('Client error')
166
+ controller.abort()
167
+ })
168
+
169
+ try {
170
+ let ssePing: NodeJS.Timeout | undefined
171
+ if (args.stream) {
172
+ res.writeHead(200, {
173
+ 'Content-Type': 'text/event-stream',
174
+ 'Cache-Control': 'no-cache',
175
+ Connection: 'keep-alive',
176
+ })
177
+ res.flushHeaders()
178
+ ssePing = setInterval(() => {
179
+ res.write(':ping\n\n')
180
+ }, 30000)
181
+ }
182
+
183
+ let stop = args.stop ? args.stop : undefined
184
+ if (typeof stop === 'string') {
185
+ stop = [stop]
186
+ }
187
+
188
+ let completionGrammar: 'json' | undefined
189
+ if (args.response_format) {
190
+ if (args.response_format.type === 'json_object') {
191
+ completionGrammar = 'json'
192
+ }
193
+ }
194
+
195
+ let completionTools:
196
+ | Record<string, ToolDefinition>
197
+ | undefined
198
+
199
+ if (args.tools) {
200
+ const functionTools = args.tools
201
+ .filter((tool) => tool.type === 'function')
202
+ .map((tool) => {
203
+ return {
204
+ name: tool.function.name,
205
+ description: tool.function.description,
206
+ parameters: tool.function.parameters,
207
+ }
208
+ })
209
+ if (functionTools.length) {
210
+ if (!completionTools) {
211
+ completionTools = {}
212
+ }
213
+ for (const tool of functionTools) {
214
+ completionTools[tool.name] = {
215
+ description: tool.description,
216
+ parameters: tool.parameters,
217
+ } as ToolDefinition
218
+ }
219
+ }
220
+ }
221
+
222
+ const messages = await prepareIncomingMessages(args.messages)
223
+ const completionReq = omitEmptyValues<ChatCompletionRequest>({
224
+ model: args.model,
225
+ messages,
226
+ temperature: args.temperature ? args.temperature : undefined,
227
+ stream: args.stream ? Boolean(args.stream) : false,
228
+ maxTokens: args.max_tokens ? args.max_tokens : undefined,
229
+ seed: args.seed ? args.seed : undefined,
230
+ stop,
231
+ frequencyPenalty: args.frequency_penalty
232
+ ? args.frequency_penalty
233
+ : undefined,
234
+ presencePenalty: args.presence_penalty
235
+ ? args.presence_penalty
236
+ : undefined,
237
+ topP: args.top_p ? args.top_p : undefined,
238
+ tokenBias: args.logit_bias ? args.logit_bias : undefined,
239
+ grammar: completionGrammar,
240
+ tools: completionTools,
241
+ // additional non-spec params
242
+ repeatPenaltyNum: args.repeat_penalty_num
243
+ ? args.repeat_penalty_num
244
+ : undefined,
245
+ minP: args.min_p ? args.min_p : undefined,
246
+ topK: args.top_k ? args.top_k : undefined,
247
+ })
248
+ const { instance, release } = await modelServer.requestInstance(
249
+ completionReq,
250
+ controller.signal,
251
+ )
252
+
253
+ if (ssePing) {
254
+ clearInterval(ssePing)
255
+ }
256
+ const task = instance.processChatCompletionTask(completionReq, {
257
+ signal: controller.signal,
258
+ onChunk: (chunk) => {
259
+ if (args.stream) {
260
+ const chunkData: OpenAIChatCompletionChunk = {
261
+ id: task.id,
262
+ object: 'chat.completion.chunk',
263
+ model: task.model,
264
+ created: Math.floor(task.createdAt.getTime() / 1000),
265
+ choices: [
266
+ {
267
+ index: 0,
268
+ delta: {
269
+ role: 'assistant',
270
+ content: chunk.text,
271
+ },
272
+ logprobs: null,
273
+ finish_reason: null,
274
+ },
275
+ ],
276
+ }
277
+ res.write(`data: ${JSON.stringify(chunkData)}\n\n`)
278
+ }
279
+ },
280
+ })
281
+
282
+ const result = await task.result
283
+
284
+ release()
285
+
286
+ if (args.stream) {
287
+ if (result.finishReason === 'toolCalls') {
288
+ // currently not possible to stream function calls
289
+ // imitating a stream here by sending two chunks. makes it work with the openai client
290
+ const streamedToolCallChunk: OpenAIChatCompletionChunk = {
291
+ id: task.id,
292
+ object: 'chat.completion.chunk',
293
+ model: task.model,
294
+ created: Math.floor(task.createdAt.getTime() / 1000),
295
+ choices: [
296
+ {
297
+ index: 0,
298
+ delta: {
299
+ role: 'assistant',
300
+ content: null,
301
+ },
302
+ logprobs: null,
303
+ finish_reason: result.finishReason
304
+ ? finishReasonMap[result.finishReason]
305
+ : 'stop',
306
+ },
307
+ ],
308
+ }
309
+
310
+ const toolCalls: OpenAI.ChatCompletionChunk.Choice.Delta.ToolCall[] =
311
+ result.message.toolCalls!.map((call, index) => {
312
+ return {
313
+ index,
314
+ id: call.id,
315
+ type: 'function',
316
+ function: {
317
+ name: call.name,
318
+ arguments: JSON.stringify(call.parameters),
319
+ },
320
+ }
321
+ })
322
+ streamedToolCallChunk.choices[0].delta.tool_calls = toolCalls
323
+ res.write(`data: ${JSON.stringify(streamedToolCallChunk)}\n\n`)
324
+ }
325
+ if (args.stream_options?.include_usage) {
326
+ const finalChunk: OpenAIChatCompletionChunk = {
327
+ id: task.id,
328
+ object: 'chat.completion.chunk',
329
+ model: task.model,
330
+ created: Math.floor(task.createdAt.getTime() / 1000),
331
+ system_fingerprint: instance.fingerprint,
332
+ choices: [
333
+ {
334
+ index: 0,
335
+ delta: {},
336
+ logprobs: null,
337
+ finish_reason: result.finishReason
338
+ ? finishReasonMap[result.finishReason]
339
+ : 'stop',
340
+ },
341
+ ],
342
+ usage: {
343
+ prompt_tokens: result.promptTokens,
344
+ completion_tokens: result.completionTokens,
345
+ total_tokens: result.contextTokens,
346
+ },
347
+ }
348
+ res.write(`data: ${JSON.stringify(finalChunk)}\n\n`)
349
+ }
350
+ res.write('data: [DONE]')
351
+ res.end()
352
+ } else {
353
+ const response: OpenAI.ChatCompletion = {
354
+ id: task.id,
355
+ model: task.model,
356
+ object: 'chat.completion',
357
+ created: Math.floor(task.createdAt.getTime() / 1000),
358
+ system_fingerprint: instance.fingerprint,
359
+ choices: [
360
+ {
361
+ index: 0,
362
+ message: {
363
+ role: 'assistant',
364
+ content: createResponseMessageContent(result.message.content),
365
+ refusal: null,
366
+ },
367
+ logprobs: null,
368
+ finish_reason: result.finishReason
369
+ ? finishReasonMap[result.finishReason]
370
+ : 'stop',
371
+ },
372
+ ],
373
+ usage: {
374
+ prompt_tokens: result.promptTokens,
375
+ completion_tokens: result.completionTokens,
376
+ total_tokens: result.contextTokens,
377
+ },
378
+ }
379
+ if (
380
+ 'toolCalls' in result.message &&
381
+ result.message.toolCalls?.length
382
+ ) {
383
+ response.choices[0].message.tool_calls =
384
+ result.message.toolCalls.map((call) => {
385
+ return {
386
+ id: call.id,
387
+ type: 'function',
388
+ function: {
389
+ name: call.name,
390
+ arguments: JSON.stringify(call.parameters),
391
+ },
392
+ }
393
+ })
394
+ }
395
+ res.writeHead(200, { 'Content-Type': 'application/json' })
396
+ res.end(JSON.stringify(response, null, 2))
397
+ }
398
+ } catch (e) {
399
+ console.error(e)
400
+ if (args.stream) {
401
+ res.write('data: [ERROR]')
402
+ } else {
403
+ res.writeHead(500, { 'Content-Type': 'application/json' })
404
+ res.end(JSON.stringify({ error: 'Internal server error' }))
405
+ }
406
+ }
407
+ }
408
+ }