inference-server 1.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/README.md +216 -0
  2. package/dist/api/openai/enums.d.ts +4 -0
  3. package/dist/api/openai/enums.js +17 -0
  4. package/dist/api/openai/enums.js.map +1 -0
  5. package/dist/api/openai/handlers/chat.d.ts +3 -0
  6. package/dist/api/openai/handlers/chat.js +358 -0
  7. package/dist/api/openai/handlers/chat.js.map +1 -0
  8. package/dist/api/openai/handlers/completions.d.ts +3 -0
  9. package/dist/api/openai/handlers/completions.js +169 -0
  10. package/dist/api/openai/handlers/completions.js.map +1 -0
  11. package/dist/api/openai/handlers/embeddings.d.ts +3 -0
  12. package/dist/api/openai/handlers/embeddings.js +74 -0
  13. package/dist/api/openai/handlers/embeddings.js.map +1 -0
  14. package/dist/api/openai/handlers/images.d.ts +0 -0
  15. package/dist/api/openai/handlers/images.js +4 -0
  16. package/dist/api/openai/handlers/images.js.map +1 -0
  17. package/dist/api/openai/handlers/models.d.ts +3 -0
  18. package/dist/api/openai/handlers/models.js +23 -0
  19. package/dist/api/openai/handlers/models.js.map +1 -0
  20. package/dist/api/openai/handlers/transcription.d.ts +0 -0
  21. package/dist/api/openai/handlers/transcription.js +4 -0
  22. package/dist/api/openai/handlers/transcription.js.map +1 -0
  23. package/dist/api/openai/index.d.ts +7 -0
  24. package/dist/api/openai/index.js +14 -0
  25. package/dist/api/openai/index.js.map +1 -0
  26. package/dist/api/parseJSONRequestBody.d.ts +2 -0
  27. package/dist/api/parseJSONRequestBody.js +24 -0
  28. package/dist/api/parseJSONRequestBody.js.map +1 -0
  29. package/dist/api/v1/index.d.ts +2 -0
  30. package/dist/api/v1/index.js +29 -0
  31. package/dist/api/v1/index.js.map +1 -0
  32. package/dist/cli.d.ts +1 -0
  33. package/dist/cli.js +10 -0
  34. package/dist/cli.js.map +1 -0
  35. package/dist/engines/gpt4all/engine.d.ts +34 -0
  36. package/dist/engines/gpt4all/engine.js +357 -0
  37. package/dist/engines/gpt4all/engine.js.map +1 -0
  38. package/dist/engines/gpt4all/util.d.ts +3 -0
  39. package/dist/engines/gpt4all/util.js +29 -0
  40. package/dist/engines/gpt4all/util.js.map +1 -0
  41. package/dist/engines/index.d.ts +19 -0
  42. package/dist/engines/index.js +21 -0
  43. package/dist/engines/index.js.map +1 -0
  44. package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
  45. package/dist/engines/node-llama-cpp/engine.js +666 -0
  46. package/dist/engines/node-llama-cpp/engine.js.map +1 -0
  47. package/dist/engines/node-llama-cpp/types.d.ts +13 -0
  48. package/dist/engines/node-llama-cpp/types.js +2 -0
  49. package/dist/engines/node-llama-cpp/types.js.map +1 -0
  50. package/dist/engines/node-llama-cpp/util.d.ts +15 -0
  51. package/dist/engines/node-llama-cpp/util.js +84 -0
  52. package/dist/engines/node-llama-cpp/util.js.map +1 -0
  53. package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
  54. package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
  55. package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
  56. package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
  57. package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
  58. package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
  59. package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
  60. package/dist/engines/stable-diffusion-cpp/types.js +2 -0
  61. package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
  62. package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
  63. package/dist/engines/stable-diffusion-cpp/util.js +55 -0
  64. package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
  65. package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
  66. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
  67. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
  68. package/dist/engines/transformers-js/engine.d.ts +37 -0
  69. package/dist/engines/transformers-js/engine.js +538 -0
  70. package/dist/engines/transformers-js/engine.js.map +1 -0
  71. package/dist/engines/transformers-js/types.d.ts +7 -0
  72. package/dist/engines/transformers-js/types.js +2 -0
  73. package/dist/engines/transformers-js/types.js.map +1 -0
  74. package/dist/engines/transformers-js/util.d.ts +7 -0
  75. package/dist/engines/transformers-js/util.js +36 -0
  76. package/dist/engines/transformers-js/util.js.map +1 -0
  77. package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
  78. package/dist/engines/transformers-js/validateModelFiles.js +133 -0
  79. package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
  80. package/dist/experiments/ChatWithVision.d.ts +11 -0
  81. package/dist/experiments/ChatWithVision.js +91 -0
  82. package/dist/experiments/ChatWithVision.js.map +1 -0
  83. package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
  84. package/dist/experiments/StableDiffPromptGenerator.js +4 -0
  85. package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
  86. package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
  87. package/dist/experiments/VoiceFunctionCall.js +51 -0
  88. package/dist/experiments/VoiceFunctionCall.js.map +1 -0
  89. package/dist/http.d.ts +19 -0
  90. package/dist/http.js +54 -0
  91. package/dist/http.js.map +1 -0
  92. package/dist/index.d.ts +7 -0
  93. package/dist/index.js +8 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/instance.d.ts +88 -0
  96. package/dist/instance.js +594 -0
  97. package/dist/instance.js.map +1 -0
  98. package/dist/lib/acquireFileLock.d.ts +7 -0
  99. package/dist/lib/acquireFileLock.js +38 -0
  100. package/dist/lib/acquireFileLock.js.map +1 -0
  101. package/dist/lib/calculateContextIdentity.d.ts +7 -0
  102. package/dist/lib/calculateContextIdentity.js +39 -0
  103. package/dist/lib/calculateContextIdentity.js.map +1 -0
  104. package/dist/lib/calculateFileChecksum.d.ts +1 -0
  105. package/dist/lib/calculateFileChecksum.js +16 -0
  106. package/dist/lib/calculateFileChecksum.js.map +1 -0
  107. package/dist/lib/copyDirectory.d.ts +6 -0
  108. package/dist/lib/copyDirectory.js +27 -0
  109. package/dist/lib/copyDirectory.js.map +1 -0
  110. package/dist/lib/decodeAudio.d.ts +1 -0
  111. package/dist/lib/decodeAudio.js +26 -0
  112. package/dist/lib/decodeAudio.js.map +1 -0
  113. package/dist/lib/downloadModelFile.d.ts +10 -0
  114. package/dist/lib/downloadModelFile.js +58 -0
  115. package/dist/lib/downloadModelFile.js.map +1 -0
  116. package/dist/lib/flattenMessageTextContent.d.ts +2 -0
  117. package/dist/lib/flattenMessageTextContent.js +11 -0
  118. package/dist/lib/flattenMessageTextContent.js.map +1 -0
  119. package/dist/lib/getCacheDirPath.d.ts +12 -0
  120. package/dist/lib/getCacheDirPath.js +31 -0
  121. package/dist/lib/getCacheDirPath.js.map +1 -0
  122. package/dist/lib/loadImage.d.ts +12 -0
  123. package/dist/lib/loadImage.js +30 -0
  124. package/dist/lib/loadImage.js.map +1 -0
  125. package/dist/lib/logger.d.ts +12 -0
  126. package/dist/lib/logger.js +98 -0
  127. package/dist/lib/logger.js.map +1 -0
  128. package/dist/lib/math.d.ts +7 -0
  129. package/dist/lib/math.js +30 -0
  130. package/dist/lib/math.js.map +1 -0
  131. package/dist/lib/resolveModelFileLocation.d.ts +15 -0
  132. package/dist/lib/resolveModelFileLocation.js +41 -0
  133. package/dist/lib/resolveModelFileLocation.js.map +1 -0
  134. package/dist/lib/util.d.ts +7 -0
  135. package/dist/lib/util.js +61 -0
  136. package/dist/lib/util.js.map +1 -0
  137. package/dist/lib/validateModelFile.d.ts +9 -0
  138. package/dist/lib/validateModelFile.js +62 -0
  139. package/dist/lib/validateModelFile.js.map +1 -0
  140. package/dist/lib/validateModelOptions.d.ts +3 -0
  141. package/dist/lib/validateModelOptions.js +23 -0
  142. package/dist/lib/validateModelOptions.js.map +1 -0
  143. package/dist/pool.d.ts +61 -0
  144. package/dist/pool.js +512 -0
  145. package/dist/pool.js.map +1 -0
  146. package/dist/server.d.ts +59 -0
  147. package/dist/server.js +221 -0
  148. package/dist/server.js.map +1 -0
  149. package/dist/standalone.d.ts +1 -0
  150. package/dist/standalone.js +306 -0
  151. package/dist/standalone.js.map +1 -0
  152. package/dist/store.d.ts +60 -0
  153. package/dist/store.js +203 -0
  154. package/dist/store.js.map +1 -0
  155. package/dist/types/completions.d.ts +57 -0
  156. package/dist/types/completions.js +2 -0
  157. package/dist/types/completions.js.map +1 -0
  158. package/dist/types/index.d.ts +326 -0
  159. package/dist/types/index.js +2 -0
  160. package/dist/types/index.js.map +1 -0
  161. package/docs/engines.md +28 -0
  162. package/docs/gpu.md +72 -0
  163. package/docs/http-api.md +147 -0
  164. package/examples/all-options.js +108 -0
  165. package/examples/chat-cli.js +56 -0
  166. package/examples/chat-server.js +65 -0
  167. package/examples/concurrency.js +70 -0
  168. package/examples/express.js +70 -0
  169. package/examples/pool.js +91 -0
  170. package/package.json +113 -0
  171. package/src/api/openai/enums.ts +20 -0
  172. package/src/api/openai/handlers/chat.ts +408 -0
  173. package/src/api/openai/handlers/completions.ts +196 -0
  174. package/src/api/openai/handlers/embeddings.ts +92 -0
  175. package/src/api/openai/handlers/images.ts +3 -0
  176. package/src/api/openai/handlers/models.ts +33 -0
  177. package/src/api/openai/handlers/transcription.ts +2 -0
  178. package/src/api/openai/index.ts +16 -0
  179. package/src/api/parseJSONRequestBody.ts +26 -0
  180. package/src/api/v1/DRAFT.md +16 -0
  181. package/src/api/v1/index.ts +37 -0
  182. package/src/cli.ts +9 -0
  183. package/src/engines/gpt4all/engine.ts +441 -0
  184. package/src/engines/gpt4all/util.ts +31 -0
  185. package/src/engines/index.ts +28 -0
  186. package/src/engines/node-llama-cpp/engine.ts +811 -0
  187. package/src/engines/node-llama-cpp/types.ts +17 -0
  188. package/src/engines/node-llama-cpp/util.ts +126 -0
  189. package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
  190. package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
  191. package/src/engines/stable-diffusion-cpp/types.ts +54 -0
  192. package/src/engines/stable-diffusion-cpp/util.ts +58 -0
  193. package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
  194. package/src/engines/transformers-js/engine.ts +659 -0
  195. package/src/engines/transformers-js/types.ts +25 -0
  196. package/src/engines/transformers-js/util.ts +40 -0
  197. package/src/engines/transformers-js/validateModelFiles.ts +168 -0
  198. package/src/experiments/ChatWithVision.ts +103 -0
  199. package/src/experiments/StableDiffPromptGenerator.ts +2 -0
  200. package/src/experiments/VoiceFunctionCall.ts +71 -0
  201. package/src/http.ts +72 -0
  202. package/src/index.ts +7 -0
  203. package/src/instance.ts +723 -0
  204. package/src/lib/acquireFileLock.ts +38 -0
  205. package/src/lib/calculateContextIdentity.ts +53 -0
  206. package/src/lib/calculateFileChecksum.ts +18 -0
  207. package/src/lib/copyDirectory.ts +29 -0
  208. package/src/lib/decodeAudio.ts +39 -0
  209. package/src/lib/downloadModelFile.ts +70 -0
  210. package/src/lib/flattenMessageTextContent.ts +19 -0
  211. package/src/lib/getCacheDirPath.ts +34 -0
  212. package/src/lib/loadImage.ts +46 -0
  213. package/src/lib/logger.ts +112 -0
  214. package/src/lib/math.ts +31 -0
  215. package/src/lib/resolveModelFileLocation.ts +49 -0
  216. package/src/lib/util.ts +75 -0
  217. package/src/lib/validateModelFile.ts +71 -0
  218. package/src/lib/validateModelOptions.ts +31 -0
  219. package/src/pool.ts +651 -0
  220. package/src/server.ts +270 -0
  221. package/src/standalone.ts +320 -0
  222. package/src/store.ts +278 -0
  223. package/src/types/completions.ts +86 -0
  224. package/src/types/index.ts +488 -0
  225. package/tsconfig.json +29 -0
  226. package/tsconfig.release.json +11 -0
  227. package/vitest.config.ts +18 -0
package/README.md ADDED
@@ -0,0 +1,216 @@
1
+ ## inference-server
2
+
3
+ Libraries and server to build AI applications doing local inference in node. Use it within your application, or as a microservice. Adapters to [llama.cpp](https://github.com/ggerganov/llama.cpp/) via [node-llama-cpp](https://github.com/withcatai/node-llama-cpp) and [gpt4all](https://github.com/nomic-ai/gpt4all). And [transformers.js](https://github.com/xenova/transformers.js/) using [ONNX](https://github.com/microsoft/onnxruntime/tree/main/js#onnxruntime-node)!
4
+
5
+ The project includes a model resource pool, an inference queue and a HTTP API server. Model file management is abstracted away as much as possible - configure a URL and go. This package is useful for quick model evaluations and experiments (in JavaScript), small-scale chatbots, resource efficient assistants on edge devices, or any applications where private & offline are interesting criteria. For other - not node-based solutions - check out the [related solutions](#related-solutions) section.
6
+
7
+ ⚠️ This package is currently in beta. APIs may change. Things may break. [Help is welcome](#contributing).
8
+
9
+ ### Features
10
+
11
+ - Configure as many models as you want, they will be downloaded and cached to disk. You may provide them as abs file paths if you already have models downloaded.
12
+ - Adjust the pool `concurrency`, and the models `maxInstances`, `ttl` and `contextSize` to fit your usecase. Combine multiple pools for more complex setups.
13
+ - Can be tuned to either use no resources when idle or to always keep a model ready with context preloaded.
14
+ - A chat session cache that will effectively reuse context across multiple turns or stateless requests.
15
+ - OpenAI spec API endpoints. See [HTTP API docs](./docs/http-api.md) for details. A "native" HTTP API is not yet implemented.
16
+ - BYO web server or use the provided express server and middleware. Incoming requests are queued - stall, if needed - and processed as soon as resources are available.
17
+ - Have as many ModelServers running as you want, they can share the same cache directory. (Multiple processes can as well)
18
+ - Use the [ModelPool](./examples/pool.js) class directly for a lowerlevel transaction-like API to aquire/release model instances.
19
+ - Use [custom engines](./docs/engines.md#custom-engines) to combine multiple models (or do RAG) behind the scenes.
20
+
21
+ ### Usage
22
+
23
+ Example with minimal configuration:
24
+
25
+ ```ts basic.ts
26
+ import { ModelServer } from 'inference-server'
27
+
28
+ const modelServer = new ModelServer({
29
+ log: 'info', // default is 'warn'
30
+ models: {
31
+ 'my-model': { // Identifiers can use a-zA-Z0-9_:\-\.
32
+ // Required are `task`, `engine`, `url` and/or `file`.
33
+ task: 'text-completion', // text-completion models can be used for chat and text generation tasks
34
+ engine: 'node-llama-cpp', // each engine comes with a peer dep. `npm install node-llama-cpp@3`
35
+ url: 'https://huggingface.co/HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF/blob/main/smollm-135m-instruct-add-basics-q8_0.gguf',
36
+ },
37
+ },
38
+ })
39
+ await modelServer.start()
40
+ const result = await modelServer.processChatCompletionTask({
41
+ model: 'my-model',
42
+ messages: [
43
+ {
44
+ role: 'user',
45
+ content: 'Why are bananas rather blue than bread at night?',
46
+ },
47
+ ],
48
+ })
49
+ console.debug(result)
50
+ modelServer.stop()
51
+ ```
52
+
53
+ Or, to start an OAI compatible HTTP server with two concurrent instances of the same model:
54
+
55
+ ```ts http-api.ts
56
+ import { startHTTPServer } from 'inference-server'
57
+ import OpenAI from 'openai'
58
+
59
+ const server = await startHTTPServer({
60
+ listen: { port: 3000 }, // apart from `listen` options are identical to ModelServer
61
+ concurrency: 2, // two inference processes may run at the same time
62
+ models: {
63
+ 'smollm': {
64
+ task: 'text-completion',
65
+ engine: 'node-llama-cpp',
66
+ url: 'https://huggingface.co/HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF/blob/main/smollm-135m-instruct-add-basics-q8_0.gguf',
67
+ maxInstances: 2, // two instances of this model may be loaded into memory
68
+ device: {
69
+ cpuThreads: 4, // limit cpu threads so we dont occupy all cores
70
+ }
71
+ },
72
+ },
73
+ })
74
+
75
+ const client = new OpenAI({
76
+ baseURL: 'http://localhost:3000/openai/v1/',
77
+ apiKey: 'yes',
78
+ })
79
+ const completion = await client.beta.chat.completions.stream({
80
+ stream_options: { include_usage: true },
81
+ model: 'smollm',
82
+ messages: [
83
+ { role: 'user', content: 'lets count to 10, but only whisper every second number' },
84
+ ],
85
+ })
86
+ for await (const chunk of completion) {
87
+ if (chunk.choices[0]?.delta?.content) {
88
+ process.stdout.write(chunk.choices[0].delta.content)
89
+ }
90
+ }
91
+ server.stop()
92
+ ```
93
+
94
+ More usage examples:
95
+ - Using all available options / model options API doc [./examples/all-options](./examples/all-options.js).
96
+ - Custom engines [./tests/engines/experiments.test.ts](./tests/engines/experiments.test.ts).
97
+ - A chat cli [./examples/chat-cli](./examples/chat-cli.js).
98
+ - `concurrency` behavior [./examples/concurrency](./examples/concurrency.js).
99
+ - Using the ModelPool directly [./examples/pool](./examples/pool.js).
100
+ - Using the express middleware [./examples/express](./examples/express.js).
101
+
102
+ Currently supported inference engines are:
103
+
104
+ | Engine | Peer Dependency |
105
+ | --- | --- |
106
+ | node-llama-cpp | `node-llama-cpp >= 3.0.0` |
107
+ | gpt4all | `gpt4all >= 4.0.0` |
108
+ | transformers-js | `@huggingface/transformers >= 3.0.0` |
109
+
110
+ See [engine docs](./docs/engines.md) for more information on each.
111
+
112
+ #### Limitations and Known Issues
113
+
114
+ ##### Only one model can run on GPU at a time
115
+ Llama.cpp bindings currently do not support running multiple models on gpu at the same time. This can/will likely be improved in the future. See [GPU docs](./docs/gpu.md) for more information on how to work around that.
116
+
117
+ ##### System Messages
118
+ System role messages are supported only as the first message in a chat completion session. All other system messages will be ignored. This is only for simplicity reasons and might change in the future.
119
+
120
+ ##### Chat Context Cache / Reusing the correct instance on stateless requests
121
+ Note that the current context cache implementation only works if (apart from the final user message) the _same messages_ are resent in the _same order_. This is because the messages will be hashed to be compared during follow up turns, to match requests to the correct session. If no hash matches everything will still work, but slower. Because a fresh context will be used and the whole input conversation will be reingested, instead of just the new user message.
122
+
123
+ ##### Function Calling
124
+ Only available when using node-llama-cpp and a model that supports function calling. `tool_choice` can currently not be controlled and will always be `auto`. GBNF grammars cannot be used together with function calling.
125
+
126
+ ##### Huge node_modules when installing all engines
127
+ CUDA binaries are distributed with each engine seperately, which leads to an extra 0.5-1GB of disk use. Unfortunately there is nothing I can do about that.
128
+
129
+ #### TODO / Roadmap
130
+
131
+ Not in any particular order:
132
+
133
+ - [x] Automatic download of GGUF's with ipull
134
+ - [x] Engine abstraction
135
+ - [x] Model instance pool and queue
136
+ - [x] Basic OpenAI API compatibility
137
+ - [x] POC of chat context reuse across requests
138
+ - [x] Tests for context reuse and context leaking
139
+ - [x] Logging Interface
140
+ - [x] Better Examples
141
+ - [x] GPU support
142
+ - [x] node-llama-cpp context reuse
143
+ - [x] Instance TTL
144
+ - [x] Allow configuring model hashes / verification
145
+ - [x] Improve template code / stop trigger support
146
+ - [x] Support configuring a timeout on completion processing
147
+ - [x] Logit bias / Token bias support
148
+ - [x] Improve tests for longer conversations / context window shifting
149
+ - [x] Embeddings APIs
150
+ - [x] Improve node-llama-cpp token usage counts / TokenMeter
151
+ - [x] Reuse download logic from node-llama-cpp to support split ggufs.
152
+ - [x] Support preloading instances with context, like a long system message or few shot examples
153
+ - [x] transformers.js engine
154
+ - [x] Support custom engine implementations
155
+ - [x] Make sure nothing clashes if multiple servers/stores are using the same cache directory
156
+ - [x] See if we can install supported engines as peer deps
157
+ - [x] Improve types, simpler node-llama-cpp grammar integration
158
+ - [x] Restructure docs, add function calling & grammar usage docs
159
+ - [x] TTL=0 should immediately dispose of instances instead of waiting (currently on avg 30s) for the next TTL check
160
+ - [x] Expose node-llama-cpp context shift strategy, lora, allow json schema as input for `grammar`
161
+ - [x] Improve types for tool definitions / json schema
162
+ - [x] Make pool dispose / stop more robust
163
+ - [x] Tests for cancellation and timeouts
164
+ - [x] transformer.js text embeddings
165
+ - [x] transformer.js image embeddings
166
+ - [x] transformer.js multimodal image/text embeddings (see [jina-clip-v1](https://github.com/xenova/transformers.js/issues/793) and [nomic-embed-vision](https://github.com/xenova/transformers.js/issues/848) issues.)
167
+ - [x] Allow "prefilling" (partial) assistant responses like outlined [here](https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/prefill-claudes-response#how-to-prefill-claudes-response)
168
+ - [x] non-chat text completions: Allow reuse of context
169
+ - [x] non-chat text completions: Support preloading of prefixes
170
+ - [ ] Add some light jsdoc for server/pool/store methods
171
+ - [ ] utilize node-llama-cpp's support to reuse LlamaModel instances with multiple contexts
172
+ - [ ] Support transformer.js for text-completion tasks ([not yet supported in Node.js](https://github.com/huggingface/transformers.js/blob/e129c47c65a049173f35e6263fd8d9f660dfc1a7/src/models.js#L240-L242))
173
+ - [ ] Implement more transformer.js tasks (`imageToImage`, `textToImage`, `textToSpeech`?)
174
+ - [ ] Infill completion support https://github.com/withcatai/node-llama-cpp/blob/beta/src/evaluator/LlamaCompletion.ts#L322-L336
175
+ - [ ] Find a way to type available custom engines (and their options?)
176
+ - [ ] Rework GPU+device usage / lock (Support multiple models on gpu in cases where its possible)
177
+ - [ ] Add engine interfaces for resource use (and estimates, see https://github.com/ggerganov/llama.cpp/issues/4315 and https://github.com/withcatai/node-llama-cpp/blob/beta/src/gguf/insights/utils/resolveContextContextSizeOption.ts)
178
+ - [ ] Allow configuring a pools max memory usage
179
+ - [ ] Test deno/bun support
180
+ - [ ] Add image generation endpoint in oai api
181
+ - [ ] Add transcript endpoint in oai api
182
+ - [ ] Add `n` parameter support to node-llama-cpp chat completions
183
+ - [ ] [CLI](https://github.com/iimez/inference-server/discussions/7)
184
+ - [ ] Replace express with tinyhttp
185
+ - [ ] Add stable-diffusion engine
186
+
187
+ ### Contributing
188
+
189
+ If you are using this package - let me know where [you would like this to go](https://github.com/iimez/inference-server/discussions). Code also welcome. You find things im planning to do (eventually) above, and the wishlist below.
190
+
191
+ #### Possible Future Goals
192
+
193
+ - See if it would make sense to implement engines for [leejet/stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp) and [onnxruntime-node](https://www.npmjs.com/package/onnxruntime-node)
194
+ - Create a separate HTTP API thats independent of the OpenAI spec and stateful. See [discussion](https://github.com/iimez/inference-server/discussions/8).
195
+ - Add a clientside library (React hooks?) for use of above API.
196
+ - Provide a Docker image. And maybe a Prometheus endpoint.
197
+ - Logprobs support for node-llama-cpp.
198
+
199
+ #### Currently not the Goals
200
+
201
+ - A facade to LLM cloud hoster HTTP API's. The strengths here are local/private/offline use.
202
+ - Worry too much about authentication or rate limiting or misuse. Host this with caution, it's likely DDoS-able.
203
+ - Some kind of distributed or multi-node setup. That should probably be something designed for this purpose from the ground up.
204
+ - Other common related tooling like vector stores, Chat GUIs, etc. Scope would probably get out of hand.
205
+
206
+ ### Related Solutions
207
+
208
+ If you look at this package, you might also want to take a look at these other solutions:
209
+
210
+ - [ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md) - Uses llama.cpp and provides a HTTP API. Also has experimental OpenAI API compatibility.
211
+ - [llama.cpp Server](https://github.com/ggerganov/llama.cpp/tree/master/examples/server#llamacpp-http-server) - The official llama.cpp HTTP API.
212
+ - [VLLM](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html) - A more production ready solution for hosting large language models.
213
+ - [LM Studio](https://lmstudio.ai/docs/local-server) - Also has a local server.
214
+ - [LocalAI](https://github.com/mudler/LocalAI) - Similar project in go.
215
+ - [Petals](https://github.com/bigscience-workshop/petals) - Local (and distributed!) inference in python.
216
+ - [cortex.cpp](https://github.com/janhq/cortex.cpp/)
@@ -0,0 +1,4 @@
1
+ import { CompletionFinishReason, ChatMessage } from '../../types/index.js';
2
+ import OpenAI from 'openai';
3
+ export declare const finishReasonMap: Record<CompletionFinishReason, OpenAI.ChatCompletion.Choice['finish_reason']>;
4
+ export declare const messageRoleMap: Record<OpenAI.ChatCompletionMessageParam['role'], ChatMessage['role']>;
@@ -0,0 +1,17 @@
1
+ export const finishReasonMap = {
2
+ maxTokens: 'length',
3
+ toolCalls: 'tool_calls',
4
+ eogToken: 'stop',
5
+ stopTrigger: 'stop',
6
+ timeout: 'stop',
7
+ cancel: 'stop',
8
+ abort: 'stop',
9
+ };
10
+ export const messageRoleMap = {
11
+ user: 'user',
12
+ system: 'system',
13
+ assistant: 'assistant',
14
+ tool: 'tool',
15
+ function: 'tool',
16
+ };
17
+ //# sourceMappingURL=enums.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"enums.js","sourceRoot":"","sources":["../../../src/api/openai/enums.ts"],"names":[],"mappings":"AAGA,MAAM,CAAC,MAAM,eAAe,GAAkF;IAC7G,SAAS,EAAE,QAAQ;IACnB,SAAS,EAAE,YAAY;IACvB,QAAQ,EAAE,MAAM;IAChB,WAAW,EAAE,MAAM;IACnB,OAAO,EAAE,MAAM;IACf,MAAM,EAAE,MAAM;IACd,KAAK,EAAE,MAAM;CACJ,CAAA;AAEV,MAAM,CAAC,MAAM,cAAc,GAA2E;IACrG,IAAI,EAAE,MAAM;IACZ,MAAM,EAAE,QAAQ;IAChB,SAAS,EAAE,WAAW;IACtB,IAAI,EAAE,MAAM;IACZ,QAAQ,EAAE,MAAM;CAChB,CAAA"}
@@ -0,0 +1,3 @@
1
+ import type { IncomingMessage, ServerResponse } from 'node:http';
2
+ import type { ModelServer } from '../../../server.js';
3
+ export declare function createChatCompletionHandler(modelServer: ModelServer): (req: IncomingMessage, res: ServerResponse) => Promise<void>;
@@ -0,0 +1,358 @@
1
+ import { parseJSONRequestBody } from '../../../api/parseJSONRequestBody.js';
2
+ import { omitEmptyValues } from '../../../lib/util.js';
3
+ import { loadImageFromUrl } from '../../../lib/loadImage.js';
4
+ import { finishReasonMap, messageRoleMap } from '../enums.js';
5
+ async function prepareIncomingMessages(messages) {
6
+ const downloadPromises = {};
7
+ const resultMessages = [];
8
+ for (const message of messages) {
9
+ const role = messageRoleMap[message.role];
10
+ const resultMessage = {
11
+ role,
12
+ content: [],
13
+ };
14
+ if (role === 'tool' && 'tool_call_id' in message) {
15
+ resultMessage.callId = message.tool_call_id;
16
+ }
17
+ if (typeof message.content === 'string') {
18
+ resultMessage.content.push({
19
+ type: 'text',
20
+ text: message.content,
21
+ });
22
+ }
23
+ else if (Array.isArray(message.content)) {
24
+ for (const part of message.content) {
25
+ if (typeof part === 'string') {
26
+ resultMessage.content.push({
27
+ type: 'text',
28
+ text: part,
29
+ });
30
+ }
31
+ else if (part.type === 'text') {
32
+ resultMessage.content.push({
33
+ type: 'text',
34
+ text: part.text,
35
+ });
36
+ }
37
+ else if (part.type === 'image_url') {
38
+ if (!downloadPromises[part.image_url.url]) {
39
+ downloadPromises[part.image_url.url] = loadImageFromUrl(part.image_url.url);
40
+ }
41
+ const content = {
42
+ type: 'image',
43
+ };
44
+ resultMessage.content.push(content);
45
+ downloadPromises[part.image_url.url].then((image) => {
46
+ content.image = image;
47
+ });
48
+ }
49
+ else if (part.type === 'input_audio') {
50
+ resultMessage.content.push({
51
+ type: 'audio',
52
+ audio: part.input_audio,
53
+ });
54
+ }
55
+ else if (part.type === 'refusal') {
56
+ resultMessage.content.push({
57
+ type: 'text',
58
+ text: part.refusal,
59
+ });
60
+ }
61
+ }
62
+ }
63
+ else {
64
+ throw new Error('Invalid message content');
65
+ }
66
+ resultMessages.push(resultMessage);
67
+ }
68
+ await Promise.all(Object.values(downloadPromises));
69
+ return resultMessages;
70
+ }
71
+ function createResponseMessageContent(content) {
72
+ if (!content) {
73
+ return null;
74
+ }
75
+ if (typeof content === 'string') {
76
+ return content;
77
+ }
78
+ if (!Array.isArray(content)) {
79
+ throw new Error('Invalid response message content');
80
+ }
81
+ let text = '';
82
+ for (const part of content) {
83
+ if (part.type === 'text') {
84
+ text += part.text;
85
+ }
86
+ // assistant may only respond with text in openai chat completions
87
+ }
88
+ return text;
89
+ }
90
+ // v1/chat/completions
91
+ // https://platform.openai.com/docs/api-reference/chat/create
92
+ export function createChatCompletionHandler(modelServer) {
93
+ return async (req, res) => {
94
+ let args;
95
+ try {
96
+ const body = await parseJSONRequestBody(req);
97
+ args = body;
98
+ }
99
+ catch (e) {
100
+ console.error(e);
101
+ res.writeHead(400, { 'Content-Type': 'application/json' });
102
+ res.end(JSON.stringify({ error: 'Invalid request' }));
103
+ return;
104
+ }
105
+ // TODO ajv schema validation?
106
+ if (!args.model || !args.messages) {
107
+ res.writeHead(400, { 'Content-Type': 'application/json' });
108
+ res.end(JSON.stringify({ error: 'Invalid request (need at least model and messages)' }));
109
+ return;
110
+ }
111
+ if (!modelServer.modelExists(args.model)) {
112
+ res.writeHead(400, { 'Content-Type': 'application/json' });
113
+ res.end(JSON.stringify({ error: 'Model does not exist' }));
114
+ return;
115
+ }
116
+ const controller = new AbortController();
117
+ req.on('close', () => {
118
+ console.debug('Client closed connection');
119
+ controller.abort();
120
+ });
121
+ req.on('end', () => {
122
+ console.debug('Client ended connection');
123
+ controller.abort();
124
+ });
125
+ req.on('aborted', () => {
126
+ console.debug('Client aborted connection');
127
+ controller.abort();
128
+ });
129
+ req.on('error', () => {
130
+ console.debug('Client error');
131
+ controller.abort();
132
+ });
133
+ try {
134
+ let ssePing;
135
+ if (args.stream) {
136
+ res.writeHead(200, {
137
+ 'Content-Type': 'text/event-stream',
138
+ 'Cache-Control': 'no-cache',
139
+ Connection: 'keep-alive',
140
+ });
141
+ res.flushHeaders();
142
+ ssePing = setInterval(() => {
143
+ res.write(':ping\n\n');
144
+ }, 30000);
145
+ }
146
+ let stop = args.stop ? args.stop : undefined;
147
+ if (typeof stop === 'string') {
148
+ stop = [stop];
149
+ }
150
+ let completionGrammar;
151
+ if (args.response_format) {
152
+ if (args.response_format.type === 'json_object') {
153
+ completionGrammar = 'json';
154
+ }
155
+ }
156
+ let completionTools;
157
+ if (args.tools) {
158
+ const functionTools = args.tools
159
+ .filter((tool) => tool.type === 'function')
160
+ .map((tool) => {
161
+ return {
162
+ name: tool.function.name,
163
+ description: tool.function.description,
164
+ parameters: tool.function.parameters,
165
+ };
166
+ });
167
+ if (functionTools.length) {
168
+ if (!completionTools) {
169
+ completionTools = {};
170
+ }
171
+ for (const tool of functionTools) {
172
+ completionTools[tool.name] = {
173
+ description: tool.description,
174
+ parameters: tool.parameters,
175
+ };
176
+ }
177
+ }
178
+ }
179
+ const messages = await prepareIncomingMessages(args.messages);
180
+ const completionReq = omitEmptyValues({
181
+ model: args.model,
182
+ messages,
183
+ temperature: args.temperature ? args.temperature : undefined,
184
+ stream: args.stream ? Boolean(args.stream) : false,
185
+ maxTokens: args.max_tokens ? args.max_tokens : undefined,
186
+ seed: args.seed ? args.seed : undefined,
187
+ stop,
188
+ frequencyPenalty: args.frequency_penalty
189
+ ? args.frequency_penalty
190
+ : undefined,
191
+ presencePenalty: args.presence_penalty
192
+ ? args.presence_penalty
193
+ : undefined,
194
+ topP: args.top_p ? args.top_p : undefined,
195
+ tokenBias: args.logit_bias ? args.logit_bias : undefined,
196
+ grammar: completionGrammar,
197
+ tools: completionTools,
198
+ // additional non-spec params
199
+ repeatPenaltyNum: args.repeat_penalty_num
200
+ ? args.repeat_penalty_num
201
+ : undefined,
202
+ minP: args.min_p ? args.min_p : undefined,
203
+ topK: args.top_k ? args.top_k : undefined,
204
+ });
205
+ const { instance, release } = await modelServer.requestInstance(completionReq, controller.signal);
206
+ if (ssePing) {
207
+ clearInterval(ssePing);
208
+ }
209
+ const task = instance.processChatCompletionTask(completionReq, {
210
+ signal: controller.signal,
211
+ onChunk: (chunk) => {
212
+ if (args.stream) {
213
+ const chunkData = {
214
+ id: task.id,
215
+ object: 'chat.completion.chunk',
216
+ model: task.model,
217
+ created: Math.floor(task.createdAt.getTime() / 1000),
218
+ choices: [
219
+ {
220
+ index: 0,
221
+ delta: {
222
+ role: 'assistant',
223
+ content: chunk.text,
224
+ },
225
+ logprobs: null,
226
+ finish_reason: null,
227
+ },
228
+ ],
229
+ };
230
+ res.write(`data: ${JSON.stringify(chunkData)}\n\n`);
231
+ }
232
+ },
233
+ });
234
+ const result = await task.result;
235
+ release();
236
+ if (args.stream) {
237
+ if (result.finishReason === 'toolCalls') {
238
+ // currently not possible to stream function calls
239
+ // imitating a stream here by sending two chunks. makes it work with the openai client
240
+ const streamedToolCallChunk = {
241
+ id: task.id,
242
+ object: 'chat.completion.chunk',
243
+ model: task.model,
244
+ created: Math.floor(task.createdAt.getTime() / 1000),
245
+ choices: [
246
+ {
247
+ index: 0,
248
+ delta: {
249
+ role: 'assistant',
250
+ content: null,
251
+ },
252
+ logprobs: null,
253
+ finish_reason: result.finishReason
254
+ ? finishReasonMap[result.finishReason]
255
+ : 'stop',
256
+ },
257
+ ],
258
+ };
259
+ const toolCalls = result.message.toolCalls.map((call, index) => {
260
+ return {
261
+ index,
262
+ id: call.id,
263
+ type: 'function',
264
+ function: {
265
+ name: call.name,
266
+ arguments: JSON.stringify(call.parameters),
267
+ },
268
+ };
269
+ });
270
+ streamedToolCallChunk.choices[0].delta.tool_calls = toolCalls;
271
+ res.write(`data: ${JSON.stringify(streamedToolCallChunk)}\n\n`);
272
+ }
273
+ if (args.stream_options?.include_usage) {
274
+ const finalChunk = {
275
+ id: task.id,
276
+ object: 'chat.completion.chunk',
277
+ model: task.model,
278
+ created: Math.floor(task.createdAt.getTime() / 1000),
279
+ system_fingerprint: instance.fingerprint,
280
+ choices: [
281
+ {
282
+ index: 0,
283
+ delta: {},
284
+ logprobs: null,
285
+ finish_reason: result.finishReason
286
+ ? finishReasonMap[result.finishReason]
287
+ : 'stop',
288
+ },
289
+ ],
290
+ usage: {
291
+ prompt_tokens: result.promptTokens,
292
+ completion_tokens: result.completionTokens,
293
+ total_tokens: result.contextTokens,
294
+ },
295
+ };
296
+ res.write(`data: ${JSON.stringify(finalChunk)}\n\n`);
297
+ }
298
+ res.write('data: [DONE]');
299
+ res.end();
300
+ }
301
+ else {
302
+ const response = {
303
+ id: task.id,
304
+ model: task.model,
305
+ object: 'chat.completion',
306
+ created: Math.floor(task.createdAt.getTime() / 1000),
307
+ system_fingerprint: instance.fingerprint,
308
+ choices: [
309
+ {
310
+ index: 0,
311
+ message: {
312
+ role: 'assistant',
313
+ content: createResponseMessageContent(result.message.content),
314
+ refusal: null,
315
+ },
316
+ logprobs: null,
317
+ finish_reason: result.finishReason
318
+ ? finishReasonMap[result.finishReason]
319
+ : 'stop',
320
+ },
321
+ ],
322
+ usage: {
323
+ prompt_tokens: result.promptTokens,
324
+ completion_tokens: result.completionTokens,
325
+ total_tokens: result.contextTokens,
326
+ },
327
+ };
328
+ if ('toolCalls' in result.message &&
329
+ result.message.toolCalls?.length) {
330
+ response.choices[0].message.tool_calls =
331
+ result.message.toolCalls.map((call) => {
332
+ return {
333
+ id: call.id,
334
+ type: 'function',
335
+ function: {
336
+ name: call.name,
337
+ arguments: JSON.stringify(call.parameters),
338
+ },
339
+ };
340
+ });
341
+ }
342
+ res.writeHead(200, { 'Content-Type': 'application/json' });
343
+ res.end(JSON.stringify(response, null, 2));
344
+ }
345
+ }
346
+ catch (e) {
347
+ console.error(e);
348
+ if (args.stream) {
349
+ res.write('data: [ERROR]');
350
+ }
351
+ else {
352
+ res.writeHead(500, { 'Content-Type': 'application/json' });
353
+ res.end(JSON.stringify({ error: 'Internal server error' }));
354
+ }
355
+ }
356
+ };
357
+ }
358
+ //# sourceMappingURL=chat.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chat.js","sourceRoot":"","sources":["../../../../src/api/openai/handlers/chat.ts"],"names":[],"mappings":"AAWA,OAAO,EAAE,oBAAoB,EAAE,MAAM,sCAAsC,CAAA;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAA;AAC5D,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAc7D,KAAK,UAAU,uBAAuB,CACrC,QAAsC;IAEtC,MAAM,gBAAgB,GAAmC,EAAE,CAAA;IAC3D,MAAM,cAAc,GAAkB,EAAE,CAAA;IAExC,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QACzC,MAAM,aAAa,GAAQ;YAC1B,IAAI;YACJ,OAAO,EAAE,EAAE;SACX,CAAA;QACD,IAAI,IAAI,KAAK,MAAM,IAAI,cAAc,IAAI,OAAO,EAAE,CAAC;YAClD,aAAa,CAAC,MAAM,GAAG,OAAO,CAAC,YAAY,CAAA;QAC5C,CAAC;QAED,IAAI,OAAO,OAAO,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACzC,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC;gBAC1B,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,OAAO,CAAC,OAAO;aACrB,CAAC,CAAA;QACH,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3C,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;gBAEpC,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;oBAC9B,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC;wBAC1B,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,IAAI;qBACV,CAAC,CAAA;gBACH,CAAC;qBAAM,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;oBACjC,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC;wBAC1B,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,IAAI,CAAC,IAAI;qBACf,CAAC,CAAA;gBACH,CAAC;qBAAM,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;oBACtC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC3C,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAA;oBAC5E,CAAC;oBACD,MAAM,OAAO,GAAgC;wBAC5C,IAAI,EAAE,OAAO;qBACb,CAAA;oBACD,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;oBACnC,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;wBACnD,OAAO,CAAC,KAAK,GAAG,KAAK,CAAA;oBACtB,CAAC,CAAC,CAAA;gBACH,CAAC;qBAAM,IAAI,IAAI,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;oBACxC,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC;wBAC1B,IAAI,EAAE,OAAO;wBACb,KAAK,EAAE,IAAI,CAAC,WAAW;qBACvB,CAAC,CAAA;gBACH,CAAC;qBAAM,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;oBACpC,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC;wBAC1B,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,IAAI,CAAC,OAAO;qBAClB,CAAC,CAAA;gBACH,CAAC;YACF,CAAC;QACF,CAAC;aAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAA;QAC3C,CAAC;QAED,cAAc,CAAC,IAAI,CAAC,aAAa,CAAC,CAAA;IACnC,CAAC;IAED,MAAM,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,CAAA;IAElD,OAAO,cAAc,CAAA;AAEtB,CAAC;AAED,SAAS,4BAA4B,CACpC,OAAsC;IAEtC,IAAI,CAAC,OAAO,EAAE,CAAC;QACd,OAAO,IAAI,CAAA;IACZ,CAAC;IACD,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;QACjC,OAAO,OAAO,CAAA;IACf,CAAC;IACD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAA;IACpD,CAAC;IAED,IAAI,IAAI,GAAG,EAAE,CAAA;IACb,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC1B,IAAI,IAAI,IAAI,CAAC,IAAI,CAAA;QAClB,CAAC;QACD,kEAAkE;IACnE,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC;AAED,sBAAsB;AACtB,6DAA6D;AAC7D,MAAM,UAAU,2BAA2B,CAAC,WAAwB;IACnE,OAAO,KAAK,EAAE,GAAoB,EAAE,GAAmB,EAAE,EAAE;QAC1D,IAAI,IAAgC,CAAA;QAEpC,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,MAAM,oBAAoB,CAAC,GAAG,CAAC,CAAA;YAC5C,IAAI,GAAG,IAAI,CAAA;QACZ,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACZ,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;YAChB,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAA;YAC1D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE,CAAC,CAAC,CAAA;YACrD,OAAM;QACP,CAAC;QAED,8BAA8B;QAC9B,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnC,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAA;YAC1D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,oDAAoD,EAAE,CAAC,CAAC,CAAA;YACxF,OAAM;QACP,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1C,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAA;YAC1D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,sBAAsB,EAAE,CAAC,CAAC,CAAA;YAC1D,OAAM;QACP,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAA;QACxC,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACpB,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAA;YACzC,UAAU,CAAC,KAAK,EAAE,CAAA;QACnB,CAAC,CAAC,CAAA;QACF,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;YAClB,OAAO,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAA;YACxC,UAAU,CAAC,KAAK,EAAE,CAAA;QACnB,CAAC,CAAC,CAAA;QACF,GAAG,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE;YACtB,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAA;YAC1C,UAAU,CAAC,KAAK,EAAE,CAAA;QACnB,CAAC,CAAC,CAAA;QACF,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACpB,OAAO,CAAC,KAAK,CAAC,cAAc,CAAC,CAAA;YAC7B,UAAU,CAAC,KAAK,EAAE,CAAA;QACnB,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC;YACJ,IAAI,OAAmC,CAAA;YACvC,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE;oBAClB,cAAc,EAAE,mBAAmB;oBACnC,eAAe,EAAE,UAAU;oBAC3B,UAAU,EAAE,YAAY;iBACxB,CAAC,CAAA;gBACF,GAAG,CAAC,YAAY,EAAE,CAAA;gBAClB,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE;oBAC1B,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAA;gBACvB,CAAC,EAAE,KAAK,CAAC,CAAA;YACV,CAAC;YAED,IAAI,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAA;YAC5C,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC9B,IAAI,GAAG,CAAC,IAAI,CAAC,CAAA;YACd,CAAC;YAED,IAAI,iBAAqC,CAAA;YACzC,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;gBAC1B,IAAI,IAAI,CAAC,eAAe,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;oBACjD,iBAAiB,GAAG,MAAM,CAAA;gBAC3B,CAAC;YACF,CAAC;YAED,IAAI,eAEQ,CAAA;YAEZ,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAChB,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK;qBAC9B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,KAAK,UAAU,CAAC;qBAC1C,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;oBACb,OAAO;wBACN,IAAI,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI;wBACxB,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,WAAW;wBACtC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC,UAAU;qBACpC,CAAA;gBACF,CAAC,CAAC,CAAA;gBACH,IAAI,aAAa,CAAC,MAAM,EAAE,CAAC;oBAC1B,IAAI,CAAC,eAAe,EAAE,CAAC;wBACtB,eAAe,GAAG,EAAE,CAAA;oBACrB,CAAC;oBACD,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;wBAClC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;4BAC5B,WAAW,EAAE,IAAI,CAAC,WAAW;4BAC7B,UAAU,EAAE,IAAI,CAAC,UAAU;yBACT,CAAA;oBACpB,CAAC;gBACF,CAAC;YACF,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,uBAAuB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YAC7D,MAAM,aAAa,GAAG,eAAe,CAAwB;gBAC5D,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ;gBACR,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,SAAS;gBAC5D,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK;gBAClD,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;gBACxD,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;gBACvC,IAAI;gBACJ,gBAAgB,EAAE,IAAI,CAAC,iBAAiB;oBACvC,CAAC,CAAC,IAAI,CAAC,iBAAiB;oBACxB,CAAC,CAAC,SAAS;gBACZ,eAAe,EAAE,IAAI,CAAC,gBAAgB;oBACrC,CAAC,CAAC,IAAI,CAAC,gBAAgB;oBACvB,CAAC,CAAC,SAAS;gBACZ,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACzC,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;gBACxD,OAAO,EAAE,iBAAiB;gBAC1B,KAAK,EAAE,eAAe;gBACtB,6BAA6B;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,kBAAkB;oBACxC,CAAC,CAAC,IAAI,CAAC,kBAAkB;oBACzB,CAAC,CAAC,SAAS;gBACZ,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;gBACzC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;aACzC,CAAC,CAAA;YACF,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,MAAM,WAAW,CAAC,eAAe,CAC9D,aAAa,EACb,UAAU,CAAC,MAAM,CACjB,CAAA;YAED,IAAI,OAAO,EAAE,CAAC;gBACb,aAAa,CAAC,OAAO,CAAC,CAAA;YACvB,CAAC;YACD,MAAM,IAAI,GAAG,QAAQ,CAAC,yBAAyB,CAAC,aAAa,EAAE;gBAC9D,MAAM,EAAE,UAAU,CAAC,MAAM;gBACzB,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;oBAClB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;wBACjB,MAAM,SAAS,GAA8B;4BAC5C,EAAE,EAAE,IAAI,CAAC,EAAE;4BACX,MAAM,EAAE,uBAAuB;4BAC/B,KAAK,EAAE,IAAI,CAAC,KAAK;4BACjB,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC;4BACpD,OAAO,EAAE;gCACR;oCACC,KAAK,EAAE,CAAC;oCACR,KAAK,EAAE;wCACN,IAAI,EAAE,WAAW;wCACjB,OAAO,EAAE,KAAK,CAAC,IAAI;qCACnB;oCACD,QAAQ,EAAE,IAAI;oCACd,aAAa,EAAE,IAAI;iCACnB;6BACD;yBACD,CAAA;wBACD,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;oBACpD,CAAC;gBACF,CAAC;aACD,CAAC,CAAA;YAEF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAA;YAEhC,OAAO,EAAE,CAAA;YAET,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,IAAI,MAAM,CAAC,YAAY,KAAK,WAAW,EAAE,CAAC;oBACzC,kDAAkD;oBAClD,sFAAsF;oBACtF,MAAM,qBAAqB,GAA8B;wBACxD,EAAE,EAAE,IAAI,CAAC,EAAE;wBACX,MAAM,EAAE,uBAAuB;wBAC/B,KAAK,EAAE,IAAI,CAAC,KAAK;wBACjB,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC;wBACpD,OAAO,EAAE;4BACR;gCACC,KAAK,EAAE,CAAC;gCACR,KAAK,EAAE;oCACN,IAAI,EAAE,WAAW;oCACjB,OAAO,EAAE,IAAI;iCACb;gCACD,QAAQ,EAAE,IAAI;gCACd,aAAa,EAAE,MAAM,CAAC,YAAY;oCACjC,CAAC,CAAC,eAAe,CAAC,MAAM,CAAC,YAAY,CAAC;oCACtC,CAAC,CAAC,MAAM;6BACT;yBACD;qBACD,CAAA;oBAED,MAAM,SAAS,GACd,MAAM,CAAC,OAAO,CAAC,SAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;wBAC7C,OAAO;4BACN,KAAK;4BACL,EAAE,EAAE,IAAI,CAAC,EAAE;4BACX,IAAI,EAAE,UAAU;4BAChB,QAAQ,EAAE;gCACT,IAAI,EAAE,IAAI,CAAC,IAAI;gCACf,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC;6BAC1C;yBACD,CAAA;oBACF,CAAC,CAAC,CAAA;oBACH,qBAAqB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,UAAU,GAAG,SAAS,CAAA;oBAC7D,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,qBAAqB,CAAC,MAAM,CAAC,CAAA;gBAChE,CAAC;gBACD,IAAI,IAAI,CAAC,cAAc,EAAE,aAAa,EAAE,CAAC;oBACxC,MAAM,UAAU,GAA8B;wBAC7C,EAAE,EAAE,IAAI,CAAC,EAAE;wBACX,MAAM,EAAE,uBAAuB;wBAC/B,KAAK,EAAE,IAAI,CAAC,KAAK;wBACjB,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC;wBACpD,kBAAkB,EAAE,QAAQ,CAAC,WAAW;wBACxC,OAAO,EAAE;4BACR;gCACC,KAAK,EAAE,CAAC;gCACR,KAAK,EAAE,EAAE;gCACT,QAAQ,EAAE,IAAI;gCACd,aAAa,EAAE,MAAM,CAAC,YAAY;oCACjC,CAAC,CAAC,eAAe,CAAC,MAAM,CAAC,YAAY,CAAC;oCACtC,CAAC,CAAC,MAAM;6BACT;yBACD;wBACD,KAAK,EAAE;4BACN,aAAa,EAAE,MAAM,CAAC,YAAY;4BAClC,iBAAiB,EAAE,MAAM,CAAC,gBAAgB;4BAC1C,YAAY,EAAE,MAAM,CAAC,aAAa;yBAClC;qBACD,CAAA;oBACD,GAAG,CAAC,KAAK,CAAC,SAAS,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,MAAM,CAAC,CAAA;gBACrD,CAAC;gBACD,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAA;gBACzB,GAAG,CAAC,GAAG,EAAE,CAAA;YACV,CAAC;iBAAM,CAAC;gBACP,MAAM,QAAQ,GAA0B;oBACvC,EAAE,EAAE,IAAI,CAAC,EAAE;oBACX,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,iBAAiB;oBACzB,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC;oBACpD,kBAAkB,EAAE,QAAQ,CAAC,WAAW;oBACxC,OAAO,EAAE;wBACR;4BACC,KAAK,EAAE,CAAC;4BACR,OAAO,EAAE;gCACR,IAAI,EAAE,WAAW;gCACjB,OAAO,EAAE,4BAA4B,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC;gCAC7D,OAAO,EAAE,IAAI;6BACb;4BACD,QAAQ,EAAE,IAAI;4BACd,aAAa,EAAE,MAAM,CAAC,YAAY;gCACjC,CAAC,CAAC,eAAe,CAAC,MAAM,CAAC,YAAY,CAAC;gCACtC,CAAC,CAAC,MAAM;yBACT;qBACD;oBACD,KAAK,EAAE;wBACN,aAAa,EAAE,MAAM,CAAC,YAAY;wBAClC,iBAAiB,EAAE,MAAM,CAAC,gBAAgB;wBAC1C,YAAY,EAAE,MAAM,CAAC,aAAa;qBAClC;iBACD,CAAA;gBACD,IACC,WAAW,IAAI,MAAM,CAAC,OAAO;oBAC7B,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,EAC/B,CAAC;oBACF,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU;wBACrC,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;4BACrC,OAAO;gCACN,EAAE,EAAE,IAAI,CAAC,EAAE;gCACX,IAAI,EAAE,UAAU;gCAChB,QAAQ,EAAE;oCACT,IAAI,EAAE,IAAI,CAAC,IAAI;oCACf,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC;iCAC1C;6BACD,CAAA;wBACF,CAAC,CAAC,CAAA;gBACJ,CAAC;gBACD,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAA;gBAC1D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAA;YAC3C,CAAC;QACF,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACZ,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;YAChB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,GAAG,CAAC,KAAK,CAAC,eAAe,CAAC,CAAA;YAC3B,CAAC;iBAAM,CAAC;gBACP,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,CAAC,CAAA;gBAC1D,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,CAAC,CAAC,CAAA;YAC5D,CAAC;QACF,CAAC;IACF,CAAC,CAAA;AACF,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { IncomingMessage, ServerResponse } from 'node:http';
2
+ import type { ModelServer } from '../../../server.js';
3
+ export declare function createCompletionHandler(modelServer: ModelServer): (req: IncomingMessage, res: ServerResponse) => Promise<void>;