inference-server 1.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/README.md +216 -0
  2. package/dist/api/openai/enums.d.ts +4 -0
  3. package/dist/api/openai/enums.js +17 -0
  4. package/dist/api/openai/enums.js.map +1 -0
  5. package/dist/api/openai/handlers/chat.d.ts +3 -0
  6. package/dist/api/openai/handlers/chat.js +358 -0
  7. package/dist/api/openai/handlers/chat.js.map +1 -0
  8. package/dist/api/openai/handlers/completions.d.ts +3 -0
  9. package/dist/api/openai/handlers/completions.js +169 -0
  10. package/dist/api/openai/handlers/completions.js.map +1 -0
  11. package/dist/api/openai/handlers/embeddings.d.ts +3 -0
  12. package/dist/api/openai/handlers/embeddings.js +74 -0
  13. package/dist/api/openai/handlers/embeddings.js.map +1 -0
  14. package/dist/api/openai/handlers/images.d.ts +0 -0
  15. package/dist/api/openai/handlers/images.js +4 -0
  16. package/dist/api/openai/handlers/images.js.map +1 -0
  17. package/dist/api/openai/handlers/models.d.ts +3 -0
  18. package/dist/api/openai/handlers/models.js +23 -0
  19. package/dist/api/openai/handlers/models.js.map +1 -0
  20. package/dist/api/openai/handlers/transcription.d.ts +0 -0
  21. package/dist/api/openai/handlers/transcription.js +4 -0
  22. package/dist/api/openai/handlers/transcription.js.map +1 -0
  23. package/dist/api/openai/index.d.ts +7 -0
  24. package/dist/api/openai/index.js +14 -0
  25. package/dist/api/openai/index.js.map +1 -0
  26. package/dist/api/parseJSONRequestBody.d.ts +2 -0
  27. package/dist/api/parseJSONRequestBody.js +24 -0
  28. package/dist/api/parseJSONRequestBody.js.map +1 -0
  29. package/dist/api/v1/index.d.ts +2 -0
  30. package/dist/api/v1/index.js +29 -0
  31. package/dist/api/v1/index.js.map +1 -0
  32. package/dist/cli.d.ts +1 -0
  33. package/dist/cli.js +10 -0
  34. package/dist/cli.js.map +1 -0
  35. package/dist/engines/gpt4all/engine.d.ts +34 -0
  36. package/dist/engines/gpt4all/engine.js +357 -0
  37. package/dist/engines/gpt4all/engine.js.map +1 -0
  38. package/dist/engines/gpt4all/util.d.ts +3 -0
  39. package/dist/engines/gpt4all/util.js +29 -0
  40. package/dist/engines/gpt4all/util.js.map +1 -0
  41. package/dist/engines/index.d.ts +19 -0
  42. package/dist/engines/index.js +21 -0
  43. package/dist/engines/index.js.map +1 -0
  44. package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
  45. package/dist/engines/node-llama-cpp/engine.js +666 -0
  46. package/dist/engines/node-llama-cpp/engine.js.map +1 -0
  47. package/dist/engines/node-llama-cpp/types.d.ts +13 -0
  48. package/dist/engines/node-llama-cpp/types.js +2 -0
  49. package/dist/engines/node-llama-cpp/types.js.map +1 -0
  50. package/dist/engines/node-llama-cpp/util.d.ts +15 -0
  51. package/dist/engines/node-llama-cpp/util.js +84 -0
  52. package/dist/engines/node-llama-cpp/util.js.map +1 -0
  53. package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
  54. package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
  55. package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
  56. package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
  57. package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
  58. package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
  59. package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
  60. package/dist/engines/stable-diffusion-cpp/types.js +2 -0
  61. package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
  62. package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
  63. package/dist/engines/stable-diffusion-cpp/util.js +55 -0
  64. package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
  65. package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
  66. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
  67. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
  68. package/dist/engines/transformers-js/engine.d.ts +37 -0
  69. package/dist/engines/transformers-js/engine.js +538 -0
  70. package/dist/engines/transformers-js/engine.js.map +1 -0
  71. package/dist/engines/transformers-js/types.d.ts +7 -0
  72. package/dist/engines/transformers-js/types.js +2 -0
  73. package/dist/engines/transformers-js/types.js.map +1 -0
  74. package/dist/engines/transformers-js/util.d.ts +7 -0
  75. package/dist/engines/transformers-js/util.js +36 -0
  76. package/dist/engines/transformers-js/util.js.map +1 -0
  77. package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
  78. package/dist/engines/transformers-js/validateModelFiles.js +133 -0
  79. package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
  80. package/dist/experiments/ChatWithVision.d.ts +11 -0
  81. package/dist/experiments/ChatWithVision.js +91 -0
  82. package/dist/experiments/ChatWithVision.js.map +1 -0
  83. package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
  84. package/dist/experiments/StableDiffPromptGenerator.js +4 -0
  85. package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
  86. package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
  87. package/dist/experiments/VoiceFunctionCall.js +51 -0
  88. package/dist/experiments/VoiceFunctionCall.js.map +1 -0
  89. package/dist/http.d.ts +19 -0
  90. package/dist/http.js +54 -0
  91. package/dist/http.js.map +1 -0
  92. package/dist/index.d.ts +7 -0
  93. package/dist/index.js +8 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/instance.d.ts +88 -0
  96. package/dist/instance.js +594 -0
  97. package/dist/instance.js.map +1 -0
  98. package/dist/lib/acquireFileLock.d.ts +7 -0
  99. package/dist/lib/acquireFileLock.js +38 -0
  100. package/dist/lib/acquireFileLock.js.map +1 -0
  101. package/dist/lib/calculateContextIdentity.d.ts +7 -0
  102. package/dist/lib/calculateContextIdentity.js +39 -0
  103. package/dist/lib/calculateContextIdentity.js.map +1 -0
  104. package/dist/lib/calculateFileChecksum.d.ts +1 -0
  105. package/dist/lib/calculateFileChecksum.js +16 -0
  106. package/dist/lib/calculateFileChecksum.js.map +1 -0
  107. package/dist/lib/copyDirectory.d.ts +6 -0
  108. package/dist/lib/copyDirectory.js +27 -0
  109. package/dist/lib/copyDirectory.js.map +1 -0
  110. package/dist/lib/decodeAudio.d.ts +1 -0
  111. package/dist/lib/decodeAudio.js +26 -0
  112. package/dist/lib/decodeAudio.js.map +1 -0
  113. package/dist/lib/downloadModelFile.d.ts +10 -0
  114. package/dist/lib/downloadModelFile.js +58 -0
  115. package/dist/lib/downloadModelFile.js.map +1 -0
  116. package/dist/lib/flattenMessageTextContent.d.ts +2 -0
  117. package/dist/lib/flattenMessageTextContent.js +11 -0
  118. package/dist/lib/flattenMessageTextContent.js.map +1 -0
  119. package/dist/lib/getCacheDirPath.d.ts +12 -0
  120. package/dist/lib/getCacheDirPath.js +31 -0
  121. package/dist/lib/getCacheDirPath.js.map +1 -0
  122. package/dist/lib/loadImage.d.ts +12 -0
  123. package/dist/lib/loadImage.js +30 -0
  124. package/dist/lib/loadImage.js.map +1 -0
  125. package/dist/lib/logger.d.ts +12 -0
  126. package/dist/lib/logger.js +98 -0
  127. package/dist/lib/logger.js.map +1 -0
  128. package/dist/lib/math.d.ts +7 -0
  129. package/dist/lib/math.js +30 -0
  130. package/dist/lib/math.js.map +1 -0
  131. package/dist/lib/resolveModelFileLocation.d.ts +15 -0
  132. package/dist/lib/resolveModelFileLocation.js +41 -0
  133. package/dist/lib/resolveModelFileLocation.js.map +1 -0
  134. package/dist/lib/util.d.ts +7 -0
  135. package/dist/lib/util.js +61 -0
  136. package/dist/lib/util.js.map +1 -0
  137. package/dist/lib/validateModelFile.d.ts +9 -0
  138. package/dist/lib/validateModelFile.js +62 -0
  139. package/dist/lib/validateModelFile.js.map +1 -0
  140. package/dist/lib/validateModelOptions.d.ts +3 -0
  141. package/dist/lib/validateModelOptions.js +23 -0
  142. package/dist/lib/validateModelOptions.js.map +1 -0
  143. package/dist/pool.d.ts +61 -0
  144. package/dist/pool.js +512 -0
  145. package/dist/pool.js.map +1 -0
  146. package/dist/server.d.ts +59 -0
  147. package/dist/server.js +221 -0
  148. package/dist/server.js.map +1 -0
  149. package/dist/standalone.d.ts +1 -0
  150. package/dist/standalone.js +306 -0
  151. package/dist/standalone.js.map +1 -0
  152. package/dist/store.d.ts +60 -0
  153. package/dist/store.js +203 -0
  154. package/dist/store.js.map +1 -0
  155. package/dist/types/completions.d.ts +57 -0
  156. package/dist/types/completions.js +2 -0
  157. package/dist/types/completions.js.map +1 -0
  158. package/dist/types/index.d.ts +326 -0
  159. package/dist/types/index.js +2 -0
  160. package/dist/types/index.js.map +1 -0
  161. package/docs/engines.md +28 -0
  162. package/docs/gpu.md +72 -0
  163. package/docs/http-api.md +147 -0
  164. package/examples/all-options.js +108 -0
  165. package/examples/chat-cli.js +56 -0
  166. package/examples/chat-server.js +65 -0
  167. package/examples/concurrency.js +70 -0
  168. package/examples/express.js +70 -0
  169. package/examples/pool.js +91 -0
  170. package/package.json +113 -0
  171. package/src/api/openai/enums.ts +20 -0
  172. package/src/api/openai/handlers/chat.ts +408 -0
  173. package/src/api/openai/handlers/completions.ts +196 -0
  174. package/src/api/openai/handlers/embeddings.ts +92 -0
  175. package/src/api/openai/handlers/images.ts +3 -0
  176. package/src/api/openai/handlers/models.ts +33 -0
  177. package/src/api/openai/handlers/transcription.ts +2 -0
  178. package/src/api/openai/index.ts +16 -0
  179. package/src/api/parseJSONRequestBody.ts +26 -0
  180. package/src/api/v1/DRAFT.md +16 -0
  181. package/src/api/v1/index.ts +37 -0
  182. package/src/cli.ts +9 -0
  183. package/src/engines/gpt4all/engine.ts +441 -0
  184. package/src/engines/gpt4all/util.ts +31 -0
  185. package/src/engines/index.ts +28 -0
  186. package/src/engines/node-llama-cpp/engine.ts +811 -0
  187. package/src/engines/node-llama-cpp/types.ts +17 -0
  188. package/src/engines/node-llama-cpp/util.ts +126 -0
  189. package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
  190. package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
  191. package/src/engines/stable-diffusion-cpp/types.ts +54 -0
  192. package/src/engines/stable-diffusion-cpp/util.ts +58 -0
  193. package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
  194. package/src/engines/transformers-js/engine.ts +659 -0
  195. package/src/engines/transformers-js/types.ts +25 -0
  196. package/src/engines/transformers-js/util.ts +40 -0
  197. package/src/engines/transformers-js/validateModelFiles.ts +168 -0
  198. package/src/experiments/ChatWithVision.ts +103 -0
  199. package/src/experiments/StableDiffPromptGenerator.ts +2 -0
  200. package/src/experiments/VoiceFunctionCall.ts +71 -0
  201. package/src/http.ts +72 -0
  202. package/src/index.ts +7 -0
  203. package/src/instance.ts +723 -0
  204. package/src/lib/acquireFileLock.ts +38 -0
  205. package/src/lib/calculateContextIdentity.ts +53 -0
  206. package/src/lib/calculateFileChecksum.ts +18 -0
  207. package/src/lib/copyDirectory.ts +29 -0
  208. package/src/lib/decodeAudio.ts +39 -0
  209. package/src/lib/downloadModelFile.ts +70 -0
  210. package/src/lib/flattenMessageTextContent.ts +19 -0
  211. package/src/lib/getCacheDirPath.ts +34 -0
  212. package/src/lib/loadImage.ts +46 -0
  213. package/src/lib/logger.ts +112 -0
  214. package/src/lib/math.ts +31 -0
  215. package/src/lib/resolveModelFileLocation.ts +49 -0
  216. package/src/lib/util.ts +75 -0
  217. package/src/lib/validateModelFile.ts +71 -0
  218. package/src/lib/validateModelOptions.ts +31 -0
  219. package/src/pool.ts +651 -0
  220. package/src/server.ts +270 -0
  221. package/src/standalone.ts +320 -0
  222. package/src/store.ts +278 -0
  223. package/src/types/completions.ts +86 -0
  224. package/src/types/index.ts +488 -0
  225. package/tsconfig.json +29 -0
  226. package/tsconfig.release.json +11 -0
  227. package/vitest.config.ts +18 -0
@@ -0,0 +1,196 @@
1
+ import type { IncomingMessage, ServerResponse } from 'node:http'
2
+ import type { OpenAI } from 'openai'
3
+ import type { ModelServer } from '#package/server.js'
4
+ import { TextCompletionRequest } from '#package/types/index.js'
5
+ import { parseJSONRequestBody } from '#package/api/parseJSONRequestBody.js'
6
+ import { omitEmptyValues } from '#package/lib/util.js'
7
+ import { finishReasonMap } from '../enums.js'
8
+
9
+ interface OpenAICompletionParams
10
+ extends Omit<OpenAI.CompletionCreateParamsStreaming, 'stream'> {
11
+ stream?: boolean
12
+ top_k?: number
13
+ min_p?: number
14
+ repeat_penalty_num?: number
15
+ }
16
+
17
+ interface OpenAICompletionChunk extends OpenAI.Completions.Completion {
18
+ usage?: OpenAI.CompletionUsage
19
+ }
20
+
21
+ // v1/completions
22
+ // https://platform.openai.com/docs/api-reference/completions/create
23
+ export function createCompletionHandler(modelServer: ModelServer) {
24
+ return async (req: IncomingMessage, res: ServerResponse) => {
25
+ let args: OpenAICompletionParams
26
+
27
+ try {
28
+ const body = await parseJSONRequestBody(req)
29
+ args = body
30
+ } catch (e) {
31
+ console.error(e)
32
+ res.writeHead(400, { 'Content-Type': 'application/json' })
33
+ res.end(JSON.stringify({ error: 'Invalid request' }))
34
+ return
35
+ }
36
+
37
+ // TODO ajv schema validation?
38
+ if (!args.model || !args.prompt) {
39
+ res.writeHead(400, { 'Content-Type': 'application/json' })
40
+ res.end(JSON.stringify({ error: 'Invalid request' }))
41
+ return
42
+ }
43
+ if (!modelServer.modelExists(args.model)) {
44
+ res.writeHead(400, { 'Content-Type': 'application/json' })
45
+ res.end(JSON.stringify({ error: 'Invalid model' }))
46
+ return
47
+ }
48
+
49
+ const controller = new AbortController()
50
+ req.on('close', () => {
51
+ console.debug('Client closed connection')
52
+ controller.abort()
53
+ })
54
+ req.on('end', () => {
55
+ console.debug('Client ended connection')
56
+ controller.abort()
57
+ })
58
+
59
+ try {
60
+ if (args.stream) {
61
+ res.writeHead(200, {
62
+ 'Content-Type': 'text/event-stream',
63
+ 'Cache-Control': 'no-cache',
64
+ Connection: 'keep-alive',
65
+ })
66
+ res.flushHeaders()
67
+ }
68
+
69
+ let prompt = args.prompt
70
+
71
+ if (typeof prompt !== 'string') {
72
+ throw new Error('Prompt must be a string')
73
+ }
74
+
75
+ let stop = args.stop ? args.stop : undefined
76
+ if (typeof stop === 'string') {
77
+ stop = [stop]
78
+ }
79
+
80
+ const completionReq = omitEmptyValues<TextCompletionRequest>({
81
+ model: args.model,
82
+ prompt: args.prompt as string,
83
+ temperature: args.temperature ? args.temperature : undefined,
84
+ stream: args.stream ? Boolean(args.stream) : false,
85
+ maxTokens: args.max_tokens ? args.max_tokens : undefined,
86
+ seed: args.seed ? args.seed : undefined,
87
+ stop,
88
+ frequencyPenalty: args.frequency_penalty
89
+ ? args.frequency_penalty
90
+ : undefined,
91
+ presencePenalty: args.presence_penalty
92
+ ? args.presence_penalty
93
+ : undefined,
94
+ tokenBias: args.logit_bias ? args.logit_bias : undefined,
95
+ topP: args.top_p ? args.top_p : undefined,
96
+ // additional non-spec params
97
+ repeatPenaltyNum: args.repeat_penalty_num
98
+ ? args.repeat_penalty_num
99
+ : undefined,
100
+ minP: args.min_p ? args.min_p : undefined,
101
+ topK: args.top_k ? args.top_k : undefined,
102
+ })
103
+
104
+ const { instance, release } = await modelServer.requestInstance(
105
+ completionReq,
106
+ controller.signal,
107
+ )
108
+ const task = instance.processTextCompletionTask(completionReq, {
109
+ signal: controller.signal,
110
+ onChunk: (chunk) => {
111
+ if (args.stream) {
112
+ const chunkData: OpenAICompletionChunk = {
113
+ id: task.id,
114
+ model: task.model,
115
+ object: 'text_completion',
116
+ created: Math.floor(task.createdAt.getTime() / 1000),
117
+ choices: [
118
+ {
119
+ index: 0,
120
+ text: chunk.text,
121
+ logprobs: null,
122
+ // @ts-ignore official api returns null here in the same case
123
+ finish_reason: null,
124
+ },
125
+ ],
126
+ }
127
+ res.write(`data: ${JSON.stringify(chunkData)}\n\n`)
128
+ }
129
+ },
130
+ })
131
+ const result = await task.result
132
+ release()
133
+
134
+ if (args.stream) {
135
+ if (args.stream_options?.include_usage) {
136
+ const finalChunk: OpenAICompletionChunk = {
137
+ id: task.id,
138
+ model: task.model,
139
+ object: 'text_completion',
140
+ created: Math.floor(task.createdAt.getTime() / 1000),
141
+ choices: [
142
+ {
143
+ index: 0,
144
+ text: '',
145
+ logprobs: null,
146
+ // @ts-ignore
147
+ finish_reason: result.finishReason
148
+ ? finishReasonMap[result.finishReason]
149
+ : 'stop',
150
+ },
151
+ ],
152
+ }
153
+ res.write(
154
+ `data: ${JSON.stringify(finalChunk)}\n\n`,
155
+ )
156
+ }
157
+ res.write('data: [DONE]')
158
+ res.end()
159
+ } else {
160
+ const response: OpenAI.Completions.Completion = {
161
+ id: task.id,
162
+ model: task.model,
163
+ object: 'text_completion',
164
+ created: Math.floor(task.createdAt.getTime() / 1000),
165
+ system_fingerprint: instance.fingerprint,
166
+ choices: [
167
+ {
168
+ index: 0,
169
+ text: result.text,
170
+ logprobs: null,
171
+ // @ts-ignore
172
+ finish_reason: result.finishReason
173
+ ? finishReasonMap[result.finishReason]
174
+ : 'stop',
175
+ },
176
+ ],
177
+ usage: {
178
+ prompt_tokens: result.promptTokens,
179
+ completion_tokens: result.completionTokens,
180
+ total_tokens: result.contextTokens,
181
+ },
182
+ }
183
+ res.writeHead(200, { 'Content-Type': 'application/json' })
184
+ res.end(JSON.stringify(response, null, 2))
185
+ }
186
+ } catch (err) {
187
+ console.error(err)
188
+ if (args.stream) {
189
+ res.write('data: [ERROR]')
190
+ } else {
191
+ res.writeHead(500, { 'Content-Type': 'application/json' })
192
+ res.end(JSON.stringify({ error: 'Internal server error' }))
193
+ }
194
+ }
195
+ }
196
+ }
@@ -0,0 +1,92 @@
1
+ import { IncomingMessage, ServerResponse } from 'node:http'
2
+ import type { OpenAI } from 'openai'
3
+ import { EmbeddingRequest } from '#package/types/index.js'
4
+ import { parseJSONRequestBody } from '#package/api/parseJSONRequestBody.js'
5
+ import { omitEmptyValues } from '#package/lib/util.js'
6
+ import { ModelServer } from '#package/server.js'
7
+
8
+ type OpenAIEmbeddingsParams = OpenAI.EmbeddingCreateParams
9
+
10
+ // v1/embeddings
11
+ // https://platform.openai.com/docs/api-reference/embeddings
12
+ export function createEmbeddingsHandler(modelServer: ModelServer) {
13
+ return async (req: IncomingMessage, res: ServerResponse) => {
14
+ let args: OpenAIEmbeddingsParams
15
+
16
+ try {
17
+ const body = await parseJSONRequestBody(req)
18
+ args = body
19
+ } catch (e) {
20
+ console.error(e)
21
+ res.writeHead(400, { 'Content-Type': 'application/json' })
22
+ res.end(JSON.stringify({ error: 'Invalid request' }))
23
+ return
24
+ }
25
+
26
+ // TODO ajv schema validation?
27
+ if (!args.model || !args.input) {
28
+ res.writeHead(400, { 'Content-Type': 'application/json' })
29
+ res.end(JSON.stringify({ error: 'Invalid request' }))
30
+ return
31
+ }
32
+ if (!modelServer.modelExists(args.model)) {
33
+ res.writeHead(400, { 'Content-Type': 'application/json' })
34
+ res.end(JSON.stringify({ error: 'Invalid model' }))
35
+ return
36
+ }
37
+
38
+ const controller = new AbortController()
39
+ req.on('close', () => {
40
+ console.debug('Client closed connection')
41
+ controller.abort()
42
+ })
43
+ req.on('end', () => {
44
+ console.debug('Client ended connection')
45
+ controller.abort()
46
+ })
47
+
48
+ try {
49
+
50
+
51
+ let input = args.input
52
+
53
+ if (typeof input !== 'string') {
54
+ throw new Error('Input must be a string')
55
+ }
56
+
57
+ const embeddingsReq = omitEmptyValues<EmbeddingRequest>({
58
+ model: args.model,
59
+ input: args.input as string,
60
+ })
61
+
62
+ const { instance, release } = await modelServer.requestInstance(
63
+ embeddingsReq,
64
+ controller.signal,
65
+ )
66
+ const task = instance.processEmbeddingTask(embeddingsReq)
67
+ const result = await task.result
68
+ release()
69
+
70
+ const response: OpenAI.CreateEmbeddingResponse = {
71
+ model: instance.modelId,
72
+ object: 'list',
73
+ data: result.embeddings.map((embedding, index) => ({
74
+ embedding: Array.from(embedding),
75
+ index,
76
+ object: 'embedding',
77
+ })),
78
+ usage: {
79
+ prompt_tokens: result.inputTokens,
80
+ total_tokens: result.inputTokens,
81
+ },
82
+ }
83
+ res.writeHead(200, { 'Content-Type': 'application/json' })
84
+ res.end(JSON.stringify(response, null, 2))
85
+
86
+ } catch (err) {
87
+ console.error(err)
88
+ res.writeHead(500, { 'Content-Type': 'application/json' })
89
+ res.end(JSON.stringify({ error: 'Internal server error' }))
90
+ }
91
+ }
92
+ }
@@ -0,0 +1,3 @@
1
+
2
+ // v1/images endpoints
3
+ // https://platform.openai.com/docs/api-reference/images/create
@@ -0,0 +1,33 @@
1
+ import type { IncomingMessage, ServerResponse } from 'node:http'
2
+ import path from 'node:path'
3
+ import type { OpenAI } from 'openai'
4
+ import type { ModelServer } from '#package/server'
5
+
6
+ // https://platform.openai.com/docs/api-reference/models/list
7
+ export function createModelsHandler(modelServer: ModelServer) {
8
+ return async (req: IncomingMessage, res: ServerResponse) => {
9
+
10
+ const models = modelServer.store.getStatus()
11
+ const data: OpenAI.Model[] = Object.entries(models).map(
12
+ ([id, info]) => {
13
+ // const lastModDate = new Date(info.source.lastModified)
14
+ // const created = Math.floor(lastModDate.getTime() / 1000)
15
+
16
+ // const dirPath = path.dirname(info.source.file);
17
+ // const lastDir = path.basename(dirPath);
18
+ // const baseName = path.basename(info.source.file);
19
+ const owned_by = info.engine// + ':' + path.join(lastDir, baseName);
20
+
21
+ return {
22
+ object: 'model',
23
+ id,
24
+ created: 0,
25
+ owned_by,
26
+ }
27
+ },
28
+ )
29
+
30
+ res.writeHead(200, { 'Content-Type': 'application/json' })
31
+ res.end(JSON.stringify({ object: 'list', data }, null, 2))
32
+ }
33
+ }
@@ -0,0 +1,2 @@
1
+ // TODO
2
+ // https://platform.openai.com/docs/api-reference/audio/createTranscription
@@ -0,0 +1,16 @@
1
+ import type { ModelServer } from '#package/server.js'
2
+ import { createChatCompletionHandler } from './handlers/chat.js'
3
+ import { createCompletionHandler } from './handlers/completions.js'
4
+ import { createModelsHandler } from './handlers/models.js'
5
+ import { createEmbeddingsHandler } from './handlers/embeddings.js'
6
+
7
+
8
+ // See OpenAI API specs at https://github.com/openai/openai-openapi/blob/master/openapi.yaml
9
+ export function createOpenAIRequestHandlers(modelServer: ModelServer) {
10
+ return {
11
+ chatCompletions: createChatCompletionHandler(modelServer),
12
+ completions: createCompletionHandler(modelServer),
13
+ models: createModelsHandler(modelServer),
14
+ embeddings: createEmbeddingsHandler(modelServer),
15
+ }
16
+ }
@@ -0,0 +1,26 @@
1
+ import { IncomingMessage } from 'node:http'
2
+
3
+ export function parseJSONRequestBody(req: IncomingMessage | Request): Promise<any> {
4
+ return new Promise((resolve, reject) => {
5
+
6
+ // if request is from express theres no need to parse anything
7
+ if ('body' in req) {
8
+ resolve(req.body)
9
+ return
10
+ }
11
+
12
+ // for native http server
13
+ let body = ''
14
+ req.on('data', (chunk) => {
15
+ body += chunk.toString()
16
+ })
17
+ req.on('end', () => {
18
+ try {
19
+ const data = JSON.parse(body)
20
+ resolve(data)
21
+ } catch (error) {
22
+ reject(error)
23
+ }
24
+ })
25
+ })
26
+ }
@@ -0,0 +1,16 @@
1
+ See [discussion](https://github.com/iimez/inference-server/discussions/8) for more details.
2
+
3
+ ### Task API
4
+
5
+ - `POST /tasks/text-completion`
6
+ - `POST /tasks/chat-completion` should continue to be stateless
7
+ - `GET /tasks/{task_id}`
8
+ - `DELETE /tasks/{task_id}`
9
+
10
+ ### Thread API
11
+
12
+ - `POST /threads`
13
+ - `POST /threads/{thread_id}` mutate state without generating anything
14
+ - `POST /threads/{thread_id}/generate` to generate a new assistant message
15
+ - `GET /threads/{thread_id}`
16
+ - `DELETE /threads/{thread_id}`
@@ -0,0 +1,37 @@
1
+ import { IncomingMessage, ServerResponse } from 'node:http'
2
+ import express from 'express'
3
+ import { ModelPool } from '#package/pool.js'
4
+ import { ModelServer } from '#package/server.js'
5
+
6
+ export function createAPIMiddleware(server: ModelServer) {
7
+ const router = express.Router()
8
+
9
+ router.use((req, res, next) => {
10
+ console.debug('API call', {
11
+ method: req.method,
12
+ url: req.url,
13
+ body: req.body,
14
+ })
15
+ next()
16
+ })
17
+ return router
18
+ // return async (req: IncomingMessage, res: ServerResponse) => {
19
+
20
+ // let args: any
21
+
22
+ // try {
23
+ // const body = await parseJSONRequestBody(req)
24
+ // args = body
25
+ // } catch (e) {
26
+ // console.error(e)
27
+ // res.writeHead(400, { 'Content-Type': 'application/json' })
28
+ // res.end(JSON.stringify({ error: 'Invalid request' }))
29
+ // return
30
+ // }
31
+
32
+ // console.debug('Handler', JSON.stringify(args, null, 2))
33
+ // res.writeHead(200, { 'Content-Type': 'application/json' })
34
+ // res.end(JSON.stringify({ message: 'Hello' }))
35
+
36
+ // }
37
+ }
package/src/cli.ts ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env node
2
+ process.stdout.write("Nothing here, yet.\n");
3
+
4
+ /* API Ideas:
5
+ - `infsrv ls` - list stored models
6
+ - `infsrv rm <name>` - remove a stored model
7
+ - `infsrv show <name>` - show details of a stored model
8
+ - `infsrv prepare <config>` - download everything required for given config
9
+ */