inference-server 1.0.0-beta.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +216 -0
- package/dist/api/openai/enums.d.ts +4 -0
- package/dist/api/openai/enums.js +17 -0
- package/dist/api/openai/enums.js.map +1 -0
- package/dist/api/openai/handlers/chat.d.ts +3 -0
- package/dist/api/openai/handlers/chat.js +358 -0
- package/dist/api/openai/handlers/chat.js.map +1 -0
- package/dist/api/openai/handlers/completions.d.ts +3 -0
- package/dist/api/openai/handlers/completions.js +169 -0
- package/dist/api/openai/handlers/completions.js.map +1 -0
- package/dist/api/openai/handlers/embeddings.d.ts +3 -0
- package/dist/api/openai/handlers/embeddings.js +74 -0
- package/dist/api/openai/handlers/embeddings.js.map +1 -0
- package/dist/api/openai/handlers/images.d.ts +0 -0
- package/dist/api/openai/handlers/images.js +4 -0
- package/dist/api/openai/handlers/images.js.map +1 -0
- package/dist/api/openai/handlers/models.d.ts +3 -0
- package/dist/api/openai/handlers/models.js +23 -0
- package/dist/api/openai/handlers/models.js.map +1 -0
- package/dist/api/openai/handlers/transcription.d.ts +0 -0
- package/dist/api/openai/handlers/transcription.js +4 -0
- package/dist/api/openai/handlers/transcription.js.map +1 -0
- package/dist/api/openai/index.d.ts +7 -0
- package/dist/api/openai/index.js +14 -0
- package/dist/api/openai/index.js.map +1 -0
- package/dist/api/parseJSONRequestBody.d.ts +2 -0
- package/dist/api/parseJSONRequestBody.js +24 -0
- package/dist/api/parseJSONRequestBody.js.map +1 -0
- package/dist/api/v1/index.d.ts +2 -0
- package/dist/api/v1/index.js +29 -0
- package/dist/api/v1/index.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +10 -0
- package/dist/cli.js.map +1 -0
- package/dist/engines/gpt4all/engine.d.ts +34 -0
- package/dist/engines/gpt4all/engine.js +357 -0
- package/dist/engines/gpt4all/engine.js.map +1 -0
- package/dist/engines/gpt4all/util.d.ts +3 -0
- package/dist/engines/gpt4all/util.js +29 -0
- package/dist/engines/gpt4all/util.js.map +1 -0
- package/dist/engines/index.d.ts +19 -0
- package/dist/engines/index.js +21 -0
- package/dist/engines/index.js.map +1 -0
- package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
- package/dist/engines/node-llama-cpp/engine.js +666 -0
- package/dist/engines/node-llama-cpp/engine.js.map +1 -0
- package/dist/engines/node-llama-cpp/types.d.ts +13 -0
- package/dist/engines/node-llama-cpp/types.js +2 -0
- package/dist/engines/node-llama-cpp/types.js.map +1 -0
- package/dist/engines/node-llama-cpp/util.d.ts +15 -0
- package/dist/engines/node-llama-cpp/util.js +84 -0
- package/dist/engines/node-llama-cpp/util.js.map +1 -0
- package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
- package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
- package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
- package/dist/engines/stable-diffusion-cpp/types.js +2 -0
- package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
- package/dist/engines/stable-diffusion-cpp/util.js +55 -0
- package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
- package/dist/engines/transformers-js/engine.d.ts +37 -0
- package/dist/engines/transformers-js/engine.js +538 -0
- package/dist/engines/transformers-js/engine.js.map +1 -0
- package/dist/engines/transformers-js/types.d.ts +7 -0
- package/dist/engines/transformers-js/types.js +2 -0
- package/dist/engines/transformers-js/types.js.map +1 -0
- package/dist/engines/transformers-js/util.d.ts +7 -0
- package/dist/engines/transformers-js/util.js +36 -0
- package/dist/engines/transformers-js/util.js.map +1 -0
- package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
- package/dist/engines/transformers-js/validateModelFiles.js +133 -0
- package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
- package/dist/experiments/ChatWithVision.d.ts +11 -0
- package/dist/experiments/ChatWithVision.js +91 -0
- package/dist/experiments/ChatWithVision.js.map +1 -0
- package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
- package/dist/experiments/StableDiffPromptGenerator.js +4 -0
- package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
- package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
- package/dist/experiments/VoiceFunctionCall.js +51 -0
- package/dist/experiments/VoiceFunctionCall.js.map +1 -0
- package/dist/http.d.ts +19 -0
- package/dist/http.js +54 -0
- package/dist/http.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/instance.d.ts +88 -0
- package/dist/instance.js +594 -0
- package/dist/instance.js.map +1 -0
- package/dist/lib/acquireFileLock.d.ts +7 -0
- package/dist/lib/acquireFileLock.js +38 -0
- package/dist/lib/acquireFileLock.js.map +1 -0
- package/dist/lib/calculateContextIdentity.d.ts +7 -0
- package/dist/lib/calculateContextIdentity.js +39 -0
- package/dist/lib/calculateContextIdentity.js.map +1 -0
- package/dist/lib/calculateFileChecksum.d.ts +1 -0
- package/dist/lib/calculateFileChecksum.js +16 -0
- package/dist/lib/calculateFileChecksum.js.map +1 -0
- package/dist/lib/copyDirectory.d.ts +6 -0
- package/dist/lib/copyDirectory.js +27 -0
- package/dist/lib/copyDirectory.js.map +1 -0
- package/dist/lib/decodeAudio.d.ts +1 -0
- package/dist/lib/decodeAudio.js +26 -0
- package/dist/lib/decodeAudio.js.map +1 -0
- package/dist/lib/downloadModelFile.d.ts +10 -0
- package/dist/lib/downloadModelFile.js +58 -0
- package/dist/lib/downloadModelFile.js.map +1 -0
- package/dist/lib/flattenMessageTextContent.d.ts +2 -0
- package/dist/lib/flattenMessageTextContent.js +11 -0
- package/dist/lib/flattenMessageTextContent.js.map +1 -0
- package/dist/lib/getCacheDirPath.d.ts +12 -0
- package/dist/lib/getCacheDirPath.js +31 -0
- package/dist/lib/getCacheDirPath.js.map +1 -0
- package/dist/lib/loadImage.d.ts +12 -0
- package/dist/lib/loadImage.js +30 -0
- package/dist/lib/loadImage.js.map +1 -0
- package/dist/lib/logger.d.ts +12 -0
- package/dist/lib/logger.js +98 -0
- package/dist/lib/logger.js.map +1 -0
- package/dist/lib/math.d.ts +7 -0
- package/dist/lib/math.js +30 -0
- package/dist/lib/math.js.map +1 -0
- package/dist/lib/resolveModelFileLocation.d.ts +15 -0
- package/dist/lib/resolveModelFileLocation.js +41 -0
- package/dist/lib/resolveModelFileLocation.js.map +1 -0
- package/dist/lib/util.d.ts +7 -0
- package/dist/lib/util.js +61 -0
- package/dist/lib/util.js.map +1 -0
- package/dist/lib/validateModelFile.d.ts +9 -0
- package/dist/lib/validateModelFile.js +62 -0
- package/dist/lib/validateModelFile.js.map +1 -0
- package/dist/lib/validateModelOptions.d.ts +3 -0
- package/dist/lib/validateModelOptions.js +23 -0
- package/dist/lib/validateModelOptions.js.map +1 -0
- package/dist/pool.d.ts +61 -0
- package/dist/pool.js +512 -0
- package/dist/pool.js.map +1 -0
- package/dist/server.d.ts +59 -0
- package/dist/server.js +221 -0
- package/dist/server.js.map +1 -0
- package/dist/standalone.d.ts +1 -0
- package/dist/standalone.js +306 -0
- package/dist/standalone.js.map +1 -0
- package/dist/store.d.ts +60 -0
- package/dist/store.js +203 -0
- package/dist/store.js.map +1 -0
- package/dist/types/completions.d.ts +57 -0
- package/dist/types/completions.js +2 -0
- package/dist/types/completions.js.map +1 -0
- package/dist/types/index.d.ts +326 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/docs/engines.md +28 -0
- package/docs/gpu.md +72 -0
- package/docs/http-api.md +147 -0
- package/examples/all-options.js +108 -0
- package/examples/chat-cli.js +56 -0
- package/examples/chat-server.js +65 -0
- package/examples/concurrency.js +70 -0
- package/examples/express.js +70 -0
- package/examples/pool.js +91 -0
- package/package.json +113 -0
- package/src/api/openai/enums.ts +20 -0
- package/src/api/openai/handlers/chat.ts +408 -0
- package/src/api/openai/handlers/completions.ts +196 -0
- package/src/api/openai/handlers/embeddings.ts +92 -0
- package/src/api/openai/handlers/images.ts +3 -0
- package/src/api/openai/handlers/models.ts +33 -0
- package/src/api/openai/handlers/transcription.ts +2 -0
- package/src/api/openai/index.ts +16 -0
- package/src/api/parseJSONRequestBody.ts +26 -0
- package/src/api/v1/DRAFT.md +16 -0
- package/src/api/v1/index.ts +37 -0
- package/src/cli.ts +9 -0
- package/src/engines/gpt4all/engine.ts +441 -0
- package/src/engines/gpt4all/util.ts +31 -0
- package/src/engines/index.ts +28 -0
- package/src/engines/node-llama-cpp/engine.ts +811 -0
- package/src/engines/node-llama-cpp/types.ts +17 -0
- package/src/engines/node-llama-cpp/util.ts +126 -0
- package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
- package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
- package/src/engines/stable-diffusion-cpp/types.ts +54 -0
- package/src/engines/stable-diffusion-cpp/util.ts +58 -0
- package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
- package/src/engines/transformers-js/engine.ts +659 -0
- package/src/engines/transformers-js/types.ts +25 -0
- package/src/engines/transformers-js/util.ts +40 -0
- package/src/engines/transformers-js/validateModelFiles.ts +168 -0
- package/src/experiments/ChatWithVision.ts +103 -0
- package/src/experiments/StableDiffPromptGenerator.ts +2 -0
- package/src/experiments/VoiceFunctionCall.ts +71 -0
- package/src/http.ts +72 -0
- package/src/index.ts +7 -0
- package/src/instance.ts +723 -0
- package/src/lib/acquireFileLock.ts +38 -0
- package/src/lib/calculateContextIdentity.ts +53 -0
- package/src/lib/calculateFileChecksum.ts +18 -0
- package/src/lib/copyDirectory.ts +29 -0
- package/src/lib/decodeAudio.ts +39 -0
- package/src/lib/downloadModelFile.ts +70 -0
- package/src/lib/flattenMessageTextContent.ts +19 -0
- package/src/lib/getCacheDirPath.ts +34 -0
- package/src/lib/loadImage.ts +46 -0
- package/src/lib/logger.ts +112 -0
- package/src/lib/math.ts +31 -0
- package/src/lib/resolveModelFileLocation.ts +49 -0
- package/src/lib/util.ts +75 -0
- package/src/lib/validateModelFile.ts +71 -0
- package/src/lib/validateModelOptions.ts +31 -0
- package/src/pool.ts +651 -0
- package/src/server.ts +270 -0
- package/src/standalone.ts +320 -0
- package/src/store.ts +278 -0
- package/src/types/completions.ts +86 -0
- package/src/types/index.ts +488 -0
- package/tsconfig.json +29 -0
- package/tsconfig.release.json +11 -0
- package/vitest.config.ts +18 -0
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import type { IncomingMessage, ServerResponse } from 'node:http'
|
|
2
|
+
import type { OpenAI } from 'openai'
|
|
3
|
+
import type { ModelServer } from '#package/server.js'
|
|
4
|
+
import { TextCompletionRequest } from '#package/types/index.js'
|
|
5
|
+
import { parseJSONRequestBody } from '#package/api/parseJSONRequestBody.js'
|
|
6
|
+
import { omitEmptyValues } from '#package/lib/util.js'
|
|
7
|
+
import { finishReasonMap } from '../enums.js'
|
|
8
|
+
|
|
9
|
+
interface OpenAICompletionParams
|
|
10
|
+
extends Omit<OpenAI.CompletionCreateParamsStreaming, 'stream'> {
|
|
11
|
+
stream?: boolean
|
|
12
|
+
top_k?: number
|
|
13
|
+
min_p?: number
|
|
14
|
+
repeat_penalty_num?: number
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
interface OpenAICompletionChunk extends OpenAI.Completions.Completion {
|
|
18
|
+
usage?: OpenAI.CompletionUsage
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// v1/completions
|
|
22
|
+
// https://platform.openai.com/docs/api-reference/completions/create
|
|
23
|
+
export function createCompletionHandler(modelServer: ModelServer) {
|
|
24
|
+
return async (req: IncomingMessage, res: ServerResponse) => {
|
|
25
|
+
let args: OpenAICompletionParams
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
const body = await parseJSONRequestBody(req)
|
|
29
|
+
args = body
|
|
30
|
+
} catch (e) {
|
|
31
|
+
console.error(e)
|
|
32
|
+
res.writeHead(400, { 'Content-Type': 'application/json' })
|
|
33
|
+
res.end(JSON.stringify({ error: 'Invalid request' }))
|
|
34
|
+
return
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// TODO ajv schema validation?
|
|
38
|
+
if (!args.model || !args.prompt) {
|
|
39
|
+
res.writeHead(400, { 'Content-Type': 'application/json' })
|
|
40
|
+
res.end(JSON.stringify({ error: 'Invalid request' }))
|
|
41
|
+
return
|
|
42
|
+
}
|
|
43
|
+
if (!modelServer.modelExists(args.model)) {
|
|
44
|
+
res.writeHead(400, { 'Content-Type': 'application/json' })
|
|
45
|
+
res.end(JSON.stringify({ error: 'Invalid model' }))
|
|
46
|
+
return
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const controller = new AbortController()
|
|
50
|
+
req.on('close', () => {
|
|
51
|
+
console.debug('Client closed connection')
|
|
52
|
+
controller.abort()
|
|
53
|
+
})
|
|
54
|
+
req.on('end', () => {
|
|
55
|
+
console.debug('Client ended connection')
|
|
56
|
+
controller.abort()
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
if (args.stream) {
|
|
61
|
+
res.writeHead(200, {
|
|
62
|
+
'Content-Type': 'text/event-stream',
|
|
63
|
+
'Cache-Control': 'no-cache',
|
|
64
|
+
Connection: 'keep-alive',
|
|
65
|
+
})
|
|
66
|
+
res.flushHeaders()
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
let prompt = args.prompt
|
|
70
|
+
|
|
71
|
+
if (typeof prompt !== 'string') {
|
|
72
|
+
throw new Error('Prompt must be a string')
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
let stop = args.stop ? args.stop : undefined
|
|
76
|
+
if (typeof stop === 'string') {
|
|
77
|
+
stop = [stop]
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const completionReq = omitEmptyValues<TextCompletionRequest>({
|
|
81
|
+
model: args.model,
|
|
82
|
+
prompt: args.prompt as string,
|
|
83
|
+
temperature: args.temperature ? args.temperature : undefined,
|
|
84
|
+
stream: args.stream ? Boolean(args.stream) : false,
|
|
85
|
+
maxTokens: args.max_tokens ? args.max_tokens : undefined,
|
|
86
|
+
seed: args.seed ? args.seed : undefined,
|
|
87
|
+
stop,
|
|
88
|
+
frequencyPenalty: args.frequency_penalty
|
|
89
|
+
? args.frequency_penalty
|
|
90
|
+
: undefined,
|
|
91
|
+
presencePenalty: args.presence_penalty
|
|
92
|
+
? args.presence_penalty
|
|
93
|
+
: undefined,
|
|
94
|
+
tokenBias: args.logit_bias ? args.logit_bias : undefined,
|
|
95
|
+
topP: args.top_p ? args.top_p : undefined,
|
|
96
|
+
// additional non-spec params
|
|
97
|
+
repeatPenaltyNum: args.repeat_penalty_num
|
|
98
|
+
? args.repeat_penalty_num
|
|
99
|
+
: undefined,
|
|
100
|
+
minP: args.min_p ? args.min_p : undefined,
|
|
101
|
+
topK: args.top_k ? args.top_k : undefined,
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
const { instance, release } = await modelServer.requestInstance(
|
|
105
|
+
completionReq,
|
|
106
|
+
controller.signal,
|
|
107
|
+
)
|
|
108
|
+
const task = instance.processTextCompletionTask(completionReq, {
|
|
109
|
+
signal: controller.signal,
|
|
110
|
+
onChunk: (chunk) => {
|
|
111
|
+
if (args.stream) {
|
|
112
|
+
const chunkData: OpenAICompletionChunk = {
|
|
113
|
+
id: task.id,
|
|
114
|
+
model: task.model,
|
|
115
|
+
object: 'text_completion',
|
|
116
|
+
created: Math.floor(task.createdAt.getTime() / 1000),
|
|
117
|
+
choices: [
|
|
118
|
+
{
|
|
119
|
+
index: 0,
|
|
120
|
+
text: chunk.text,
|
|
121
|
+
logprobs: null,
|
|
122
|
+
// @ts-ignore official api returns null here in the same case
|
|
123
|
+
finish_reason: null,
|
|
124
|
+
},
|
|
125
|
+
],
|
|
126
|
+
}
|
|
127
|
+
res.write(`data: ${JSON.stringify(chunkData)}\n\n`)
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
})
|
|
131
|
+
const result = await task.result
|
|
132
|
+
release()
|
|
133
|
+
|
|
134
|
+
if (args.stream) {
|
|
135
|
+
if (args.stream_options?.include_usage) {
|
|
136
|
+
const finalChunk: OpenAICompletionChunk = {
|
|
137
|
+
id: task.id,
|
|
138
|
+
model: task.model,
|
|
139
|
+
object: 'text_completion',
|
|
140
|
+
created: Math.floor(task.createdAt.getTime() / 1000),
|
|
141
|
+
choices: [
|
|
142
|
+
{
|
|
143
|
+
index: 0,
|
|
144
|
+
text: '',
|
|
145
|
+
logprobs: null,
|
|
146
|
+
// @ts-ignore
|
|
147
|
+
finish_reason: result.finishReason
|
|
148
|
+
? finishReasonMap[result.finishReason]
|
|
149
|
+
: 'stop',
|
|
150
|
+
},
|
|
151
|
+
],
|
|
152
|
+
}
|
|
153
|
+
res.write(
|
|
154
|
+
`data: ${JSON.stringify(finalChunk)}\n\n`,
|
|
155
|
+
)
|
|
156
|
+
}
|
|
157
|
+
res.write('data: [DONE]')
|
|
158
|
+
res.end()
|
|
159
|
+
} else {
|
|
160
|
+
const response: OpenAI.Completions.Completion = {
|
|
161
|
+
id: task.id,
|
|
162
|
+
model: task.model,
|
|
163
|
+
object: 'text_completion',
|
|
164
|
+
created: Math.floor(task.createdAt.getTime() / 1000),
|
|
165
|
+
system_fingerprint: instance.fingerprint,
|
|
166
|
+
choices: [
|
|
167
|
+
{
|
|
168
|
+
index: 0,
|
|
169
|
+
text: result.text,
|
|
170
|
+
logprobs: null,
|
|
171
|
+
// @ts-ignore
|
|
172
|
+
finish_reason: result.finishReason
|
|
173
|
+
? finishReasonMap[result.finishReason]
|
|
174
|
+
: 'stop',
|
|
175
|
+
},
|
|
176
|
+
],
|
|
177
|
+
usage: {
|
|
178
|
+
prompt_tokens: result.promptTokens,
|
|
179
|
+
completion_tokens: result.completionTokens,
|
|
180
|
+
total_tokens: result.contextTokens,
|
|
181
|
+
},
|
|
182
|
+
}
|
|
183
|
+
res.writeHead(200, { 'Content-Type': 'application/json' })
|
|
184
|
+
res.end(JSON.stringify(response, null, 2))
|
|
185
|
+
}
|
|
186
|
+
} catch (err) {
|
|
187
|
+
console.error(err)
|
|
188
|
+
if (args.stream) {
|
|
189
|
+
res.write('data: [ERROR]')
|
|
190
|
+
} else {
|
|
191
|
+
res.writeHead(500, { 'Content-Type': 'application/json' })
|
|
192
|
+
res.end(JSON.stringify({ error: 'Internal server error' }))
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { IncomingMessage, ServerResponse } from 'node:http'
|
|
2
|
+
import type { OpenAI } from 'openai'
|
|
3
|
+
import { EmbeddingRequest } from '#package/types/index.js'
|
|
4
|
+
import { parseJSONRequestBody } from '#package/api/parseJSONRequestBody.js'
|
|
5
|
+
import { omitEmptyValues } from '#package/lib/util.js'
|
|
6
|
+
import { ModelServer } from '#package/server.js'
|
|
7
|
+
|
|
8
|
+
type OpenAIEmbeddingsParams = OpenAI.EmbeddingCreateParams
|
|
9
|
+
|
|
10
|
+
// v1/embeddings
|
|
11
|
+
// https://platform.openai.com/docs/api-reference/embeddings
|
|
12
|
+
export function createEmbeddingsHandler(modelServer: ModelServer) {
|
|
13
|
+
return async (req: IncomingMessage, res: ServerResponse) => {
|
|
14
|
+
let args: OpenAIEmbeddingsParams
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
const body = await parseJSONRequestBody(req)
|
|
18
|
+
args = body
|
|
19
|
+
} catch (e) {
|
|
20
|
+
console.error(e)
|
|
21
|
+
res.writeHead(400, { 'Content-Type': 'application/json' })
|
|
22
|
+
res.end(JSON.stringify({ error: 'Invalid request' }))
|
|
23
|
+
return
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// TODO ajv schema validation?
|
|
27
|
+
if (!args.model || !args.input) {
|
|
28
|
+
res.writeHead(400, { 'Content-Type': 'application/json' })
|
|
29
|
+
res.end(JSON.stringify({ error: 'Invalid request' }))
|
|
30
|
+
return
|
|
31
|
+
}
|
|
32
|
+
if (!modelServer.modelExists(args.model)) {
|
|
33
|
+
res.writeHead(400, { 'Content-Type': 'application/json' })
|
|
34
|
+
res.end(JSON.stringify({ error: 'Invalid model' }))
|
|
35
|
+
return
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const controller = new AbortController()
|
|
39
|
+
req.on('close', () => {
|
|
40
|
+
console.debug('Client closed connection')
|
|
41
|
+
controller.abort()
|
|
42
|
+
})
|
|
43
|
+
req.on('end', () => {
|
|
44
|
+
console.debug('Client ended connection')
|
|
45
|
+
controller.abort()
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
let input = args.input
|
|
52
|
+
|
|
53
|
+
if (typeof input !== 'string') {
|
|
54
|
+
throw new Error('Input must be a string')
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const embeddingsReq = omitEmptyValues<EmbeddingRequest>({
|
|
58
|
+
model: args.model,
|
|
59
|
+
input: args.input as string,
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
const { instance, release } = await modelServer.requestInstance(
|
|
63
|
+
embeddingsReq,
|
|
64
|
+
controller.signal,
|
|
65
|
+
)
|
|
66
|
+
const task = instance.processEmbeddingTask(embeddingsReq)
|
|
67
|
+
const result = await task.result
|
|
68
|
+
release()
|
|
69
|
+
|
|
70
|
+
const response: OpenAI.CreateEmbeddingResponse = {
|
|
71
|
+
model: instance.modelId,
|
|
72
|
+
object: 'list',
|
|
73
|
+
data: result.embeddings.map((embedding, index) => ({
|
|
74
|
+
embedding: Array.from(embedding),
|
|
75
|
+
index,
|
|
76
|
+
object: 'embedding',
|
|
77
|
+
})),
|
|
78
|
+
usage: {
|
|
79
|
+
prompt_tokens: result.inputTokens,
|
|
80
|
+
total_tokens: result.inputTokens,
|
|
81
|
+
},
|
|
82
|
+
}
|
|
83
|
+
res.writeHead(200, { 'Content-Type': 'application/json' })
|
|
84
|
+
res.end(JSON.stringify(response, null, 2))
|
|
85
|
+
|
|
86
|
+
} catch (err) {
|
|
87
|
+
console.error(err)
|
|
88
|
+
res.writeHead(500, { 'Content-Type': 'application/json' })
|
|
89
|
+
res.end(JSON.stringify({ error: 'Internal server error' }))
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { IncomingMessage, ServerResponse } from 'node:http'
|
|
2
|
+
import path from 'node:path'
|
|
3
|
+
import type { OpenAI } from 'openai'
|
|
4
|
+
import type { ModelServer } from '#package/server'
|
|
5
|
+
|
|
6
|
+
// https://platform.openai.com/docs/api-reference/models/list
|
|
7
|
+
export function createModelsHandler(modelServer: ModelServer) {
|
|
8
|
+
return async (req: IncomingMessage, res: ServerResponse) => {
|
|
9
|
+
|
|
10
|
+
const models = modelServer.store.getStatus()
|
|
11
|
+
const data: OpenAI.Model[] = Object.entries(models).map(
|
|
12
|
+
([id, info]) => {
|
|
13
|
+
// const lastModDate = new Date(info.source.lastModified)
|
|
14
|
+
// const created = Math.floor(lastModDate.getTime() / 1000)
|
|
15
|
+
|
|
16
|
+
// const dirPath = path.dirname(info.source.file);
|
|
17
|
+
// const lastDir = path.basename(dirPath);
|
|
18
|
+
// const baseName = path.basename(info.source.file);
|
|
19
|
+
const owned_by = info.engine// + ':' + path.join(lastDir, baseName);
|
|
20
|
+
|
|
21
|
+
return {
|
|
22
|
+
object: 'model',
|
|
23
|
+
id,
|
|
24
|
+
created: 0,
|
|
25
|
+
owned_by,
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
res.writeHead(200, { 'Content-Type': 'application/json' })
|
|
31
|
+
res.end(JSON.stringify({ object: 'list', data }, null, 2))
|
|
32
|
+
}
|
|
33
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { ModelServer } from '#package/server.js'
|
|
2
|
+
import { createChatCompletionHandler } from './handlers/chat.js'
|
|
3
|
+
import { createCompletionHandler } from './handlers/completions.js'
|
|
4
|
+
import { createModelsHandler } from './handlers/models.js'
|
|
5
|
+
import { createEmbeddingsHandler } from './handlers/embeddings.js'
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
// See OpenAI API specs at https://github.com/openai/openai-openapi/blob/master/openapi.yaml
|
|
9
|
+
export function createOpenAIRequestHandlers(modelServer: ModelServer) {
|
|
10
|
+
return {
|
|
11
|
+
chatCompletions: createChatCompletionHandler(modelServer),
|
|
12
|
+
completions: createCompletionHandler(modelServer),
|
|
13
|
+
models: createModelsHandler(modelServer),
|
|
14
|
+
embeddings: createEmbeddingsHandler(modelServer),
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { IncomingMessage } from 'node:http'
|
|
2
|
+
|
|
3
|
+
export function parseJSONRequestBody(req: IncomingMessage | Request): Promise<any> {
|
|
4
|
+
return new Promise((resolve, reject) => {
|
|
5
|
+
|
|
6
|
+
// if request is from express theres no need to parse anything
|
|
7
|
+
if ('body' in req) {
|
|
8
|
+
resolve(req.body)
|
|
9
|
+
return
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
// for native http server
|
|
13
|
+
let body = ''
|
|
14
|
+
req.on('data', (chunk) => {
|
|
15
|
+
body += chunk.toString()
|
|
16
|
+
})
|
|
17
|
+
req.on('end', () => {
|
|
18
|
+
try {
|
|
19
|
+
const data = JSON.parse(body)
|
|
20
|
+
resolve(data)
|
|
21
|
+
} catch (error) {
|
|
22
|
+
reject(error)
|
|
23
|
+
}
|
|
24
|
+
})
|
|
25
|
+
})
|
|
26
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
See [discussion](https://github.com/iimez/inference-server/discussions/8) for more details.
|
|
2
|
+
|
|
3
|
+
### Task API
|
|
4
|
+
|
|
5
|
+
- `POST /tasks/text-completion`
|
|
6
|
+
- `POST /tasks/chat-completion` should continue to be stateless
|
|
7
|
+
- `GET /tasks/{task_id}`
|
|
8
|
+
- `DELETE /tasks/{task_id}`
|
|
9
|
+
|
|
10
|
+
### Thread API
|
|
11
|
+
|
|
12
|
+
- `POST /threads`
|
|
13
|
+
- `POST /threads/{thread_id}` mutate state without generating anything
|
|
14
|
+
- `POST /threads/{thread_id}/generate` to generate a new assistant message
|
|
15
|
+
- `GET /threads/{thread_id}`
|
|
16
|
+
- `DELETE /threads/{thread_id}`
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { IncomingMessage, ServerResponse } from 'node:http'
|
|
2
|
+
import express from 'express'
|
|
3
|
+
import { ModelPool } from '#package/pool.js'
|
|
4
|
+
import { ModelServer } from '#package/server.js'
|
|
5
|
+
|
|
6
|
+
export function createAPIMiddleware(server: ModelServer) {
|
|
7
|
+
const router = express.Router()
|
|
8
|
+
|
|
9
|
+
router.use((req, res, next) => {
|
|
10
|
+
console.debug('API call', {
|
|
11
|
+
method: req.method,
|
|
12
|
+
url: req.url,
|
|
13
|
+
body: req.body,
|
|
14
|
+
})
|
|
15
|
+
next()
|
|
16
|
+
})
|
|
17
|
+
return router
|
|
18
|
+
// return async (req: IncomingMessage, res: ServerResponse) => {
|
|
19
|
+
|
|
20
|
+
// let args: any
|
|
21
|
+
|
|
22
|
+
// try {
|
|
23
|
+
// const body = await parseJSONRequestBody(req)
|
|
24
|
+
// args = body
|
|
25
|
+
// } catch (e) {
|
|
26
|
+
// console.error(e)
|
|
27
|
+
// res.writeHead(400, { 'Content-Type': 'application/json' })
|
|
28
|
+
// res.end(JSON.stringify({ error: 'Invalid request' }))
|
|
29
|
+
// return
|
|
30
|
+
// }
|
|
31
|
+
|
|
32
|
+
// console.debug('Handler', JSON.stringify(args, null, 2))
|
|
33
|
+
// res.writeHead(200, { 'Content-Type': 'application/json' })
|
|
34
|
+
// res.end(JSON.stringify({ message: 'Hello' }))
|
|
35
|
+
|
|
36
|
+
// }
|
|
37
|
+
}
|
package/src/cli.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
process.stdout.write("Nothing here, yet.\n");
|
|
3
|
+
|
|
4
|
+
/* API Ideas:
|
|
5
|
+
- `infsrv ls` - list stored models
|
|
6
|
+
- `infsrv rm <name>` - remove a stored model
|
|
7
|
+
- `infsrv show <name>` - show details of a stored model
|
|
8
|
+
- `infsrv prepare <config>` - download everything required for given config
|
|
9
|
+
*/
|