inference-server 1.0.0-beta.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +216 -0
- package/dist/api/openai/enums.d.ts +4 -0
- package/dist/api/openai/enums.js +17 -0
- package/dist/api/openai/enums.js.map +1 -0
- package/dist/api/openai/handlers/chat.d.ts +3 -0
- package/dist/api/openai/handlers/chat.js +358 -0
- package/dist/api/openai/handlers/chat.js.map +1 -0
- package/dist/api/openai/handlers/completions.d.ts +3 -0
- package/dist/api/openai/handlers/completions.js +169 -0
- package/dist/api/openai/handlers/completions.js.map +1 -0
- package/dist/api/openai/handlers/embeddings.d.ts +3 -0
- package/dist/api/openai/handlers/embeddings.js +74 -0
- package/dist/api/openai/handlers/embeddings.js.map +1 -0
- package/dist/api/openai/handlers/images.d.ts +0 -0
- package/dist/api/openai/handlers/images.js +4 -0
- package/dist/api/openai/handlers/images.js.map +1 -0
- package/dist/api/openai/handlers/models.d.ts +3 -0
- package/dist/api/openai/handlers/models.js +23 -0
- package/dist/api/openai/handlers/models.js.map +1 -0
- package/dist/api/openai/handlers/transcription.d.ts +0 -0
- package/dist/api/openai/handlers/transcription.js +4 -0
- package/dist/api/openai/handlers/transcription.js.map +1 -0
- package/dist/api/openai/index.d.ts +7 -0
- package/dist/api/openai/index.js +14 -0
- package/dist/api/openai/index.js.map +1 -0
- package/dist/api/parseJSONRequestBody.d.ts +2 -0
- package/dist/api/parseJSONRequestBody.js +24 -0
- package/dist/api/parseJSONRequestBody.js.map +1 -0
- package/dist/api/v1/index.d.ts +2 -0
- package/dist/api/v1/index.js +29 -0
- package/dist/api/v1/index.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +10 -0
- package/dist/cli.js.map +1 -0
- package/dist/engines/gpt4all/engine.d.ts +34 -0
- package/dist/engines/gpt4all/engine.js +357 -0
- package/dist/engines/gpt4all/engine.js.map +1 -0
- package/dist/engines/gpt4all/util.d.ts +3 -0
- package/dist/engines/gpt4all/util.js +29 -0
- package/dist/engines/gpt4all/util.js.map +1 -0
- package/dist/engines/index.d.ts +19 -0
- package/dist/engines/index.js +21 -0
- package/dist/engines/index.js.map +1 -0
- package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
- package/dist/engines/node-llama-cpp/engine.js +666 -0
- package/dist/engines/node-llama-cpp/engine.js.map +1 -0
- package/dist/engines/node-llama-cpp/types.d.ts +13 -0
- package/dist/engines/node-llama-cpp/types.js +2 -0
- package/dist/engines/node-llama-cpp/types.js.map +1 -0
- package/dist/engines/node-llama-cpp/util.d.ts +15 -0
- package/dist/engines/node-llama-cpp/util.js +84 -0
- package/dist/engines/node-llama-cpp/util.js.map +1 -0
- package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
- package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
- package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
- package/dist/engines/stable-diffusion-cpp/types.js +2 -0
- package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
- package/dist/engines/stable-diffusion-cpp/util.js +55 -0
- package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
- package/dist/engines/transformers-js/engine.d.ts +37 -0
- package/dist/engines/transformers-js/engine.js +538 -0
- package/dist/engines/transformers-js/engine.js.map +1 -0
- package/dist/engines/transformers-js/types.d.ts +7 -0
- package/dist/engines/transformers-js/types.js +2 -0
- package/dist/engines/transformers-js/types.js.map +1 -0
- package/dist/engines/transformers-js/util.d.ts +7 -0
- package/dist/engines/transformers-js/util.js +36 -0
- package/dist/engines/transformers-js/util.js.map +1 -0
- package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
- package/dist/engines/transformers-js/validateModelFiles.js +133 -0
- package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
- package/dist/experiments/ChatWithVision.d.ts +11 -0
- package/dist/experiments/ChatWithVision.js +91 -0
- package/dist/experiments/ChatWithVision.js.map +1 -0
- package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
- package/dist/experiments/StableDiffPromptGenerator.js +4 -0
- package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
- package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
- package/dist/experiments/VoiceFunctionCall.js +51 -0
- package/dist/experiments/VoiceFunctionCall.js.map +1 -0
- package/dist/http.d.ts +19 -0
- package/dist/http.js +54 -0
- package/dist/http.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/instance.d.ts +88 -0
- package/dist/instance.js +594 -0
- package/dist/instance.js.map +1 -0
- package/dist/lib/acquireFileLock.d.ts +7 -0
- package/dist/lib/acquireFileLock.js +38 -0
- package/dist/lib/acquireFileLock.js.map +1 -0
- package/dist/lib/calculateContextIdentity.d.ts +7 -0
- package/dist/lib/calculateContextIdentity.js +39 -0
- package/dist/lib/calculateContextIdentity.js.map +1 -0
- package/dist/lib/calculateFileChecksum.d.ts +1 -0
- package/dist/lib/calculateFileChecksum.js +16 -0
- package/dist/lib/calculateFileChecksum.js.map +1 -0
- package/dist/lib/copyDirectory.d.ts +6 -0
- package/dist/lib/copyDirectory.js +27 -0
- package/dist/lib/copyDirectory.js.map +1 -0
- package/dist/lib/decodeAudio.d.ts +1 -0
- package/dist/lib/decodeAudio.js +26 -0
- package/dist/lib/decodeAudio.js.map +1 -0
- package/dist/lib/downloadModelFile.d.ts +10 -0
- package/dist/lib/downloadModelFile.js +58 -0
- package/dist/lib/downloadModelFile.js.map +1 -0
- package/dist/lib/flattenMessageTextContent.d.ts +2 -0
- package/dist/lib/flattenMessageTextContent.js +11 -0
- package/dist/lib/flattenMessageTextContent.js.map +1 -0
- package/dist/lib/getCacheDirPath.d.ts +12 -0
- package/dist/lib/getCacheDirPath.js +31 -0
- package/dist/lib/getCacheDirPath.js.map +1 -0
- package/dist/lib/loadImage.d.ts +12 -0
- package/dist/lib/loadImage.js +30 -0
- package/dist/lib/loadImage.js.map +1 -0
- package/dist/lib/logger.d.ts +12 -0
- package/dist/lib/logger.js +98 -0
- package/dist/lib/logger.js.map +1 -0
- package/dist/lib/math.d.ts +7 -0
- package/dist/lib/math.js +30 -0
- package/dist/lib/math.js.map +1 -0
- package/dist/lib/resolveModelFileLocation.d.ts +15 -0
- package/dist/lib/resolveModelFileLocation.js +41 -0
- package/dist/lib/resolveModelFileLocation.js.map +1 -0
- package/dist/lib/util.d.ts +7 -0
- package/dist/lib/util.js +61 -0
- package/dist/lib/util.js.map +1 -0
- package/dist/lib/validateModelFile.d.ts +9 -0
- package/dist/lib/validateModelFile.js +62 -0
- package/dist/lib/validateModelFile.js.map +1 -0
- package/dist/lib/validateModelOptions.d.ts +3 -0
- package/dist/lib/validateModelOptions.js +23 -0
- package/dist/lib/validateModelOptions.js.map +1 -0
- package/dist/pool.d.ts +61 -0
- package/dist/pool.js +512 -0
- package/dist/pool.js.map +1 -0
- package/dist/server.d.ts +59 -0
- package/dist/server.js +221 -0
- package/dist/server.js.map +1 -0
- package/dist/standalone.d.ts +1 -0
- package/dist/standalone.js +306 -0
- package/dist/standalone.js.map +1 -0
- package/dist/store.d.ts +60 -0
- package/dist/store.js +203 -0
- package/dist/store.js.map +1 -0
- package/dist/types/completions.d.ts +57 -0
- package/dist/types/completions.js +2 -0
- package/dist/types/completions.js.map +1 -0
- package/dist/types/index.d.ts +326 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/docs/engines.md +28 -0
- package/docs/gpu.md +72 -0
- package/docs/http-api.md +147 -0
- package/examples/all-options.js +108 -0
- package/examples/chat-cli.js +56 -0
- package/examples/chat-server.js +65 -0
- package/examples/concurrency.js +70 -0
- package/examples/express.js +70 -0
- package/examples/pool.js +91 -0
- package/package.json +113 -0
- package/src/api/openai/enums.ts +20 -0
- package/src/api/openai/handlers/chat.ts +408 -0
- package/src/api/openai/handlers/completions.ts +196 -0
- package/src/api/openai/handlers/embeddings.ts +92 -0
- package/src/api/openai/handlers/images.ts +3 -0
- package/src/api/openai/handlers/models.ts +33 -0
- package/src/api/openai/handlers/transcription.ts +2 -0
- package/src/api/openai/index.ts +16 -0
- package/src/api/parseJSONRequestBody.ts +26 -0
- package/src/api/v1/DRAFT.md +16 -0
- package/src/api/v1/index.ts +37 -0
- package/src/cli.ts +9 -0
- package/src/engines/gpt4all/engine.ts +441 -0
- package/src/engines/gpt4all/util.ts +31 -0
- package/src/engines/index.ts +28 -0
- package/src/engines/node-llama-cpp/engine.ts +811 -0
- package/src/engines/node-llama-cpp/types.ts +17 -0
- package/src/engines/node-llama-cpp/util.ts +126 -0
- package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
- package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
- package/src/engines/stable-diffusion-cpp/types.ts +54 -0
- package/src/engines/stable-diffusion-cpp/util.ts +58 -0
- package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
- package/src/engines/transformers-js/engine.ts +659 -0
- package/src/engines/transformers-js/types.ts +25 -0
- package/src/engines/transformers-js/util.ts +40 -0
- package/src/engines/transformers-js/validateModelFiles.ts +168 -0
- package/src/experiments/ChatWithVision.ts +103 -0
- package/src/experiments/StableDiffPromptGenerator.ts +2 -0
- package/src/experiments/VoiceFunctionCall.ts +71 -0
- package/src/http.ts +72 -0
- package/src/index.ts +7 -0
- package/src/instance.ts +723 -0
- package/src/lib/acquireFileLock.ts +38 -0
- package/src/lib/calculateContextIdentity.ts +53 -0
- package/src/lib/calculateFileChecksum.ts +18 -0
- package/src/lib/copyDirectory.ts +29 -0
- package/src/lib/decodeAudio.ts +39 -0
- package/src/lib/downloadModelFile.ts +70 -0
- package/src/lib/flattenMessageTextContent.ts +19 -0
- package/src/lib/getCacheDirPath.ts +34 -0
- package/src/lib/loadImage.ts +46 -0
- package/src/lib/logger.ts +112 -0
- package/src/lib/math.ts +31 -0
- package/src/lib/resolveModelFileLocation.ts +49 -0
- package/src/lib/util.ts +75 -0
- package/src/lib/validateModelFile.ts +71 -0
- package/src/lib/validateModelOptions.ts +31 -0
- package/src/pool.ts +651 -0
- package/src/server.ts +270 -0
- package/src/standalone.ts +320 -0
- package/src/store.ts +278 -0
- package/src/types/completions.ts +86 -0
- package/src/types/index.ts +488 -0
- package/tsconfig.json +29 -0
- package/tsconfig.release.json +11 -0
- package/vitest.config.ts +18 -0
package/src/server.ts
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import path from 'node:path'
|
|
2
|
+
import { builtInEngineNames } from '#package/engines/index.js'
|
|
3
|
+
import { ModelPool } from '#package/pool.js'
|
|
4
|
+
import { ModelInstance } from '#package/instance.js'
|
|
5
|
+
import { ModelStore, StoredModel } from '#package/store.js'
|
|
6
|
+
import {
|
|
7
|
+
ModelOptions,
|
|
8
|
+
IncomingRequest,
|
|
9
|
+
CompletionProcessingOptions,
|
|
10
|
+
ChatCompletionRequest,
|
|
11
|
+
EmbeddingRequest,
|
|
12
|
+
ProcessingOptions,
|
|
13
|
+
TextCompletionRequest,
|
|
14
|
+
ModelEngine,
|
|
15
|
+
ImageToTextRequest,
|
|
16
|
+
SpeechToTextRequest,
|
|
17
|
+
SpeechToTextProcessingOptions,
|
|
18
|
+
BuiltInModelOptions,
|
|
19
|
+
CustomEngineModelOptions,
|
|
20
|
+
ModelConfigBase,
|
|
21
|
+
TextToImageRequest,
|
|
22
|
+
ImageToImageRequest,
|
|
23
|
+
} from '#package/types/index.js'
|
|
24
|
+
import { Logger, LogLevel, createSublogger, LogLevels } from '#package/lib/logger.js'
|
|
25
|
+
import { resolveModelFileLocation } from '#package/lib/resolveModelFileLocation.js'
|
|
26
|
+
import { validateModelOptions } from '#package/lib/validateModelOptions.js'
|
|
27
|
+
import { getCacheDirPath } from '#package/lib/getCacheDirPath.js'
|
|
28
|
+
|
|
29
|
+
export interface ModelServerOptions {
|
|
30
|
+
engines?: Record<string, ModelEngine>
|
|
31
|
+
models: Record<string, ModelOptions>
|
|
32
|
+
concurrency?: number
|
|
33
|
+
cachePath?: string
|
|
34
|
+
log?: Logger | LogLevel
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function startModelServer(options: ModelServerOptions) {
|
|
38
|
+
const server = new ModelServer(options)
|
|
39
|
+
server.start()
|
|
40
|
+
return server
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export class ModelServer {
|
|
44
|
+
pool: ModelPool
|
|
45
|
+
store: ModelStore
|
|
46
|
+
engines: Record<string, ModelEngine> = {}
|
|
47
|
+
log: Logger
|
|
48
|
+
|
|
49
|
+
constructor(options: ModelServerOptions) {
|
|
50
|
+
this.log = createSublogger(options.log)
|
|
51
|
+
let modelsCachePath = getCacheDirPath('models')
|
|
52
|
+
if (options.cachePath) {
|
|
53
|
+
modelsCachePath = path.join(options.cachePath, 'models')
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const modelsWithDefaults: Record<string, ModelConfigBase> = {}
|
|
57
|
+
const usedEngines: Array<{ model: string; engine: string }> = []
|
|
58
|
+
for (const modelId in options.models) {
|
|
59
|
+
const modelOptions = options.models[modelId]
|
|
60
|
+
const isBuiltIn = builtInEngineNames.includes(modelOptions.engine)
|
|
61
|
+
if (isBuiltIn) {
|
|
62
|
+
const builtInModelOptions = modelOptions as BuiltInModelOptions
|
|
63
|
+
// can validate and resolve location of model files if a built-in engine is used
|
|
64
|
+
validateModelOptions(modelId, builtInModelOptions)
|
|
65
|
+
modelsWithDefaults[modelId] = {
|
|
66
|
+
id: modelId,
|
|
67
|
+
minInstances: 0,
|
|
68
|
+
maxInstances: 1,
|
|
69
|
+
modelsCachePath,
|
|
70
|
+
location: resolveModelFileLocation({
|
|
71
|
+
url: builtInModelOptions.url,
|
|
72
|
+
filePath: builtInModelOptions.location,
|
|
73
|
+
modelsCachePath,
|
|
74
|
+
}),
|
|
75
|
+
...builtInModelOptions,
|
|
76
|
+
}
|
|
77
|
+
} else {
|
|
78
|
+
const customEngineOptions = modelOptions as CustomEngineModelOptions
|
|
79
|
+
modelsWithDefaults[modelId] = {
|
|
80
|
+
id: modelId,
|
|
81
|
+
minInstances: 0,
|
|
82
|
+
maxInstances: 1,
|
|
83
|
+
modelsCachePath,
|
|
84
|
+
...customEngineOptions,
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
usedEngines.push({
|
|
88
|
+
model: modelId,
|
|
89
|
+
engine: modelOptions.engine,
|
|
90
|
+
})
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const customEngines = Object.keys(options.engines ?? {})
|
|
94
|
+
for (const ref of usedEngines) {
|
|
95
|
+
const isBuiltIn = builtInEngineNames.includes(ref.engine)
|
|
96
|
+
const isCustom = customEngines.includes(ref.engine)
|
|
97
|
+
if (!isBuiltIn && !isCustom) {
|
|
98
|
+
throw new Error(`Engine "${ref.engine}" used by model "${ref.model}" does not exist`)
|
|
99
|
+
}
|
|
100
|
+
if (isCustom) {
|
|
101
|
+
this.engines[ref.engine] = options.engines![ref.engine]
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
this.store = new ModelStore({
|
|
106
|
+
log: this.log,
|
|
107
|
+
// TODO expose this? or remove it?
|
|
108
|
+
// prepareConcurrency: 2,
|
|
109
|
+
models: modelsWithDefaults,
|
|
110
|
+
modelsCachePath,
|
|
111
|
+
})
|
|
112
|
+
this.pool = new ModelPool(
|
|
113
|
+
{
|
|
114
|
+
log: this.log,
|
|
115
|
+
concurrency: options.concurrency ?? 1,
|
|
116
|
+
models: modelsWithDefaults,
|
|
117
|
+
},
|
|
118
|
+
this.prepareInstance.bind(this),
|
|
119
|
+
)
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
modelExists(modelId: string) {
|
|
123
|
+
return !!this.pool.config.models[modelId]
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async start() {
|
|
127
|
+
const engineStartPromises = []
|
|
128
|
+
// call startEngine on custom engines
|
|
129
|
+
for (const [key, methods] of Object.entries(this.engines)) {
|
|
130
|
+
if (methods.start) {
|
|
131
|
+
engineStartPromises.push(methods.start(this))
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
// import built-in engines
|
|
135
|
+
for (const key of builtInEngineNames) {
|
|
136
|
+
// skip unused engines
|
|
137
|
+
const modelUsingEngine = Object.keys(this.store.models).find(
|
|
138
|
+
(modelId) => this.store.models[modelId].engine === key,
|
|
139
|
+
)
|
|
140
|
+
if (!modelUsingEngine) {
|
|
141
|
+
continue
|
|
142
|
+
}
|
|
143
|
+
engineStartPromises.push(
|
|
144
|
+
new Promise(async (resolve, reject) => {
|
|
145
|
+
try {
|
|
146
|
+
const engine = await import(`./engines/${key}/engine.js`)
|
|
147
|
+
this.engines[key] = engine
|
|
148
|
+
resolve({
|
|
149
|
+
key,
|
|
150
|
+
engine,
|
|
151
|
+
})
|
|
152
|
+
} catch (err) {
|
|
153
|
+
reject(err)
|
|
154
|
+
}
|
|
155
|
+
}),
|
|
156
|
+
)
|
|
157
|
+
}
|
|
158
|
+
await Promise.all(engineStartPromises)
|
|
159
|
+
await Promise.all([this.store.init(this.engines), this.pool.init(this.engines)])
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
async stop() {
|
|
163
|
+
this.log(LogLevels.info, 'Stopping model server')
|
|
164
|
+
this.pool.queue.clear()
|
|
165
|
+
this.store.dispose()
|
|
166
|
+
// need to make sure all tasks are canceled, waiting for idle can make stop hang
|
|
167
|
+
// await this.pool.queue.onIdle() // would wait until all completions are done
|
|
168
|
+
try {
|
|
169
|
+
await this.pool.dispose() // might cause abort errors when there are still running tasks
|
|
170
|
+
} catch (err) {
|
|
171
|
+
this.log(LogLevels.error, 'Error while stopping model server', err)
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
this.log(LogLevels.debug, 'Model server stopped')
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
async requestInstance(request: IncomingRequest, signal?: AbortSignal) {
|
|
178
|
+
return this.pool.requestInstance(request, signal)
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// gets called by the pool right before a new instance is created
|
|
182
|
+
private async prepareInstance(instance: ModelInstance, signal?: AbortSignal) {
|
|
183
|
+
const model = instance.config
|
|
184
|
+
const modelStoreStatus = this.store.models[model.id].status
|
|
185
|
+
if (modelStoreStatus === 'unloaded') {
|
|
186
|
+
await this.store.prepareModel(model.id, signal)
|
|
187
|
+
}
|
|
188
|
+
if (modelStoreStatus === 'preparing') {
|
|
189
|
+
const modelReady = new Promise<void>((resolve, reject) => {
|
|
190
|
+
const onCompleted = async (storeModel: StoredModel) => {
|
|
191
|
+
if (storeModel.id === model.id) {
|
|
192
|
+
this.store.prepareQueue.off('completed', onCompleted)
|
|
193
|
+
if (storeModel.status === 'ready') {
|
|
194
|
+
resolve()
|
|
195
|
+
} else {
|
|
196
|
+
reject()
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
this.store.prepareQueue.on('completed', onCompleted)
|
|
201
|
+
})
|
|
202
|
+
await modelReady
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
async processChatCompletionTask(args: ChatCompletionRequest, options?: CompletionProcessingOptions) {
|
|
207
|
+
const lock = await this.requestInstance(args)
|
|
208
|
+
const task = lock.instance.processChatCompletionTask(args, options)
|
|
209
|
+
const result = await task.result
|
|
210
|
+
await lock.release()
|
|
211
|
+
return result
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
async processTextCompletionTask(args: TextCompletionRequest, options?: CompletionProcessingOptions) {
|
|
215
|
+
const lock = await this.requestInstance(args)
|
|
216
|
+
const task = lock.instance.processTextCompletionTask(args, options)
|
|
217
|
+
const result = await task.result
|
|
218
|
+
await lock.release()
|
|
219
|
+
return result
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
async processEmbeddingTask(args: EmbeddingRequest, options?: ProcessingOptions) {
|
|
223
|
+
const lock = await this.requestInstance(args)
|
|
224
|
+
const task = lock.instance.processEmbeddingTask(args, options)
|
|
225
|
+
const result = await task.result
|
|
226
|
+
await lock.release()
|
|
227
|
+
return result
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
async processImageToTextTask(args: ImageToTextRequest, options?: ProcessingOptions) {
|
|
231
|
+
const lock = await this.requestInstance(args)
|
|
232
|
+
const task = lock.instance.processImageToTextTask(args, options)
|
|
233
|
+
const result = await task.result
|
|
234
|
+
await lock.release()
|
|
235
|
+
return result
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async processSpeechToTextTask(args: SpeechToTextRequest, options?: SpeechToTextProcessingOptions) {
|
|
239
|
+
const lock = await this.requestInstance(args)
|
|
240
|
+
const task = lock.instance.processSpeechToTextTask(args, options)
|
|
241
|
+
const result = await task.result
|
|
242
|
+
await lock.release()
|
|
243
|
+
return result
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
async processTextToImageTask(args: TextToImageRequest, options?: ProcessingOptions) {
|
|
247
|
+
const lock = await this.requestInstance(args)
|
|
248
|
+
const task = lock.instance.processTextToImageTask(args, options)
|
|
249
|
+
const result = await task.result
|
|
250
|
+
await lock.release()
|
|
251
|
+
return result
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
async processImageToImageTask(args: ImageToImageRequest, options?: ProcessingOptions) {
|
|
255
|
+
const lock = await this.requestInstance(args)
|
|
256
|
+
const task = lock.instance.processImageToImageTask(args, options)
|
|
257
|
+
const result = await task.result
|
|
258
|
+
await lock.release()
|
|
259
|
+
return result
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
getStatus() {
|
|
263
|
+
const poolStatus = this.pool.getStatus()
|
|
264
|
+
const storeStatus = this.store.getStatus()
|
|
265
|
+
return {
|
|
266
|
+
pool: poolStatus,
|
|
267
|
+
store: storeStatus,
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import type { AddressInfo } from 'node:net'
|
|
2
|
+
import { format as formatURL } from 'node:url'
|
|
3
|
+
import { ModelHTTPServer, ModelHTTPServerOptions } from '#package/http.js'
|
|
4
|
+
import { ChatWithVisionEngine } from '#package/experiments/ChatWithVision.js'
|
|
5
|
+
import { VoiceFunctionCallEngine } from '#package/experiments/VoiceFunctionCall.js'
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
Florence2ForConditionalGeneration,
|
|
9
|
+
WhisperForConditionalGeneration,
|
|
10
|
+
CLIPTextModelWithProjection,
|
|
11
|
+
CLIPVisionModelWithProjection,
|
|
12
|
+
AutoModelForCausalLM,
|
|
13
|
+
} from '@huggingface/transformers'
|
|
14
|
+
|
|
15
|
+
// Currently only used for debugging. Do not use.
|
|
16
|
+
const serverOptions: ModelHTTPServerOptions = {
|
|
17
|
+
listen: {
|
|
18
|
+
port: 3000,
|
|
19
|
+
},
|
|
20
|
+
log: 'debug',
|
|
21
|
+
concurrency: 2,
|
|
22
|
+
engines: {
|
|
23
|
+
// 'chat-with-vision': new ChatWithVisionEngine({
|
|
24
|
+
// imageToTextModel: 'florence2',
|
|
25
|
+
// chatModel: 'llama3-8b',
|
|
26
|
+
// }),
|
|
27
|
+
// 'voice-function-calling': new VoiceFunctionCallEngine({
|
|
28
|
+
// speechToTextModel: 'whisper-base',
|
|
29
|
+
// chatModel: 'functionary',
|
|
30
|
+
// }),
|
|
31
|
+
},
|
|
32
|
+
models: {
|
|
33
|
+
// 'sciphi-triplex': {
|
|
34
|
+
// url: 'https://huggingface.co/SciPhi/Triplex/blob/main/quantized_model-Q4_K_M.gguf',
|
|
35
|
+
// sha256: '6f8f6f1fca005640a1282dd0bd12512dedf22957d0c2135ba5e71583d33754fc',
|
|
36
|
+
// engine: 'node-llama-cpp',
|
|
37
|
+
// task: 'text-completion',
|
|
38
|
+
// },
|
|
39
|
+
// 'lite-mistral': {
|
|
40
|
+
// url: 'https://huggingface.co/bartowski/Lite-Mistral-150M-v2-Instruct-GGUF/resolve/main/Lite-Mistral-150M-v2-Instruct-Q8_0.gguf',
|
|
41
|
+
// sha256: 'b369c9b1ac20b66b2f94117d5cdc71d029a47a33948cefef9fe104615dcddfbd',
|
|
42
|
+
// engine: 'node-llama-cpp',
|
|
43
|
+
// task: 'text-completion',
|
|
44
|
+
// prepare: 'async',
|
|
45
|
+
// },
|
|
46
|
+
// 'gemma-9b': {
|
|
47
|
+
// url: 'https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/blob/main/gemma-2-9b-it-Q4_K_M.gguf',
|
|
48
|
+
// sha256:
|
|
49
|
+
// '13b2a7b4115bbd0900162edcebe476da1ba1fc24e718e8b40d32f6e300f56dfe',
|
|
50
|
+
// engine: 'node-llama-cpp',
|
|
51
|
+
// task: 'text-completion',
|
|
52
|
+
// prepare: 'async',
|
|
53
|
+
// },
|
|
54
|
+
// 'llama3.1-8b': {
|
|
55
|
+
// url: 'https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/blob/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf',
|
|
56
|
+
// sha256:
|
|
57
|
+
// '2a4ca64e02e7126436cfdb066dd7311f2486eb487191910d3d000fde13826a4d',
|
|
58
|
+
// engine: 'node-llama-cpp',
|
|
59
|
+
// task: 'text-completion',
|
|
60
|
+
// prepare: 'async',
|
|
61
|
+
// },
|
|
62
|
+
// 'dolphin-nemo-12b': {
|
|
63
|
+
// url: 'https://huggingface.co/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b-gguf/blob/main/dolphin-2.9.3-mistral-nemo-Q4_K_M.gguf',
|
|
64
|
+
// sha256: '09f9114e06d88b791e322586cf28a844d2d0a3876d04d6deffe2dfb26616dd83',
|
|
65
|
+
// engine: 'node-llama-cpp',
|
|
66
|
+
// task: 'text-completion',
|
|
67
|
+
// prepare: 'async',
|
|
68
|
+
// },
|
|
69
|
+
// 'phi3-mini-4k': {
|
|
70
|
+
// task: 'text-completion',
|
|
71
|
+
// url: 'https://gpt4all.io/models/gguf/Phi-3-mini-4k-instruct.Q4_0.gguf',
|
|
72
|
+
// engine: 'gpt4all',
|
|
73
|
+
// maxInstances: 2,
|
|
74
|
+
// prepare: 'async',
|
|
75
|
+
// },
|
|
76
|
+
// 'mxbai-embed-large-v1': {
|
|
77
|
+
// url: 'https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1',
|
|
78
|
+
// engine: 'transformers-js',
|
|
79
|
+
// task: 'embedding',
|
|
80
|
+
// prepare: 'blocking',
|
|
81
|
+
// device: {
|
|
82
|
+
// gpu: false,
|
|
83
|
+
// },
|
|
84
|
+
// },
|
|
85
|
+
// 'jina-clip-v1': {
|
|
86
|
+
// url: 'https://huggingface.co/jinaai/jina-clip-v1',
|
|
87
|
+
// engine: 'transformers-js',
|
|
88
|
+
// task: 'embedding',
|
|
89
|
+
// textModel: {
|
|
90
|
+
// modelClass: CLIPTextModelWithProjection,
|
|
91
|
+
// },
|
|
92
|
+
// visionModel: {
|
|
93
|
+
// processor: {
|
|
94
|
+
// url: 'https://huggingface.co/Xenova/clip-vit-base-patch32',
|
|
95
|
+
// // url: 'https://huggingface.co/Xenova/vit-base-patch16-224-in21k',
|
|
96
|
+
// },
|
|
97
|
+
// modelClass: CLIPVisionModelWithProjection,
|
|
98
|
+
// },
|
|
99
|
+
// prepare: 'blocking',
|
|
100
|
+
// device: {
|
|
101
|
+
// gpu: false,
|
|
102
|
+
// },
|
|
103
|
+
// },
|
|
104
|
+
// 'florence2-large': {
|
|
105
|
+
// url: 'https://huggingface.co/onnx-community/Florence-2-large-ft',
|
|
106
|
+
// engine: 'transformers-js',
|
|
107
|
+
// task: 'image-to-text',
|
|
108
|
+
// prepare: 'blocking',
|
|
109
|
+
// visionModel: {
|
|
110
|
+
// modelClass: Florence2ForConditionalGeneration,
|
|
111
|
+
// dtype: {
|
|
112
|
+
// embed_tokens: 'fp16',
|
|
113
|
+
// vision_encoder: 'fp32',
|
|
114
|
+
// encoder_model: 'fp16',
|
|
115
|
+
// decoder_model_merged: 'q4',
|
|
116
|
+
// },
|
|
117
|
+
// },
|
|
118
|
+
// device: {
|
|
119
|
+
// gpu: false,
|
|
120
|
+
// },
|
|
121
|
+
// },
|
|
122
|
+
// 'whisper-base': {
|
|
123
|
+
// url: 'https://huggingface.co/onnx-community/whisper-base',
|
|
124
|
+
// engine: 'transformers-js',
|
|
125
|
+
// task: 'speech-to-text',
|
|
126
|
+
// prepare: 'async',
|
|
127
|
+
// minInstances: 1,
|
|
128
|
+
// speechModel: {
|
|
129
|
+
// modelClass: WhisperForConditionalGeneration,
|
|
130
|
+
// dtype: {
|
|
131
|
+
// encoder_model: 'fp32', // 'fp16' works too
|
|
132
|
+
// decoder_model_merged: 'q4', // or 'fp32' ('fp16' is broken)
|
|
133
|
+
// },
|
|
134
|
+
// },
|
|
135
|
+
// device: {
|
|
136
|
+
// gpu: false,
|
|
137
|
+
// },
|
|
138
|
+
// },
|
|
139
|
+
// 'mistral-nemo-12b': {
|
|
140
|
+
// 'url': 'https://huggingface.co/mradermacher/Mistral-Nemo-Instruct-2407-GGUF/blob/main/Mistral-Nemo-Instruct-2407.Q4_K_M.gguf',
|
|
141
|
+
// 'sha256': '1ac4b6cdf0eeb1e2145f0097c6fd0a75df541e143f226a8ff25c8ae0e8dfff6f',
|
|
142
|
+
// 'engine': 'node-llama-cpp',
|
|
143
|
+
// 'task': 'text-completion',
|
|
144
|
+
// 'prepare': 'async',
|
|
145
|
+
// },
|
|
146
|
+
// 'phi-3.5-mini': {
|
|
147
|
+
// url: 'https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF/blob/main/Phi-3.5-mini-instruct-Q4_K_M.gguf',
|
|
148
|
+
// sha256:
|
|
149
|
+
// 'e4165e3a71af97f1b4820da61079826d8752a2088e313af0c7d346796c38eff5',
|
|
150
|
+
// engine: 'node-llama-cpp',
|
|
151
|
+
// task: 'text-completion',
|
|
152
|
+
// prepare: 'async',
|
|
153
|
+
// minInstances: 1,
|
|
154
|
+
// device: {
|
|
155
|
+
// gpu: 'vulkan',
|
|
156
|
+
// },
|
|
157
|
+
// },
|
|
158
|
+
// 'falcon-mamba-7b': {
|
|
159
|
+
// url: 'https://huggingface.co/mradermacher/falcon-mamba-7b-instruct-GGUF/blob/main/falcon-mamba-7b-instruct.Q4_K_M.gguf',
|
|
160
|
+
// sha256: 'f3357486034d89dd91fcefdb91bb1dfadfe0fd2969349a8a404e59d2bd3ad1b8',
|
|
161
|
+
// engine: 'node-llama-cpp',
|
|
162
|
+
// task: 'text-completion',
|
|
163
|
+
// prepare: 'async',
|
|
164
|
+
// },
|
|
165
|
+
// 'florence2-large': {
|
|
166
|
+
// url: 'https://huggingface.co/onnx-community/Florence-2-large-ft',
|
|
167
|
+
// engine: 'transformers-js',
|
|
168
|
+
// task: 'image-to-text',
|
|
169
|
+
// minInstances: 1,
|
|
170
|
+
// visionModel: {
|
|
171
|
+
// modelClass: Florence2ForConditionalGeneration,
|
|
172
|
+
// dtype: {
|
|
173
|
+
// embed_tokens: 'fp16',
|
|
174
|
+
// vision_encoder: 'fp32',
|
|
175
|
+
// encoder_model: 'fp16',
|
|
176
|
+
// decoder_model_merged: 'q4',
|
|
177
|
+
// },
|
|
178
|
+
// },
|
|
179
|
+
// device: {
|
|
180
|
+
// gpu: false,
|
|
181
|
+
// },
|
|
182
|
+
// },
|
|
183
|
+
// 'mxbai-embed-large-v1': {
|
|
184
|
+
// url: 'https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1',
|
|
185
|
+
// engine: 'transformers-js',
|
|
186
|
+
// task: 'embedding',
|
|
187
|
+
// prepare: 'blocking',
|
|
188
|
+
// device: {
|
|
189
|
+
// gpu: false,
|
|
190
|
+
// },
|
|
191
|
+
// },
|
|
192
|
+
// 'functionary-3.2-small': {
|
|
193
|
+
// url: 'https://huggingface.co/meetkai/functionary-small-v3.2-GGUF/blob/main/functionary-small-v3.2.Q4_0.gguf',
|
|
194
|
+
// sha256: 'c0afdbbffa498a8490dea3401e34034ac0f2c6e337646513a7dbc04fcef1c3a4',
|
|
195
|
+
// engine: 'node-llama-cpp',
|
|
196
|
+
// task: 'text-completion',
|
|
197
|
+
// prepare: 'blocking',
|
|
198
|
+
// },
|
|
199
|
+
// 'flux-schnell': {
|
|
200
|
+
// url: 'https://huggingface.co/leejet/FLUX.1-schnell-gguf/resolve/main/flux1-schnell-q4_0.gguf',
|
|
201
|
+
// task: 'text-to-image',
|
|
202
|
+
// sha256: '4f30741d2bfc786c92934ce925fcb0a43df3441e76504b797c3d5d5f0878fa6f',
|
|
203
|
+
// engine: 'stable-diffusion-cpp',
|
|
204
|
+
// prepare: 'blocking',
|
|
205
|
+
// diffusionModel: true,
|
|
206
|
+
// samplingMethod: 'euler_a',
|
|
207
|
+
// vae: {
|
|
208
|
+
// url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/ae.safetensors',
|
|
209
|
+
// },
|
|
210
|
+
// clipL: {
|
|
211
|
+
// url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/clip_l.safetensors',
|
|
212
|
+
// },
|
|
213
|
+
// t5xxl: {
|
|
214
|
+
// // url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/t5xxl_fp16.safetensors',
|
|
215
|
+
// url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/t5xxl-Q8_0.gguf',
|
|
216
|
+
// },
|
|
217
|
+
// },
|
|
218
|
+
// 'sd-3.5-turbo': {
|
|
219
|
+
// url: 'https://huggingface.co/stduhpf/SD3.5-Large-Turbo-GGUF-mixed-sdcpp/blob/main/legacy/sd3.5_large_turbo-q4_0.gguf',
|
|
220
|
+
// sha256: '52495d9c4356065a1378a93c9556a9eb465e10014ba9ce364512674267405bb2',
|
|
221
|
+
// engine: 'stable-diffusion-cpp',
|
|
222
|
+
// task: 'text-to-image',
|
|
223
|
+
// prepare: 'blocking',
|
|
224
|
+
// samplingMethod: 'euler',
|
|
225
|
+
// clipG: {
|
|
226
|
+
// url: 'https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_g.safetensors',
|
|
227
|
+
// sha256: 'ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4',
|
|
228
|
+
// },
|
|
229
|
+
// clipL: {
|
|
230
|
+
// url: 'https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_l.safetensors',
|
|
231
|
+
// sha256: '660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd',
|
|
232
|
+
// },
|
|
233
|
+
// t5xxl: {
|
|
234
|
+
// url: 'https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp8_e4m3fn.safetensors',
|
|
235
|
+
// sha256: '7d330da4816157540d6bb7838bf63a0f02f573fc48ca4d8de34bb0cbfd514f09',
|
|
236
|
+
// },
|
|
237
|
+
// },
|
|
238
|
+
// 'trocr-printed': {
|
|
239
|
+
// url: 'https://huggingface.co/Xenova/trocr-small-printed',
|
|
240
|
+
// engine: 'transformers-js',
|
|
241
|
+
// task: 'image-to-text',
|
|
242
|
+
// prepare: 'blocking',
|
|
243
|
+
// minInstances: 1,
|
|
244
|
+
// // textModel: {
|
|
245
|
+
// // modelClass: TrOCRPreTrainedModel,
|
|
246
|
+
// // processorClass: DeiTFeatureExtractor,
|
|
247
|
+
// // },
|
|
248
|
+
// device: {
|
|
249
|
+
// gpu: false,
|
|
250
|
+
// },
|
|
251
|
+
// },
|
|
252
|
+
// 'sdxl-turbo': {
|
|
253
|
+
// url: 'https://huggingface.co/stabilityai/sdxl-turbo/blob/main/sd_xl_turbo_1.0_fp16.safetensors',
|
|
254
|
+
// sha256:
|
|
255
|
+
// 'e869ac7d6942cb327d68d5ed83a40447aadf20e0c3358d98b2cc9e270db0da26',
|
|
256
|
+
// engine: 'stable-diffusion-cpp',
|
|
257
|
+
// task: 'image-to-image',
|
|
258
|
+
// prepare: 'blocking',
|
|
259
|
+
// samplingMethod: 'euler',
|
|
260
|
+
// vae: {
|
|
261
|
+
// url: 'https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl.vae.safetensors',
|
|
262
|
+
// sha256:
|
|
263
|
+
// '235745af8d86bf4a4c1b5b4f529868b37019a10f7c0b2e79ad0abca3a22bc6e1',
|
|
264
|
+
// },
|
|
265
|
+
// },
|
|
266
|
+
// 'flux-light': {
|
|
267
|
+
// url: 'https://huggingface.co/city96/flux.1-lite-8B-alpha-gguf/blob/main/flux.1-lite-8B-alpha-Q8_0.gguf',
|
|
268
|
+
// sha256:
|
|
269
|
+
// 'efc598d62123f2fdfd682948f533fee081f7fb1295b14d002ac1e66cae5f01a5',
|
|
270
|
+
// engine: 'stable-diffusion-cpp',
|
|
271
|
+
// task: 'image-to-image',
|
|
272
|
+
// prepare: 'blocking',
|
|
273
|
+
// },
|
|
274
|
+
// 'sd-3-medium': {
|
|
275
|
+
// url: 'https://huggingface.co/second-state/stable-diffusion-3-medium-GGUF/blob/main/sd3-medium-Q8_0.gguf',
|
|
276
|
+
// sha256: '7e34dfeb71f8cdbc8338677b63a444897cf4c5692ab4c1d98f04cbba6751885a',
|
|
277
|
+
// engine: 'stable-diffusion-cpp',
|
|
278
|
+
// task: 'text-to-image',
|
|
279
|
+
// prepare: 'async',
|
|
280
|
+
// },
|
|
281
|
+
// 'sd-1.5': {
|
|
282
|
+
// url: 'https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF/blob/main/stable-diffusion-v1-5-pruned-emaonly-f32.gguf',
|
|
283
|
+
// sha256: '52c7ca39d8d48d6f44fa4ff2c44569f3c924d92311108cb38492958350d48ff8',
|
|
284
|
+
// engine: 'stable-diffusion-cpp',
|
|
285
|
+
// task: 'text-to-image',
|
|
286
|
+
// prepare: 'async',
|
|
287
|
+
// },
|
|
288
|
+
// 'llama-3.2-3b': {
|
|
289
|
+
// url: 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/blob/main/Llama-3.2-3B-Instruct-Q6_K_L.gguf',
|
|
290
|
+
// sha256:
|
|
291
|
+
// 'c542b14ec07b8b3cb8d777e1a68ee5aabb964167719466d4c685c29fcfd04900',
|
|
292
|
+
// engine: 'node-llama-cpp',
|
|
293
|
+
// task: 'text-completion',
|
|
294
|
+
// prepare: 'blocking',
|
|
295
|
+
// },
|
|
296
|
+
},
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
async function main() {
|
|
300
|
+
const server = new ModelHTTPServer(serverOptions)
|
|
301
|
+
await server.start()
|
|
302
|
+
const { address, port } = server.httpServer.address() as AddressInfo
|
|
303
|
+
const hostname = address === '' || address === '::' ? 'localhost' : address
|
|
304
|
+
const url = formatURL({
|
|
305
|
+
protocol: 'http',
|
|
306
|
+
hostname,
|
|
307
|
+
port,
|
|
308
|
+
pathname: '/',
|
|
309
|
+
})
|
|
310
|
+
console.log(`Server listening at ${url}`)
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
main().catch((err: Error) => {
|
|
314
|
+
console.error(err)
|
|
315
|
+
process.exit(1)
|
|
316
|
+
})
|
|
317
|
+
|
|
318
|
+
process.on('unhandledRejection', (err) => {
|
|
319
|
+
console.error('Unhandled rejection:', err)
|
|
320
|
+
})
|