inference-server 1.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/README.md +216 -0
  2. package/dist/api/openai/enums.d.ts +4 -0
  3. package/dist/api/openai/enums.js +17 -0
  4. package/dist/api/openai/enums.js.map +1 -0
  5. package/dist/api/openai/handlers/chat.d.ts +3 -0
  6. package/dist/api/openai/handlers/chat.js +358 -0
  7. package/dist/api/openai/handlers/chat.js.map +1 -0
  8. package/dist/api/openai/handlers/completions.d.ts +3 -0
  9. package/dist/api/openai/handlers/completions.js +169 -0
  10. package/dist/api/openai/handlers/completions.js.map +1 -0
  11. package/dist/api/openai/handlers/embeddings.d.ts +3 -0
  12. package/dist/api/openai/handlers/embeddings.js +74 -0
  13. package/dist/api/openai/handlers/embeddings.js.map +1 -0
  14. package/dist/api/openai/handlers/images.d.ts +0 -0
  15. package/dist/api/openai/handlers/images.js +4 -0
  16. package/dist/api/openai/handlers/images.js.map +1 -0
  17. package/dist/api/openai/handlers/models.d.ts +3 -0
  18. package/dist/api/openai/handlers/models.js +23 -0
  19. package/dist/api/openai/handlers/models.js.map +1 -0
  20. package/dist/api/openai/handlers/transcription.d.ts +0 -0
  21. package/dist/api/openai/handlers/transcription.js +4 -0
  22. package/dist/api/openai/handlers/transcription.js.map +1 -0
  23. package/dist/api/openai/index.d.ts +7 -0
  24. package/dist/api/openai/index.js +14 -0
  25. package/dist/api/openai/index.js.map +1 -0
  26. package/dist/api/parseJSONRequestBody.d.ts +2 -0
  27. package/dist/api/parseJSONRequestBody.js +24 -0
  28. package/dist/api/parseJSONRequestBody.js.map +1 -0
  29. package/dist/api/v1/index.d.ts +2 -0
  30. package/dist/api/v1/index.js +29 -0
  31. package/dist/api/v1/index.js.map +1 -0
  32. package/dist/cli.d.ts +1 -0
  33. package/dist/cli.js +10 -0
  34. package/dist/cli.js.map +1 -0
  35. package/dist/engines/gpt4all/engine.d.ts +34 -0
  36. package/dist/engines/gpt4all/engine.js +357 -0
  37. package/dist/engines/gpt4all/engine.js.map +1 -0
  38. package/dist/engines/gpt4all/util.d.ts +3 -0
  39. package/dist/engines/gpt4all/util.js +29 -0
  40. package/dist/engines/gpt4all/util.js.map +1 -0
  41. package/dist/engines/index.d.ts +19 -0
  42. package/dist/engines/index.js +21 -0
  43. package/dist/engines/index.js.map +1 -0
  44. package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
  45. package/dist/engines/node-llama-cpp/engine.js +666 -0
  46. package/dist/engines/node-llama-cpp/engine.js.map +1 -0
  47. package/dist/engines/node-llama-cpp/types.d.ts +13 -0
  48. package/dist/engines/node-llama-cpp/types.js +2 -0
  49. package/dist/engines/node-llama-cpp/types.js.map +1 -0
  50. package/dist/engines/node-llama-cpp/util.d.ts +15 -0
  51. package/dist/engines/node-llama-cpp/util.js +84 -0
  52. package/dist/engines/node-llama-cpp/util.js.map +1 -0
  53. package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
  54. package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
  55. package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
  56. package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
  57. package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
  58. package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
  59. package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
  60. package/dist/engines/stable-diffusion-cpp/types.js +2 -0
  61. package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
  62. package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
  63. package/dist/engines/stable-diffusion-cpp/util.js +55 -0
  64. package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
  65. package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
  66. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
  67. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
  68. package/dist/engines/transformers-js/engine.d.ts +37 -0
  69. package/dist/engines/transformers-js/engine.js +538 -0
  70. package/dist/engines/transformers-js/engine.js.map +1 -0
  71. package/dist/engines/transformers-js/types.d.ts +7 -0
  72. package/dist/engines/transformers-js/types.js +2 -0
  73. package/dist/engines/transformers-js/types.js.map +1 -0
  74. package/dist/engines/transformers-js/util.d.ts +7 -0
  75. package/dist/engines/transformers-js/util.js +36 -0
  76. package/dist/engines/transformers-js/util.js.map +1 -0
  77. package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
  78. package/dist/engines/transformers-js/validateModelFiles.js +133 -0
  79. package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
  80. package/dist/experiments/ChatWithVision.d.ts +11 -0
  81. package/dist/experiments/ChatWithVision.js +91 -0
  82. package/dist/experiments/ChatWithVision.js.map +1 -0
  83. package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
  84. package/dist/experiments/StableDiffPromptGenerator.js +4 -0
  85. package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
  86. package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
  87. package/dist/experiments/VoiceFunctionCall.js +51 -0
  88. package/dist/experiments/VoiceFunctionCall.js.map +1 -0
  89. package/dist/http.d.ts +19 -0
  90. package/dist/http.js +54 -0
  91. package/dist/http.js.map +1 -0
  92. package/dist/index.d.ts +7 -0
  93. package/dist/index.js +8 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/instance.d.ts +88 -0
  96. package/dist/instance.js +594 -0
  97. package/dist/instance.js.map +1 -0
  98. package/dist/lib/acquireFileLock.d.ts +7 -0
  99. package/dist/lib/acquireFileLock.js +38 -0
  100. package/dist/lib/acquireFileLock.js.map +1 -0
  101. package/dist/lib/calculateContextIdentity.d.ts +7 -0
  102. package/dist/lib/calculateContextIdentity.js +39 -0
  103. package/dist/lib/calculateContextIdentity.js.map +1 -0
  104. package/dist/lib/calculateFileChecksum.d.ts +1 -0
  105. package/dist/lib/calculateFileChecksum.js +16 -0
  106. package/dist/lib/calculateFileChecksum.js.map +1 -0
  107. package/dist/lib/copyDirectory.d.ts +6 -0
  108. package/dist/lib/copyDirectory.js +27 -0
  109. package/dist/lib/copyDirectory.js.map +1 -0
  110. package/dist/lib/decodeAudio.d.ts +1 -0
  111. package/dist/lib/decodeAudio.js +26 -0
  112. package/dist/lib/decodeAudio.js.map +1 -0
  113. package/dist/lib/downloadModelFile.d.ts +10 -0
  114. package/dist/lib/downloadModelFile.js +58 -0
  115. package/dist/lib/downloadModelFile.js.map +1 -0
  116. package/dist/lib/flattenMessageTextContent.d.ts +2 -0
  117. package/dist/lib/flattenMessageTextContent.js +11 -0
  118. package/dist/lib/flattenMessageTextContent.js.map +1 -0
  119. package/dist/lib/getCacheDirPath.d.ts +12 -0
  120. package/dist/lib/getCacheDirPath.js +31 -0
  121. package/dist/lib/getCacheDirPath.js.map +1 -0
  122. package/dist/lib/loadImage.d.ts +12 -0
  123. package/dist/lib/loadImage.js +30 -0
  124. package/dist/lib/loadImage.js.map +1 -0
  125. package/dist/lib/logger.d.ts +12 -0
  126. package/dist/lib/logger.js +98 -0
  127. package/dist/lib/logger.js.map +1 -0
  128. package/dist/lib/math.d.ts +7 -0
  129. package/dist/lib/math.js +30 -0
  130. package/dist/lib/math.js.map +1 -0
  131. package/dist/lib/resolveModelFileLocation.d.ts +15 -0
  132. package/dist/lib/resolveModelFileLocation.js +41 -0
  133. package/dist/lib/resolveModelFileLocation.js.map +1 -0
  134. package/dist/lib/util.d.ts +7 -0
  135. package/dist/lib/util.js +61 -0
  136. package/dist/lib/util.js.map +1 -0
  137. package/dist/lib/validateModelFile.d.ts +9 -0
  138. package/dist/lib/validateModelFile.js +62 -0
  139. package/dist/lib/validateModelFile.js.map +1 -0
  140. package/dist/lib/validateModelOptions.d.ts +3 -0
  141. package/dist/lib/validateModelOptions.js +23 -0
  142. package/dist/lib/validateModelOptions.js.map +1 -0
  143. package/dist/pool.d.ts +61 -0
  144. package/dist/pool.js +512 -0
  145. package/dist/pool.js.map +1 -0
  146. package/dist/server.d.ts +59 -0
  147. package/dist/server.js +221 -0
  148. package/dist/server.js.map +1 -0
  149. package/dist/standalone.d.ts +1 -0
  150. package/dist/standalone.js +306 -0
  151. package/dist/standalone.js.map +1 -0
  152. package/dist/store.d.ts +60 -0
  153. package/dist/store.js +203 -0
  154. package/dist/store.js.map +1 -0
  155. package/dist/types/completions.d.ts +57 -0
  156. package/dist/types/completions.js +2 -0
  157. package/dist/types/completions.js.map +1 -0
  158. package/dist/types/index.d.ts +326 -0
  159. package/dist/types/index.js +2 -0
  160. package/dist/types/index.js.map +1 -0
  161. package/docs/engines.md +28 -0
  162. package/docs/gpu.md +72 -0
  163. package/docs/http-api.md +147 -0
  164. package/examples/all-options.js +108 -0
  165. package/examples/chat-cli.js +56 -0
  166. package/examples/chat-server.js +65 -0
  167. package/examples/concurrency.js +70 -0
  168. package/examples/express.js +70 -0
  169. package/examples/pool.js +91 -0
  170. package/package.json +113 -0
  171. package/src/api/openai/enums.ts +20 -0
  172. package/src/api/openai/handlers/chat.ts +408 -0
  173. package/src/api/openai/handlers/completions.ts +196 -0
  174. package/src/api/openai/handlers/embeddings.ts +92 -0
  175. package/src/api/openai/handlers/images.ts +3 -0
  176. package/src/api/openai/handlers/models.ts +33 -0
  177. package/src/api/openai/handlers/transcription.ts +2 -0
  178. package/src/api/openai/index.ts +16 -0
  179. package/src/api/parseJSONRequestBody.ts +26 -0
  180. package/src/api/v1/DRAFT.md +16 -0
  181. package/src/api/v1/index.ts +37 -0
  182. package/src/cli.ts +9 -0
  183. package/src/engines/gpt4all/engine.ts +441 -0
  184. package/src/engines/gpt4all/util.ts +31 -0
  185. package/src/engines/index.ts +28 -0
  186. package/src/engines/node-llama-cpp/engine.ts +811 -0
  187. package/src/engines/node-llama-cpp/types.ts +17 -0
  188. package/src/engines/node-llama-cpp/util.ts +126 -0
  189. package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
  190. package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
  191. package/src/engines/stable-diffusion-cpp/types.ts +54 -0
  192. package/src/engines/stable-diffusion-cpp/util.ts +58 -0
  193. package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
  194. package/src/engines/transformers-js/engine.ts +659 -0
  195. package/src/engines/transformers-js/types.ts +25 -0
  196. package/src/engines/transformers-js/util.ts +40 -0
  197. package/src/engines/transformers-js/validateModelFiles.ts +168 -0
  198. package/src/experiments/ChatWithVision.ts +103 -0
  199. package/src/experiments/StableDiffPromptGenerator.ts +2 -0
  200. package/src/experiments/VoiceFunctionCall.ts +71 -0
  201. package/src/http.ts +72 -0
  202. package/src/index.ts +7 -0
  203. package/src/instance.ts +723 -0
  204. package/src/lib/acquireFileLock.ts +38 -0
  205. package/src/lib/calculateContextIdentity.ts +53 -0
  206. package/src/lib/calculateFileChecksum.ts +18 -0
  207. package/src/lib/copyDirectory.ts +29 -0
  208. package/src/lib/decodeAudio.ts +39 -0
  209. package/src/lib/downloadModelFile.ts +70 -0
  210. package/src/lib/flattenMessageTextContent.ts +19 -0
  211. package/src/lib/getCacheDirPath.ts +34 -0
  212. package/src/lib/loadImage.ts +46 -0
  213. package/src/lib/logger.ts +112 -0
  214. package/src/lib/math.ts +31 -0
  215. package/src/lib/resolveModelFileLocation.ts +49 -0
  216. package/src/lib/util.ts +75 -0
  217. package/src/lib/validateModelFile.ts +71 -0
  218. package/src/lib/validateModelOptions.ts +31 -0
  219. package/src/pool.ts +651 -0
  220. package/src/server.ts +270 -0
  221. package/src/standalone.ts +320 -0
  222. package/src/store.ts +278 -0
  223. package/src/types/completions.ts +86 -0
  224. package/src/types/index.ts +488 -0
  225. package/tsconfig.json +29 -0
  226. package/tsconfig.release.json +11 -0
  227. package/vitest.config.ts +18 -0
package/src/server.ts ADDED
@@ -0,0 +1,270 @@
1
+ import path from 'node:path'
2
+ import { builtInEngineNames } from '#package/engines/index.js'
3
+ import { ModelPool } from '#package/pool.js'
4
+ import { ModelInstance } from '#package/instance.js'
5
+ import { ModelStore, StoredModel } from '#package/store.js'
6
+ import {
7
+ ModelOptions,
8
+ IncomingRequest,
9
+ CompletionProcessingOptions,
10
+ ChatCompletionRequest,
11
+ EmbeddingRequest,
12
+ ProcessingOptions,
13
+ TextCompletionRequest,
14
+ ModelEngine,
15
+ ImageToTextRequest,
16
+ SpeechToTextRequest,
17
+ SpeechToTextProcessingOptions,
18
+ BuiltInModelOptions,
19
+ CustomEngineModelOptions,
20
+ ModelConfigBase,
21
+ TextToImageRequest,
22
+ ImageToImageRequest,
23
+ } from '#package/types/index.js'
24
+ import { Logger, LogLevel, createSublogger, LogLevels } from '#package/lib/logger.js'
25
+ import { resolveModelFileLocation } from '#package/lib/resolveModelFileLocation.js'
26
+ import { validateModelOptions } from '#package/lib/validateModelOptions.js'
27
+ import { getCacheDirPath } from '#package/lib/getCacheDirPath.js'
28
+
29
+ export interface ModelServerOptions {
30
+ engines?: Record<string, ModelEngine>
31
+ models: Record<string, ModelOptions>
32
+ concurrency?: number
33
+ cachePath?: string
34
+ log?: Logger | LogLevel
35
+ }
36
+
37
+ export function startModelServer(options: ModelServerOptions) {
38
+ const server = new ModelServer(options)
39
+ server.start()
40
+ return server
41
+ }
42
+
43
+ export class ModelServer {
44
+ pool: ModelPool
45
+ store: ModelStore
46
+ engines: Record<string, ModelEngine> = {}
47
+ log: Logger
48
+
49
+ constructor(options: ModelServerOptions) {
50
+ this.log = createSublogger(options.log)
51
+ let modelsCachePath = getCacheDirPath('models')
52
+ if (options.cachePath) {
53
+ modelsCachePath = path.join(options.cachePath, 'models')
54
+ }
55
+
56
+ const modelsWithDefaults: Record<string, ModelConfigBase> = {}
57
+ const usedEngines: Array<{ model: string; engine: string }> = []
58
+ for (const modelId in options.models) {
59
+ const modelOptions = options.models[modelId]
60
+ const isBuiltIn = builtInEngineNames.includes(modelOptions.engine)
61
+ if (isBuiltIn) {
62
+ const builtInModelOptions = modelOptions as BuiltInModelOptions
63
+ // can validate and resolve location of model files if a built-in engine is used
64
+ validateModelOptions(modelId, builtInModelOptions)
65
+ modelsWithDefaults[modelId] = {
66
+ id: modelId,
67
+ minInstances: 0,
68
+ maxInstances: 1,
69
+ modelsCachePath,
70
+ location: resolveModelFileLocation({
71
+ url: builtInModelOptions.url,
72
+ filePath: builtInModelOptions.location,
73
+ modelsCachePath,
74
+ }),
75
+ ...builtInModelOptions,
76
+ }
77
+ } else {
78
+ const customEngineOptions = modelOptions as CustomEngineModelOptions
79
+ modelsWithDefaults[modelId] = {
80
+ id: modelId,
81
+ minInstances: 0,
82
+ maxInstances: 1,
83
+ modelsCachePath,
84
+ ...customEngineOptions,
85
+ }
86
+ }
87
+ usedEngines.push({
88
+ model: modelId,
89
+ engine: modelOptions.engine,
90
+ })
91
+ }
92
+
93
+ const customEngines = Object.keys(options.engines ?? {})
94
+ for (const ref of usedEngines) {
95
+ const isBuiltIn = builtInEngineNames.includes(ref.engine)
96
+ const isCustom = customEngines.includes(ref.engine)
97
+ if (!isBuiltIn && !isCustom) {
98
+ throw new Error(`Engine "${ref.engine}" used by model "${ref.model}" does not exist`)
99
+ }
100
+ if (isCustom) {
101
+ this.engines[ref.engine] = options.engines![ref.engine]
102
+ }
103
+ }
104
+
105
+ this.store = new ModelStore({
106
+ log: this.log,
107
+ // TODO expose this? or remove it?
108
+ // prepareConcurrency: 2,
109
+ models: modelsWithDefaults,
110
+ modelsCachePath,
111
+ })
112
+ this.pool = new ModelPool(
113
+ {
114
+ log: this.log,
115
+ concurrency: options.concurrency ?? 1,
116
+ models: modelsWithDefaults,
117
+ },
118
+ this.prepareInstance.bind(this),
119
+ )
120
+ }
121
+
122
+ modelExists(modelId: string) {
123
+ return !!this.pool.config.models[modelId]
124
+ }
125
+
126
+ async start() {
127
+ const engineStartPromises = []
128
+ // call startEngine on custom engines
129
+ for (const [key, methods] of Object.entries(this.engines)) {
130
+ if (methods.start) {
131
+ engineStartPromises.push(methods.start(this))
132
+ }
133
+ }
134
+ // import built-in engines
135
+ for (const key of builtInEngineNames) {
136
+ // skip unused engines
137
+ const modelUsingEngine = Object.keys(this.store.models).find(
138
+ (modelId) => this.store.models[modelId].engine === key,
139
+ )
140
+ if (!modelUsingEngine) {
141
+ continue
142
+ }
143
+ engineStartPromises.push(
144
+ new Promise(async (resolve, reject) => {
145
+ try {
146
+ const engine = await import(`./engines/${key}/engine.js`)
147
+ this.engines[key] = engine
148
+ resolve({
149
+ key,
150
+ engine,
151
+ })
152
+ } catch (err) {
153
+ reject(err)
154
+ }
155
+ }),
156
+ )
157
+ }
158
+ await Promise.all(engineStartPromises)
159
+ await Promise.all([this.store.init(this.engines), this.pool.init(this.engines)])
160
+ }
161
+
162
+ async stop() {
163
+ this.log(LogLevels.info, 'Stopping model server')
164
+ this.pool.queue.clear()
165
+ this.store.dispose()
166
+ // need to make sure all tasks are canceled, waiting for idle can make stop hang
167
+ // await this.pool.queue.onIdle() // would wait until all completions are done
168
+ try {
169
+ await this.pool.dispose() // might cause abort errors when there are still running tasks
170
+ } catch (err) {
171
+ this.log(LogLevels.error, 'Error while stopping model server', err)
172
+ }
173
+
174
+ this.log(LogLevels.debug, 'Model server stopped')
175
+ }
176
+
177
+ async requestInstance(request: IncomingRequest, signal?: AbortSignal) {
178
+ return this.pool.requestInstance(request, signal)
179
+ }
180
+
181
+ // gets called by the pool right before a new instance is created
182
+ private async prepareInstance(instance: ModelInstance, signal?: AbortSignal) {
183
+ const model = instance.config
184
+ const modelStoreStatus = this.store.models[model.id].status
185
+ if (modelStoreStatus === 'unloaded') {
186
+ await this.store.prepareModel(model.id, signal)
187
+ }
188
+ if (modelStoreStatus === 'preparing') {
189
+ const modelReady = new Promise<void>((resolve, reject) => {
190
+ const onCompleted = async (storeModel: StoredModel) => {
191
+ if (storeModel.id === model.id) {
192
+ this.store.prepareQueue.off('completed', onCompleted)
193
+ if (storeModel.status === 'ready') {
194
+ resolve()
195
+ } else {
196
+ reject()
197
+ }
198
+ }
199
+ }
200
+ this.store.prepareQueue.on('completed', onCompleted)
201
+ })
202
+ await modelReady
203
+ }
204
+ }
205
+
206
+ async processChatCompletionTask(args: ChatCompletionRequest, options?: CompletionProcessingOptions) {
207
+ const lock = await this.requestInstance(args)
208
+ const task = lock.instance.processChatCompletionTask(args, options)
209
+ const result = await task.result
210
+ await lock.release()
211
+ return result
212
+ }
213
+
214
+ async processTextCompletionTask(args: TextCompletionRequest, options?: CompletionProcessingOptions) {
215
+ const lock = await this.requestInstance(args)
216
+ const task = lock.instance.processTextCompletionTask(args, options)
217
+ const result = await task.result
218
+ await lock.release()
219
+ return result
220
+ }
221
+
222
+ async processEmbeddingTask(args: EmbeddingRequest, options?: ProcessingOptions) {
223
+ const lock = await this.requestInstance(args)
224
+ const task = lock.instance.processEmbeddingTask(args, options)
225
+ const result = await task.result
226
+ await lock.release()
227
+ return result
228
+ }
229
+
230
+ async processImageToTextTask(args: ImageToTextRequest, options?: ProcessingOptions) {
231
+ const lock = await this.requestInstance(args)
232
+ const task = lock.instance.processImageToTextTask(args, options)
233
+ const result = await task.result
234
+ await lock.release()
235
+ return result
236
+ }
237
+
238
+ async processSpeechToTextTask(args: SpeechToTextRequest, options?: SpeechToTextProcessingOptions) {
239
+ const lock = await this.requestInstance(args)
240
+ const task = lock.instance.processSpeechToTextTask(args, options)
241
+ const result = await task.result
242
+ await lock.release()
243
+ return result
244
+ }
245
+
246
+ async processTextToImageTask(args: TextToImageRequest, options?: ProcessingOptions) {
247
+ const lock = await this.requestInstance(args)
248
+ const task = lock.instance.processTextToImageTask(args, options)
249
+ const result = await task.result
250
+ await lock.release()
251
+ return result
252
+ }
253
+
254
+ async processImageToImageTask(args: ImageToImageRequest, options?: ProcessingOptions) {
255
+ const lock = await this.requestInstance(args)
256
+ const task = lock.instance.processImageToImageTask(args, options)
257
+ const result = await task.result
258
+ await lock.release()
259
+ return result
260
+ }
261
+
262
+ getStatus() {
263
+ const poolStatus = this.pool.getStatus()
264
+ const storeStatus = this.store.getStatus()
265
+ return {
266
+ pool: poolStatus,
267
+ store: storeStatus,
268
+ }
269
+ }
270
+ }
@@ -0,0 +1,320 @@
1
+ import type { AddressInfo } from 'node:net'
2
+ import { format as formatURL } from 'node:url'
3
+ import { ModelHTTPServer, ModelHTTPServerOptions } from '#package/http.js'
4
+ import { ChatWithVisionEngine } from '#package/experiments/ChatWithVision.js'
5
+ import { VoiceFunctionCallEngine } from '#package/experiments/VoiceFunctionCall.js'
6
+
7
+ import {
8
+ Florence2ForConditionalGeneration,
9
+ WhisperForConditionalGeneration,
10
+ CLIPTextModelWithProjection,
11
+ CLIPVisionModelWithProjection,
12
+ AutoModelForCausalLM,
13
+ } from '@huggingface/transformers'
14
+
15
+ // Currently only used for debugging. Do not use.
16
+ const serverOptions: ModelHTTPServerOptions = {
17
+ listen: {
18
+ port: 3000,
19
+ },
20
+ log: 'debug',
21
+ concurrency: 2,
22
+ engines: {
23
+ // 'chat-with-vision': new ChatWithVisionEngine({
24
+ // imageToTextModel: 'florence2',
25
+ // chatModel: 'llama3-8b',
26
+ // }),
27
+ // 'voice-function-calling': new VoiceFunctionCallEngine({
28
+ // speechToTextModel: 'whisper-base',
29
+ // chatModel: 'functionary',
30
+ // }),
31
+ },
32
+ models: {
33
+ // 'sciphi-triplex': {
34
+ // url: 'https://huggingface.co/SciPhi/Triplex/blob/main/quantized_model-Q4_K_M.gguf',
35
+ // sha256: '6f8f6f1fca005640a1282dd0bd12512dedf22957d0c2135ba5e71583d33754fc',
36
+ // engine: 'node-llama-cpp',
37
+ // task: 'text-completion',
38
+ // },
39
+ // 'lite-mistral': {
40
+ // url: 'https://huggingface.co/bartowski/Lite-Mistral-150M-v2-Instruct-GGUF/resolve/main/Lite-Mistral-150M-v2-Instruct-Q8_0.gguf',
41
+ // sha256: 'b369c9b1ac20b66b2f94117d5cdc71d029a47a33948cefef9fe104615dcddfbd',
42
+ // engine: 'node-llama-cpp',
43
+ // task: 'text-completion',
44
+ // prepare: 'async',
45
+ // },
46
+ // 'gemma-9b': {
47
+ // url: 'https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/blob/main/gemma-2-9b-it-Q4_K_M.gguf',
48
+ // sha256:
49
+ // '13b2a7b4115bbd0900162edcebe476da1ba1fc24e718e8b40d32f6e300f56dfe',
50
+ // engine: 'node-llama-cpp',
51
+ // task: 'text-completion',
52
+ // prepare: 'async',
53
+ // },
54
+ // 'llama3.1-8b': {
55
+ // url: 'https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/blob/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf',
56
+ // sha256:
57
+ // '2a4ca64e02e7126436cfdb066dd7311f2486eb487191910d3d000fde13826a4d',
58
+ // engine: 'node-llama-cpp',
59
+ // task: 'text-completion',
60
+ // prepare: 'async',
61
+ // },
62
+ // 'dolphin-nemo-12b': {
63
+ // url: 'https://huggingface.co/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b-gguf/blob/main/dolphin-2.9.3-mistral-nemo-Q4_K_M.gguf',
64
+ // sha256: '09f9114e06d88b791e322586cf28a844d2d0a3876d04d6deffe2dfb26616dd83',
65
+ // engine: 'node-llama-cpp',
66
+ // task: 'text-completion',
67
+ // prepare: 'async',
68
+ // },
69
+ // 'phi3-mini-4k': {
70
+ // task: 'text-completion',
71
+ // url: 'https://gpt4all.io/models/gguf/Phi-3-mini-4k-instruct.Q4_0.gguf',
72
+ // engine: 'gpt4all',
73
+ // maxInstances: 2,
74
+ // prepare: 'async',
75
+ // },
76
+ // 'mxbai-embed-large-v1': {
77
+ // url: 'https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1',
78
+ // engine: 'transformers-js',
79
+ // task: 'embedding',
80
+ // prepare: 'blocking',
81
+ // device: {
82
+ // gpu: false,
83
+ // },
84
+ // },
85
+ // 'jina-clip-v1': {
86
+ // url: 'https://huggingface.co/jinaai/jina-clip-v1',
87
+ // engine: 'transformers-js',
88
+ // task: 'embedding',
89
+ // textModel: {
90
+ // modelClass: CLIPTextModelWithProjection,
91
+ // },
92
+ // visionModel: {
93
+ // processor: {
94
+ // url: 'https://huggingface.co/Xenova/clip-vit-base-patch32',
95
+ // // url: 'https://huggingface.co/Xenova/vit-base-patch16-224-in21k',
96
+ // },
97
+ // modelClass: CLIPVisionModelWithProjection,
98
+ // },
99
+ // prepare: 'blocking',
100
+ // device: {
101
+ // gpu: false,
102
+ // },
103
+ // },
104
+ // 'florence2-large': {
105
+ // url: 'https://huggingface.co/onnx-community/Florence-2-large-ft',
106
+ // engine: 'transformers-js',
107
+ // task: 'image-to-text',
108
+ // prepare: 'blocking',
109
+ // visionModel: {
110
+ // modelClass: Florence2ForConditionalGeneration,
111
+ // dtype: {
112
+ // embed_tokens: 'fp16',
113
+ // vision_encoder: 'fp32',
114
+ // encoder_model: 'fp16',
115
+ // decoder_model_merged: 'q4',
116
+ // },
117
+ // },
118
+ // device: {
119
+ // gpu: false,
120
+ // },
121
+ // },
122
+ // 'whisper-base': {
123
+ // url: 'https://huggingface.co/onnx-community/whisper-base',
124
+ // engine: 'transformers-js',
125
+ // task: 'speech-to-text',
126
+ // prepare: 'async',
127
+ // minInstances: 1,
128
+ // speechModel: {
129
+ // modelClass: WhisperForConditionalGeneration,
130
+ // dtype: {
131
+ // encoder_model: 'fp32', // 'fp16' works too
132
+ // decoder_model_merged: 'q4', // or 'fp32' ('fp16' is broken)
133
+ // },
134
+ // },
135
+ // device: {
136
+ // gpu: false,
137
+ // },
138
+ // },
139
+ // 'mistral-nemo-12b': {
140
+ // 'url': 'https://huggingface.co/mradermacher/Mistral-Nemo-Instruct-2407-GGUF/blob/main/Mistral-Nemo-Instruct-2407.Q4_K_M.gguf',
141
+ // 'sha256': '1ac4b6cdf0eeb1e2145f0097c6fd0a75df541e143f226a8ff25c8ae0e8dfff6f',
142
+ // 'engine': 'node-llama-cpp',
143
+ // 'task': 'text-completion',
144
+ // 'prepare': 'async',
145
+ // },
146
+ // 'phi-3.5-mini': {
147
+ // url: 'https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF/blob/main/Phi-3.5-mini-instruct-Q4_K_M.gguf',
148
+ // sha256:
149
+ // 'e4165e3a71af97f1b4820da61079826d8752a2088e313af0c7d346796c38eff5',
150
+ // engine: 'node-llama-cpp',
151
+ // task: 'text-completion',
152
+ // prepare: 'async',
153
+ // minInstances: 1,
154
+ // device: {
155
+ // gpu: 'vulkan',
156
+ // },
157
+ // },
158
+ // 'falcon-mamba-7b': {
159
+ // url: 'https://huggingface.co/mradermacher/falcon-mamba-7b-instruct-GGUF/blob/main/falcon-mamba-7b-instruct.Q4_K_M.gguf',
160
+ // sha256: 'f3357486034d89dd91fcefdb91bb1dfadfe0fd2969349a8a404e59d2bd3ad1b8',
161
+ // engine: 'node-llama-cpp',
162
+ // task: 'text-completion',
163
+ // prepare: 'async',
164
+ // },
165
+ // 'florence2-large': {
166
+ // url: 'https://huggingface.co/onnx-community/Florence-2-large-ft',
167
+ // engine: 'transformers-js',
168
+ // task: 'image-to-text',
169
+ // minInstances: 1,
170
+ // visionModel: {
171
+ // modelClass: Florence2ForConditionalGeneration,
172
+ // dtype: {
173
+ // embed_tokens: 'fp16',
174
+ // vision_encoder: 'fp32',
175
+ // encoder_model: 'fp16',
176
+ // decoder_model_merged: 'q4',
177
+ // },
178
+ // },
179
+ // device: {
180
+ // gpu: false,
181
+ // },
182
+ // },
183
+ // 'mxbai-embed-large-v1': {
184
+ // url: 'https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1',
185
+ // engine: 'transformers-js',
186
+ // task: 'embedding',
187
+ // prepare: 'blocking',
188
+ // device: {
189
+ // gpu: false,
190
+ // },
191
+ // },
192
+ // 'functionary-3.2-small': {
193
+ // url: 'https://huggingface.co/meetkai/functionary-small-v3.2-GGUF/blob/main/functionary-small-v3.2.Q4_0.gguf',
194
+ // sha256: 'c0afdbbffa498a8490dea3401e34034ac0f2c6e337646513a7dbc04fcef1c3a4',
195
+ // engine: 'node-llama-cpp',
196
+ // task: 'text-completion',
197
+ // prepare: 'blocking',
198
+ // },
199
+ // 'flux-schnell': {
200
+ // url: 'https://huggingface.co/leejet/FLUX.1-schnell-gguf/resolve/main/flux1-schnell-q4_0.gguf',
201
+ // task: 'text-to-image',
202
+ // sha256: '4f30741d2bfc786c92934ce925fcb0a43df3441e76504b797c3d5d5f0878fa6f',
203
+ // engine: 'stable-diffusion-cpp',
204
+ // prepare: 'blocking',
205
+ // diffusionModel: true,
206
+ // samplingMethod: 'euler_a',
207
+ // vae: {
208
+ // url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/ae.safetensors',
209
+ // },
210
+ // clipL: {
211
+ // url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/clip_l.safetensors',
212
+ // },
213
+ // t5xxl: {
214
+ // // url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/t5xxl_fp16.safetensors',
215
+ // url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/t5xxl-Q8_0.gguf',
216
+ // },
217
+ // },
218
+ // 'sd-3.5-turbo': {
219
+ // url: 'https://huggingface.co/stduhpf/SD3.5-Large-Turbo-GGUF-mixed-sdcpp/blob/main/legacy/sd3.5_large_turbo-q4_0.gguf',
220
+ // sha256: '52495d9c4356065a1378a93c9556a9eb465e10014ba9ce364512674267405bb2',
221
+ // engine: 'stable-diffusion-cpp',
222
+ // task: 'text-to-image',
223
+ // prepare: 'blocking',
224
+ // samplingMethod: 'euler',
225
+ // clipG: {
226
+ // url: 'https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_g.safetensors',
227
+ // sha256: 'ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4',
228
+ // },
229
+ // clipL: {
230
+ // url: 'https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_l.safetensors',
231
+ // sha256: '660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd',
232
+ // },
233
+ // t5xxl: {
234
+ // url: 'https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp8_e4m3fn.safetensors',
235
+ // sha256: '7d330da4816157540d6bb7838bf63a0f02f573fc48ca4d8de34bb0cbfd514f09',
236
+ // },
237
+ // },
238
+ // 'trocr-printed': {
239
+ // url: 'https://huggingface.co/Xenova/trocr-small-printed',
240
+ // engine: 'transformers-js',
241
+ // task: 'image-to-text',
242
+ // prepare: 'blocking',
243
+ // minInstances: 1,
244
+ // // textModel: {
245
+ // // modelClass: TrOCRPreTrainedModel,
246
+ // // processorClass: DeiTFeatureExtractor,
247
+ // // },
248
+ // device: {
249
+ // gpu: false,
250
+ // },
251
+ // },
252
+ // 'sdxl-turbo': {
253
+ // url: 'https://huggingface.co/stabilityai/sdxl-turbo/blob/main/sd_xl_turbo_1.0_fp16.safetensors',
254
+ // sha256:
255
+ // 'e869ac7d6942cb327d68d5ed83a40447aadf20e0c3358d98b2cc9e270db0da26',
256
+ // engine: 'stable-diffusion-cpp',
257
+ // task: 'image-to-image',
258
+ // prepare: 'blocking',
259
+ // samplingMethod: 'euler',
260
+ // vae: {
261
+ // url: 'https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl.vae.safetensors',
262
+ // sha256:
263
+ // '235745af8d86bf4a4c1b5b4f529868b37019a10f7c0b2e79ad0abca3a22bc6e1',
264
+ // },
265
+ // },
266
+ // 'flux-light': {
267
+ // url: 'https://huggingface.co/city96/flux.1-lite-8B-alpha-gguf/blob/main/flux.1-lite-8B-alpha-Q8_0.gguf',
268
+ // sha256:
269
+ // 'efc598d62123f2fdfd682948f533fee081f7fb1295b14d002ac1e66cae5f01a5',
270
+ // engine: 'stable-diffusion-cpp',
271
+ // task: 'image-to-image',
272
+ // prepare: 'blocking',
273
+ // },
274
+ // 'sd-3-medium': {
275
+ // url: 'https://huggingface.co/second-state/stable-diffusion-3-medium-GGUF/blob/main/sd3-medium-Q8_0.gguf',
276
+ // sha256: '7e34dfeb71f8cdbc8338677b63a444897cf4c5692ab4c1d98f04cbba6751885a',
277
+ // engine: 'stable-diffusion-cpp',
278
+ // task: 'text-to-image',
279
+ // prepare: 'async',
280
+ // },
281
+ // 'sd-1.5': {
282
+ // url: 'https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF/blob/main/stable-diffusion-v1-5-pruned-emaonly-f32.gguf',
283
+ // sha256: '52c7ca39d8d48d6f44fa4ff2c44569f3c924d92311108cb38492958350d48ff8',
284
+ // engine: 'stable-diffusion-cpp',
285
+ // task: 'text-to-image',
286
+ // prepare: 'async',
287
+ // },
288
+ // 'llama-3.2-3b': {
289
+ // url: 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/blob/main/Llama-3.2-3B-Instruct-Q6_K_L.gguf',
290
+ // sha256:
291
+ // 'c542b14ec07b8b3cb8d777e1a68ee5aabb964167719466d4c685c29fcfd04900',
292
+ // engine: 'node-llama-cpp',
293
+ // task: 'text-completion',
294
+ // prepare: 'blocking',
295
+ // },
296
+ },
297
+ }
298
+
299
+ async function main() {
300
+ const server = new ModelHTTPServer(serverOptions)
301
+ await server.start()
302
+ const { address, port } = server.httpServer.address() as AddressInfo
303
+ const hostname = address === '' || address === '::' ? 'localhost' : address
304
+ const url = formatURL({
305
+ protocol: 'http',
306
+ hostname,
307
+ port,
308
+ pathname: '/',
309
+ })
310
+ console.log(`Server listening at ${url}`)
311
+ }
312
+
313
+ main().catch((err: Error) => {
314
+ console.error(err)
315
+ process.exit(1)
316
+ })
317
+
318
+ process.on('unhandledRejection', (err) => {
319
+ console.error('Unhandled rejection:', err)
320
+ })