inference-server 1.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/README.md +216 -0
  2. package/dist/api/openai/enums.d.ts +4 -0
  3. package/dist/api/openai/enums.js +17 -0
  4. package/dist/api/openai/enums.js.map +1 -0
  5. package/dist/api/openai/handlers/chat.d.ts +3 -0
  6. package/dist/api/openai/handlers/chat.js +358 -0
  7. package/dist/api/openai/handlers/chat.js.map +1 -0
  8. package/dist/api/openai/handlers/completions.d.ts +3 -0
  9. package/dist/api/openai/handlers/completions.js +169 -0
  10. package/dist/api/openai/handlers/completions.js.map +1 -0
  11. package/dist/api/openai/handlers/embeddings.d.ts +3 -0
  12. package/dist/api/openai/handlers/embeddings.js +74 -0
  13. package/dist/api/openai/handlers/embeddings.js.map +1 -0
  14. package/dist/api/openai/handlers/images.d.ts +0 -0
  15. package/dist/api/openai/handlers/images.js +4 -0
  16. package/dist/api/openai/handlers/images.js.map +1 -0
  17. package/dist/api/openai/handlers/models.d.ts +3 -0
  18. package/dist/api/openai/handlers/models.js +23 -0
  19. package/dist/api/openai/handlers/models.js.map +1 -0
  20. package/dist/api/openai/handlers/transcription.d.ts +0 -0
  21. package/dist/api/openai/handlers/transcription.js +4 -0
  22. package/dist/api/openai/handlers/transcription.js.map +1 -0
  23. package/dist/api/openai/index.d.ts +7 -0
  24. package/dist/api/openai/index.js +14 -0
  25. package/dist/api/openai/index.js.map +1 -0
  26. package/dist/api/parseJSONRequestBody.d.ts +2 -0
  27. package/dist/api/parseJSONRequestBody.js +24 -0
  28. package/dist/api/parseJSONRequestBody.js.map +1 -0
  29. package/dist/api/v1/index.d.ts +2 -0
  30. package/dist/api/v1/index.js +29 -0
  31. package/dist/api/v1/index.js.map +1 -0
  32. package/dist/cli.d.ts +1 -0
  33. package/dist/cli.js +10 -0
  34. package/dist/cli.js.map +1 -0
  35. package/dist/engines/gpt4all/engine.d.ts +34 -0
  36. package/dist/engines/gpt4all/engine.js +357 -0
  37. package/dist/engines/gpt4all/engine.js.map +1 -0
  38. package/dist/engines/gpt4all/util.d.ts +3 -0
  39. package/dist/engines/gpt4all/util.js +29 -0
  40. package/dist/engines/gpt4all/util.js.map +1 -0
  41. package/dist/engines/index.d.ts +19 -0
  42. package/dist/engines/index.js +21 -0
  43. package/dist/engines/index.js.map +1 -0
  44. package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
  45. package/dist/engines/node-llama-cpp/engine.js +666 -0
  46. package/dist/engines/node-llama-cpp/engine.js.map +1 -0
  47. package/dist/engines/node-llama-cpp/types.d.ts +13 -0
  48. package/dist/engines/node-llama-cpp/types.js +2 -0
  49. package/dist/engines/node-llama-cpp/types.js.map +1 -0
  50. package/dist/engines/node-llama-cpp/util.d.ts +15 -0
  51. package/dist/engines/node-llama-cpp/util.js +84 -0
  52. package/dist/engines/node-llama-cpp/util.js.map +1 -0
  53. package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
  54. package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
  55. package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
  56. package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
  57. package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
  58. package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
  59. package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
  60. package/dist/engines/stable-diffusion-cpp/types.js +2 -0
  61. package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
  62. package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
  63. package/dist/engines/stable-diffusion-cpp/util.js +55 -0
  64. package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
  65. package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
  66. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
  67. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
  68. package/dist/engines/transformers-js/engine.d.ts +37 -0
  69. package/dist/engines/transformers-js/engine.js +538 -0
  70. package/dist/engines/transformers-js/engine.js.map +1 -0
  71. package/dist/engines/transformers-js/types.d.ts +7 -0
  72. package/dist/engines/transformers-js/types.js +2 -0
  73. package/dist/engines/transformers-js/types.js.map +1 -0
  74. package/dist/engines/transformers-js/util.d.ts +7 -0
  75. package/dist/engines/transformers-js/util.js +36 -0
  76. package/dist/engines/transformers-js/util.js.map +1 -0
  77. package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
  78. package/dist/engines/transformers-js/validateModelFiles.js +133 -0
  79. package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
  80. package/dist/experiments/ChatWithVision.d.ts +11 -0
  81. package/dist/experiments/ChatWithVision.js +91 -0
  82. package/dist/experiments/ChatWithVision.js.map +1 -0
  83. package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
  84. package/dist/experiments/StableDiffPromptGenerator.js +4 -0
  85. package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
  86. package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
  87. package/dist/experiments/VoiceFunctionCall.js +51 -0
  88. package/dist/experiments/VoiceFunctionCall.js.map +1 -0
  89. package/dist/http.d.ts +19 -0
  90. package/dist/http.js +54 -0
  91. package/dist/http.js.map +1 -0
  92. package/dist/index.d.ts +7 -0
  93. package/dist/index.js +8 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/instance.d.ts +88 -0
  96. package/dist/instance.js +594 -0
  97. package/dist/instance.js.map +1 -0
  98. package/dist/lib/acquireFileLock.d.ts +7 -0
  99. package/dist/lib/acquireFileLock.js +38 -0
  100. package/dist/lib/acquireFileLock.js.map +1 -0
  101. package/dist/lib/calculateContextIdentity.d.ts +7 -0
  102. package/dist/lib/calculateContextIdentity.js +39 -0
  103. package/dist/lib/calculateContextIdentity.js.map +1 -0
  104. package/dist/lib/calculateFileChecksum.d.ts +1 -0
  105. package/dist/lib/calculateFileChecksum.js +16 -0
  106. package/dist/lib/calculateFileChecksum.js.map +1 -0
  107. package/dist/lib/copyDirectory.d.ts +6 -0
  108. package/dist/lib/copyDirectory.js +27 -0
  109. package/dist/lib/copyDirectory.js.map +1 -0
  110. package/dist/lib/decodeAudio.d.ts +1 -0
  111. package/dist/lib/decodeAudio.js +26 -0
  112. package/dist/lib/decodeAudio.js.map +1 -0
  113. package/dist/lib/downloadModelFile.d.ts +10 -0
  114. package/dist/lib/downloadModelFile.js +58 -0
  115. package/dist/lib/downloadModelFile.js.map +1 -0
  116. package/dist/lib/flattenMessageTextContent.d.ts +2 -0
  117. package/dist/lib/flattenMessageTextContent.js +11 -0
  118. package/dist/lib/flattenMessageTextContent.js.map +1 -0
  119. package/dist/lib/getCacheDirPath.d.ts +12 -0
  120. package/dist/lib/getCacheDirPath.js +31 -0
  121. package/dist/lib/getCacheDirPath.js.map +1 -0
  122. package/dist/lib/loadImage.d.ts +12 -0
  123. package/dist/lib/loadImage.js +30 -0
  124. package/dist/lib/loadImage.js.map +1 -0
  125. package/dist/lib/logger.d.ts +12 -0
  126. package/dist/lib/logger.js +98 -0
  127. package/dist/lib/logger.js.map +1 -0
  128. package/dist/lib/math.d.ts +7 -0
  129. package/dist/lib/math.js +30 -0
  130. package/dist/lib/math.js.map +1 -0
  131. package/dist/lib/resolveModelFileLocation.d.ts +15 -0
  132. package/dist/lib/resolveModelFileLocation.js +41 -0
  133. package/dist/lib/resolveModelFileLocation.js.map +1 -0
  134. package/dist/lib/util.d.ts +7 -0
  135. package/dist/lib/util.js +61 -0
  136. package/dist/lib/util.js.map +1 -0
  137. package/dist/lib/validateModelFile.d.ts +9 -0
  138. package/dist/lib/validateModelFile.js +62 -0
  139. package/dist/lib/validateModelFile.js.map +1 -0
  140. package/dist/lib/validateModelOptions.d.ts +3 -0
  141. package/dist/lib/validateModelOptions.js +23 -0
  142. package/dist/lib/validateModelOptions.js.map +1 -0
  143. package/dist/pool.d.ts +61 -0
  144. package/dist/pool.js +512 -0
  145. package/dist/pool.js.map +1 -0
  146. package/dist/server.d.ts +59 -0
  147. package/dist/server.js +221 -0
  148. package/dist/server.js.map +1 -0
  149. package/dist/standalone.d.ts +1 -0
  150. package/dist/standalone.js +306 -0
  151. package/dist/standalone.js.map +1 -0
  152. package/dist/store.d.ts +60 -0
  153. package/dist/store.js +203 -0
  154. package/dist/store.js.map +1 -0
  155. package/dist/types/completions.d.ts +57 -0
  156. package/dist/types/completions.js +2 -0
  157. package/dist/types/completions.js.map +1 -0
  158. package/dist/types/index.d.ts +326 -0
  159. package/dist/types/index.js +2 -0
  160. package/dist/types/index.js.map +1 -0
  161. package/docs/engines.md +28 -0
  162. package/docs/gpu.md +72 -0
  163. package/docs/http-api.md +147 -0
  164. package/examples/all-options.js +108 -0
  165. package/examples/chat-cli.js +56 -0
  166. package/examples/chat-server.js +65 -0
  167. package/examples/concurrency.js +70 -0
  168. package/examples/express.js +70 -0
  169. package/examples/pool.js +91 -0
  170. package/package.json +113 -0
  171. package/src/api/openai/enums.ts +20 -0
  172. package/src/api/openai/handlers/chat.ts +408 -0
  173. package/src/api/openai/handlers/completions.ts +196 -0
  174. package/src/api/openai/handlers/embeddings.ts +92 -0
  175. package/src/api/openai/handlers/images.ts +3 -0
  176. package/src/api/openai/handlers/models.ts +33 -0
  177. package/src/api/openai/handlers/transcription.ts +2 -0
  178. package/src/api/openai/index.ts +16 -0
  179. package/src/api/parseJSONRequestBody.ts +26 -0
  180. package/src/api/v1/DRAFT.md +16 -0
  181. package/src/api/v1/index.ts +37 -0
  182. package/src/cli.ts +9 -0
  183. package/src/engines/gpt4all/engine.ts +441 -0
  184. package/src/engines/gpt4all/util.ts +31 -0
  185. package/src/engines/index.ts +28 -0
  186. package/src/engines/node-llama-cpp/engine.ts +811 -0
  187. package/src/engines/node-llama-cpp/types.ts +17 -0
  188. package/src/engines/node-llama-cpp/util.ts +126 -0
  189. package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
  190. package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
  191. package/src/engines/stable-diffusion-cpp/types.ts +54 -0
  192. package/src/engines/stable-diffusion-cpp/util.ts +58 -0
  193. package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
  194. package/src/engines/transformers-js/engine.ts +659 -0
  195. package/src/engines/transformers-js/types.ts +25 -0
  196. package/src/engines/transformers-js/util.ts +40 -0
  197. package/src/engines/transformers-js/validateModelFiles.ts +168 -0
  198. package/src/experiments/ChatWithVision.ts +103 -0
  199. package/src/experiments/StableDiffPromptGenerator.ts +2 -0
  200. package/src/experiments/VoiceFunctionCall.ts +71 -0
  201. package/src/http.ts +72 -0
  202. package/src/index.ts +7 -0
  203. package/src/instance.ts +723 -0
  204. package/src/lib/acquireFileLock.ts +38 -0
  205. package/src/lib/calculateContextIdentity.ts +53 -0
  206. package/src/lib/calculateFileChecksum.ts +18 -0
  207. package/src/lib/copyDirectory.ts +29 -0
  208. package/src/lib/decodeAudio.ts +39 -0
  209. package/src/lib/downloadModelFile.ts +70 -0
  210. package/src/lib/flattenMessageTextContent.ts +19 -0
  211. package/src/lib/getCacheDirPath.ts +34 -0
  212. package/src/lib/loadImage.ts +46 -0
  213. package/src/lib/logger.ts +112 -0
  214. package/src/lib/math.ts +31 -0
  215. package/src/lib/resolveModelFileLocation.ts +49 -0
  216. package/src/lib/util.ts +75 -0
  217. package/src/lib/validateModelFile.ts +71 -0
  218. package/src/lib/validateModelOptions.ts +31 -0
  219. package/src/pool.ts +651 -0
  220. package/src/server.ts +270 -0
  221. package/src/standalone.ts +320 -0
  222. package/src/store.ts +278 -0
  223. package/src/types/completions.ts +86 -0
  224. package/src/types/index.ts +488 -0
  225. package/tsconfig.json +29 -0
  226. package/tsconfig.release.json +11 -0
  227. package/vitest.config.ts +18 -0
@@ -0,0 +1,723 @@
1
+ import crypto from 'node:crypto'
2
+ import { customAlphabet } from 'nanoid'
3
+ import {
4
+ TextCompletionRequest,
5
+ ChatCompletionRequest,
6
+ ModelEngine,
7
+ ModelConfig,
8
+ ModelInstanceRequest,
9
+ CompletionProcessingOptions,
10
+ EmbeddingRequest,
11
+ ImageToTextRequest,
12
+ ProcessingOptions,
13
+ SpeechToTextRequest,
14
+ SpeechToTextProcessingOptions,
15
+ EngineChatCompletionResult,
16
+ EngineTextCompletionResult,
17
+ TextToImageRequest,
18
+ ImageToImageRequest,
19
+ } from '#package/types/index.js'
20
+ import { calculateContextIdentity } from '#package/lib/calculateContextIdentity.js'
21
+ import {
22
+ LogLevels,
23
+ Logger,
24
+ createLogger,
25
+ withLogMeta,
26
+ } from '#package/lib/logger.js'
27
+ import { elapsedMillis, mergeAbortSignals } from '#package/lib/util.js'
28
+
29
+ const idAlphabet =
30
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
31
+ const generateId = customAlphabet(idAlphabet, 8)
32
+
33
+ type ModelInstanceStatus = 'idle' | 'busy' | 'error' | 'loading' | 'preparing'
34
+
35
+ interface ModelInstanceOptions extends ModelConfig {
36
+ log?: Logger
37
+ gpu: boolean
38
+ }
39
+
40
+ export class ModelInstance<TEngineState = unknown> {
41
+ id: string
42
+ status: ModelInstanceStatus
43
+ modelId: string
44
+ config: ModelConfig
45
+ fingerprint: string
46
+ createdAt: Date
47
+ lastUsed: number = 0
48
+ gpu: boolean
49
+ ttl: number
50
+ log: Logger
51
+
52
+ private engine: ModelEngine
53
+ private contextIdentity?: string
54
+ private needsContextReset: boolean = false
55
+ private engineInstance?: TEngineState | unknown
56
+ private currentRequest?: ModelInstanceRequest | null
57
+ private shutdownController: AbortController
58
+
59
+ constructor(
60
+ engine: ModelEngine,
61
+ { log, gpu, ...options }: ModelInstanceOptions,
62
+ ) {
63
+ this.modelId = options.id
64
+ this.id = this.generateInstanceId()
65
+ this.engine = engine
66
+ this.config = options
67
+ this.gpu = gpu
68
+ this.ttl = options.ttl ?? 300
69
+ this.status = 'preparing'
70
+ this.createdAt = new Date()
71
+ this.log = withLogMeta(log ?? createLogger(LogLevels.warn), {
72
+ instance: this.id,
73
+ })
74
+ this.shutdownController = new AbortController()
75
+
76
+ // TODO to implement this properly we should only include what changes the "behavior" of the model
77
+ this.fingerprint = crypto
78
+ .createHash('sha1')
79
+ .update(JSON.stringify(options))
80
+ .digest('hex')
81
+ this.log(LogLevels.info, 'Initializing new instance', {
82
+ model: this.modelId,
83
+ engine: this.config.engine,
84
+ device: this.config.device,
85
+ hasGpuLock: this.gpu,
86
+ })
87
+ }
88
+
89
+ private generateInstanceId() {
90
+ return this.modelId + ':' + generateId(8)
91
+ }
92
+
93
+ private generateTaskId() {
94
+ return this.id + '-' + generateId(8)
95
+ }
96
+
97
+ async load(signal?: AbortSignal) {
98
+ if (this.engineInstance) {
99
+ throw new Error('Instance is already loaded')
100
+ }
101
+ this.status = 'loading'
102
+ const loadBegin = process.hrtime.bigint()
103
+ const abortSignal = mergeAbortSignals([
104
+ this.shutdownController.signal,
105
+ signal,
106
+ ])
107
+ try {
108
+ this.engineInstance = await this.engine.createInstance(
109
+ {
110
+ log: withLogMeta(this.log, {
111
+ instance: this.id,
112
+ }),
113
+ config: {
114
+ ...this.config,
115
+ device: {
116
+ ...this.config.device,
117
+ gpu: this.gpu ? this.config.device?.gpu : false,
118
+ },
119
+ },
120
+ },
121
+ abortSignal,
122
+ )
123
+ this.status = 'idle'
124
+ if (this.config.initialMessages?.length) {
125
+ this.contextIdentity = calculateContextIdentity({
126
+ messages: this.config.initialMessages,
127
+ })
128
+ }
129
+ if (this.config.prefix) {
130
+ this.contextIdentity = calculateContextIdentity({
131
+ text: this.config.prefix,
132
+ })
133
+ }
134
+ this.log(LogLevels.debug, 'Instance loaded', {
135
+ elapsed: elapsedMillis(loadBegin),
136
+ })
137
+ } catch (error: any) {
138
+ this.status = 'error'
139
+ this.log(LogLevels.error, 'Failed to load instance:', {
140
+ error,
141
+ })
142
+ throw error
143
+ }
144
+ }
145
+
146
+ dispose() {
147
+ this.status = 'busy'
148
+ if (!this.engineInstance) {
149
+ return Promise.resolve()
150
+ }
151
+ this.shutdownController.abort()
152
+ return this.engine.disposeInstance(this.engineInstance)
153
+ }
154
+
155
+ lock(request: ModelInstanceRequest) {
156
+ if (this.status !== 'idle') {
157
+ throw new Error(`Cannot lock: Instance ${this.id} is not idle`)
158
+ }
159
+ this.currentRequest = request
160
+ this.status = 'busy'
161
+ }
162
+
163
+ unlock() {
164
+ this.status = 'idle'
165
+ this.currentRequest = null
166
+ }
167
+
168
+ resetContext() {
169
+ this.needsContextReset = true
170
+ }
171
+
172
+ getContextStateIdentity() {
173
+ return this.contextIdentity
174
+ }
175
+
176
+ hasContextState() {
177
+ return this.contextIdentity !== undefined
178
+ }
179
+
180
+ matchesContextState(request: ModelInstanceRequest) {
181
+ if (!this.contextIdentity) {
182
+ return false
183
+ }
184
+ let incomingContextIdentity = ''
185
+ if ('messages' in request && request.messages?.length) {
186
+ incomingContextIdentity = calculateContextIdentity({
187
+ messages: request.messages,
188
+ dropLastMessage: true,
189
+ })
190
+ } else if ('prompt' in request && request.prompt) {
191
+ incomingContextIdentity = calculateContextIdentity({
192
+ text: request.prompt,
193
+ })
194
+ }
195
+
196
+ if (!incomingContextIdentity) {
197
+ return false
198
+ }
199
+
200
+ return (
201
+ this.contextIdentity === incomingContextIdentity ||
202
+ incomingContextIdentity.startsWith(this.contextIdentity)
203
+ )
204
+ }
205
+
206
+ matchesRequirements(request: ModelInstanceRequest) {
207
+ const requiresGpu =
208
+ !!this.config.device?.gpu && this.config.device?.gpu !== 'auto'
209
+ const modelMatches = this.modelId === request.model
210
+ const gpuMatches = requiresGpu ? this.gpu : true
211
+ return modelMatches && gpuMatches
212
+ }
213
+
214
+ private createTaskController(args: {
215
+ timeout?: number
216
+ signal?: AbortSignal
217
+ }) {
218
+ const cancelController = new AbortController()
219
+ const timeoutController = new AbortController()
220
+ const abortSignals = [
221
+ cancelController.signal,
222
+ this.shutdownController.signal,
223
+ ]
224
+ if (args.signal) {
225
+ abortSignals.push(args.signal)
226
+ }
227
+ let timeout: NodeJS.Timeout | undefined
228
+ if (args.timeout) {
229
+ timeout = setTimeout(() => {
230
+ timeoutController.abort('timeout')
231
+ }, args.timeout)
232
+ abortSignals.push(timeoutController.signal)
233
+ }
234
+ return {
235
+ cancel: () => {
236
+ cancelController.abort('cancel')
237
+ if (timeout) {
238
+ clearTimeout(timeout)
239
+ }
240
+ },
241
+ complete: () => {
242
+ if (timeout) {
243
+ clearTimeout(timeout)
244
+ }
245
+ },
246
+ signal: mergeAbortSignals(abortSignals),
247
+ timeoutSignal: timeoutController.signal,
248
+ cancelSignal: cancelController.signal,
249
+ }
250
+ }
251
+
252
+ processChatCompletionTask(
253
+ request: ChatCompletionRequest,
254
+ options?: CompletionProcessingOptions,
255
+ ) {
256
+ if (!('processChatCompletionTask' in this.engine)) {
257
+ throw new Error(
258
+ `Engine "${this.config.engine}" does not implement chat completions`,
259
+ )
260
+ }
261
+ if (!request.messages?.length) {
262
+ throw new Error('Messages are required for chat completions')
263
+ }
264
+ const id = this.generateTaskId()
265
+ this.lastUsed = Date.now()
266
+ const taskLogger = withLogMeta(this.log, {
267
+ sequence: this.currentRequest!.sequence,
268
+ task: id,
269
+ })
270
+ // checking if this instance has been flagged for reset
271
+ let resetContext = false
272
+ if (this.needsContextReset) {
273
+ this.contextIdentity = undefined
274
+ this.needsContextReset = false
275
+ resetContext = true
276
+ }
277
+ const controller = this.createTaskController({
278
+ timeout: options?.timeout,
279
+ signal: options?.signal,
280
+ })
281
+ // start completion processing
282
+ taskLogger(LogLevels.verbose, 'Creating chat completion')
283
+ const taskBegin = process.hrtime.bigint()
284
+ const completionPromise = this.engine.processChatCompletionTask!(
285
+ {
286
+ request,
287
+ resetContext,
288
+ config: this.config,
289
+ log: taskLogger,
290
+ onChunk: options?.onChunk,
291
+ },
292
+ this.engineInstance,
293
+ controller.signal,
294
+ )
295
+ .then((result) => {
296
+ if (controller.timeoutSignal.aborted) {
297
+ result.finishReason = 'timeout'
298
+ } else if (controller.cancelSignal.aborted) {
299
+ result.finishReason = 'cancel'
300
+ }
301
+ this.contextIdentity = calculateContextIdentity({
302
+ messages: [...request.messages, result.message],
303
+ })
304
+ return result
305
+ })
306
+ .catch((error) => {
307
+ if (error.name === 'AbortError') {
308
+ const emptyResponse: EngineChatCompletionResult = {
309
+ finishReason: 'abort',
310
+ message: {
311
+ role: 'assistant',
312
+ content: '',
313
+ },
314
+ promptTokens: 0,
315
+ completionTokens: 0,
316
+ contextTokens: 0,
317
+ }
318
+ if (controller.timeoutSignal.aborted) {
319
+ emptyResponse.finishReason = 'timeout'
320
+ return emptyResponse
321
+ }
322
+ if (controller.cancelSignal.aborted) {
323
+ emptyResponse.finishReason = 'cancel'
324
+ return emptyResponse
325
+ }
326
+ return emptyResponse
327
+ }
328
+ taskLogger(LogLevels.error, 'Error while processing task - ', {
329
+ error,
330
+ })
331
+ throw error
332
+ })
333
+ .finally(() => {
334
+ const elapsedTime = elapsedMillis(taskBegin)
335
+ controller.complete()
336
+ taskLogger(LogLevels.info, 'Chat completion task done', {
337
+ elapsed: elapsedTime,
338
+ })
339
+ })
340
+ return {
341
+ id,
342
+ model: this.modelId,
343
+ createdAt: new Date(),
344
+ result: completionPromise,
345
+ cancel: controller.cancel,
346
+ }
347
+ }
348
+
349
+ processTextCompletionTask(
350
+ request: TextCompletionRequest,
351
+ options?: CompletionProcessingOptions,
352
+ ) {
353
+ if (!('processTextCompletionTask' in this.engine)) {
354
+ throw new Error(
355
+ `Engine "${this.config.engine}" does not implement text completion`,
356
+ )
357
+ }
358
+ if (!request.prompt) {
359
+ throw new Error('Prompt is required for text completion')
360
+ }
361
+ this.lastUsed = Date.now()
362
+ const id = this.generateTaskId()
363
+ const taskLogger = withLogMeta(this.log, {
364
+ sequence: this.currentRequest!.sequence,
365
+ task: id,
366
+ })
367
+ const controller = this.createTaskController({
368
+ timeout: options?.timeout,
369
+ signal: options?.signal,
370
+ })
371
+ taskLogger(LogLevels.verbose, 'Creating text completion task')
372
+ // pass on resetContext if this instance has been flagged for reset
373
+ let resetContext = false
374
+ if (this.needsContextReset) {
375
+ this.contextIdentity = undefined
376
+ this.needsContextReset = false
377
+ resetContext = true
378
+ }
379
+ const taskBegin = process.hrtime.bigint()
380
+ const completionPromise = this.engine.processTextCompletionTask!(
381
+ {
382
+ request,
383
+ config: this.config,
384
+ resetContext,
385
+ log: taskLogger,
386
+ onChunk: options?.onChunk,
387
+ },
388
+ this.engineInstance,
389
+ controller.signal,
390
+ )
391
+ .then((result) => {
392
+ if (controller.timeoutSignal.aborted) {
393
+ result.finishReason = 'timeout'
394
+ } else if (controller.cancelSignal.aborted) {
395
+ result.finishReason = 'cancel'
396
+ }
397
+ this.contextIdentity = calculateContextIdentity({
398
+ text: request.prompt + result.text,
399
+ })
400
+ return result
401
+ })
402
+ .catch((error) => {
403
+ if (error.name === 'AbortError') {
404
+ const emptyResponse: EngineTextCompletionResult = {
405
+ finishReason: 'abort',
406
+ text: '',
407
+ promptTokens: 0,
408
+ completionTokens: 0,
409
+ contextTokens: 0,
410
+ }
411
+ if (controller.timeoutSignal.aborted) {
412
+ emptyResponse.finishReason = 'timeout'
413
+ return emptyResponse
414
+ }
415
+ if (controller.cancelSignal.aborted) {
416
+ emptyResponse.finishReason = 'cancel'
417
+ return emptyResponse
418
+ }
419
+ return emptyResponse
420
+ }
421
+ taskLogger(LogLevels.error, 'Error while processing task - ', {
422
+ error,
423
+ })
424
+ throw error
425
+ })
426
+ .finally(() => {
427
+ const elapsedTime = elapsedMillis(taskBegin)
428
+ controller.complete()
429
+ taskLogger(LogLevels.info, 'Text completion task done', {
430
+ elapsed: elapsedTime,
431
+ })
432
+ })
433
+ return {
434
+ id,
435
+ model: this.modelId,
436
+ createdAt: new Date(),
437
+ cancel: controller.cancel,
438
+ result: completionPromise,
439
+ }
440
+ }
441
+
442
+ processEmbeddingTask(request: EmbeddingRequest, options?: ProcessingOptions) {
443
+ if (!('processEmbeddingTask' in this.engine)) {
444
+ throw new Error(
445
+ `Engine "${this.config.engine}" does not implement embedding`,
446
+ )
447
+ }
448
+ if (!request.input) {
449
+ throw new Error('Input is required for embedding')
450
+ }
451
+ this.lastUsed = Date.now()
452
+ const id = this.generateTaskId()
453
+ const taskLogger = withLogMeta(this.log, {
454
+ sequence: this.currentRequest!.sequence,
455
+ task: id,
456
+ })
457
+ const controller = this.createTaskController({
458
+ timeout: options?.timeout,
459
+ signal: options?.signal,
460
+ })
461
+ taskLogger(LogLevels.verbose, 'Creating embedding task')
462
+ const taskBegin = process.hrtime.bigint()
463
+ const result = this.engine.processEmbeddingTask!(
464
+ {
465
+ request,
466
+ config: this.config,
467
+ log: taskLogger,
468
+ },
469
+ this.engineInstance,
470
+ controller.signal,
471
+ )
472
+ .then((result) => {
473
+ const timeElapsed = elapsedMillis(taskBegin)
474
+ controller.complete()
475
+ if (controller.timeoutSignal.aborted) {
476
+ taskLogger(LogLevels.warn, 'Embedding task timed out')
477
+ }
478
+ taskLogger(LogLevels.verbose, 'Embedding task done', {
479
+ elapsed: timeElapsed,
480
+ })
481
+ return result
482
+ })
483
+ .catch((error) => {
484
+ taskLogger(LogLevels.error, 'Task failed - ', {
485
+ error,
486
+ })
487
+ throw error
488
+ })
489
+
490
+ return {
491
+ id,
492
+ model: this.modelId,
493
+ createdAt: new Date(),
494
+ cancel: controller.cancel,
495
+ result,
496
+ }
497
+ }
498
+
499
+ processImageToTextTask(
500
+ request: ImageToTextRequest,
501
+ options?: ProcessingOptions,
502
+ ) {
503
+ if (!('processImageToTextTask' in this.engine)) {
504
+ throw new Error(
505
+ `Engine "${this.config.engine}" does not implement image to text`,
506
+ )
507
+ }
508
+ this.lastUsed = Date.now()
509
+ const id = this.generateTaskId()
510
+ const taskLogger = withLogMeta(this.log, {
511
+ sequence: this.currentRequest!.sequence,
512
+ task: id,
513
+ })
514
+ const controller = this.createTaskController({
515
+ timeout: options?.timeout,
516
+ signal: options?.signal,
517
+ })
518
+ const taskBegin = process.hrtime.bigint()
519
+ const result = this.engine.processImageToTextTask!(
520
+ {
521
+ request,
522
+ config: this.config,
523
+ log: taskLogger,
524
+ },
525
+ this.engineInstance,
526
+ controller.signal,
527
+ )
528
+ .then((result) => {
529
+ const timeElapsed = elapsedMillis(taskBegin)
530
+ controller.complete()
531
+ if (controller.timeoutSignal.aborted) {
532
+ taskLogger(LogLevels.warn, 'ImageToText task timed out')
533
+ }
534
+ taskLogger(LogLevels.verbose, 'ImageToText task done', {
535
+ elapsed: timeElapsed,
536
+ })
537
+ return result
538
+ })
539
+ .catch((error) => {
540
+ taskLogger(LogLevels.error, 'Task failed - ', {
541
+ error,
542
+ })
543
+ throw error
544
+ })
545
+
546
+ return {
547
+ id,
548
+ model: this.modelId,
549
+ createdAt: new Date(),
550
+ cancel: controller.cancel,
551
+ result,
552
+ }
553
+ }
554
+
555
+ processImageToImageTask(
556
+ request: ImageToImageRequest,
557
+ options?: ProcessingOptions,
558
+ ) {
559
+ if (!('processImageToImageTask' in this.engine)) {
560
+ throw new Error(
561
+ `Engine "${this.config.engine}" does not implement image to image`,
562
+ )
563
+ }
564
+ this.lastUsed = Date.now()
565
+ const id = this.generateTaskId()
566
+ const taskLogger = withLogMeta(this.log, {
567
+ sequence: this.currentRequest!.sequence,
568
+ task: id,
569
+ })
570
+ const controller = this.createTaskController({
571
+ timeout: options?.timeout,
572
+ signal: options?.signal,
573
+ })
574
+ const taskBegin = process.hrtime.bigint()
575
+ const result = this.engine.processImageToImageTask!(
576
+ {
577
+ request,
578
+ config: this.config,
579
+ log: taskLogger,
580
+ },
581
+ this.engineInstance,
582
+ controller.signal,
583
+ )
584
+ .then((result) => {
585
+ const timeElapsed = elapsedMillis(taskBegin)
586
+ controller.complete()
587
+ if (controller.timeoutSignal.aborted) {
588
+ taskLogger(LogLevels.warn, 'ImageToImage task timed out')
589
+ }
590
+ taskLogger(LogLevels.verbose, 'ImageToImage task done', {
591
+ elapsed: timeElapsed,
592
+ })
593
+ return result
594
+ })
595
+ .catch((error) => {
596
+ taskLogger(LogLevels.error, 'Task failed - ', {
597
+ error,
598
+ })
599
+ throw error
600
+ })
601
+
602
+ return {
603
+ id,
604
+ model: this.modelId,
605
+ createdAt: new Date(),
606
+ cancel: controller.cancel,
607
+ result,
608
+ }
609
+ }
610
+
611
+ processSpeechToTextTask(
612
+ request: SpeechToTextRequest,
613
+ options?: SpeechToTextProcessingOptions,
614
+ ) {
615
+ if (!('processSpeechToTextTask' in this.engine)) {
616
+ throw new Error(
617
+ `Engine "${this.config.engine}" does not implement speech to text`,
618
+ )
619
+ }
620
+ this.lastUsed = Date.now()
621
+ const id = this.generateTaskId()
622
+ const taskLogger = withLogMeta(this.log, {
623
+ sequence: this.currentRequest!.sequence,
624
+ task: id,
625
+ })
626
+ const controller = this.createTaskController({
627
+ timeout: options?.timeout,
628
+ signal: options?.signal,
629
+ })
630
+ const taskBegin = process.hrtime.bigint()
631
+ const result = this.engine.processSpeechToTextTask!(
632
+ {
633
+ request,
634
+ config: this.config,
635
+ log: taskLogger,
636
+ },
637
+ this.engineInstance,
638
+ controller.signal,
639
+ )
640
+ .then((result) => {
641
+ const timeElapsed = elapsedMillis(taskBegin)
642
+ controller.complete()
643
+ if (controller.timeoutSignal.aborted) {
644
+ taskLogger(LogLevels.warn, 'SpeechToText task timed out')
645
+ }
646
+ taskLogger(LogLevels.verbose, 'SpeechToText task done', {
647
+ elapsed: timeElapsed,
648
+ })
649
+ return result
650
+ })
651
+ .catch((error) => {
652
+ taskLogger(LogLevels.error, 'Task failed - ', {
653
+ error,
654
+ })
655
+ throw error
656
+ })
657
+
658
+ return {
659
+ id,
660
+ model: this.modelId,
661
+ createdAt: new Date(),
662
+ cancel: controller.cancel,
663
+ result,
664
+ }
665
+ }
666
+
667
+ processTextToImageTask(
668
+ request: TextToImageRequest,
669
+ options?: ProcessingOptions,
670
+ ) {
671
+ if (!('processTextToImageTask' in this.engine)) {
672
+ throw new Error(
673
+ `Engine "${this.config.engine}" does not implement text to image`,
674
+ )
675
+ }
676
+ this.lastUsed = Date.now()
677
+ const id = this.generateTaskId()
678
+ const taskLogger = withLogMeta(this.log, {
679
+ sequence: this.currentRequest!.sequence,
680
+ task: id,
681
+ })
682
+ const controller = this.createTaskController({
683
+ timeout: options?.timeout,
684
+ signal: options?.signal,
685
+ })
686
+ const taskBegin = process.hrtime.bigint()
687
+ const result = this.engine.processTextToImageTask!(
688
+ {
689
+ request,
690
+ config: this.config,
691
+ log: taskLogger,
692
+ },
693
+ this.engineInstance,
694
+ controller.signal,
695
+ )
696
+ .then((result) => {
697
+ const timeElapsed = elapsedMillis(taskBegin)
698
+ controller.complete()
699
+ if (controller.timeoutSignal.aborted) {
700
+ taskLogger(LogLevels.warn, 'TextToImage task timed out')
701
+ }
702
+ taskLogger(LogLevels.verbose, 'TextToImage task done', {
703
+ elapsed: timeElapsed,
704
+ })
705
+ return result
706
+ })
707
+ .catch((error) => {
708
+ taskLogger(LogLevels.error, 'Task failed - ', {
709
+ error,
710
+ })
711
+ throw error
712
+ })
713
+
714
+ return {
715
+ id,
716
+ model: this.modelId,
717
+ createdAt: new Date(),
718
+ cancel: controller.cancel,
719
+ result,
720
+ }
721
+ }
722
+
723
+ }