inference-server 1.0.0-beta.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +216 -0
- package/dist/api/openai/enums.d.ts +4 -0
- package/dist/api/openai/enums.js +17 -0
- package/dist/api/openai/enums.js.map +1 -0
- package/dist/api/openai/handlers/chat.d.ts +3 -0
- package/dist/api/openai/handlers/chat.js +358 -0
- package/dist/api/openai/handlers/chat.js.map +1 -0
- package/dist/api/openai/handlers/completions.d.ts +3 -0
- package/dist/api/openai/handlers/completions.js +169 -0
- package/dist/api/openai/handlers/completions.js.map +1 -0
- package/dist/api/openai/handlers/embeddings.d.ts +3 -0
- package/dist/api/openai/handlers/embeddings.js +74 -0
- package/dist/api/openai/handlers/embeddings.js.map +1 -0
- package/dist/api/openai/handlers/images.d.ts +0 -0
- package/dist/api/openai/handlers/images.js +4 -0
- package/dist/api/openai/handlers/images.js.map +1 -0
- package/dist/api/openai/handlers/models.d.ts +3 -0
- package/dist/api/openai/handlers/models.js +23 -0
- package/dist/api/openai/handlers/models.js.map +1 -0
- package/dist/api/openai/handlers/transcription.d.ts +0 -0
- package/dist/api/openai/handlers/transcription.js +4 -0
- package/dist/api/openai/handlers/transcription.js.map +1 -0
- package/dist/api/openai/index.d.ts +7 -0
- package/dist/api/openai/index.js +14 -0
- package/dist/api/openai/index.js.map +1 -0
- package/dist/api/parseJSONRequestBody.d.ts +2 -0
- package/dist/api/parseJSONRequestBody.js +24 -0
- package/dist/api/parseJSONRequestBody.js.map +1 -0
- package/dist/api/v1/index.d.ts +2 -0
- package/dist/api/v1/index.js +29 -0
- package/dist/api/v1/index.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +10 -0
- package/dist/cli.js.map +1 -0
- package/dist/engines/gpt4all/engine.d.ts +34 -0
- package/dist/engines/gpt4all/engine.js +357 -0
- package/dist/engines/gpt4all/engine.js.map +1 -0
- package/dist/engines/gpt4all/util.d.ts +3 -0
- package/dist/engines/gpt4all/util.js +29 -0
- package/dist/engines/gpt4all/util.js.map +1 -0
- package/dist/engines/index.d.ts +19 -0
- package/dist/engines/index.js +21 -0
- package/dist/engines/index.js.map +1 -0
- package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
- package/dist/engines/node-llama-cpp/engine.js +666 -0
- package/dist/engines/node-llama-cpp/engine.js.map +1 -0
- package/dist/engines/node-llama-cpp/types.d.ts +13 -0
- package/dist/engines/node-llama-cpp/types.js +2 -0
- package/dist/engines/node-llama-cpp/types.js.map +1 -0
- package/dist/engines/node-llama-cpp/util.d.ts +15 -0
- package/dist/engines/node-llama-cpp/util.js +84 -0
- package/dist/engines/node-llama-cpp/util.js.map +1 -0
- package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
- package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
- package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
- package/dist/engines/stable-diffusion-cpp/types.js +2 -0
- package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
- package/dist/engines/stable-diffusion-cpp/util.js +55 -0
- package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
- package/dist/engines/transformers-js/engine.d.ts +37 -0
- package/dist/engines/transformers-js/engine.js +538 -0
- package/dist/engines/transformers-js/engine.js.map +1 -0
- package/dist/engines/transformers-js/types.d.ts +7 -0
- package/dist/engines/transformers-js/types.js +2 -0
- package/dist/engines/transformers-js/types.js.map +1 -0
- package/dist/engines/transformers-js/util.d.ts +7 -0
- package/dist/engines/transformers-js/util.js +36 -0
- package/dist/engines/transformers-js/util.js.map +1 -0
- package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
- package/dist/engines/transformers-js/validateModelFiles.js +133 -0
- package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
- package/dist/experiments/ChatWithVision.d.ts +11 -0
- package/dist/experiments/ChatWithVision.js +91 -0
- package/dist/experiments/ChatWithVision.js.map +1 -0
- package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
- package/dist/experiments/StableDiffPromptGenerator.js +4 -0
- package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
- package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
- package/dist/experiments/VoiceFunctionCall.js +51 -0
- package/dist/experiments/VoiceFunctionCall.js.map +1 -0
- package/dist/http.d.ts +19 -0
- package/dist/http.js +54 -0
- package/dist/http.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/instance.d.ts +88 -0
- package/dist/instance.js +594 -0
- package/dist/instance.js.map +1 -0
- package/dist/lib/acquireFileLock.d.ts +7 -0
- package/dist/lib/acquireFileLock.js +38 -0
- package/dist/lib/acquireFileLock.js.map +1 -0
- package/dist/lib/calculateContextIdentity.d.ts +7 -0
- package/dist/lib/calculateContextIdentity.js +39 -0
- package/dist/lib/calculateContextIdentity.js.map +1 -0
- package/dist/lib/calculateFileChecksum.d.ts +1 -0
- package/dist/lib/calculateFileChecksum.js +16 -0
- package/dist/lib/calculateFileChecksum.js.map +1 -0
- package/dist/lib/copyDirectory.d.ts +6 -0
- package/dist/lib/copyDirectory.js +27 -0
- package/dist/lib/copyDirectory.js.map +1 -0
- package/dist/lib/decodeAudio.d.ts +1 -0
- package/dist/lib/decodeAudio.js +26 -0
- package/dist/lib/decodeAudio.js.map +1 -0
- package/dist/lib/downloadModelFile.d.ts +10 -0
- package/dist/lib/downloadModelFile.js +58 -0
- package/dist/lib/downloadModelFile.js.map +1 -0
- package/dist/lib/flattenMessageTextContent.d.ts +2 -0
- package/dist/lib/flattenMessageTextContent.js +11 -0
- package/dist/lib/flattenMessageTextContent.js.map +1 -0
- package/dist/lib/getCacheDirPath.d.ts +12 -0
- package/dist/lib/getCacheDirPath.js +31 -0
- package/dist/lib/getCacheDirPath.js.map +1 -0
- package/dist/lib/loadImage.d.ts +12 -0
- package/dist/lib/loadImage.js +30 -0
- package/dist/lib/loadImage.js.map +1 -0
- package/dist/lib/logger.d.ts +12 -0
- package/dist/lib/logger.js +98 -0
- package/dist/lib/logger.js.map +1 -0
- package/dist/lib/math.d.ts +7 -0
- package/dist/lib/math.js +30 -0
- package/dist/lib/math.js.map +1 -0
- package/dist/lib/resolveModelFileLocation.d.ts +15 -0
- package/dist/lib/resolveModelFileLocation.js +41 -0
- package/dist/lib/resolveModelFileLocation.js.map +1 -0
- package/dist/lib/util.d.ts +7 -0
- package/dist/lib/util.js +61 -0
- package/dist/lib/util.js.map +1 -0
- package/dist/lib/validateModelFile.d.ts +9 -0
- package/dist/lib/validateModelFile.js +62 -0
- package/dist/lib/validateModelFile.js.map +1 -0
- package/dist/lib/validateModelOptions.d.ts +3 -0
- package/dist/lib/validateModelOptions.js +23 -0
- package/dist/lib/validateModelOptions.js.map +1 -0
- package/dist/pool.d.ts +61 -0
- package/dist/pool.js +512 -0
- package/dist/pool.js.map +1 -0
- package/dist/server.d.ts +59 -0
- package/dist/server.js +221 -0
- package/dist/server.js.map +1 -0
- package/dist/standalone.d.ts +1 -0
- package/dist/standalone.js +306 -0
- package/dist/standalone.js.map +1 -0
- package/dist/store.d.ts +60 -0
- package/dist/store.js +203 -0
- package/dist/store.js.map +1 -0
- package/dist/types/completions.d.ts +57 -0
- package/dist/types/completions.js +2 -0
- package/dist/types/completions.js.map +1 -0
- package/dist/types/index.d.ts +326 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/docs/engines.md +28 -0
- package/docs/gpu.md +72 -0
- package/docs/http-api.md +147 -0
- package/examples/all-options.js +108 -0
- package/examples/chat-cli.js +56 -0
- package/examples/chat-server.js +65 -0
- package/examples/concurrency.js +70 -0
- package/examples/express.js +70 -0
- package/examples/pool.js +91 -0
- package/package.json +113 -0
- package/src/api/openai/enums.ts +20 -0
- package/src/api/openai/handlers/chat.ts +408 -0
- package/src/api/openai/handlers/completions.ts +196 -0
- package/src/api/openai/handlers/embeddings.ts +92 -0
- package/src/api/openai/handlers/images.ts +3 -0
- package/src/api/openai/handlers/models.ts +33 -0
- package/src/api/openai/handlers/transcription.ts +2 -0
- package/src/api/openai/index.ts +16 -0
- package/src/api/parseJSONRequestBody.ts +26 -0
- package/src/api/v1/DRAFT.md +16 -0
- package/src/api/v1/index.ts +37 -0
- package/src/cli.ts +9 -0
- package/src/engines/gpt4all/engine.ts +441 -0
- package/src/engines/gpt4all/util.ts +31 -0
- package/src/engines/index.ts +28 -0
- package/src/engines/node-llama-cpp/engine.ts +811 -0
- package/src/engines/node-llama-cpp/types.ts +17 -0
- package/src/engines/node-llama-cpp/util.ts +126 -0
- package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
- package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
- package/src/engines/stable-diffusion-cpp/types.ts +54 -0
- package/src/engines/stable-diffusion-cpp/util.ts +58 -0
- package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
- package/src/engines/transformers-js/engine.ts +659 -0
- package/src/engines/transformers-js/types.ts +25 -0
- package/src/engines/transformers-js/util.ts +40 -0
- package/src/engines/transformers-js/validateModelFiles.ts +168 -0
- package/src/experiments/ChatWithVision.ts +103 -0
- package/src/experiments/StableDiffPromptGenerator.ts +2 -0
- package/src/experiments/VoiceFunctionCall.ts +71 -0
- package/src/http.ts +72 -0
- package/src/index.ts +7 -0
- package/src/instance.ts +723 -0
- package/src/lib/acquireFileLock.ts +38 -0
- package/src/lib/calculateContextIdentity.ts +53 -0
- package/src/lib/calculateFileChecksum.ts +18 -0
- package/src/lib/copyDirectory.ts +29 -0
- package/src/lib/decodeAudio.ts +39 -0
- package/src/lib/downloadModelFile.ts +70 -0
- package/src/lib/flattenMessageTextContent.ts +19 -0
- package/src/lib/getCacheDirPath.ts +34 -0
- package/src/lib/loadImage.ts +46 -0
- package/src/lib/logger.ts +112 -0
- package/src/lib/math.ts +31 -0
- package/src/lib/resolveModelFileLocation.ts +49 -0
- package/src/lib/util.ts +75 -0
- package/src/lib/validateModelFile.ts +71 -0
- package/src/lib/validateModelOptions.ts +31 -0
- package/src/pool.ts +651 -0
- package/src/server.ts +270 -0
- package/src/standalone.ts +320 -0
- package/src/store.ts +278 -0
- package/src/types/completions.ts +86 -0
- package/src/types/index.ts +488 -0
- package/tsconfig.json +29 -0
- package/tsconfig.release.json +11 -0
- package/vitest.config.ts +18 -0
package/src/instance.ts
ADDED
|
@@ -0,0 +1,723 @@
|
|
|
1
|
+
import crypto from 'node:crypto'
|
|
2
|
+
import { customAlphabet } from 'nanoid'
|
|
3
|
+
import {
|
|
4
|
+
TextCompletionRequest,
|
|
5
|
+
ChatCompletionRequest,
|
|
6
|
+
ModelEngine,
|
|
7
|
+
ModelConfig,
|
|
8
|
+
ModelInstanceRequest,
|
|
9
|
+
CompletionProcessingOptions,
|
|
10
|
+
EmbeddingRequest,
|
|
11
|
+
ImageToTextRequest,
|
|
12
|
+
ProcessingOptions,
|
|
13
|
+
SpeechToTextRequest,
|
|
14
|
+
SpeechToTextProcessingOptions,
|
|
15
|
+
EngineChatCompletionResult,
|
|
16
|
+
EngineTextCompletionResult,
|
|
17
|
+
TextToImageRequest,
|
|
18
|
+
ImageToImageRequest,
|
|
19
|
+
} from '#package/types/index.js'
|
|
20
|
+
import { calculateContextIdentity } from '#package/lib/calculateContextIdentity.js'
|
|
21
|
+
import {
|
|
22
|
+
LogLevels,
|
|
23
|
+
Logger,
|
|
24
|
+
createLogger,
|
|
25
|
+
withLogMeta,
|
|
26
|
+
} from '#package/lib/logger.js'
|
|
27
|
+
import { elapsedMillis, mergeAbortSignals } from '#package/lib/util.js'
|
|
28
|
+
|
|
29
|
+
const idAlphabet =
|
|
30
|
+
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
|
|
31
|
+
const generateId = customAlphabet(idAlphabet, 8)
|
|
32
|
+
|
|
33
|
+
type ModelInstanceStatus = 'idle' | 'busy' | 'error' | 'loading' | 'preparing'
|
|
34
|
+
|
|
35
|
+
interface ModelInstanceOptions extends ModelConfig {
|
|
36
|
+
log?: Logger
|
|
37
|
+
gpu: boolean
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export class ModelInstance<TEngineState = unknown> {
|
|
41
|
+
id: string
|
|
42
|
+
status: ModelInstanceStatus
|
|
43
|
+
modelId: string
|
|
44
|
+
config: ModelConfig
|
|
45
|
+
fingerprint: string
|
|
46
|
+
createdAt: Date
|
|
47
|
+
lastUsed: number = 0
|
|
48
|
+
gpu: boolean
|
|
49
|
+
ttl: number
|
|
50
|
+
log: Logger
|
|
51
|
+
|
|
52
|
+
private engine: ModelEngine
|
|
53
|
+
private contextIdentity?: string
|
|
54
|
+
private needsContextReset: boolean = false
|
|
55
|
+
private engineInstance?: TEngineState | unknown
|
|
56
|
+
private currentRequest?: ModelInstanceRequest | null
|
|
57
|
+
private shutdownController: AbortController
|
|
58
|
+
|
|
59
|
+
constructor(
|
|
60
|
+
engine: ModelEngine,
|
|
61
|
+
{ log, gpu, ...options }: ModelInstanceOptions,
|
|
62
|
+
) {
|
|
63
|
+
this.modelId = options.id
|
|
64
|
+
this.id = this.generateInstanceId()
|
|
65
|
+
this.engine = engine
|
|
66
|
+
this.config = options
|
|
67
|
+
this.gpu = gpu
|
|
68
|
+
this.ttl = options.ttl ?? 300
|
|
69
|
+
this.status = 'preparing'
|
|
70
|
+
this.createdAt = new Date()
|
|
71
|
+
this.log = withLogMeta(log ?? createLogger(LogLevels.warn), {
|
|
72
|
+
instance: this.id,
|
|
73
|
+
})
|
|
74
|
+
this.shutdownController = new AbortController()
|
|
75
|
+
|
|
76
|
+
// TODO to implement this properly we should only include what changes the "behavior" of the model
|
|
77
|
+
this.fingerprint = crypto
|
|
78
|
+
.createHash('sha1')
|
|
79
|
+
.update(JSON.stringify(options))
|
|
80
|
+
.digest('hex')
|
|
81
|
+
this.log(LogLevels.info, 'Initializing new instance', {
|
|
82
|
+
model: this.modelId,
|
|
83
|
+
engine: this.config.engine,
|
|
84
|
+
device: this.config.device,
|
|
85
|
+
hasGpuLock: this.gpu,
|
|
86
|
+
})
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private generateInstanceId() {
|
|
90
|
+
return this.modelId + ':' + generateId(8)
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
private generateTaskId() {
|
|
94
|
+
return this.id + '-' + generateId(8)
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async load(signal?: AbortSignal) {
|
|
98
|
+
if (this.engineInstance) {
|
|
99
|
+
throw new Error('Instance is already loaded')
|
|
100
|
+
}
|
|
101
|
+
this.status = 'loading'
|
|
102
|
+
const loadBegin = process.hrtime.bigint()
|
|
103
|
+
const abortSignal = mergeAbortSignals([
|
|
104
|
+
this.shutdownController.signal,
|
|
105
|
+
signal,
|
|
106
|
+
])
|
|
107
|
+
try {
|
|
108
|
+
this.engineInstance = await this.engine.createInstance(
|
|
109
|
+
{
|
|
110
|
+
log: withLogMeta(this.log, {
|
|
111
|
+
instance: this.id,
|
|
112
|
+
}),
|
|
113
|
+
config: {
|
|
114
|
+
...this.config,
|
|
115
|
+
device: {
|
|
116
|
+
...this.config.device,
|
|
117
|
+
gpu: this.gpu ? this.config.device?.gpu : false,
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
},
|
|
121
|
+
abortSignal,
|
|
122
|
+
)
|
|
123
|
+
this.status = 'idle'
|
|
124
|
+
if (this.config.initialMessages?.length) {
|
|
125
|
+
this.contextIdentity = calculateContextIdentity({
|
|
126
|
+
messages: this.config.initialMessages,
|
|
127
|
+
})
|
|
128
|
+
}
|
|
129
|
+
if (this.config.prefix) {
|
|
130
|
+
this.contextIdentity = calculateContextIdentity({
|
|
131
|
+
text: this.config.prefix,
|
|
132
|
+
})
|
|
133
|
+
}
|
|
134
|
+
this.log(LogLevels.debug, 'Instance loaded', {
|
|
135
|
+
elapsed: elapsedMillis(loadBegin),
|
|
136
|
+
})
|
|
137
|
+
} catch (error: any) {
|
|
138
|
+
this.status = 'error'
|
|
139
|
+
this.log(LogLevels.error, 'Failed to load instance:', {
|
|
140
|
+
error,
|
|
141
|
+
})
|
|
142
|
+
throw error
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
dispose() {
|
|
147
|
+
this.status = 'busy'
|
|
148
|
+
if (!this.engineInstance) {
|
|
149
|
+
return Promise.resolve()
|
|
150
|
+
}
|
|
151
|
+
this.shutdownController.abort()
|
|
152
|
+
return this.engine.disposeInstance(this.engineInstance)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
lock(request: ModelInstanceRequest) {
|
|
156
|
+
if (this.status !== 'idle') {
|
|
157
|
+
throw new Error(`Cannot lock: Instance ${this.id} is not idle`)
|
|
158
|
+
}
|
|
159
|
+
this.currentRequest = request
|
|
160
|
+
this.status = 'busy'
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
unlock() {
|
|
164
|
+
this.status = 'idle'
|
|
165
|
+
this.currentRequest = null
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
resetContext() {
|
|
169
|
+
this.needsContextReset = true
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
getContextStateIdentity() {
|
|
173
|
+
return this.contextIdentity
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
hasContextState() {
|
|
177
|
+
return this.contextIdentity !== undefined
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
matchesContextState(request: ModelInstanceRequest) {
|
|
181
|
+
if (!this.contextIdentity) {
|
|
182
|
+
return false
|
|
183
|
+
}
|
|
184
|
+
let incomingContextIdentity = ''
|
|
185
|
+
if ('messages' in request && request.messages?.length) {
|
|
186
|
+
incomingContextIdentity = calculateContextIdentity({
|
|
187
|
+
messages: request.messages,
|
|
188
|
+
dropLastMessage: true,
|
|
189
|
+
})
|
|
190
|
+
} else if ('prompt' in request && request.prompt) {
|
|
191
|
+
incomingContextIdentity = calculateContextIdentity({
|
|
192
|
+
text: request.prompt,
|
|
193
|
+
})
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (!incomingContextIdentity) {
|
|
197
|
+
return false
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return (
|
|
201
|
+
this.contextIdentity === incomingContextIdentity ||
|
|
202
|
+
incomingContextIdentity.startsWith(this.contextIdentity)
|
|
203
|
+
)
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
matchesRequirements(request: ModelInstanceRequest) {
|
|
207
|
+
const requiresGpu =
|
|
208
|
+
!!this.config.device?.gpu && this.config.device?.gpu !== 'auto'
|
|
209
|
+
const modelMatches = this.modelId === request.model
|
|
210
|
+
const gpuMatches = requiresGpu ? this.gpu : true
|
|
211
|
+
return modelMatches && gpuMatches
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
private createTaskController(args: {
|
|
215
|
+
timeout?: number
|
|
216
|
+
signal?: AbortSignal
|
|
217
|
+
}) {
|
|
218
|
+
const cancelController = new AbortController()
|
|
219
|
+
const timeoutController = new AbortController()
|
|
220
|
+
const abortSignals = [
|
|
221
|
+
cancelController.signal,
|
|
222
|
+
this.shutdownController.signal,
|
|
223
|
+
]
|
|
224
|
+
if (args.signal) {
|
|
225
|
+
abortSignals.push(args.signal)
|
|
226
|
+
}
|
|
227
|
+
let timeout: NodeJS.Timeout | undefined
|
|
228
|
+
if (args.timeout) {
|
|
229
|
+
timeout = setTimeout(() => {
|
|
230
|
+
timeoutController.abort('timeout')
|
|
231
|
+
}, args.timeout)
|
|
232
|
+
abortSignals.push(timeoutController.signal)
|
|
233
|
+
}
|
|
234
|
+
return {
|
|
235
|
+
cancel: () => {
|
|
236
|
+
cancelController.abort('cancel')
|
|
237
|
+
if (timeout) {
|
|
238
|
+
clearTimeout(timeout)
|
|
239
|
+
}
|
|
240
|
+
},
|
|
241
|
+
complete: () => {
|
|
242
|
+
if (timeout) {
|
|
243
|
+
clearTimeout(timeout)
|
|
244
|
+
}
|
|
245
|
+
},
|
|
246
|
+
signal: mergeAbortSignals(abortSignals),
|
|
247
|
+
timeoutSignal: timeoutController.signal,
|
|
248
|
+
cancelSignal: cancelController.signal,
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
processChatCompletionTask(
|
|
253
|
+
request: ChatCompletionRequest,
|
|
254
|
+
options?: CompletionProcessingOptions,
|
|
255
|
+
) {
|
|
256
|
+
if (!('processChatCompletionTask' in this.engine)) {
|
|
257
|
+
throw new Error(
|
|
258
|
+
`Engine "${this.config.engine}" does not implement chat completions`,
|
|
259
|
+
)
|
|
260
|
+
}
|
|
261
|
+
if (!request.messages?.length) {
|
|
262
|
+
throw new Error('Messages are required for chat completions')
|
|
263
|
+
}
|
|
264
|
+
const id = this.generateTaskId()
|
|
265
|
+
this.lastUsed = Date.now()
|
|
266
|
+
const taskLogger = withLogMeta(this.log, {
|
|
267
|
+
sequence: this.currentRequest!.sequence,
|
|
268
|
+
task: id,
|
|
269
|
+
})
|
|
270
|
+
// checking if this instance has been flagged for reset
|
|
271
|
+
let resetContext = false
|
|
272
|
+
if (this.needsContextReset) {
|
|
273
|
+
this.contextIdentity = undefined
|
|
274
|
+
this.needsContextReset = false
|
|
275
|
+
resetContext = true
|
|
276
|
+
}
|
|
277
|
+
const controller = this.createTaskController({
|
|
278
|
+
timeout: options?.timeout,
|
|
279
|
+
signal: options?.signal,
|
|
280
|
+
})
|
|
281
|
+
// start completion processing
|
|
282
|
+
taskLogger(LogLevels.verbose, 'Creating chat completion')
|
|
283
|
+
const taskBegin = process.hrtime.bigint()
|
|
284
|
+
const completionPromise = this.engine.processChatCompletionTask!(
|
|
285
|
+
{
|
|
286
|
+
request,
|
|
287
|
+
resetContext,
|
|
288
|
+
config: this.config,
|
|
289
|
+
log: taskLogger,
|
|
290
|
+
onChunk: options?.onChunk,
|
|
291
|
+
},
|
|
292
|
+
this.engineInstance,
|
|
293
|
+
controller.signal,
|
|
294
|
+
)
|
|
295
|
+
.then((result) => {
|
|
296
|
+
if (controller.timeoutSignal.aborted) {
|
|
297
|
+
result.finishReason = 'timeout'
|
|
298
|
+
} else if (controller.cancelSignal.aborted) {
|
|
299
|
+
result.finishReason = 'cancel'
|
|
300
|
+
}
|
|
301
|
+
this.contextIdentity = calculateContextIdentity({
|
|
302
|
+
messages: [...request.messages, result.message],
|
|
303
|
+
})
|
|
304
|
+
return result
|
|
305
|
+
})
|
|
306
|
+
.catch((error) => {
|
|
307
|
+
if (error.name === 'AbortError') {
|
|
308
|
+
const emptyResponse: EngineChatCompletionResult = {
|
|
309
|
+
finishReason: 'abort',
|
|
310
|
+
message: {
|
|
311
|
+
role: 'assistant',
|
|
312
|
+
content: '',
|
|
313
|
+
},
|
|
314
|
+
promptTokens: 0,
|
|
315
|
+
completionTokens: 0,
|
|
316
|
+
contextTokens: 0,
|
|
317
|
+
}
|
|
318
|
+
if (controller.timeoutSignal.aborted) {
|
|
319
|
+
emptyResponse.finishReason = 'timeout'
|
|
320
|
+
return emptyResponse
|
|
321
|
+
}
|
|
322
|
+
if (controller.cancelSignal.aborted) {
|
|
323
|
+
emptyResponse.finishReason = 'cancel'
|
|
324
|
+
return emptyResponse
|
|
325
|
+
}
|
|
326
|
+
return emptyResponse
|
|
327
|
+
}
|
|
328
|
+
taskLogger(LogLevels.error, 'Error while processing task - ', {
|
|
329
|
+
error,
|
|
330
|
+
})
|
|
331
|
+
throw error
|
|
332
|
+
})
|
|
333
|
+
.finally(() => {
|
|
334
|
+
const elapsedTime = elapsedMillis(taskBegin)
|
|
335
|
+
controller.complete()
|
|
336
|
+
taskLogger(LogLevels.info, 'Chat completion task done', {
|
|
337
|
+
elapsed: elapsedTime,
|
|
338
|
+
})
|
|
339
|
+
})
|
|
340
|
+
return {
|
|
341
|
+
id,
|
|
342
|
+
model: this.modelId,
|
|
343
|
+
createdAt: new Date(),
|
|
344
|
+
result: completionPromise,
|
|
345
|
+
cancel: controller.cancel,
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
processTextCompletionTask(
|
|
350
|
+
request: TextCompletionRequest,
|
|
351
|
+
options?: CompletionProcessingOptions,
|
|
352
|
+
) {
|
|
353
|
+
if (!('processTextCompletionTask' in this.engine)) {
|
|
354
|
+
throw new Error(
|
|
355
|
+
`Engine "${this.config.engine}" does not implement text completion`,
|
|
356
|
+
)
|
|
357
|
+
}
|
|
358
|
+
if (!request.prompt) {
|
|
359
|
+
throw new Error('Prompt is required for text completion')
|
|
360
|
+
}
|
|
361
|
+
this.lastUsed = Date.now()
|
|
362
|
+
const id = this.generateTaskId()
|
|
363
|
+
const taskLogger = withLogMeta(this.log, {
|
|
364
|
+
sequence: this.currentRequest!.sequence,
|
|
365
|
+
task: id,
|
|
366
|
+
})
|
|
367
|
+
const controller = this.createTaskController({
|
|
368
|
+
timeout: options?.timeout,
|
|
369
|
+
signal: options?.signal,
|
|
370
|
+
})
|
|
371
|
+
taskLogger(LogLevels.verbose, 'Creating text completion task')
|
|
372
|
+
// pass on resetContext if this instance has been flagged for reset
|
|
373
|
+
let resetContext = false
|
|
374
|
+
if (this.needsContextReset) {
|
|
375
|
+
this.contextIdentity = undefined
|
|
376
|
+
this.needsContextReset = false
|
|
377
|
+
resetContext = true
|
|
378
|
+
}
|
|
379
|
+
const taskBegin = process.hrtime.bigint()
|
|
380
|
+
const completionPromise = this.engine.processTextCompletionTask!(
|
|
381
|
+
{
|
|
382
|
+
request,
|
|
383
|
+
config: this.config,
|
|
384
|
+
resetContext,
|
|
385
|
+
log: taskLogger,
|
|
386
|
+
onChunk: options?.onChunk,
|
|
387
|
+
},
|
|
388
|
+
this.engineInstance,
|
|
389
|
+
controller.signal,
|
|
390
|
+
)
|
|
391
|
+
.then((result) => {
|
|
392
|
+
if (controller.timeoutSignal.aborted) {
|
|
393
|
+
result.finishReason = 'timeout'
|
|
394
|
+
} else if (controller.cancelSignal.aborted) {
|
|
395
|
+
result.finishReason = 'cancel'
|
|
396
|
+
}
|
|
397
|
+
this.contextIdentity = calculateContextIdentity({
|
|
398
|
+
text: request.prompt + result.text,
|
|
399
|
+
})
|
|
400
|
+
return result
|
|
401
|
+
})
|
|
402
|
+
.catch((error) => {
|
|
403
|
+
if (error.name === 'AbortError') {
|
|
404
|
+
const emptyResponse: EngineTextCompletionResult = {
|
|
405
|
+
finishReason: 'abort',
|
|
406
|
+
text: '',
|
|
407
|
+
promptTokens: 0,
|
|
408
|
+
completionTokens: 0,
|
|
409
|
+
contextTokens: 0,
|
|
410
|
+
}
|
|
411
|
+
if (controller.timeoutSignal.aborted) {
|
|
412
|
+
emptyResponse.finishReason = 'timeout'
|
|
413
|
+
return emptyResponse
|
|
414
|
+
}
|
|
415
|
+
if (controller.cancelSignal.aborted) {
|
|
416
|
+
emptyResponse.finishReason = 'cancel'
|
|
417
|
+
return emptyResponse
|
|
418
|
+
}
|
|
419
|
+
return emptyResponse
|
|
420
|
+
}
|
|
421
|
+
taskLogger(LogLevels.error, 'Error while processing task - ', {
|
|
422
|
+
error,
|
|
423
|
+
})
|
|
424
|
+
throw error
|
|
425
|
+
})
|
|
426
|
+
.finally(() => {
|
|
427
|
+
const elapsedTime = elapsedMillis(taskBegin)
|
|
428
|
+
controller.complete()
|
|
429
|
+
taskLogger(LogLevels.info, 'Text completion task done', {
|
|
430
|
+
elapsed: elapsedTime,
|
|
431
|
+
})
|
|
432
|
+
})
|
|
433
|
+
return {
|
|
434
|
+
id,
|
|
435
|
+
model: this.modelId,
|
|
436
|
+
createdAt: new Date(),
|
|
437
|
+
cancel: controller.cancel,
|
|
438
|
+
result: completionPromise,
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
processEmbeddingTask(request: EmbeddingRequest, options?: ProcessingOptions) {
|
|
443
|
+
if (!('processEmbeddingTask' in this.engine)) {
|
|
444
|
+
throw new Error(
|
|
445
|
+
`Engine "${this.config.engine}" does not implement embedding`,
|
|
446
|
+
)
|
|
447
|
+
}
|
|
448
|
+
if (!request.input) {
|
|
449
|
+
throw new Error('Input is required for embedding')
|
|
450
|
+
}
|
|
451
|
+
this.lastUsed = Date.now()
|
|
452
|
+
const id = this.generateTaskId()
|
|
453
|
+
const taskLogger = withLogMeta(this.log, {
|
|
454
|
+
sequence: this.currentRequest!.sequence,
|
|
455
|
+
task: id,
|
|
456
|
+
})
|
|
457
|
+
const controller = this.createTaskController({
|
|
458
|
+
timeout: options?.timeout,
|
|
459
|
+
signal: options?.signal,
|
|
460
|
+
})
|
|
461
|
+
taskLogger(LogLevels.verbose, 'Creating embedding task')
|
|
462
|
+
const taskBegin = process.hrtime.bigint()
|
|
463
|
+
const result = this.engine.processEmbeddingTask!(
|
|
464
|
+
{
|
|
465
|
+
request,
|
|
466
|
+
config: this.config,
|
|
467
|
+
log: taskLogger,
|
|
468
|
+
},
|
|
469
|
+
this.engineInstance,
|
|
470
|
+
controller.signal,
|
|
471
|
+
)
|
|
472
|
+
.then((result) => {
|
|
473
|
+
const timeElapsed = elapsedMillis(taskBegin)
|
|
474
|
+
controller.complete()
|
|
475
|
+
if (controller.timeoutSignal.aborted) {
|
|
476
|
+
taskLogger(LogLevels.warn, 'Embedding task timed out')
|
|
477
|
+
}
|
|
478
|
+
taskLogger(LogLevels.verbose, 'Embedding task done', {
|
|
479
|
+
elapsed: timeElapsed,
|
|
480
|
+
})
|
|
481
|
+
return result
|
|
482
|
+
})
|
|
483
|
+
.catch((error) => {
|
|
484
|
+
taskLogger(LogLevels.error, 'Task failed - ', {
|
|
485
|
+
error,
|
|
486
|
+
})
|
|
487
|
+
throw error
|
|
488
|
+
})
|
|
489
|
+
|
|
490
|
+
return {
|
|
491
|
+
id,
|
|
492
|
+
model: this.modelId,
|
|
493
|
+
createdAt: new Date(),
|
|
494
|
+
cancel: controller.cancel,
|
|
495
|
+
result,
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
processImageToTextTask(
|
|
500
|
+
request: ImageToTextRequest,
|
|
501
|
+
options?: ProcessingOptions,
|
|
502
|
+
) {
|
|
503
|
+
if (!('processImageToTextTask' in this.engine)) {
|
|
504
|
+
throw new Error(
|
|
505
|
+
`Engine "${this.config.engine}" does not implement image to text`,
|
|
506
|
+
)
|
|
507
|
+
}
|
|
508
|
+
this.lastUsed = Date.now()
|
|
509
|
+
const id = this.generateTaskId()
|
|
510
|
+
const taskLogger = withLogMeta(this.log, {
|
|
511
|
+
sequence: this.currentRequest!.sequence,
|
|
512
|
+
task: id,
|
|
513
|
+
})
|
|
514
|
+
const controller = this.createTaskController({
|
|
515
|
+
timeout: options?.timeout,
|
|
516
|
+
signal: options?.signal,
|
|
517
|
+
})
|
|
518
|
+
const taskBegin = process.hrtime.bigint()
|
|
519
|
+
const result = this.engine.processImageToTextTask!(
|
|
520
|
+
{
|
|
521
|
+
request,
|
|
522
|
+
config: this.config,
|
|
523
|
+
log: taskLogger,
|
|
524
|
+
},
|
|
525
|
+
this.engineInstance,
|
|
526
|
+
controller.signal,
|
|
527
|
+
)
|
|
528
|
+
.then((result) => {
|
|
529
|
+
const timeElapsed = elapsedMillis(taskBegin)
|
|
530
|
+
controller.complete()
|
|
531
|
+
if (controller.timeoutSignal.aborted) {
|
|
532
|
+
taskLogger(LogLevels.warn, 'ImageToText task timed out')
|
|
533
|
+
}
|
|
534
|
+
taskLogger(LogLevels.verbose, 'ImageToText task done', {
|
|
535
|
+
elapsed: timeElapsed,
|
|
536
|
+
})
|
|
537
|
+
return result
|
|
538
|
+
})
|
|
539
|
+
.catch((error) => {
|
|
540
|
+
taskLogger(LogLevels.error, 'Task failed - ', {
|
|
541
|
+
error,
|
|
542
|
+
})
|
|
543
|
+
throw error
|
|
544
|
+
})
|
|
545
|
+
|
|
546
|
+
return {
|
|
547
|
+
id,
|
|
548
|
+
model: this.modelId,
|
|
549
|
+
createdAt: new Date(),
|
|
550
|
+
cancel: controller.cancel,
|
|
551
|
+
result,
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
processImageToImageTask(
|
|
556
|
+
request: ImageToImageRequest,
|
|
557
|
+
options?: ProcessingOptions,
|
|
558
|
+
) {
|
|
559
|
+
if (!('processImageToImageTask' in this.engine)) {
|
|
560
|
+
throw new Error(
|
|
561
|
+
`Engine "${this.config.engine}" does not implement image to image`,
|
|
562
|
+
)
|
|
563
|
+
}
|
|
564
|
+
this.lastUsed = Date.now()
|
|
565
|
+
const id = this.generateTaskId()
|
|
566
|
+
const taskLogger = withLogMeta(this.log, {
|
|
567
|
+
sequence: this.currentRequest!.sequence,
|
|
568
|
+
task: id,
|
|
569
|
+
})
|
|
570
|
+
const controller = this.createTaskController({
|
|
571
|
+
timeout: options?.timeout,
|
|
572
|
+
signal: options?.signal,
|
|
573
|
+
})
|
|
574
|
+
const taskBegin = process.hrtime.bigint()
|
|
575
|
+
const result = this.engine.processImageToImageTask!(
|
|
576
|
+
{
|
|
577
|
+
request,
|
|
578
|
+
config: this.config,
|
|
579
|
+
log: taskLogger,
|
|
580
|
+
},
|
|
581
|
+
this.engineInstance,
|
|
582
|
+
controller.signal,
|
|
583
|
+
)
|
|
584
|
+
.then((result) => {
|
|
585
|
+
const timeElapsed = elapsedMillis(taskBegin)
|
|
586
|
+
controller.complete()
|
|
587
|
+
if (controller.timeoutSignal.aborted) {
|
|
588
|
+
taskLogger(LogLevels.warn, 'ImageToImage task timed out')
|
|
589
|
+
}
|
|
590
|
+
taskLogger(LogLevels.verbose, 'ImageToImage task done', {
|
|
591
|
+
elapsed: timeElapsed,
|
|
592
|
+
})
|
|
593
|
+
return result
|
|
594
|
+
})
|
|
595
|
+
.catch((error) => {
|
|
596
|
+
taskLogger(LogLevels.error, 'Task failed - ', {
|
|
597
|
+
error,
|
|
598
|
+
})
|
|
599
|
+
throw error
|
|
600
|
+
})
|
|
601
|
+
|
|
602
|
+
return {
|
|
603
|
+
id,
|
|
604
|
+
model: this.modelId,
|
|
605
|
+
createdAt: new Date(),
|
|
606
|
+
cancel: controller.cancel,
|
|
607
|
+
result,
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
processSpeechToTextTask(
|
|
612
|
+
request: SpeechToTextRequest,
|
|
613
|
+
options?: SpeechToTextProcessingOptions,
|
|
614
|
+
) {
|
|
615
|
+
if (!('processSpeechToTextTask' in this.engine)) {
|
|
616
|
+
throw new Error(
|
|
617
|
+
`Engine "${this.config.engine}" does not implement speech to text`,
|
|
618
|
+
)
|
|
619
|
+
}
|
|
620
|
+
this.lastUsed = Date.now()
|
|
621
|
+
const id = this.generateTaskId()
|
|
622
|
+
const taskLogger = withLogMeta(this.log, {
|
|
623
|
+
sequence: this.currentRequest!.sequence,
|
|
624
|
+
task: id,
|
|
625
|
+
})
|
|
626
|
+
const controller = this.createTaskController({
|
|
627
|
+
timeout: options?.timeout,
|
|
628
|
+
signal: options?.signal,
|
|
629
|
+
})
|
|
630
|
+
const taskBegin = process.hrtime.bigint()
|
|
631
|
+
const result = this.engine.processSpeechToTextTask!(
|
|
632
|
+
{
|
|
633
|
+
request,
|
|
634
|
+
config: this.config,
|
|
635
|
+
log: taskLogger,
|
|
636
|
+
},
|
|
637
|
+
this.engineInstance,
|
|
638
|
+
controller.signal,
|
|
639
|
+
)
|
|
640
|
+
.then((result) => {
|
|
641
|
+
const timeElapsed = elapsedMillis(taskBegin)
|
|
642
|
+
controller.complete()
|
|
643
|
+
if (controller.timeoutSignal.aborted) {
|
|
644
|
+
taskLogger(LogLevels.warn, 'SpeechToText task timed out')
|
|
645
|
+
}
|
|
646
|
+
taskLogger(LogLevels.verbose, 'SpeechToText task done', {
|
|
647
|
+
elapsed: timeElapsed,
|
|
648
|
+
})
|
|
649
|
+
return result
|
|
650
|
+
})
|
|
651
|
+
.catch((error) => {
|
|
652
|
+
taskLogger(LogLevels.error, 'Task failed - ', {
|
|
653
|
+
error,
|
|
654
|
+
})
|
|
655
|
+
throw error
|
|
656
|
+
})
|
|
657
|
+
|
|
658
|
+
return {
|
|
659
|
+
id,
|
|
660
|
+
model: this.modelId,
|
|
661
|
+
createdAt: new Date(),
|
|
662
|
+
cancel: controller.cancel,
|
|
663
|
+
result,
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
processTextToImageTask(
|
|
668
|
+
request: TextToImageRequest,
|
|
669
|
+
options?: ProcessingOptions,
|
|
670
|
+
) {
|
|
671
|
+
if (!('processTextToImageTask' in this.engine)) {
|
|
672
|
+
throw new Error(
|
|
673
|
+
`Engine "${this.config.engine}" does not implement text to image`,
|
|
674
|
+
)
|
|
675
|
+
}
|
|
676
|
+
this.lastUsed = Date.now()
|
|
677
|
+
const id = this.generateTaskId()
|
|
678
|
+
const taskLogger = withLogMeta(this.log, {
|
|
679
|
+
sequence: this.currentRequest!.sequence,
|
|
680
|
+
task: id,
|
|
681
|
+
})
|
|
682
|
+
const controller = this.createTaskController({
|
|
683
|
+
timeout: options?.timeout,
|
|
684
|
+
signal: options?.signal,
|
|
685
|
+
})
|
|
686
|
+
const taskBegin = process.hrtime.bigint()
|
|
687
|
+
const result = this.engine.processTextToImageTask!(
|
|
688
|
+
{
|
|
689
|
+
request,
|
|
690
|
+
config: this.config,
|
|
691
|
+
log: taskLogger,
|
|
692
|
+
},
|
|
693
|
+
this.engineInstance,
|
|
694
|
+
controller.signal,
|
|
695
|
+
)
|
|
696
|
+
.then((result) => {
|
|
697
|
+
const timeElapsed = elapsedMillis(taskBegin)
|
|
698
|
+
controller.complete()
|
|
699
|
+
if (controller.timeoutSignal.aborted) {
|
|
700
|
+
taskLogger(LogLevels.warn, 'TextToImage task timed out')
|
|
701
|
+
}
|
|
702
|
+
taskLogger(LogLevels.verbose, 'TextToImage task done', {
|
|
703
|
+
elapsed: timeElapsed,
|
|
704
|
+
})
|
|
705
|
+
return result
|
|
706
|
+
})
|
|
707
|
+
.catch((error) => {
|
|
708
|
+
taskLogger(LogLevels.error, 'Task failed - ', {
|
|
709
|
+
error,
|
|
710
|
+
})
|
|
711
|
+
throw error
|
|
712
|
+
})
|
|
713
|
+
|
|
714
|
+
return {
|
|
715
|
+
id,
|
|
716
|
+
model: this.modelId,
|
|
717
|
+
createdAt: new Date(),
|
|
718
|
+
cancel: controller.cancel,
|
|
719
|
+
result,
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
}
|