inference-server 1.0.0-beta.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +216 -0
- package/dist/api/openai/enums.d.ts +4 -0
- package/dist/api/openai/enums.js +17 -0
- package/dist/api/openai/enums.js.map +1 -0
- package/dist/api/openai/handlers/chat.d.ts +3 -0
- package/dist/api/openai/handlers/chat.js +358 -0
- package/dist/api/openai/handlers/chat.js.map +1 -0
- package/dist/api/openai/handlers/completions.d.ts +3 -0
- package/dist/api/openai/handlers/completions.js +169 -0
- package/dist/api/openai/handlers/completions.js.map +1 -0
- package/dist/api/openai/handlers/embeddings.d.ts +3 -0
- package/dist/api/openai/handlers/embeddings.js +74 -0
- package/dist/api/openai/handlers/embeddings.js.map +1 -0
- package/dist/api/openai/handlers/images.d.ts +0 -0
- package/dist/api/openai/handlers/images.js +4 -0
- package/dist/api/openai/handlers/images.js.map +1 -0
- package/dist/api/openai/handlers/models.d.ts +3 -0
- package/dist/api/openai/handlers/models.js +23 -0
- package/dist/api/openai/handlers/models.js.map +1 -0
- package/dist/api/openai/handlers/transcription.d.ts +0 -0
- package/dist/api/openai/handlers/transcription.js +4 -0
- package/dist/api/openai/handlers/transcription.js.map +1 -0
- package/dist/api/openai/index.d.ts +7 -0
- package/dist/api/openai/index.js +14 -0
- package/dist/api/openai/index.js.map +1 -0
- package/dist/api/parseJSONRequestBody.d.ts +2 -0
- package/dist/api/parseJSONRequestBody.js +24 -0
- package/dist/api/parseJSONRequestBody.js.map +1 -0
- package/dist/api/v1/index.d.ts +2 -0
- package/dist/api/v1/index.js +29 -0
- package/dist/api/v1/index.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +10 -0
- package/dist/cli.js.map +1 -0
- package/dist/engines/gpt4all/engine.d.ts +34 -0
- package/dist/engines/gpt4all/engine.js +357 -0
- package/dist/engines/gpt4all/engine.js.map +1 -0
- package/dist/engines/gpt4all/util.d.ts +3 -0
- package/dist/engines/gpt4all/util.js +29 -0
- package/dist/engines/gpt4all/util.js.map +1 -0
- package/dist/engines/index.d.ts +19 -0
- package/dist/engines/index.js +21 -0
- package/dist/engines/index.js.map +1 -0
- package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
- package/dist/engines/node-llama-cpp/engine.js +666 -0
- package/dist/engines/node-llama-cpp/engine.js.map +1 -0
- package/dist/engines/node-llama-cpp/types.d.ts +13 -0
- package/dist/engines/node-llama-cpp/types.js +2 -0
- package/dist/engines/node-llama-cpp/types.js.map +1 -0
- package/dist/engines/node-llama-cpp/util.d.ts +15 -0
- package/dist/engines/node-llama-cpp/util.js +84 -0
- package/dist/engines/node-llama-cpp/util.js.map +1 -0
- package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
- package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
- package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
- package/dist/engines/stable-diffusion-cpp/types.js +2 -0
- package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
- package/dist/engines/stable-diffusion-cpp/util.js +55 -0
- package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
- package/dist/engines/transformers-js/engine.d.ts +37 -0
- package/dist/engines/transformers-js/engine.js +538 -0
- package/dist/engines/transformers-js/engine.js.map +1 -0
- package/dist/engines/transformers-js/types.d.ts +7 -0
- package/dist/engines/transformers-js/types.js +2 -0
- package/dist/engines/transformers-js/types.js.map +1 -0
- package/dist/engines/transformers-js/util.d.ts +7 -0
- package/dist/engines/transformers-js/util.js +36 -0
- package/dist/engines/transformers-js/util.js.map +1 -0
- package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
- package/dist/engines/transformers-js/validateModelFiles.js +133 -0
- package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
- package/dist/experiments/ChatWithVision.d.ts +11 -0
- package/dist/experiments/ChatWithVision.js +91 -0
- package/dist/experiments/ChatWithVision.js.map +1 -0
- package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
- package/dist/experiments/StableDiffPromptGenerator.js +4 -0
- package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
- package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
- package/dist/experiments/VoiceFunctionCall.js +51 -0
- package/dist/experiments/VoiceFunctionCall.js.map +1 -0
- package/dist/http.d.ts +19 -0
- package/dist/http.js +54 -0
- package/dist/http.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/instance.d.ts +88 -0
- package/dist/instance.js +594 -0
- package/dist/instance.js.map +1 -0
- package/dist/lib/acquireFileLock.d.ts +7 -0
- package/dist/lib/acquireFileLock.js +38 -0
- package/dist/lib/acquireFileLock.js.map +1 -0
- package/dist/lib/calculateContextIdentity.d.ts +7 -0
- package/dist/lib/calculateContextIdentity.js +39 -0
- package/dist/lib/calculateContextIdentity.js.map +1 -0
- package/dist/lib/calculateFileChecksum.d.ts +1 -0
- package/dist/lib/calculateFileChecksum.js +16 -0
- package/dist/lib/calculateFileChecksum.js.map +1 -0
- package/dist/lib/copyDirectory.d.ts +6 -0
- package/dist/lib/copyDirectory.js +27 -0
- package/dist/lib/copyDirectory.js.map +1 -0
- package/dist/lib/decodeAudio.d.ts +1 -0
- package/dist/lib/decodeAudio.js +26 -0
- package/dist/lib/decodeAudio.js.map +1 -0
- package/dist/lib/downloadModelFile.d.ts +10 -0
- package/dist/lib/downloadModelFile.js +58 -0
- package/dist/lib/downloadModelFile.js.map +1 -0
- package/dist/lib/flattenMessageTextContent.d.ts +2 -0
- package/dist/lib/flattenMessageTextContent.js +11 -0
- package/dist/lib/flattenMessageTextContent.js.map +1 -0
- package/dist/lib/getCacheDirPath.d.ts +12 -0
- package/dist/lib/getCacheDirPath.js +31 -0
- package/dist/lib/getCacheDirPath.js.map +1 -0
- package/dist/lib/loadImage.d.ts +12 -0
- package/dist/lib/loadImage.js +30 -0
- package/dist/lib/loadImage.js.map +1 -0
- package/dist/lib/logger.d.ts +12 -0
- package/dist/lib/logger.js +98 -0
- package/dist/lib/logger.js.map +1 -0
- package/dist/lib/math.d.ts +7 -0
- package/dist/lib/math.js +30 -0
- package/dist/lib/math.js.map +1 -0
- package/dist/lib/resolveModelFileLocation.d.ts +15 -0
- package/dist/lib/resolveModelFileLocation.js +41 -0
- package/dist/lib/resolveModelFileLocation.js.map +1 -0
- package/dist/lib/util.d.ts +7 -0
- package/dist/lib/util.js +61 -0
- package/dist/lib/util.js.map +1 -0
- package/dist/lib/validateModelFile.d.ts +9 -0
- package/dist/lib/validateModelFile.js +62 -0
- package/dist/lib/validateModelFile.js.map +1 -0
- package/dist/lib/validateModelOptions.d.ts +3 -0
- package/dist/lib/validateModelOptions.js +23 -0
- package/dist/lib/validateModelOptions.js.map +1 -0
- package/dist/pool.d.ts +61 -0
- package/dist/pool.js +512 -0
- package/dist/pool.js.map +1 -0
- package/dist/server.d.ts +59 -0
- package/dist/server.js +221 -0
- package/dist/server.js.map +1 -0
- package/dist/standalone.d.ts +1 -0
- package/dist/standalone.js +306 -0
- package/dist/standalone.js.map +1 -0
- package/dist/store.d.ts +60 -0
- package/dist/store.js +203 -0
- package/dist/store.js.map +1 -0
- package/dist/types/completions.d.ts +57 -0
- package/dist/types/completions.js +2 -0
- package/dist/types/completions.js.map +1 -0
- package/dist/types/index.d.ts +326 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/docs/engines.md +28 -0
- package/docs/gpu.md +72 -0
- package/docs/http-api.md +147 -0
- package/examples/all-options.js +108 -0
- package/examples/chat-cli.js +56 -0
- package/examples/chat-server.js +65 -0
- package/examples/concurrency.js +70 -0
- package/examples/express.js +70 -0
- package/examples/pool.js +91 -0
- package/package.json +113 -0
- package/src/api/openai/enums.ts +20 -0
- package/src/api/openai/handlers/chat.ts +408 -0
- package/src/api/openai/handlers/completions.ts +196 -0
- package/src/api/openai/handlers/embeddings.ts +92 -0
- package/src/api/openai/handlers/images.ts +3 -0
- package/src/api/openai/handlers/models.ts +33 -0
- package/src/api/openai/handlers/transcription.ts +2 -0
- package/src/api/openai/index.ts +16 -0
- package/src/api/parseJSONRequestBody.ts +26 -0
- package/src/api/v1/DRAFT.md +16 -0
- package/src/api/v1/index.ts +37 -0
- package/src/cli.ts +9 -0
- package/src/engines/gpt4all/engine.ts +441 -0
- package/src/engines/gpt4all/util.ts +31 -0
- package/src/engines/index.ts +28 -0
- package/src/engines/node-llama-cpp/engine.ts +811 -0
- package/src/engines/node-llama-cpp/types.ts +17 -0
- package/src/engines/node-llama-cpp/util.ts +126 -0
- package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
- package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
- package/src/engines/stable-diffusion-cpp/types.ts +54 -0
- package/src/engines/stable-diffusion-cpp/util.ts +58 -0
- package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
- package/src/engines/transformers-js/engine.ts +659 -0
- package/src/engines/transformers-js/types.ts +25 -0
- package/src/engines/transformers-js/util.ts +40 -0
- package/src/engines/transformers-js/validateModelFiles.ts +168 -0
- package/src/experiments/ChatWithVision.ts +103 -0
- package/src/experiments/StableDiffPromptGenerator.ts +2 -0
- package/src/experiments/VoiceFunctionCall.ts +71 -0
- package/src/http.ts +72 -0
- package/src/index.ts +7 -0
- package/src/instance.ts +723 -0
- package/src/lib/acquireFileLock.ts +38 -0
- package/src/lib/calculateContextIdentity.ts +53 -0
- package/src/lib/calculateFileChecksum.ts +18 -0
- package/src/lib/copyDirectory.ts +29 -0
- package/src/lib/decodeAudio.ts +39 -0
- package/src/lib/downloadModelFile.ts +70 -0
- package/src/lib/flattenMessageTextContent.ts +19 -0
- package/src/lib/getCacheDirPath.ts +34 -0
- package/src/lib/loadImage.ts +46 -0
- package/src/lib/logger.ts +112 -0
- package/src/lib/math.ts +31 -0
- package/src/lib/resolveModelFileLocation.ts +49 -0
- package/src/lib/util.ts +75 -0
- package/src/lib/validateModelFile.ts +71 -0
- package/src/lib/validateModelOptions.ts +31 -0
- package/src/pool.ts +651 -0
- package/src/server.ts +270 -0
- package/src/standalone.ts +320 -0
- package/src/store.ts +278 -0
- package/src/types/completions.ts +86 -0
- package/src/types/index.ts +488 -0
- package/tsconfig.json +29 -0
- package/tsconfig.release.json +11 -0
- package/vitest.config.ts +18 -0
|
@@ -0,0 +1,488 @@
|
|
|
1
|
+
import type { SomeJSONSchema } from 'ajv/dist/types/json-schema'
|
|
2
|
+
import type { Sharp } from 'sharp'
|
|
3
|
+
import type { BuiltInEngineName } from '#package/engines/index.js'
|
|
4
|
+
import type { Logger } from '#package/lib/logger.js'
|
|
5
|
+
import type { ModelPool } from '#package/pool.js'
|
|
6
|
+
import type { ModelStore } from '#package/store.js'
|
|
7
|
+
import {
|
|
8
|
+
AssistantMessage,
|
|
9
|
+
ChatMessage,
|
|
10
|
+
CompletionFinishReason,
|
|
11
|
+
TextCompletionParams,
|
|
12
|
+
ToolDefinition,
|
|
13
|
+
} from '#package/types/completions.js'
|
|
14
|
+
import type { ContextShiftStrategy } from '#package/engines/node-llama-cpp/types.js'
|
|
15
|
+
import type {
|
|
16
|
+
StableDiffusionWeightType,
|
|
17
|
+
StableDiffusionSamplingMethod,
|
|
18
|
+
StableDiffusionSchedule,
|
|
19
|
+
} from '#package/engines/stable-diffusion-cpp/types.js'
|
|
20
|
+
import type {
|
|
21
|
+
TransformersJsModelClass,
|
|
22
|
+
TransformersJsTokenizerClass,
|
|
23
|
+
TransformersJsProcessorClass,
|
|
24
|
+
TransformersJsDataType,
|
|
25
|
+
} from '#package/engines/transformers-js/types.js'
|
|
26
|
+
export * from '#package/types/completions.js'
|
|
27
|
+
|
|
28
|
+
export type ModelTaskType =
|
|
29
|
+
| 'text-completion'
|
|
30
|
+
| 'embedding'
|
|
31
|
+
| 'image-to-text'
|
|
32
|
+
| 'image-to-image'
|
|
33
|
+
| 'text-to-image'
|
|
34
|
+
| 'speech-to-text'
|
|
35
|
+
|
|
36
|
+
export interface ModelOptionsBase {
|
|
37
|
+
engine: BuiltInEngineName | (string & {})
|
|
38
|
+
task: ModelTaskType | (string & {})
|
|
39
|
+
prepare?: 'blocking' | 'async' | 'on-demand'
|
|
40
|
+
minInstances?: number
|
|
41
|
+
maxInstances?: number
|
|
42
|
+
location?: string
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface BuiltInModelOptionsBase extends ModelOptionsBase {
|
|
46
|
+
engine: BuiltInEngineName
|
|
47
|
+
task: ModelTaskType
|
|
48
|
+
url?: string
|
|
49
|
+
location?: string
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface ModelConfigBase extends ModelOptionsBase {
|
|
53
|
+
id: string
|
|
54
|
+
minInstances: number
|
|
55
|
+
maxInstances: number
|
|
56
|
+
modelsCachePath: string
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface ModelConfig extends ModelConfigBase {
|
|
60
|
+
url?: string
|
|
61
|
+
location?: string
|
|
62
|
+
task: ModelTaskType | (string & {})
|
|
63
|
+
engine: BuiltInEngineName | (string & {})
|
|
64
|
+
// minInstances: number
|
|
65
|
+
// maxInstances: number
|
|
66
|
+
ttl?: number
|
|
67
|
+
prefix?: string
|
|
68
|
+
initialMessages?: ChatMessage[]
|
|
69
|
+
device?: {
|
|
70
|
+
gpu?: boolean | 'auto' | (string & {})
|
|
71
|
+
// gpuLayers?: number
|
|
72
|
+
// cpuThreads?: number
|
|
73
|
+
// memLock?: boolean
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// export interface ChatModelConfig extends ModelConfig {
|
|
78
|
+
// initialMessages?: ChatMessage[]
|
|
79
|
+
// }
|
|
80
|
+
|
|
81
|
+
export interface CompletionChunk {
|
|
82
|
+
tokens: number[]
|
|
83
|
+
text: string
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export interface ProcessingOptions {
|
|
87
|
+
timeout?: number
|
|
88
|
+
signal?: AbortSignal
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface Image {
|
|
92
|
+
handle: Sharp
|
|
93
|
+
width: number
|
|
94
|
+
height: number
|
|
95
|
+
channels: 1 | 2 | 3 | 4
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export interface CompletionProcessingOptions extends ProcessingOptions {
|
|
99
|
+
onChunk?: (chunk: CompletionChunk) => void
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export interface SpeechToTextProcessingOptions extends ProcessingOptions {
|
|
103
|
+
onChunk?: (chunk: { text: string }) => void
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
export interface EngineContext<
|
|
107
|
+
TModelConfig = ModelConfig,
|
|
108
|
+
TModelMeta = unknown,
|
|
109
|
+
> {
|
|
110
|
+
config: TModelConfig
|
|
111
|
+
meta?: TModelMeta
|
|
112
|
+
log: Logger
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export interface TextCompletionRequestBase extends TextCompletionParams {
|
|
116
|
+
model: string
|
|
117
|
+
stream?: boolean
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export interface TextCompletionRequest extends TextCompletionRequestBase {
|
|
121
|
+
prompt?: string
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export interface ChatCompletionRequest extends TextCompletionRequestBase {
|
|
125
|
+
messages: ChatMessage[]
|
|
126
|
+
grammar?: string
|
|
127
|
+
tools?: Record<string, ToolDefinition>
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export interface TextEmbeddingInput {
|
|
131
|
+
type: 'text'
|
|
132
|
+
content: string
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export interface ImageEmbeddingInput {
|
|
136
|
+
type: 'image'
|
|
137
|
+
content: Image
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
export type EmbeddingInput = TextEmbeddingInput | ImageEmbeddingInput | string
|
|
141
|
+
|
|
142
|
+
export interface EmbeddingRequest {
|
|
143
|
+
model: string
|
|
144
|
+
input: EmbeddingInput | EmbeddingInput[]
|
|
145
|
+
dimensions?: number
|
|
146
|
+
pooling?: 'cls' | 'mean'
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
export interface ImageToTextRequest {
|
|
150
|
+
model: string
|
|
151
|
+
image: Image
|
|
152
|
+
prompt?: string
|
|
153
|
+
maxTokens?: number
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export interface StableDiffusionRequest {
|
|
157
|
+
negativePrompt?: string
|
|
158
|
+
guidance?: number
|
|
159
|
+
styleRatio?: number
|
|
160
|
+
strength?: number
|
|
161
|
+
sampleSteps?: number
|
|
162
|
+
batchCount?: number
|
|
163
|
+
samplingMethod?: StableDiffusionSamplingMethod
|
|
164
|
+
cfgScale?: number
|
|
165
|
+
controlStrength?: number
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export interface TextToImageRequest extends StableDiffusionRequest {
|
|
169
|
+
model: string
|
|
170
|
+
prompt: string
|
|
171
|
+
width?: number
|
|
172
|
+
height?: number
|
|
173
|
+
seed?: number
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
export interface ImageToImageRequest extends StableDiffusionRequest {
|
|
177
|
+
model: string
|
|
178
|
+
image: Image
|
|
179
|
+
prompt: string
|
|
180
|
+
width?: number
|
|
181
|
+
height?: number
|
|
182
|
+
seed?: number
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
export interface SpeechToTextRequest {
|
|
186
|
+
model: string
|
|
187
|
+
url?: string
|
|
188
|
+
file?: string
|
|
189
|
+
language?: string
|
|
190
|
+
prompt?: string
|
|
191
|
+
maxTokens?: number
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export interface ModelRequestMeta {
|
|
195
|
+
sequence: number
|
|
196
|
+
abortController: AbortController
|
|
197
|
+
}
|
|
198
|
+
export type IncomingRequest =
|
|
199
|
+
| TextCompletionRequest
|
|
200
|
+
| ChatCompletionRequest
|
|
201
|
+
| EmbeddingRequest
|
|
202
|
+
| ImageToTextRequest
|
|
203
|
+
| SpeechToTextRequest
|
|
204
|
+
export type ModelInstanceRequest = ModelRequestMeta & IncomingRequest
|
|
205
|
+
|
|
206
|
+
export interface EngineTextCompletionArgs<
|
|
207
|
+
TModelConfig = unknown,
|
|
208
|
+
TModelMeta = unknown,
|
|
209
|
+
> extends EngineContext<TModelConfig, TModelMeta> {
|
|
210
|
+
onChunk?: (chunk: CompletionChunk) => void
|
|
211
|
+
resetContext?: boolean
|
|
212
|
+
request: TextCompletionRequest
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
export interface EngineChatCompletionArgs<
|
|
216
|
+
TModelConfig = unknown,
|
|
217
|
+
TModelMeta = unknown,
|
|
218
|
+
> extends EngineContext<TModelConfig, TModelMeta> {
|
|
219
|
+
onChunk?: (chunk: CompletionChunk) => void
|
|
220
|
+
resetContext?: boolean
|
|
221
|
+
request: ChatCompletionRequest
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
export interface EngineEmbeddingArgs<
|
|
225
|
+
TModelConfig = unknown,
|
|
226
|
+
TModelMeta = unknown,
|
|
227
|
+
> extends EngineContext<TModelConfig, TModelMeta> {
|
|
228
|
+
request: EmbeddingRequest
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export interface EngineImageToTextArgs<
|
|
232
|
+
TModelConfig = unknown,
|
|
233
|
+
TModelMeta = unknown,
|
|
234
|
+
> extends EngineContext<TModelConfig, TModelMeta> {
|
|
235
|
+
request: ImageToTextRequest
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
export interface EngineTextToImageArgs<
|
|
239
|
+
TModelConfig = unknown,
|
|
240
|
+
TModelMeta = unknown,
|
|
241
|
+
> extends EngineContext<TModelConfig, TModelMeta> {
|
|
242
|
+
request: TextToImageRequest
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export interface EngineImageToImageArgs<
|
|
246
|
+
TModelConfig = unknown,
|
|
247
|
+
TModelMeta = unknown,
|
|
248
|
+
> extends EngineContext<TModelConfig, TModelMeta> {
|
|
249
|
+
request: ImageToImageRequest
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
export interface EngineSpeechToTextArgs<
|
|
253
|
+
TModelConfig = unknown,
|
|
254
|
+
TModelMeta = unknown,
|
|
255
|
+
> extends EngineContext<TModelConfig, TModelMeta> {
|
|
256
|
+
request: SpeechToTextRequest
|
|
257
|
+
onChunk?: (chunk: { text: string }) => void
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
export interface FileDownloadProgress {
|
|
261
|
+
file: string
|
|
262
|
+
loadedBytes: number
|
|
263
|
+
totalBytes: number
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
export interface EngineStartContext {
|
|
267
|
+
pool: ModelPool
|
|
268
|
+
store: ModelStore
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
export interface ModelEngine<
|
|
272
|
+
TInstance = unknown,
|
|
273
|
+
TModelConfig extends ModelConfig = ModelConfig,
|
|
274
|
+
TModelMeta = unknown,
|
|
275
|
+
> {
|
|
276
|
+
autoGpu?: boolean
|
|
277
|
+
start?: (ctx: EngineStartContext) => Promise<void>
|
|
278
|
+
prepareModel: (
|
|
279
|
+
ctx: EngineContext<TModelConfig, TModelMeta>,
|
|
280
|
+
onProgress?: (progress: FileDownloadProgress) => void,
|
|
281
|
+
signal?: AbortSignal,
|
|
282
|
+
) => Promise<TModelMeta>
|
|
283
|
+
createInstance: (
|
|
284
|
+
ctx: EngineContext<TModelConfig, TModelMeta>,
|
|
285
|
+
signal?: AbortSignal,
|
|
286
|
+
) => Promise<TInstance>
|
|
287
|
+
disposeInstance: (instance: TInstance) => Promise<void>
|
|
288
|
+
processChatCompletionTask?: (
|
|
289
|
+
args: EngineChatCompletionArgs<TModelConfig, TModelMeta>,
|
|
290
|
+
instance: TInstance,
|
|
291
|
+
signal?: AbortSignal,
|
|
292
|
+
) => Promise<EngineChatCompletionResult>
|
|
293
|
+
processTextCompletionTask?: (
|
|
294
|
+
args: EngineTextCompletionArgs<TModelConfig, TModelMeta>,
|
|
295
|
+
instance: TInstance,
|
|
296
|
+
signal?: AbortSignal,
|
|
297
|
+
) => Promise<EngineTextCompletionResult>
|
|
298
|
+
processEmbeddingTask?: (
|
|
299
|
+
args: EngineEmbeddingArgs<TModelConfig, TModelMeta>,
|
|
300
|
+
instance: TInstance,
|
|
301
|
+
signal?: AbortSignal,
|
|
302
|
+
) => Promise<EngineEmbeddingResult>
|
|
303
|
+
processImageToTextTask?: (
|
|
304
|
+
args: EngineImageToTextArgs<TModelConfig, TModelMeta>,
|
|
305
|
+
instance: TInstance,
|
|
306
|
+
signal?: AbortSignal,
|
|
307
|
+
) => Promise<EngineImageToTextResult>
|
|
308
|
+
processSpeechToTextTask?: (
|
|
309
|
+
args: EngineSpeechToTextArgs<TModelConfig, TModelMeta>,
|
|
310
|
+
instance: TInstance,
|
|
311
|
+
signal?: AbortSignal,
|
|
312
|
+
) => Promise<EngineSpeechToTextResult>
|
|
313
|
+
processTextToImageTask?: (
|
|
314
|
+
args: EngineTextToImageArgs<TModelConfig, TModelMeta>,
|
|
315
|
+
instance: TInstance,
|
|
316
|
+
signal?: AbortSignal,
|
|
317
|
+
) => Promise<EngineTextToImageResult>
|
|
318
|
+
processImageToImageTask?: (
|
|
319
|
+
args: EngineImageToImageArgs<TModelConfig, TModelMeta>,
|
|
320
|
+
instance: TInstance,
|
|
321
|
+
signal?: AbortSignal,
|
|
322
|
+
) => Promise<EngineImageToImageResult>
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
interface EmbeddingModelOptions {
|
|
326
|
+
task: 'embedding'
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
export type TextCompletionGrammar = string | SomeJSONSchema
|
|
330
|
+
|
|
331
|
+
interface TextCompletionModelOptions {
|
|
332
|
+
task: 'text-completion'
|
|
333
|
+
contextSize?: number
|
|
334
|
+
grammars?: Record<string, TextCompletionGrammar>
|
|
335
|
+
completionDefaults?: TextCompletionParams
|
|
336
|
+
initialMessages?: ChatMessage[]
|
|
337
|
+
prefix?: string
|
|
338
|
+
batchSize?: number
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
interface LlamaCppModelOptionsBase extends BuiltInModelOptionsBase {
|
|
342
|
+
engine: 'node-llama-cpp'
|
|
343
|
+
task: 'text-completion' | 'embedding'
|
|
344
|
+
sha256?: string
|
|
345
|
+
batchSize?: number
|
|
346
|
+
contextShiftStrategy?: ContextShiftStrategy
|
|
347
|
+
tools?: {
|
|
348
|
+
definitions: Record<string, ToolDefinition>
|
|
349
|
+
includeParamsDocumentation?: boolean
|
|
350
|
+
parallelism?: number
|
|
351
|
+
}
|
|
352
|
+
device?: {
|
|
353
|
+
gpu?: boolean | 'auto' | (string & {})
|
|
354
|
+
gpuLayers?: number
|
|
355
|
+
cpuThreads?: number
|
|
356
|
+
memLock?: boolean
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
interface LlamaCppEmbeddingModelOptions
|
|
361
|
+
extends LlamaCppModelOptionsBase,
|
|
362
|
+
EmbeddingModelOptions {
|
|
363
|
+
task: 'embedding'
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
export interface LlamaCppTextCompletionModelOptions
|
|
367
|
+
extends LlamaCppModelOptionsBase,
|
|
368
|
+
TextCompletionModelOptions {
|
|
369
|
+
task: 'text-completion'
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
interface GPT4AllModelOptions extends BuiltInModelOptionsBase {
|
|
373
|
+
engine: 'gpt4all'
|
|
374
|
+
task: 'text-completion' | 'embedding'
|
|
375
|
+
md5?: string
|
|
376
|
+
device?: {
|
|
377
|
+
gpu?: boolean | 'auto' | (string & {})
|
|
378
|
+
gpuLayers?: number
|
|
379
|
+
cpuThreads?: number
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
type GPT4AllTextCompletionModelOptions = TextCompletionModelOptions &
|
|
384
|
+
GPT4AllModelOptions
|
|
385
|
+
|
|
386
|
+
type GPT4AllEmbeddingModelOptions = GPT4AllModelOptions & EmbeddingModelOptions
|
|
387
|
+
|
|
388
|
+
export interface TransformersJsModel {
|
|
389
|
+
processor?: {
|
|
390
|
+
url?: string
|
|
391
|
+
file?: string
|
|
392
|
+
}
|
|
393
|
+
processorClass?: TransformersJsProcessorClass
|
|
394
|
+
tokenizerClass?: TransformersJsTokenizerClass
|
|
395
|
+
modelClass?: TransformersJsModelClass
|
|
396
|
+
dtype?: Record<string, TransformersJsDataType> | TransformersJsDataType
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
interface TransformersJsModelOptions extends BuiltInModelOptionsBase {
|
|
400
|
+
engine: 'transformers-js'
|
|
401
|
+
task: 'image-to-text' | 'speech-to-text' | 'text-completion' | 'embedding'
|
|
402
|
+
textModel?: TransformersJsModel
|
|
403
|
+
visionModel?: TransformersJsModel
|
|
404
|
+
speechModel?: TransformersJsModel
|
|
405
|
+
device?: {
|
|
406
|
+
gpu?: boolean | 'auto' | (string & {})
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
export interface ModelFileSource {
|
|
411
|
+
url?: string
|
|
412
|
+
file?: string
|
|
413
|
+
sha256?: string
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
interface StableDiffusionModelOptions extends BuiltInModelOptionsBase {
|
|
417
|
+
engine: 'stable-diffusion-cpp'
|
|
418
|
+
task: 'image-to-text' | 'text-to-image' | 'image-to-image'
|
|
419
|
+
sha256?: string
|
|
420
|
+
url?: string
|
|
421
|
+
diffusionModel?: boolean
|
|
422
|
+
vae?: ModelFileSource
|
|
423
|
+
clipL?: ModelFileSource
|
|
424
|
+
clipG?: ModelFileSource
|
|
425
|
+
t5xxl?: ModelFileSource
|
|
426
|
+
taesd?: ModelFileSource
|
|
427
|
+
controlNet?: ModelFileSource
|
|
428
|
+
samplingMethod?: StableDiffusionSamplingMethod
|
|
429
|
+
weightType?: StableDiffusionWeightType
|
|
430
|
+
schedule?: StableDiffusionSchedule
|
|
431
|
+
loras?: ModelFileSource[]
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
export interface CustomEngineModelOptions extends ModelOptionsBase {}
|
|
435
|
+
|
|
436
|
+
export type BuiltInModelOptions =
|
|
437
|
+
| LlamaCppTextCompletionModelOptions
|
|
438
|
+
| LlamaCppEmbeddingModelOptions
|
|
439
|
+
| GPT4AllTextCompletionModelOptions
|
|
440
|
+
| GPT4AllEmbeddingModelOptions
|
|
441
|
+
| TransformersJsModelOptions
|
|
442
|
+
| StableDiffusionModelOptions
|
|
443
|
+
|
|
444
|
+
export type ModelOptions = BuiltInModelOptions | CustomEngineModelOptions
|
|
445
|
+
|
|
446
|
+
export interface EngineEmbeddingResult {
|
|
447
|
+
embeddings: Float32Array[]
|
|
448
|
+
inputTokens: number
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
export interface ChatCompletionResult extends EngineChatCompletionResult {
|
|
452
|
+
id: string
|
|
453
|
+
model: string
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
export interface EngineChatCompletionResult {
|
|
457
|
+
message: AssistantMessage
|
|
458
|
+
finishReason: CompletionFinishReason
|
|
459
|
+
promptTokens: number
|
|
460
|
+
completionTokens: number
|
|
461
|
+
contextTokens: number
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
export interface EngineTextCompletionResult {
|
|
465
|
+
text: string
|
|
466
|
+
finishReason?: CompletionFinishReason
|
|
467
|
+
promptTokens: number
|
|
468
|
+
completionTokens: number
|
|
469
|
+
contextTokens: number
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
export interface EngineImageToTextResult {
|
|
473
|
+
text: string
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
export interface EngineTextToImageResult {
|
|
477
|
+
images: Image[]
|
|
478
|
+
seed: number
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
export interface EngineImageToImageResult {
|
|
482
|
+
images: Image[]
|
|
483
|
+
seed: number
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
export interface EngineSpeechToTextResult {
|
|
487
|
+
text: string
|
|
488
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "es2022",
|
|
4
|
+
"module": "es2022",
|
|
5
|
+
"sourceMap": true,
|
|
6
|
+
"declaration": true,
|
|
7
|
+
"stripInternal": true,
|
|
8
|
+
"strict": true,
|
|
9
|
+
"noImplicitAny": true,
|
|
10
|
+
"noImplicitReturns": true,
|
|
11
|
+
"noImplicitThis": true,
|
|
12
|
+
"noImplicitOverride": true,
|
|
13
|
+
"strictNullChecks": true,
|
|
14
|
+
"noFallthroughCasesInSwitch": true,
|
|
15
|
+
"removeComments": false,
|
|
16
|
+
"isolatedModules": true,
|
|
17
|
+
"moduleResolution": "node",
|
|
18
|
+
"lib": ["es2022"],
|
|
19
|
+
"resolveJsonModule": false,
|
|
20
|
+
"esModuleInterop": true,
|
|
21
|
+
"forceConsistentCasingInFileNames": true,
|
|
22
|
+
"allowSyntheticDefaultImports": true,
|
|
23
|
+
"skipLibCheck": true,
|
|
24
|
+
"paths": {
|
|
25
|
+
"#package/*": ["./src/*"]
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"include": ["src/**/*"]
|
|
29
|
+
}
|
package/vitest.config.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { defineConfig } from 'vitest/config'
|
|
2
|
+
|
|
3
|
+
export default defineConfig({
|
|
4
|
+
test: {
|
|
5
|
+
pool: 'forks',
|
|
6
|
+
maxWorkers: 1,
|
|
7
|
+
minWorkers: 1,
|
|
8
|
+
maxConcurrency: 1,
|
|
9
|
+
testTimeout: 60000,
|
|
10
|
+
hookTimeout: 120000,
|
|
11
|
+
poolOptions: {
|
|
12
|
+
threads: {
|
|
13
|
+
minThreads: 1,
|
|
14
|
+
maxThreads: 1,
|
|
15
|
+
},
|
|
16
|
+
},
|
|
17
|
+
},
|
|
18
|
+
})
|