inference-server 1.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/README.md +216 -0
  2. package/dist/api/openai/enums.d.ts +4 -0
  3. package/dist/api/openai/enums.js +17 -0
  4. package/dist/api/openai/enums.js.map +1 -0
  5. package/dist/api/openai/handlers/chat.d.ts +3 -0
  6. package/dist/api/openai/handlers/chat.js +358 -0
  7. package/dist/api/openai/handlers/chat.js.map +1 -0
  8. package/dist/api/openai/handlers/completions.d.ts +3 -0
  9. package/dist/api/openai/handlers/completions.js +169 -0
  10. package/dist/api/openai/handlers/completions.js.map +1 -0
  11. package/dist/api/openai/handlers/embeddings.d.ts +3 -0
  12. package/dist/api/openai/handlers/embeddings.js +74 -0
  13. package/dist/api/openai/handlers/embeddings.js.map +1 -0
  14. package/dist/api/openai/handlers/images.d.ts +0 -0
  15. package/dist/api/openai/handlers/images.js +4 -0
  16. package/dist/api/openai/handlers/images.js.map +1 -0
  17. package/dist/api/openai/handlers/models.d.ts +3 -0
  18. package/dist/api/openai/handlers/models.js +23 -0
  19. package/dist/api/openai/handlers/models.js.map +1 -0
  20. package/dist/api/openai/handlers/transcription.d.ts +0 -0
  21. package/dist/api/openai/handlers/transcription.js +4 -0
  22. package/dist/api/openai/handlers/transcription.js.map +1 -0
  23. package/dist/api/openai/index.d.ts +7 -0
  24. package/dist/api/openai/index.js +14 -0
  25. package/dist/api/openai/index.js.map +1 -0
  26. package/dist/api/parseJSONRequestBody.d.ts +2 -0
  27. package/dist/api/parseJSONRequestBody.js +24 -0
  28. package/dist/api/parseJSONRequestBody.js.map +1 -0
  29. package/dist/api/v1/index.d.ts +2 -0
  30. package/dist/api/v1/index.js +29 -0
  31. package/dist/api/v1/index.js.map +1 -0
  32. package/dist/cli.d.ts +1 -0
  33. package/dist/cli.js +10 -0
  34. package/dist/cli.js.map +1 -0
  35. package/dist/engines/gpt4all/engine.d.ts +34 -0
  36. package/dist/engines/gpt4all/engine.js +357 -0
  37. package/dist/engines/gpt4all/engine.js.map +1 -0
  38. package/dist/engines/gpt4all/util.d.ts +3 -0
  39. package/dist/engines/gpt4all/util.js +29 -0
  40. package/dist/engines/gpt4all/util.js.map +1 -0
  41. package/dist/engines/index.d.ts +19 -0
  42. package/dist/engines/index.js +21 -0
  43. package/dist/engines/index.js.map +1 -0
  44. package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
  45. package/dist/engines/node-llama-cpp/engine.js +666 -0
  46. package/dist/engines/node-llama-cpp/engine.js.map +1 -0
  47. package/dist/engines/node-llama-cpp/types.d.ts +13 -0
  48. package/dist/engines/node-llama-cpp/types.js +2 -0
  49. package/dist/engines/node-llama-cpp/types.js.map +1 -0
  50. package/dist/engines/node-llama-cpp/util.d.ts +15 -0
  51. package/dist/engines/node-llama-cpp/util.js +84 -0
  52. package/dist/engines/node-llama-cpp/util.js.map +1 -0
  53. package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
  54. package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
  55. package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
  56. package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
  57. package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
  58. package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
  59. package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
  60. package/dist/engines/stable-diffusion-cpp/types.js +2 -0
  61. package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
  62. package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
  63. package/dist/engines/stable-diffusion-cpp/util.js +55 -0
  64. package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
  65. package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
  66. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
  67. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
  68. package/dist/engines/transformers-js/engine.d.ts +37 -0
  69. package/dist/engines/transformers-js/engine.js +538 -0
  70. package/dist/engines/transformers-js/engine.js.map +1 -0
  71. package/dist/engines/transformers-js/types.d.ts +7 -0
  72. package/dist/engines/transformers-js/types.js +2 -0
  73. package/dist/engines/transformers-js/types.js.map +1 -0
  74. package/dist/engines/transformers-js/util.d.ts +7 -0
  75. package/dist/engines/transformers-js/util.js +36 -0
  76. package/dist/engines/transformers-js/util.js.map +1 -0
  77. package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
  78. package/dist/engines/transformers-js/validateModelFiles.js +133 -0
  79. package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
  80. package/dist/experiments/ChatWithVision.d.ts +11 -0
  81. package/dist/experiments/ChatWithVision.js +91 -0
  82. package/dist/experiments/ChatWithVision.js.map +1 -0
  83. package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
  84. package/dist/experiments/StableDiffPromptGenerator.js +4 -0
  85. package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
  86. package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
  87. package/dist/experiments/VoiceFunctionCall.js +51 -0
  88. package/dist/experiments/VoiceFunctionCall.js.map +1 -0
  89. package/dist/http.d.ts +19 -0
  90. package/dist/http.js +54 -0
  91. package/dist/http.js.map +1 -0
  92. package/dist/index.d.ts +7 -0
  93. package/dist/index.js +8 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/instance.d.ts +88 -0
  96. package/dist/instance.js +594 -0
  97. package/dist/instance.js.map +1 -0
  98. package/dist/lib/acquireFileLock.d.ts +7 -0
  99. package/dist/lib/acquireFileLock.js +38 -0
  100. package/dist/lib/acquireFileLock.js.map +1 -0
  101. package/dist/lib/calculateContextIdentity.d.ts +7 -0
  102. package/dist/lib/calculateContextIdentity.js +39 -0
  103. package/dist/lib/calculateContextIdentity.js.map +1 -0
  104. package/dist/lib/calculateFileChecksum.d.ts +1 -0
  105. package/dist/lib/calculateFileChecksum.js +16 -0
  106. package/dist/lib/calculateFileChecksum.js.map +1 -0
  107. package/dist/lib/copyDirectory.d.ts +6 -0
  108. package/dist/lib/copyDirectory.js +27 -0
  109. package/dist/lib/copyDirectory.js.map +1 -0
  110. package/dist/lib/decodeAudio.d.ts +1 -0
  111. package/dist/lib/decodeAudio.js +26 -0
  112. package/dist/lib/decodeAudio.js.map +1 -0
  113. package/dist/lib/downloadModelFile.d.ts +10 -0
  114. package/dist/lib/downloadModelFile.js +58 -0
  115. package/dist/lib/downloadModelFile.js.map +1 -0
  116. package/dist/lib/flattenMessageTextContent.d.ts +2 -0
  117. package/dist/lib/flattenMessageTextContent.js +11 -0
  118. package/dist/lib/flattenMessageTextContent.js.map +1 -0
  119. package/dist/lib/getCacheDirPath.d.ts +12 -0
  120. package/dist/lib/getCacheDirPath.js +31 -0
  121. package/dist/lib/getCacheDirPath.js.map +1 -0
  122. package/dist/lib/loadImage.d.ts +12 -0
  123. package/dist/lib/loadImage.js +30 -0
  124. package/dist/lib/loadImage.js.map +1 -0
  125. package/dist/lib/logger.d.ts +12 -0
  126. package/dist/lib/logger.js +98 -0
  127. package/dist/lib/logger.js.map +1 -0
  128. package/dist/lib/math.d.ts +7 -0
  129. package/dist/lib/math.js +30 -0
  130. package/dist/lib/math.js.map +1 -0
  131. package/dist/lib/resolveModelFileLocation.d.ts +15 -0
  132. package/dist/lib/resolveModelFileLocation.js +41 -0
  133. package/dist/lib/resolveModelFileLocation.js.map +1 -0
  134. package/dist/lib/util.d.ts +7 -0
  135. package/dist/lib/util.js +61 -0
  136. package/dist/lib/util.js.map +1 -0
  137. package/dist/lib/validateModelFile.d.ts +9 -0
  138. package/dist/lib/validateModelFile.js +62 -0
  139. package/dist/lib/validateModelFile.js.map +1 -0
  140. package/dist/lib/validateModelOptions.d.ts +3 -0
  141. package/dist/lib/validateModelOptions.js +23 -0
  142. package/dist/lib/validateModelOptions.js.map +1 -0
  143. package/dist/pool.d.ts +61 -0
  144. package/dist/pool.js +512 -0
  145. package/dist/pool.js.map +1 -0
  146. package/dist/server.d.ts +59 -0
  147. package/dist/server.js +221 -0
  148. package/dist/server.js.map +1 -0
  149. package/dist/standalone.d.ts +1 -0
  150. package/dist/standalone.js +306 -0
  151. package/dist/standalone.js.map +1 -0
  152. package/dist/store.d.ts +60 -0
  153. package/dist/store.js +203 -0
  154. package/dist/store.js.map +1 -0
  155. package/dist/types/completions.d.ts +57 -0
  156. package/dist/types/completions.js +2 -0
  157. package/dist/types/completions.js.map +1 -0
  158. package/dist/types/index.d.ts +326 -0
  159. package/dist/types/index.js +2 -0
  160. package/dist/types/index.js.map +1 -0
  161. package/docs/engines.md +28 -0
  162. package/docs/gpu.md +72 -0
  163. package/docs/http-api.md +147 -0
  164. package/examples/all-options.js +108 -0
  165. package/examples/chat-cli.js +56 -0
  166. package/examples/chat-server.js +65 -0
  167. package/examples/concurrency.js +70 -0
  168. package/examples/express.js +70 -0
  169. package/examples/pool.js +91 -0
  170. package/package.json +113 -0
  171. package/src/api/openai/enums.ts +20 -0
  172. package/src/api/openai/handlers/chat.ts +408 -0
  173. package/src/api/openai/handlers/completions.ts +196 -0
  174. package/src/api/openai/handlers/embeddings.ts +92 -0
  175. package/src/api/openai/handlers/images.ts +3 -0
  176. package/src/api/openai/handlers/models.ts +33 -0
  177. package/src/api/openai/handlers/transcription.ts +2 -0
  178. package/src/api/openai/index.ts +16 -0
  179. package/src/api/parseJSONRequestBody.ts +26 -0
  180. package/src/api/v1/DRAFT.md +16 -0
  181. package/src/api/v1/index.ts +37 -0
  182. package/src/cli.ts +9 -0
  183. package/src/engines/gpt4all/engine.ts +441 -0
  184. package/src/engines/gpt4all/util.ts +31 -0
  185. package/src/engines/index.ts +28 -0
  186. package/src/engines/node-llama-cpp/engine.ts +811 -0
  187. package/src/engines/node-llama-cpp/types.ts +17 -0
  188. package/src/engines/node-llama-cpp/util.ts +126 -0
  189. package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
  190. package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
  191. package/src/engines/stable-diffusion-cpp/types.ts +54 -0
  192. package/src/engines/stable-diffusion-cpp/util.ts +58 -0
  193. package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
  194. package/src/engines/transformers-js/engine.ts +659 -0
  195. package/src/engines/transformers-js/types.ts +25 -0
  196. package/src/engines/transformers-js/util.ts +40 -0
  197. package/src/engines/transformers-js/validateModelFiles.ts +168 -0
  198. package/src/experiments/ChatWithVision.ts +103 -0
  199. package/src/experiments/StableDiffPromptGenerator.ts +2 -0
  200. package/src/experiments/VoiceFunctionCall.ts +71 -0
  201. package/src/http.ts +72 -0
  202. package/src/index.ts +7 -0
  203. package/src/instance.ts +723 -0
  204. package/src/lib/acquireFileLock.ts +38 -0
  205. package/src/lib/calculateContextIdentity.ts +53 -0
  206. package/src/lib/calculateFileChecksum.ts +18 -0
  207. package/src/lib/copyDirectory.ts +29 -0
  208. package/src/lib/decodeAudio.ts +39 -0
  209. package/src/lib/downloadModelFile.ts +70 -0
  210. package/src/lib/flattenMessageTextContent.ts +19 -0
  211. package/src/lib/getCacheDirPath.ts +34 -0
  212. package/src/lib/loadImage.ts +46 -0
  213. package/src/lib/logger.ts +112 -0
  214. package/src/lib/math.ts +31 -0
  215. package/src/lib/resolveModelFileLocation.ts +49 -0
  216. package/src/lib/util.ts +75 -0
  217. package/src/lib/validateModelFile.ts +71 -0
  218. package/src/lib/validateModelOptions.ts +31 -0
  219. package/src/pool.ts +651 -0
  220. package/src/server.ts +270 -0
  221. package/src/standalone.ts +320 -0
  222. package/src/store.ts +278 -0
  223. package/src/types/completions.ts +86 -0
  224. package/src/types/index.ts +488 -0
  225. package/tsconfig.json +29 -0
  226. package/tsconfig.release.json +11 -0
  227. package/vitest.config.ts +18 -0
@@ -0,0 +1,488 @@
1
+ import type { SomeJSONSchema } from 'ajv/dist/types/json-schema'
2
+ import type { Sharp } from 'sharp'
3
+ import type { BuiltInEngineName } from '#package/engines/index.js'
4
+ import type { Logger } from '#package/lib/logger.js'
5
+ import type { ModelPool } from '#package/pool.js'
6
+ import type { ModelStore } from '#package/store.js'
7
+ import {
8
+ AssistantMessage,
9
+ ChatMessage,
10
+ CompletionFinishReason,
11
+ TextCompletionParams,
12
+ ToolDefinition,
13
+ } from '#package/types/completions.js'
14
+ import type { ContextShiftStrategy } from '#package/engines/node-llama-cpp/types.js'
15
+ import type {
16
+ StableDiffusionWeightType,
17
+ StableDiffusionSamplingMethod,
18
+ StableDiffusionSchedule,
19
+ } from '#package/engines/stable-diffusion-cpp/types.js'
20
+ import type {
21
+ TransformersJsModelClass,
22
+ TransformersJsTokenizerClass,
23
+ TransformersJsProcessorClass,
24
+ TransformersJsDataType,
25
+ } from '#package/engines/transformers-js/types.js'
26
+ export * from '#package/types/completions.js'
27
+
28
+ export type ModelTaskType =
29
+ | 'text-completion'
30
+ | 'embedding'
31
+ | 'image-to-text'
32
+ | 'image-to-image'
33
+ | 'text-to-image'
34
+ | 'speech-to-text'
35
+
36
+ export interface ModelOptionsBase {
37
+ engine: BuiltInEngineName | (string & {})
38
+ task: ModelTaskType | (string & {})
39
+ prepare?: 'blocking' | 'async' | 'on-demand'
40
+ minInstances?: number
41
+ maxInstances?: number
42
+ location?: string
43
+ }
44
+
45
+ export interface BuiltInModelOptionsBase extends ModelOptionsBase {
46
+ engine: BuiltInEngineName
47
+ task: ModelTaskType
48
+ url?: string
49
+ location?: string
50
+ }
51
+
52
+ export interface ModelConfigBase extends ModelOptionsBase {
53
+ id: string
54
+ minInstances: number
55
+ maxInstances: number
56
+ modelsCachePath: string
57
+ }
58
+
59
+ export interface ModelConfig extends ModelConfigBase {
60
+ url?: string
61
+ location?: string
62
+ task: ModelTaskType | (string & {})
63
+ engine: BuiltInEngineName | (string & {})
64
+ // minInstances: number
65
+ // maxInstances: number
66
+ ttl?: number
67
+ prefix?: string
68
+ initialMessages?: ChatMessage[]
69
+ device?: {
70
+ gpu?: boolean | 'auto' | (string & {})
71
+ // gpuLayers?: number
72
+ // cpuThreads?: number
73
+ // memLock?: boolean
74
+ }
75
+ }
76
+
77
+ // export interface ChatModelConfig extends ModelConfig {
78
+ // initialMessages?: ChatMessage[]
79
+ // }
80
+
81
+ export interface CompletionChunk {
82
+ tokens: number[]
83
+ text: string
84
+ }
85
+
86
+ export interface ProcessingOptions {
87
+ timeout?: number
88
+ signal?: AbortSignal
89
+ }
90
+
91
+ export interface Image {
92
+ handle: Sharp
93
+ width: number
94
+ height: number
95
+ channels: 1 | 2 | 3 | 4
96
+ }
97
+
98
+ export interface CompletionProcessingOptions extends ProcessingOptions {
99
+ onChunk?: (chunk: CompletionChunk) => void
100
+ }
101
+
102
+ export interface SpeechToTextProcessingOptions extends ProcessingOptions {
103
+ onChunk?: (chunk: { text: string }) => void
104
+ }
105
+
106
+ export interface EngineContext<
107
+ TModelConfig = ModelConfig,
108
+ TModelMeta = unknown,
109
+ > {
110
+ config: TModelConfig
111
+ meta?: TModelMeta
112
+ log: Logger
113
+ }
114
+
115
+ export interface TextCompletionRequestBase extends TextCompletionParams {
116
+ model: string
117
+ stream?: boolean
118
+ }
119
+
120
+ export interface TextCompletionRequest extends TextCompletionRequestBase {
121
+ prompt?: string
122
+ }
123
+
124
+ export interface ChatCompletionRequest extends TextCompletionRequestBase {
125
+ messages: ChatMessage[]
126
+ grammar?: string
127
+ tools?: Record<string, ToolDefinition>
128
+ }
129
+
130
+ export interface TextEmbeddingInput {
131
+ type: 'text'
132
+ content: string
133
+ }
134
+
135
+ export interface ImageEmbeddingInput {
136
+ type: 'image'
137
+ content: Image
138
+ }
139
+
140
+ export type EmbeddingInput = TextEmbeddingInput | ImageEmbeddingInput | string
141
+
142
+ export interface EmbeddingRequest {
143
+ model: string
144
+ input: EmbeddingInput | EmbeddingInput[]
145
+ dimensions?: number
146
+ pooling?: 'cls' | 'mean'
147
+ }
148
+
149
+ export interface ImageToTextRequest {
150
+ model: string
151
+ image: Image
152
+ prompt?: string
153
+ maxTokens?: number
154
+ }
155
+
156
+ export interface StableDiffusionRequest {
157
+ negativePrompt?: string
158
+ guidance?: number
159
+ styleRatio?: number
160
+ strength?: number
161
+ sampleSteps?: number
162
+ batchCount?: number
163
+ samplingMethod?: StableDiffusionSamplingMethod
164
+ cfgScale?: number
165
+ controlStrength?: number
166
+ }
167
+
168
+ export interface TextToImageRequest extends StableDiffusionRequest {
169
+ model: string
170
+ prompt: string
171
+ width?: number
172
+ height?: number
173
+ seed?: number
174
+ }
175
+
176
+ export interface ImageToImageRequest extends StableDiffusionRequest {
177
+ model: string
178
+ image: Image
179
+ prompt: string
180
+ width?: number
181
+ height?: number
182
+ seed?: number
183
+ }
184
+
185
+ export interface SpeechToTextRequest {
186
+ model: string
187
+ url?: string
188
+ file?: string
189
+ language?: string
190
+ prompt?: string
191
+ maxTokens?: number
192
+ }
193
+
194
+ export interface ModelRequestMeta {
195
+ sequence: number
196
+ abortController: AbortController
197
+ }
198
+ export type IncomingRequest =
199
+ | TextCompletionRequest
200
+ | ChatCompletionRequest
201
+ | EmbeddingRequest
202
+ | ImageToTextRequest
203
+ | SpeechToTextRequest
204
+ export type ModelInstanceRequest = ModelRequestMeta & IncomingRequest
205
+
206
+ export interface EngineTextCompletionArgs<
207
+ TModelConfig = unknown,
208
+ TModelMeta = unknown,
209
+ > extends EngineContext<TModelConfig, TModelMeta> {
210
+ onChunk?: (chunk: CompletionChunk) => void
211
+ resetContext?: boolean
212
+ request: TextCompletionRequest
213
+ }
214
+
215
+ export interface EngineChatCompletionArgs<
216
+ TModelConfig = unknown,
217
+ TModelMeta = unknown,
218
+ > extends EngineContext<TModelConfig, TModelMeta> {
219
+ onChunk?: (chunk: CompletionChunk) => void
220
+ resetContext?: boolean
221
+ request: ChatCompletionRequest
222
+ }
223
+
224
+ export interface EngineEmbeddingArgs<
225
+ TModelConfig = unknown,
226
+ TModelMeta = unknown,
227
+ > extends EngineContext<TModelConfig, TModelMeta> {
228
+ request: EmbeddingRequest
229
+ }
230
+
231
+ export interface EngineImageToTextArgs<
232
+ TModelConfig = unknown,
233
+ TModelMeta = unknown,
234
+ > extends EngineContext<TModelConfig, TModelMeta> {
235
+ request: ImageToTextRequest
236
+ }
237
+
238
+ export interface EngineTextToImageArgs<
239
+ TModelConfig = unknown,
240
+ TModelMeta = unknown,
241
+ > extends EngineContext<TModelConfig, TModelMeta> {
242
+ request: TextToImageRequest
243
+ }
244
+
245
+ export interface EngineImageToImageArgs<
246
+ TModelConfig = unknown,
247
+ TModelMeta = unknown,
248
+ > extends EngineContext<TModelConfig, TModelMeta> {
249
+ request: ImageToImageRequest
250
+ }
251
+
252
+ export interface EngineSpeechToTextArgs<
253
+ TModelConfig = unknown,
254
+ TModelMeta = unknown,
255
+ > extends EngineContext<TModelConfig, TModelMeta> {
256
+ request: SpeechToTextRequest
257
+ onChunk?: (chunk: { text: string }) => void
258
+ }
259
+
260
+ export interface FileDownloadProgress {
261
+ file: string
262
+ loadedBytes: number
263
+ totalBytes: number
264
+ }
265
+
266
+ export interface EngineStartContext {
267
+ pool: ModelPool
268
+ store: ModelStore
269
+ }
270
+
271
+ export interface ModelEngine<
272
+ TInstance = unknown,
273
+ TModelConfig extends ModelConfig = ModelConfig,
274
+ TModelMeta = unknown,
275
+ > {
276
+ autoGpu?: boolean
277
+ start?: (ctx: EngineStartContext) => Promise<void>
278
+ prepareModel: (
279
+ ctx: EngineContext<TModelConfig, TModelMeta>,
280
+ onProgress?: (progress: FileDownloadProgress) => void,
281
+ signal?: AbortSignal,
282
+ ) => Promise<TModelMeta>
283
+ createInstance: (
284
+ ctx: EngineContext<TModelConfig, TModelMeta>,
285
+ signal?: AbortSignal,
286
+ ) => Promise<TInstance>
287
+ disposeInstance: (instance: TInstance) => Promise<void>
288
+ processChatCompletionTask?: (
289
+ args: EngineChatCompletionArgs<TModelConfig, TModelMeta>,
290
+ instance: TInstance,
291
+ signal?: AbortSignal,
292
+ ) => Promise<EngineChatCompletionResult>
293
+ processTextCompletionTask?: (
294
+ args: EngineTextCompletionArgs<TModelConfig, TModelMeta>,
295
+ instance: TInstance,
296
+ signal?: AbortSignal,
297
+ ) => Promise<EngineTextCompletionResult>
298
+ processEmbeddingTask?: (
299
+ args: EngineEmbeddingArgs<TModelConfig, TModelMeta>,
300
+ instance: TInstance,
301
+ signal?: AbortSignal,
302
+ ) => Promise<EngineEmbeddingResult>
303
+ processImageToTextTask?: (
304
+ args: EngineImageToTextArgs<TModelConfig, TModelMeta>,
305
+ instance: TInstance,
306
+ signal?: AbortSignal,
307
+ ) => Promise<EngineImageToTextResult>
308
+ processSpeechToTextTask?: (
309
+ args: EngineSpeechToTextArgs<TModelConfig, TModelMeta>,
310
+ instance: TInstance,
311
+ signal?: AbortSignal,
312
+ ) => Promise<EngineSpeechToTextResult>
313
+ processTextToImageTask?: (
314
+ args: EngineTextToImageArgs<TModelConfig, TModelMeta>,
315
+ instance: TInstance,
316
+ signal?: AbortSignal,
317
+ ) => Promise<EngineTextToImageResult>
318
+ processImageToImageTask?: (
319
+ args: EngineImageToImageArgs<TModelConfig, TModelMeta>,
320
+ instance: TInstance,
321
+ signal?: AbortSignal,
322
+ ) => Promise<EngineImageToImageResult>
323
+ }
324
+
325
+ interface EmbeddingModelOptions {
326
+ task: 'embedding'
327
+ }
328
+
329
+ export type TextCompletionGrammar = string | SomeJSONSchema
330
+
331
+ interface TextCompletionModelOptions {
332
+ task: 'text-completion'
333
+ contextSize?: number
334
+ grammars?: Record<string, TextCompletionGrammar>
335
+ completionDefaults?: TextCompletionParams
336
+ initialMessages?: ChatMessage[]
337
+ prefix?: string
338
+ batchSize?: number
339
+ }
340
+
341
+ interface LlamaCppModelOptionsBase extends BuiltInModelOptionsBase {
342
+ engine: 'node-llama-cpp'
343
+ task: 'text-completion' | 'embedding'
344
+ sha256?: string
345
+ batchSize?: number
346
+ contextShiftStrategy?: ContextShiftStrategy
347
+ tools?: {
348
+ definitions: Record<string, ToolDefinition>
349
+ includeParamsDocumentation?: boolean
350
+ parallelism?: number
351
+ }
352
+ device?: {
353
+ gpu?: boolean | 'auto' | (string & {})
354
+ gpuLayers?: number
355
+ cpuThreads?: number
356
+ memLock?: boolean
357
+ }
358
+ }
359
+
360
+ interface LlamaCppEmbeddingModelOptions
361
+ extends LlamaCppModelOptionsBase,
362
+ EmbeddingModelOptions {
363
+ task: 'embedding'
364
+ }
365
+
366
+ export interface LlamaCppTextCompletionModelOptions
367
+ extends LlamaCppModelOptionsBase,
368
+ TextCompletionModelOptions {
369
+ task: 'text-completion'
370
+ }
371
+
372
+ interface GPT4AllModelOptions extends BuiltInModelOptionsBase {
373
+ engine: 'gpt4all'
374
+ task: 'text-completion' | 'embedding'
375
+ md5?: string
376
+ device?: {
377
+ gpu?: boolean | 'auto' | (string & {})
378
+ gpuLayers?: number
379
+ cpuThreads?: number
380
+ }
381
+ }
382
+
383
+ type GPT4AllTextCompletionModelOptions = TextCompletionModelOptions &
384
+ GPT4AllModelOptions
385
+
386
+ type GPT4AllEmbeddingModelOptions = GPT4AllModelOptions & EmbeddingModelOptions
387
+
388
+ export interface TransformersJsModel {
389
+ processor?: {
390
+ url?: string
391
+ file?: string
392
+ }
393
+ processorClass?: TransformersJsProcessorClass
394
+ tokenizerClass?: TransformersJsTokenizerClass
395
+ modelClass?: TransformersJsModelClass
396
+ dtype?: Record<string, TransformersJsDataType> | TransformersJsDataType
397
+ }
398
+
399
+ interface TransformersJsModelOptions extends BuiltInModelOptionsBase {
400
+ engine: 'transformers-js'
401
+ task: 'image-to-text' | 'speech-to-text' | 'text-completion' | 'embedding'
402
+ textModel?: TransformersJsModel
403
+ visionModel?: TransformersJsModel
404
+ speechModel?: TransformersJsModel
405
+ device?: {
406
+ gpu?: boolean | 'auto' | (string & {})
407
+ }
408
+ }
409
+
410
+ export interface ModelFileSource {
411
+ url?: string
412
+ file?: string
413
+ sha256?: string
414
+ }
415
+
416
+ interface StableDiffusionModelOptions extends BuiltInModelOptionsBase {
417
+ engine: 'stable-diffusion-cpp'
418
+ task: 'image-to-text' | 'text-to-image' | 'image-to-image'
419
+ sha256?: string
420
+ url?: string
421
+ diffusionModel?: boolean
422
+ vae?: ModelFileSource
423
+ clipL?: ModelFileSource
424
+ clipG?: ModelFileSource
425
+ t5xxl?: ModelFileSource
426
+ taesd?: ModelFileSource
427
+ controlNet?: ModelFileSource
428
+ samplingMethod?: StableDiffusionSamplingMethod
429
+ weightType?: StableDiffusionWeightType
430
+ schedule?: StableDiffusionSchedule
431
+ loras?: ModelFileSource[]
432
+ }
433
+
434
+ export interface CustomEngineModelOptions extends ModelOptionsBase {}
435
+
436
+ export type BuiltInModelOptions =
437
+ | LlamaCppTextCompletionModelOptions
438
+ | LlamaCppEmbeddingModelOptions
439
+ | GPT4AllTextCompletionModelOptions
440
+ | GPT4AllEmbeddingModelOptions
441
+ | TransformersJsModelOptions
442
+ | StableDiffusionModelOptions
443
+
444
+ export type ModelOptions = BuiltInModelOptions | CustomEngineModelOptions
445
+
446
+ export interface EngineEmbeddingResult {
447
+ embeddings: Float32Array[]
448
+ inputTokens: number
449
+ }
450
+
451
+ export interface ChatCompletionResult extends EngineChatCompletionResult {
452
+ id: string
453
+ model: string
454
+ }
455
+
456
+ export interface EngineChatCompletionResult {
457
+ message: AssistantMessage
458
+ finishReason: CompletionFinishReason
459
+ promptTokens: number
460
+ completionTokens: number
461
+ contextTokens: number
462
+ }
463
+
464
+ export interface EngineTextCompletionResult {
465
+ text: string
466
+ finishReason?: CompletionFinishReason
467
+ promptTokens: number
468
+ completionTokens: number
469
+ contextTokens: number
470
+ }
471
+
472
+ export interface EngineImageToTextResult {
473
+ text: string
474
+ }
475
+
476
+ export interface EngineTextToImageResult {
477
+ images: Image[]
478
+ seed: number
479
+ }
480
+
481
+ export interface EngineImageToImageResult {
482
+ images: Image[]
483
+ seed: number
484
+ }
485
+
486
+ export interface EngineSpeechToTextResult {
487
+ text: string
488
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,29 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "es2022",
4
+ "module": "es2022",
5
+ "sourceMap": true,
6
+ "declaration": true,
7
+ "stripInternal": true,
8
+ "strict": true,
9
+ "noImplicitAny": true,
10
+ "noImplicitReturns": true,
11
+ "noImplicitThis": true,
12
+ "noImplicitOverride": true,
13
+ "strictNullChecks": true,
14
+ "noFallthroughCasesInSwitch": true,
15
+ "removeComments": false,
16
+ "isolatedModules": true,
17
+ "moduleResolution": "node",
18
+ "lib": ["es2022"],
19
+ "resolveJsonModule": false,
20
+ "esModuleInterop": true,
21
+ "forceConsistentCasingInFileNames": true,
22
+ "allowSyntheticDefaultImports": true,
23
+ "skipLibCheck": true,
24
+ "paths": {
25
+ "#package/*": ["./src/*"]
26
+ }
27
+ },
28
+ "include": ["src/**/*"]
29
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "extends": "./tsconfig.json",
3
+ "compilerOptions": {
4
+ "rootDir": "src",
5
+ "outDir": "dist",
6
+ "noEmit": false
7
+ },
8
+ "include": [
9
+ "src/**/*"
10
+ ]
11
+ }
@@ -0,0 +1,18 @@
1
+ import { defineConfig } from 'vitest/config'
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ pool: 'forks',
6
+ maxWorkers: 1,
7
+ minWorkers: 1,
8
+ maxConcurrency: 1,
9
+ testTimeout: 60000,
10
+ hookTimeout: 120000,
11
+ poolOptions: {
12
+ threads: {
13
+ minThreads: 1,
14
+ maxThreads: 1,
15
+ },
16
+ },
17
+ },
18
+ })