inference-server 1.0.0-beta.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/README.md +216 -0
  2. package/dist/api/openai/enums.d.ts +4 -0
  3. package/dist/api/openai/enums.js +17 -0
  4. package/dist/api/openai/enums.js.map +1 -0
  5. package/dist/api/openai/handlers/chat.d.ts +3 -0
  6. package/dist/api/openai/handlers/chat.js +358 -0
  7. package/dist/api/openai/handlers/chat.js.map +1 -0
  8. package/dist/api/openai/handlers/completions.d.ts +3 -0
  9. package/dist/api/openai/handlers/completions.js +169 -0
  10. package/dist/api/openai/handlers/completions.js.map +1 -0
  11. package/dist/api/openai/handlers/embeddings.d.ts +3 -0
  12. package/dist/api/openai/handlers/embeddings.js +74 -0
  13. package/dist/api/openai/handlers/embeddings.js.map +1 -0
  14. package/dist/api/openai/handlers/images.d.ts +0 -0
  15. package/dist/api/openai/handlers/images.js +4 -0
  16. package/dist/api/openai/handlers/images.js.map +1 -0
  17. package/dist/api/openai/handlers/models.d.ts +3 -0
  18. package/dist/api/openai/handlers/models.js +23 -0
  19. package/dist/api/openai/handlers/models.js.map +1 -0
  20. package/dist/api/openai/handlers/transcription.d.ts +0 -0
  21. package/dist/api/openai/handlers/transcription.js +4 -0
  22. package/dist/api/openai/handlers/transcription.js.map +1 -0
  23. package/dist/api/openai/index.d.ts +7 -0
  24. package/dist/api/openai/index.js +14 -0
  25. package/dist/api/openai/index.js.map +1 -0
  26. package/dist/api/parseJSONRequestBody.d.ts +2 -0
  27. package/dist/api/parseJSONRequestBody.js +24 -0
  28. package/dist/api/parseJSONRequestBody.js.map +1 -0
  29. package/dist/api/v1/index.d.ts +2 -0
  30. package/dist/api/v1/index.js +29 -0
  31. package/dist/api/v1/index.js.map +1 -0
  32. package/dist/cli.d.ts +1 -0
  33. package/dist/cli.js +10 -0
  34. package/dist/cli.js.map +1 -0
  35. package/dist/engines/gpt4all/engine.d.ts +34 -0
  36. package/dist/engines/gpt4all/engine.js +357 -0
  37. package/dist/engines/gpt4all/engine.js.map +1 -0
  38. package/dist/engines/gpt4all/util.d.ts +3 -0
  39. package/dist/engines/gpt4all/util.js +29 -0
  40. package/dist/engines/gpt4all/util.js.map +1 -0
  41. package/dist/engines/index.d.ts +19 -0
  42. package/dist/engines/index.js +21 -0
  43. package/dist/engines/index.js.map +1 -0
  44. package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
  45. package/dist/engines/node-llama-cpp/engine.js +666 -0
  46. package/dist/engines/node-llama-cpp/engine.js.map +1 -0
  47. package/dist/engines/node-llama-cpp/types.d.ts +13 -0
  48. package/dist/engines/node-llama-cpp/types.js +2 -0
  49. package/dist/engines/node-llama-cpp/types.js.map +1 -0
  50. package/dist/engines/node-llama-cpp/util.d.ts +15 -0
  51. package/dist/engines/node-llama-cpp/util.js +84 -0
  52. package/dist/engines/node-llama-cpp/util.js.map +1 -0
  53. package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
  54. package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
  55. package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
  56. package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
  57. package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
  58. package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
  59. package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
  60. package/dist/engines/stable-diffusion-cpp/types.js +2 -0
  61. package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
  62. package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
  63. package/dist/engines/stable-diffusion-cpp/util.js +55 -0
  64. package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
  65. package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
  66. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
  67. package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
  68. package/dist/engines/transformers-js/engine.d.ts +37 -0
  69. package/dist/engines/transformers-js/engine.js +538 -0
  70. package/dist/engines/transformers-js/engine.js.map +1 -0
  71. package/dist/engines/transformers-js/types.d.ts +7 -0
  72. package/dist/engines/transformers-js/types.js +2 -0
  73. package/dist/engines/transformers-js/types.js.map +1 -0
  74. package/dist/engines/transformers-js/util.d.ts +7 -0
  75. package/dist/engines/transformers-js/util.js +36 -0
  76. package/dist/engines/transformers-js/util.js.map +1 -0
  77. package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
  78. package/dist/engines/transformers-js/validateModelFiles.js +133 -0
  79. package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
  80. package/dist/experiments/ChatWithVision.d.ts +11 -0
  81. package/dist/experiments/ChatWithVision.js +91 -0
  82. package/dist/experiments/ChatWithVision.js.map +1 -0
  83. package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
  84. package/dist/experiments/StableDiffPromptGenerator.js +4 -0
  85. package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
  86. package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
  87. package/dist/experiments/VoiceFunctionCall.js +51 -0
  88. package/dist/experiments/VoiceFunctionCall.js.map +1 -0
  89. package/dist/http.d.ts +19 -0
  90. package/dist/http.js +54 -0
  91. package/dist/http.js.map +1 -0
  92. package/dist/index.d.ts +7 -0
  93. package/dist/index.js +8 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/instance.d.ts +88 -0
  96. package/dist/instance.js +594 -0
  97. package/dist/instance.js.map +1 -0
  98. package/dist/lib/acquireFileLock.d.ts +7 -0
  99. package/dist/lib/acquireFileLock.js +38 -0
  100. package/dist/lib/acquireFileLock.js.map +1 -0
  101. package/dist/lib/calculateContextIdentity.d.ts +7 -0
  102. package/dist/lib/calculateContextIdentity.js +39 -0
  103. package/dist/lib/calculateContextIdentity.js.map +1 -0
  104. package/dist/lib/calculateFileChecksum.d.ts +1 -0
  105. package/dist/lib/calculateFileChecksum.js +16 -0
  106. package/dist/lib/calculateFileChecksum.js.map +1 -0
  107. package/dist/lib/copyDirectory.d.ts +6 -0
  108. package/dist/lib/copyDirectory.js +27 -0
  109. package/dist/lib/copyDirectory.js.map +1 -0
  110. package/dist/lib/decodeAudio.d.ts +1 -0
  111. package/dist/lib/decodeAudio.js +26 -0
  112. package/dist/lib/decodeAudio.js.map +1 -0
  113. package/dist/lib/downloadModelFile.d.ts +10 -0
  114. package/dist/lib/downloadModelFile.js +58 -0
  115. package/dist/lib/downloadModelFile.js.map +1 -0
  116. package/dist/lib/flattenMessageTextContent.d.ts +2 -0
  117. package/dist/lib/flattenMessageTextContent.js +11 -0
  118. package/dist/lib/flattenMessageTextContent.js.map +1 -0
  119. package/dist/lib/getCacheDirPath.d.ts +12 -0
  120. package/dist/lib/getCacheDirPath.js +31 -0
  121. package/dist/lib/getCacheDirPath.js.map +1 -0
  122. package/dist/lib/loadImage.d.ts +12 -0
  123. package/dist/lib/loadImage.js +30 -0
  124. package/dist/lib/loadImage.js.map +1 -0
  125. package/dist/lib/logger.d.ts +12 -0
  126. package/dist/lib/logger.js +98 -0
  127. package/dist/lib/logger.js.map +1 -0
  128. package/dist/lib/math.d.ts +7 -0
  129. package/dist/lib/math.js +30 -0
  130. package/dist/lib/math.js.map +1 -0
  131. package/dist/lib/resolveModelFileLocation.d.ts +15 -0
  132. package/dist/lib/resolveModelFileLocation.js +41 -0
  133. package/dist/lib/resolveModelFileLocation.js.map +1 -0
  134. package/dist/lib/util.d.ts +7 -0
  135. package/dist/lib/util.js +61 -0
  136. package/dist/lib/util.js.map +1 -0
  137. package/dist/lib/validateModelFile.d.ts +9 -0
  138. package/dist/lib/validateModelFile.js +62 -0
  139. package/dist/lib/validateModelFile.js.map +1 -0
  140. package/dist/lib/validateModelOptions.d.ts +3 -0
  141. package/dist/lib/validateModelOptions.js +23 -0
  142. package/dist/lib/validateModelOptions.js.map +1 -0
  143. package/dist/pool.d.ts +61 -0
  144. package/dist/pool.js +512 -0
  145. package/dist/pool.js.map +1 -0
  146. package/dist/server.d.ts +59 -0
  147. package/dist/server.js +221 -0
  148. package/dist/server.js.map +1 -0
  149. package/dist/standalone.d.ts +1 -0
  150. package/dist/standalone.js +306 -0
  151. package/dist/standalone.js.map +1 -0
  152. package/dist/store.d.ts +60 -0
  153. package/dist/store.js +203 -0
  154. package/dist/store.js.map +1 -0
  155. package/dist/types/completions.d.ts +57 -0
  156. package/dist/types/completions.js +2 -0
  157. package/dist/types/completions.js.map +1 -0
  158. package/dist/types/index.d.ts +326 -0
  159. package/dist/types/index.js +2 -0
  160. package/dist/types/index.js.map +1 -0
  161. package/docs/engines.md +28 -0
  162. package/docs/gpu.md +72 -0
  163. package/docs/http-api.md +147 -0
  164. package/examples/all-options.js +108 -0
  165. package/examples/chat-cli.js +56 -0
  166. package/examples/chat-server.js +65 -0
  167. package/examples/concurrency.js +70 -0
  168. package/examples/express.js +70 -0
  169. package/examples/pool.js +91 -0
  170. package/package.json +113 -0
  171. package/src/api/openai/enums.ts +20 -0
  172. package/src/api/openai/handlers/chat.ts +408 -0
  173. package/src/api/openai/handlers/completions.ts +196 -0
  174. package/src/api/openai/handlers/embeddings.ts +92 -0
  175. package/src/api/openai/handlers/images.ts +3 -0
  176. package/src/api/openai/handlers/models.ts +33 -0
  177. package/src/api/openai/handlers/transcription.ts +2 -0
  178. package/src/api/openai/index.ts +16 -0
  179. package/src/api/parseJSONRequestBody.ts +26 -0
  180. package/src/api/v1/DRAFT.md +16 -0
  181. package/src/api/v1/index.ts +37 -0
  182. package/src/cli.ts +9 -0
  183. package/src/engines/gpt4all/engine.ts +441 -0
  184. package/src/engines/gpt4all/util.ts +31 -0
  185. package/src/engines/index.ts +28 -0
  186. package/src/engines/node-llama-cpp/engine.ts +811 -0
  187. package/src/engines/node-llama-cpp/types.ts +17 -0
  188. package/src/engines/node-llama-cpp/util.ts +126 -0
  189. package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
  190. package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
  191. package/src/engines/stable-diffusion-cpp/types.ts +54 -0
  192. package/src/engines/stable-diffusion-cpp/util.ts +58 -0
  193. package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
  194. package/src/engines/transformers-js/engine.ts +659 -0
  195. package/src/engines/transformers-js/types.ts +25 -0
  196. package/src/engines/transformers-js/util.ts +40 -0
  197. package/src/engines/transformers-js/validateModelFiles.ts +168 -0
  198. package/src/experiments/ChatWithVision.ts +103 -0
  199. package/src/experiments/StableDiffPromptGenerator.ts +2 -0
  200. package/src/experiments/VoiceFunctionCall.ts +71 -0
  201. package/src/http.ts +72 -0
  202. package/src/index.ts +7 -0
  203. package/src/instance.ts +723 -0
  204. package/src/lib/acquireFileLock.ts +38 -0
  205. package/src/lib/calculateContextIdentity.ts +53 -0
  206. package/src/lib/calculateFileChecksum.ts +18 -0
  207. package/src/lib/copyDirectory.ts +29 -0
  208. package/src/lib/decodeAudio.ts +39 -0
  209. package/src/lib/downloadModelFile.ts +70 -0
  210. package/src/lib/flattenMessageTextContent.ts +19 -0
  211. package/src/lib/getCacheDirPath.ts +34 -0
  212. package/src/lib/loadImage.ts +46 -0
  213. package/src/lib/logger.ts +112 -0
  214. package/src/lib/math.ts +31 -0
  215. package/src/lib/resolveModelFileLocation.ts +49 -0
  216. package/src/lib/util.ts +75 -0
  217. package/src/lib/validateModelFile.ts +71 -0
  218. package/src/lib/validateModelOptions.ts +31 -0
  219. package/src/pool.ts +651 -0
  220. package/src/server.ts +270 -0
  221. package/src/standalone.ts +320 -0
  222. package/src/store.ts +278 -0
  223. package/src/types/completions.ts +86 -0
  224. package/src/types/index.ts +488 -0
  225. package/tsconfig.json +29 -0
  226. package/tsconfig.release.json +11 -0
  227. package/vitest.config.ts +18 -0
@@ -0,0 +1,40 @@
1
+ export async function remoteFileExists(url: string): Promise<boolean> {
2
+ try {
3
+ const response = await fetch(url, { method: 'HEAD' })
4
+ return response.ok
5
+ } catch (error) {
6
+ console.error('Error checking remote file:', error)
7
+ return false
8
+ }
9
+ }
10
+
11
+ interface HuggingfaceModelInfo {
12
+ modelId: string
13
+ branch: string
14
+ }
15
+
16
+ export function parseHuggingfaceModelIdAndBranch(url: string): HuggingfaceModelInfo {
17
+ // url to the hub model, like https://huggingface.co/jinaai/jina-clip-v1
18
+ const parsedUrl = new URL(url)
19
+ const urlSegments = parsedUrl.pathname.split('/')
20
+ const repoOrg = urlSegments[1]
21
+ const repoName = urlSegments[2]
22
+ const branch = urlSegments[4] || 'main'
23
+ return {
24
+ modelId: `${repoOrg}/${repoName}`,
25
+ branch,
26
+ }
27
+ // if (filePath) { // path to the cached model, like /path/to/huggingface/jinaai/jina-clip-v1-main
28
+ // const filePathSegments = filePath.split('/')
29
+ // const modelDir = filePathSegments[filePathSegments.length - 1]
30
+ // const branch = modelDir.split('-').pop() || 'main'
31
+ // const repoName = modelDir.replace(new RegExp(`-${branch}$`), '')
32
+ // const repoOrg = filePathSegments[filePathSegments.length - 2]
33
+ // const modelId = `${repoOrg}/${repoName}`
34
+ // return {
35
+ // modelId,
36
+ // branch,
37
+ // }
38
+ // }
39
+ // throw new Error('Either url or filePath must be provided')
40
+ }
@@ -0,0 +1,168 @@
1
+ import fs from 'node:fs'
2
+ import { AutoModel, AutoProcessor, AutoTokenizer } from '@huggingface/transformers'
3
+ import { TransformersJsModel } from '#package/types/index.js'
4
+ import { resolveModelFileLocation } from '#package/lib/resolveModelFileLocation.js'
5
+ import { TransformersJsModelConfig } from './engine.js'
6
+ import { parseHuggingfaceModelIdAndBranch, remoteFileExists } from './util.js'
7
+
8
+ async function validateModel(
9
+ modelOpts: TransformersJsModel,
10
+ config: TransformersJsModelConfig,
11
+ modelPath: string,
12
+ ): Promise<string | undefined> {
13
+ const modelClass = modelOpts.modelClass ?? AutoModel
14
+ const device = config.device?.gpu ? 'gpu' : 'cpu'
15
+ try {
16
+ const model = await modelClass.from_pretrained(modelPath, {
17
+ local_files_only: true,
18
+ device: device,
19
+ dtype: modelOpts.dtype || 'fp32',
20
+ })
21
+ await model.dispose()
22
+ } catch (error) {
23
+ return `Failed to load model (${error})`
24
+ }
25
+ return undefined
26
+ }
27
+
28
+ async function validateTokenizer(
29
+ modelOpts: TransformersJsModel,
30
+ config: TransformersJsModelConfig,
31
+ modelPath: string,
32
+ ): Promise<string | undefined> {
33
+ const tokenizerClass = modelOpts.tokenizerClass ?? AutoTokenizer
34
+ try {
35
+ await tokenizerClass.from_pretrained(modelPath, {
36
+ local_files_only: true,
37
+ })
38
+ } catch (error) {
39
+ return `Failed to load tokenizer (${error})`
40
+ }
41
+ return undefined
42
+ }
43
+
44
+ async function validateProcessor(
45
+ modelOpts: TransformersJsModel,
46
+ config: TransformersJsModelConfig,
47
+ modelPath: string,
48
+ ): Promise<string | undefined> {
49
+ const processorClass = modelOpts.processorClass ?? AutoProcessor
50
+ try {
51
+ if (modelOpts.processor) {
52
+ const processorPath = resolveModelFileLocation({
53
+ url: modelOpts.processor.url,
54
+ filePath: modelOpts.processor.file,
55
+ modelsCachePath: config.modelsCachePath,
56
+ })
57
+ await processorClass.from_pretrained(processorPath, {
58
+ local_files_only: true,
59
+ })
60
+ } else {
61
+ if (modelOpts.processorClass) {
62
+ await processorClass.from_pretrained(modelPath, {
63
+ local_files_only: true,
64
+ })
65
+ } else if (config.url) {
66
+ const { branch } = parseHuggingfaceModelIdAndBranch(config.url)
67
+ const [hasProcessor, hasPreprocessor] = await Promise.all([
68
+ remoteFileExists(`${config.url}/blob/${branch}/processor_config.json`),
69
+ remoteFileExists(`${config.url}/blob/${branch}/preprocessor_config.json`),
70
+ ])
71
+ if (hasProcessor || hasPreprocessor) {
72
+ await processorClass.from_pretrained(modelPath, {
73
+ local_files_only: true,
74
+ })
75
+ }
76
+ }
77
+ }
78
+ } catch (error) {
79
+ return `Failed to load processor (${error})`
80
+ }
81
+ return undefined
82
+ }
83
+
84
+ interface ComponentValidationErrors {
85
+ model?: string
86
+ tokenizer?: string
87
+ processor?: string
88
+ }
89
+
90
+ interface ModelValidationErrors {
91
+ textModel?: ComponentValidationErrors
92
+ visionModel?: ComponentValidationErrors
93
+ speechModel?: ComponentValidationErrors
94
+ }
95
+
96
+ export interface ModelValidationResult {
97
+ message: string
98
+ errors?: ModelValidationErrors
99
+ }
100
+
101
+ export async function validateModelFiles(
102
+ config: TransformersJsModelConfig,
103
+ ): Promise<ModelValidationResult | undefined> {
104
+ if (!fs.existsSync(config.location)) {
105
+ return {
106
+ message: `model directory does not exist: ${config.location}`,
107
+ }
108
+ }
109
+
110
+ let modelPath = config.location
111
+ if (!modelPath.endsWith('/')) {
112
+ modelPath += '/'
113
+ }
114
+
115
+ const validateModelComponents = async (modelOpts: TransformersJsModel) => {
116
+ const componentValidationPromises = [
117
+ validateModel(modelOpts, config, modelPath),
118
+ validateTokenizer(modelOpts, config, modelPath),
119
+ validateProcessor(modelOpts, config, modelPath),
120
+ ]
121
+
122
+
123
+ // if (modelOpts.processor) {
124
+ // componentValidationPromises.push(validateProcessor(modelOpts, config, modelPath))
125
+ // }
126
+ const [model, tokenizer, processor] = await Promise.all(componentValidationPromises)
127
+ const result: ComponentValidationErrors = {}
128
+ if (model) result.model = model
129
+ if (tokenizer) result.tokenizer = tokenizer
130
+ if (processor) result.processor = processor
131
+ return result
132
+ }
133
+
134
+ const modelValidationPromises: any = {}
135
+ const noModelConfigured = !config.textModel && !config.visionModel && !config.speechModel
136
+ if (config.textModel || noModelConfigured) {
137
+ modelValidationPromises.textModel = validateModelComponents(config.textModel || {})
138
+ }
139
+ if (config.visionModel) {
140
+ modelValidationPromises.visionModel = validateModelComponents(config.visionModel)
141
+ }
142
+ if (config.speechModel) {
143
+ modelValidationPromises.speechModel = validateModelComponents(config.speechModel)
144
+ }
145
+
146
+ await Promise.all(Object.values(modelValidationPromises))
147
+ const validationErrors: ModelValidationErrors = {}
148
+ const textModelErrors = await modelValidationPromises.textModel
149
+ if (textModelErrors && Object.keys(textModelErrors).length) {
150
+ validationErrors.textModel = textModelErrors
151
+ }
152
+ const visionModelErrors = await modelValidationPromises.visionModel
153
+ if (visionModelErrors && Object.keys(visionModelErrors).length) {
154
+ validationErrors.visionModel = visionModelErrors
155
+ }
156
+ const speechModelErrors = await modelValidationPromises.speechModel
157
+ if (speechModelErrors && Object.keys(speechModelErrors).length) {
158
+ validationErrors.speechModel = speechModelErrors
159
+ }
160
+
161
+ if (Object.keys(validationErrors).length > 0) {
162
+ return {
163
+ message: 'Failed to validate model components',
164
+ errors: validationErrors,
165
+ }
166
+ }
167
+ return undefined
168
+ }
@@ -0,0 +1,103 @@
1
+
2
+ import { EngineChatCompletionArgs, ImageToTextRequest, ModelEngine } from '#package/types/index.js'
3
+ import { CustomEngine } from '#package/engines/index.js'
4
+
5
+ // an experimental engine that replaces images with their descriptions before passing them to a chat model
6
+
7
+ export class ChatWithVisionEngine extends CustomEngine implements ModelEngine {
8
+ imageToTextModel: string
9
+ chatModel: string
10
+
11
+ constructor({ imageToTextModel, chatModel }: { imageToTextModel: string, chatModel: string }) {
12
+ super()
13
+ this.imageToTextModel = imageToTextModel
14
+ this.chatModel = chatModel
15
+ }
16
+
17
+ async processChatCompletionTask (
18
+ args: EngineChatCompletionArgs,
19
+ ) {
20
+
21
+ const imageTextPromises: any[] = []
22
+ const imageToTextModel = await this.pool.requestInstance({
23
+ model: this.imageToTextModel,
24
+ })
25
+
26
+ const messagesWithImageDescriptions = [... args.request.messages]
27
+
28
+ for (let m = 0; m < messagesWithImageDescriptions.length; m++) {
29
+ const message = messagesWithImageDescriptions[m]
30
+ if (!Array.isArray(message.content)) {
31
+ continue
32
+ }
33
+ for (let p = 0; p < message.content.length; p++) {
34
+ const contentPart = message.content[p]
35
+ if (contentPart.type !== 'image') {
36
+ continue
37
+ }
38
+ imageTextPromises.push(new Promise(async (resolve, reject) => {
39
+ // Florence2 prompt doc
40
+ // "task_prompts_without_inputs": {
41
+ // "<OCR>": "What is the text in the image?",
42
+ // "<OCR_WITH_REGION>": "What is the text in the image, with regions?",
43
+ // "<CAPTION>": "What does the image describe?",
44
+ // "<DETAILED_CAPTION>": "Describe in detail what is shown in the image.",
45
+ // "<MORE_DETAILED_CAPTION>": "Describe with a paragraph what is shown in the image.",
46
+ // "<OD>": "Locate the objects with category name in the image.",
47
+ // "<DENSE_REGION_CAPTION>": "Locate the objects in the image, with their descriptions.",
48
+ // "<REGION_PROPOSAL>": "Locate the region proposals in the image."
49
+ // },
50
+ // "task_prompts_with_input": {
51
+ // "<CAPTION_TO_PHRASE_GROUNDING>": "Locate the phrases in the caption: {input}",
52
+ // "<REFERRING_EXPRESSION_SEGMENTATION>": "Locate {input} in the image with mask",
53
+ // "<REGION_TO_SEGMENTATION>": "What is the polygon mask of region {input}",
54
+ // "<OPEN_VOCABULARY_DETECTION>": "Locate {input} in the image.",
55
+ // "<REGION_TO_CATEGORY>": "What is the region {input}?",
56
+ // "<REGION_TO_DESCRIPTION>": "What does the region {input} describe?",
57
+ // "<REGION_TO_OCR>": "What text is in the region {input}?"
58
+ // }
59
+ // const imageData = await fetch(contentPart.image.url).then((res) => res.arrayBuffer())
60
+ const task = imageToTextModel.instance.processImageToTextTask({
61
+ model: this.imageToTextModel,
62
+ // url: contentPart.url,
63
+ image: contentPart.image,
64
+ prompt: 'What does the image describe?',
65
+ })
66
+ const result = await task.result
67
+ resolve({
68
+ text: result.text,
69
+ messageIndex: m,
70
+ contentPartIndex: p,
71
+ })
72
+ }))
73
+ }
74
+ }
75
+
76
+ const imageTextResults = await Promise.all(imageTextPromises)
77
+ imageToTextModel.release()
78
+ console.debug('Image text results', imageTextResults)
79
+
80
+ for (const imageTextResult of imageTextResults) {
81
+ const { text, messageIndex, contentPartIndex } = imageTextResult
82
+ const message = messagesWithImageDescriptions[messageIndex]
83
+ // if ('type' in message.content[contentPartIndex]) {
84
+ // message.content[contentPartIndex].type = 'text'
85
+ // @ts-ignore
86
+ message.content[contentPartIndex] = {
87
+ type: 'text',
88
+ text: `User uploaded image: ${text}`,
89
+ }
90
+ }
91
+
92
+ const chatRequest = { ...args.request, messages: messagesWithImageDescriptions, model: this.chatModel }
93
+ const chatModel = await this.pool.requestInstance(chatRequest)
94
+ const task = chatModel.instance.processChatCompletionTask(chatRequest, {
95
+ onChunk: (chunk) => {
96
+ if (args.onChunk) args.onChunk(chunk)
97
+ },
98
+ })
99
+ const result = await task.result
100
+ chatModel.release()
101
+ return result
102
+ }
103
+ }
@@ -0,0 +1,2 @@
1
+ // https://huggingface.co/Gustavosta/MagicPrompt-Stable-Diffusion
2
+ // https://huggingface.co/QuantFactory/MagicPrompt-Stable-Diffusion-GGUF
@@ -0,0 +1,71 @@
1
+ import {
2
+ EngineSpeechToTextArgs,
3
+ ModelEngine,
4
+ ToolDefinition,
5
+ } from '#package/types/index.js'
6
+ import { CustomEngine } from '#package/engines/index.js'
7
+
8
+ type EngineArgs = {
9
+ speechToTextModel: string
10
+ chatModel: string
11
+ tools: Record<string, ToolDefinition>
12
+ }
13
+
14
+ // an experimental engine that forwards a transcription to a (function calling) chat model
15
+
16
+ export class VoiceFunctionCallEngine
17
+ extends CustomEngine
18
+ implements ModelEngine
19
+ {
20
+ speechToTextModel: string
21
+ chatModel: string
22
+ tools: Record<string, ToolDefinition>
23
+
24
+ constructor({ speechToTextModel, chatModel, tools }: EngineArgs) {
25
+ super()
26
+ this.speechToTextModel = speechToTextModel
27
+ this.chatModel = chatModel
28
+ this.tools = tools
29
+ }
30
+
31
+ async createTranscription(args: EngineSpeechToTextArgs) {
32
+ const speechToTextModel = await this.pool.requestInstance({
33
+ model: this.speechToTextModel,
34
+ })
35
+ const transcriptionTask = speechToTextModel.instance.processSpeechToTextTask(
36
+ {
37
+ ...args.request,
38
+ model: this.speechToTextModel,
39
+ },
40
+ )
41
+ const transcription = await transcriptionTask.result
42
+ speechToTextModel.release()
43
+ return transcription.text
44
+ }
45
+
46
+ async processSpeechToTextTask(args: EngineSpeechToTextArgs) {
47
+ const [transcription, chatModel] = await Promise.all([
48
+ this.createTranscription(args),
49
+ this.pool.requestInstance({
50
+ model: this.chatModel,
51
+ }),
52
+ ])
53
+ const chatTask = chatModel.instance.processChatCompletionTask({
54
+ model: this.chatModel,
55
+ tools: this.tools,
56
+ messages: [
57
+ {
58
+ role: 'user',
59
+ content: transcription,
60
+ },
61
+ ],
62
+ }, {
63
+ onChunk: args.onChunk,
64
+ })
65
+ const chatResponse = await chatTask.result
66
+ chatModel.release()
67
+ return {
68
+ text: chatResponse.message.content,
69
+ }
70
+ }
71
+ }
package/src/http.ts ADDED
@@ -0,0 +1,72 @@
1
+ import http from 'node:http'
2
+ import { ListenOptions } from 'node:net'
3
+ import express from 'express'
4
+ import cors from 'cors'
5
+ import { createOpenAIRequestHandlers } from '#package/api/openai/index.js'
6
+ import { createAPIMiddleware } from '#package/api/v1/index.js'
7
+ import { LogLevel } from '#package/lib/logger.js'
8
+ import { ModelServer, ModelServerOptions, startModelServer } from '#package/server.js'
9
+
10
+ export function createOpenAIMiddleware(modelServer: ModelServer) {
11
+ const router = express.Router()
12
+ const requestHandlers = createOpenAIRequestHandlers(modelServer)
13
+ router.get('/v1/models', requestHandlers.models)
14
+ router.post('/v1/completions', requestHandlers.completions)
15
+ router.post('/v1/chat/completions', requestHandlers.chatCompletions)
16
+ router.post('/v1/embeddings', requestHandlers.embeddings)
17
+ return router
18
+ }
19
+
20
+ export function createExpressMiddleware(modelServer: ModelServer) {
21
+ const router = express.Router()
22
+ router.get('/', (req, res) => {
23
+ res.json(modelServer.getStatus())
24
+ })
25
+ router.use('/openai', createOpenAIMiddleware(modelServer))
26
+ router.use('/llama', createAPIMiddleware(modelServer))
27
+ return router
28
+ }
29
+
30
+ export interface ModelHTTPServerOptions extends ModelServerOptions {
31
+ listen?: ListenOptions
32
+ logLevel?: LogLevel
33
+ }
34
+
35
+ export class ModelHTTPServer {
36
+ httpServer: http.Server
37
+ modelServer: ModelServer
38
+ listenOptions: ListenOptions
39
+
40
+ constructor(options: ModelHTTPServerOptions) {
41
+ const { listen, ...modelServerOpts } = options
42
+ this.modelServer = new ModelServer(modelServerOpts)
43
+ this.listenOptions = listen ?? { port: 3000 }
44
+ const app = express()
45
+ app.use(
46
+ cors(),
47
+ express.json({ limit: '50mb' }),
48
+ createExpressMiddleware(this.modelServer),
49
+ )
50
+
51
+ app.set('json spaces', 2)
52
+ this.httpServer = http.createServer(app)
53
+ }
54
+
55
+ async start() {
56
+ await this.modelServer.start()
57
+ await new Promise<void>((resolve) => {
58
+ this.httpServer.listen(this.listenOptions, resolve)
59
+ })
60
+ }
61
+
62
+ async stop() {
63
+ this.httpServer.close()
64
+ await this.modelServer.stop()
65
+ }
66
+ }
67
+
68
+ export async function startHTTPServer(options: ModelHTTPServerOptions) {
69
+ const server = new ModelHTTPServer(options)
70
+ await server.start()
71
+ return server
72
+ }
package/src/index.ts ADDED
@@ -0,0 +1,7 @@
1
+ export * from './api/openai/index.js'
2
+ export * from './types/index.js'
3
+ export * from './pool.js'
4
+ export * from './instance.js'
5
+ export * from './store.js'
6
+ export * from './server.js'
7
+ export * from './http.js'