inference-server 1.0.0-beta.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +216 -0
- package/dist/api/openai/enums.d.ts +4 -0
- package/dist/api/openai/enums.js +17 -0
- package/dist/api/openai/enums.js.map +1 -0
- package/dist/api/openai/handlers/chat.d.ts +3 -0
- package/dist/api/openai/handlers/chat.js +358 -0
- package/dist/api/openai/handlers/chat.js.map +1 -0
- package/dist/api/openai/handlers/completions.d.ts +3 -0
- package/dist/api/openai/handlers/completions.js +169 -0
- package/dist/api/openai/handlers/completions.js.map +1 -0
- package/dist/api/openai/handlers/embeddings.d.ts +3 -0
- package/dist/api/openai/handlers/embeddings.js +74 -0
- package/dist/api/openai/handlers/embeddings.js.map +1 -0
- package/dist/api/openai/handlers/images.d.ts +0 -0
- package/dist/api/openai/handlers/images.js +4 -0
- package/dist/api/openai/handlers/images.js.map +1 -0
- package/dist/api/openai/handlers/models.d.ts +3 -0
- package/dist/api/openai/handlers/models.js +23 -0
- package/dist/api/openai/handlers/models.js.map +1 -0
- package/dist/api/openai/handlers/transcription.d.ts +0 -0
- package/dist/api/openai/handlers/transcription.js +4 -0
- package/dist/api/openai/handlers/transcription.js.map +1 -0
- package/dist/api/openai/index.d.ts +7 -0
- package/dist/api/openai/index.js +14 -0
- package/dist/api/openai/index.js.map +1 -0
- package/dist/api/parseJSONRequestBody.d.ts +2 -0
- package/dist/api/parseJSONRequestBody.js +24 -0
- package/dist/api/parseJSONRequestBody.js.map +1 -0
- package/dist/api/v1/index.d.ts +2 -0
- package/dist/api/v1/index.js +29 -0
- package/dist/api/v1/index.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +10 -0
- package/dist/cli.js.map +1 -0
- package/dist/engines/gpt4all/engine.d.ts +34 -0
- package/dist/engines/gpt4all/engine.js +357 -0
- package/dist/engines/gpt4all/engine.js.map +1 -0
- package/dist/engines/gpt4all/util.d.ts +3 -0
- package/dist/engines/gpt4all/util.js +29 -0
- package/dist/engines/gpt4all/util.js.map +1 -0
- package/dist/engines/index.d.ts +19 -0
- package/dist/engines/index.js +21 -0
- package/dist/engines/index.js.map +1 -0
- package/dist/engines/node-llama-cpp/engine.d.ts +49 -0
- package/dist/engines/node-llama-cpp/engine.js +666 -0
- package/dist/engines/node-llama-cpp/engine.js.map +1 -0
- package/dist/engines/node-llama-cpp/types.d.ts +13 -0
- package/dist/engines/node-llama-cpp/types.js +2 -0
- package/dist/engines/node-llama-cpp/types.js.map +1 -0
- package/dist/engines/node-llama-cpp/util.d.ts +15 -0
- package/dist/engines/node-llama-cpp/util.js +84 -0
- package/dist/engines/node-llama-cpp/util.js.map +1 -0
- package/dist/engines/node-llama-cpp/validateModelFile.d.ts +8 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js +36 -0
- package/dist/engines/node-llama-cpp/validateModelFile.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/engine.d.ts +90 -0
- package/dist/engines/stable-diffusion-cpp/engine.js +294 -0
- package/dist/engines/stable-diffusion-cpp/engine.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/types.d.ts +3 -0
- package/dist/engines/stable-diffusion-cpp/types.js +2 -0
- package/dist/engines/stable-diffusion-cpp/types.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/util.d.ts +4 -0
- package/dist/engines/stable-diffusion-cpp/util.js +55 -0
- package/dist/engines/stable-diffusion-cpp/util.js.map +1 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.d.ts +19 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js +91 -0
- package/dist/engines/stable-diffusion-cpp/validateModelFiles.js.map +1 -0
- package/dist/engines/transformers-js/engine.d.ts +37 -0
- package/dist/engines/transformers-js/engine.js +538 -0
- package/dist/engines/transformers-js/engine.js.map +1 -0
- package/dist/engines/transformers-js/types.d.ts +7 -0
- package/dist/engines/transformers-js/types.js +2 -0
- package/dist/engines/transformers-js/types.js.map +1 -0
- package/dist/engines/transformers-js/util.d.ts +7 -0
- package/dist/engines/transformers-js/util.js +36 -0
- package/dist/engines/transformers-js/util.js.map +1 -0
- package/dist/engines/transformers-js/validateModelFiles.d.ts +17 -0
- package/dist/engines/transformers-js/validateModelFiles.js +133 -0
- package/dist/engines/transformers-js/validateModelFiles.js.map +1 -0
- package/dist/experiments/ChatWithVision.d.ts +11 -0
- package/dist/experiments/ChatWithVision.js +91 -0
- package/dist/experiments/ChatWithVision.js.map +1 -0
- package/dist/experiments/StableDiffPromptGenerator.d.ts +0 -0
- package/dist/experiments/StableDiffPromptGenerator.js +4 -0
- package/dist/experiments/StableDiffPromptGenerator.js.map +1 -0
- package/dist/experiments/VoiceFunctionCall.d.ts +18 -0
- package/dist/experiments/VoiceFunctionCall.js +51 -0
- package/dist/experiments/VoiceFunctionCall.js.map +1 -0
- package/dist/http.d.ts +19 -0
- package/dist/http.js +54 -0
- package/dist/http.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/instance.d.ts +88 -0
- package/dist/instance.js +594 -0
- package/dist/instance.js.map +1 -0
- package/dist/lib/acquireFileLock.d.ts +7 -0
- package/dist/lib/acquireFileLock.js +38 -0
- package/dist/lib/acquireFileLock.js.map +1 -0
- package/dist/lib/calculateContextIdentity.d.ts +7 -0
- package/dist/lib/calculateContextIdentity.js +39 -0
- package/dist/lib/calculateContextIdentity.js.map +1 -0
- package/dist/lib/calculateFileChecksum.d.ts +1 -0
- package/dist/lib/calculateFileChecksum.js +16 -0
- package/dist/lib/calculateFileChecksum.js.map +1 -0
- package/dist/lib/copyDirectory.d.ts +6 -0
- package/dist/lib/copyDirectory.js +27 -0
- package/dist/lib/copyDirectory.js.map +1 -0
- package/dist/lib/decodeAudio.d.ts +1 -0
- package/dist/lib/decodeAudio.js +26 -0
- package/dist/lib/decodeAudio.js.map +1 -0
- package/dist/lib/downloadModelFile.d.ts +10 -0
- package/dist/lib/downloadModelFile.js +58 -0
- package/dist/lib/downloadModelFile.js.map +1 -0
- package/dist/lib/flattenMessageTextContent.d.ts +2 -0
- package/dist/lib/flattenMessageTextContent.js +11 -0
- package/dist/lib/flattenMessageTextContent.js.map +1 -0
- package/dist/lib/getCacheDirPath.d.ts +12 -0
- package/dist/lib/getCacheDirPath.js +31 -0
- package/dist/lib/getCacheDirPath.js.map +1 -0
- package/dist/lib/loadImage.d.ts +12 -0
- package/dist/lib/loadImage.js +30 -0
- package/dist/lib/loadImage.js.map +1 -0
- package/dist/lib/logger.d.ts +12 -0
- package/dist/lib/logger.js +98 -0
- package/dist/lib/logger.js.map +1 -0
- package/dist/lib/math.d.ts +7 -0
- package/dist/lib/math.js +30 -0
- package/dist/lib/math.js.map +1 -0
- package/dist/lib/resolveModelFileLocation.d.ts +15 -0
- package/dist/lib/resolveModelFileLocation.js +41 -0
- package/dist/lib/resolveModelFileLocation.js.map +1 -0
- package/dist/lib/util.d.ts +7 -0
- package/dist/lib/util.js +61 -0
- package/dist/lib/util.js.map +1 -0
- package/dist/lib/validateModelFile.d.ts +9 -0
- package/dist/lib/validateModelFile.js +62 -0
- package/dist/lib/validateModelFile.js.map +1 -0
- package/dist/lib/validateModelOptions.d.ts +3 -0
- package/dist/lib/validateModelOptions.js +23 -0
- package/dist/lib/validateModelOptions.js.map +1 -0
- package/dist/pool.d.ts +61 -0
- package/dist/pool.js +512 -0
- package/dist/pool.js.map +1 -0
- package/dist/server.d.ts +59 -0
- package/dist/server.js +221 -0
- package/dist/server.js.map +1 -0
- package/dist/standalone.d.ts +1 -0
- package/dist/standalone.js +306 -0
- package/dist/standalone.js.map +1 -0
- package/dist/store.d.ts +60 -0
- package/dist/store.js +203 -0
- package/dist/store.js.map +1 -0
- package/dist/types/completions.d.ts +57 -0
- package/dist/types/completions.js +2 -0
- package/dist/types/completions.js.map +1 -0
- package/dist/types/index.d.ts +326 -0
- package/dist/types/index.js +2 -0
- package/dist/types/index.js.map +1 -0
- package/docs/engines.md +28 -0
- package/docs/gpu.md +72 -0
- package/docs/http-api.md +147 -0
- package/examples/all-options.js +108 -0
- package/examples/chat-cli.js +56 -0
- package/examples/chat-server.js +65 -0
- package/examples/concurrency.js +70 -0
- package/examples/express.js +70 -0
- package/examples/pool.js +91 -0
- package/package.json +113 -0
- package/src/api/openai/enums.ts +20 -0
- package/src/api/openai/handlers/chat.ts +408 -0
- package/src/api/openai/handlers/completions.ts +196 -0
- package/src/api/openai/handlers/embeddings.ts +92 -0
- package/src/api/openai/handlers/images.ts +3 -0
- package/src/api/openai/handlers/models.ts +33 -0
- package/src/api/openai/handlers/transcription.ts +2 -0
- package/src/api/openai/index.ts +16 -0
- package/src/api/parseJSONRequestBody.ts +26 -0
- package/src/api/v1/DRAFT.md +16 -0
- package/src/api/v1/index.ts +37 -0
- package/src/cli.ts +9 -0
- package/src/engines/gpt4all/engine.ts +441 -0
- package/src/engines/gpt4all/util.ts +31 -0
- package/src/engines/index.ts +28 -0
- package/src/engines/node-llama-cpp/engine.ts +811 -0
- package/src/engines/node-llama-cpp/types.ts +17 -0
- package/src/engines/node-llama-cpp/util.ts +126 -0
- package/src/engines/node-llama-cpp/validateModelFile.ts +46 -0
- package/src/engines/stable-diffusion-cpp/engine.ts +369 -0
- package/src/engines/stable-diffusion-cpp/types.ts +54 -0
- package/src/engines/stable-diffusion-cpp/util.ts +58 -0
- package/src/engines/stable-diffusion-cpp/validateModelFiles.ts +119 -0
- package/src/engines/transformers-js/engine.ts +659 -0
- package/src/engines/transformers-js/types.ts +25 -0
- package/src/engines/transformers-js/util.ts +40 -0
- package/src/engines/transformers-js/validateModelFiles.ts +168 -0
- package/src/experiments/ChatWithVision.ts +103 -0
- package/src/experiments/StableDiffPromptGenerator.ts +2 -0
- package/src/experiments/VoiceFunctionCall.ts +71 -0
- package/src/http.ts +72 -0
- package/src/index.ts +7 -0
- package/src/instance.ts +723 -0
- package/src/lib/acquireFileLock.ts +38 -0
- package/src/lib/calculateContextIdentity.ts +53 -0
- package/src/lib/calculateFileChecksum.ts +18 -0
- package/src/lib/copyDirectory.ts +29 -0
- package/src/lib/decodeAudio.ts +39 -0
- package/src/lib/downloadModelFile.ts +70 -0
- package/src/lib/flattenMessageTextContent.ts +19 -0
- package/src/lib/getCacheDirPath.ts +34 -0
- package/src/lib/loadImage.ts +46 -0
- package/src/lib/logger.ts +112 -0
- package/src/lib/math.ts +31 -0
- package/src/lib/resolveModelFileLocation.ts +49 -0
- package/src/lib/util.ts +75 -0
- package/src/lib/validateModelFile.ts +71 -0
- package/src/lib/validateModelOptions.ts +31 -0
- package/src/pool.ts +651 -0
- package/src/server.ts +270 -0
- package/src/standalone.ts +320 -0
- package/src/store.ts +278 -0
- package/src/types/completions.ts +86 -0
- package/src/types/index.ts +488 -0
- package/tsconfig.json +29 -0
- package/tsconfig.release.json +11 -0
- package/vitest.config.ts +18 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
export async function remoteFileExists(url: string): Promise<boolean> {
|
|
2
|
+
try {
|
|
3
|
+
const response = await fetch(url, { method: 'HEAD' })
|
|
4
|
+
return response.ok
|
|
5
|
+
} catch (error) {
|
|
6
|
+
console.error('Error checking remote file:', error)
|
|
7
|
+
return false
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
interface HuggingfaceModelInfo {
|
|
12
|
+
modelId: string
|
|
13
|
+
branch: string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function parseHuggingfaceModelIdAndBranch(url: string): HuggingfaceModelInfo {
|
|
17
|
+
// url to the hub model, like https://huggingface.co/jinaai/jina-clip-v1
|
|
18
|
+
const parsedUrl = new URL(url)
|
|
19
|
+
const urlSegments = parsedUrl.pathname.split('/')
|
|
20
|
+
const repoOrg = urlSegments[1]
|
|
21
|
+
const repoName = urlSegments[2]
|
|
22
|
+
const branch = urlSegments[4] || 'main'
|
|
23
|
+
return {
|
|
24
|
+
modelId: `${repoOrg}/${repoName}`,
|
|
25
|
+
branch,
|
|
26
|
+
}
|
|
27
|
+
// if (filePath) { // path to the cached model, like /path/to/huggingface/jinaai/jina-clip-v1-main
|
|
28
|
+
// const filePathSegments = filePath.split('/')
|
|
29
|
+
// const modelDir = filePathSegments[filePathSegments.length - 1]
|
|
30
|
+
// const branch = modelDir.split('-').pop() || 'main'
|
|
31
|
+
// const repoName = modelDir.replace(new RegExp(`-${branch}$`), '')
|
|
32
|
+
// const repoOrg = filePathSegments[filePathSegments.length - 2]
|
|
33
|
+
// const modelId = `${repoOrg}/${repoName}`
|
|
34
|
+
// return {
|
|
35
|
+
// modelId,
|
|
36
|
+
// branch,
|
|
37
|
+
// }
|
|
38
|
+
// }
|
|
39
|
+
// throw new Error('Either url or filePath must be provided')
|
|
40
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import fs from 'node:fs'
|
|
2
|
+
import { AutoModel, AutoProcessor, AutoTokenizer } from '@huggingface/transformers'
|
|
3
|
+
import { TransformersJsModel } from '#package/types/index.js'
|
|
4
|
+
import { resolveModelFileLocation } from '#package/lib/resolveModelFileLocation.js'
|
|
5
|
+
import { TransformersJsModelConfig } from './engine.js'
|
|
6
|
+
import { parseHuggingfaceModelIdAndBranch, remoteFileExists } from './util.js'
|
|
7
|
+
|
|
8
|
+
async function validateModel(
|
|
9
|
+
modelOpts: TransformersJsModel,
|
|
10
|
+
config: TransformersJsModelConfig,
|
|
11
|
+
modelPath: string,
|
|
12
|
+
): Promise<string | undefined> {
|
|
13
|
+
const modelClass = modelOpts.modelClass ?? AutoModel
|
|
14
|
+
const device = config.device?.gpu ? 'gpu' : 'cpu'
|
|
15
|
+
try {
|
|
16
|
+
const model = await modelClass.from_pretrained(modelPath, {
|
|
17
|
+
local_files_only: true,
|
|
18
|
+
device: device,
|
|
19
|
+
dtype: modelOpts.dtype || 'fp32',
|
|
20
|
+
})
|
|
21
|
+
await model.dispose()
|
|
22
|
+
} catch (error) {
|
|
23
|
+
return `Failed to load model (${error})`
|
|
24
|
+
}
|
|
25
|
+
return undefined
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function validateTokenizer(
|
|
29
|
+
modelOpts: TransformersJsModel,
|
|
30
|
+
config: TransformersJsModelConfig,
|
|
31
|
+
modelPath: string,
|
|
32
|
+
): Promise<string | undefined> {
|
|
33
|
+
const tokenizerClass = modelOpts.tokenizerClass ?? AutoTokenizer
|
|
34
|
+
try {
|
|
35
|
+
await tokenizerClass.from_pretrained(modelPath, {
|
|
36
|
+
local_files_only: true,
|
|
37
|
+
})
|
|
38
|
+
} catch (error) {
|
|
39
|
+
return `Failed to load tokenizer (${error})`
|
|
40
|
+
}
|
|
41
|
+
return undefined
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async function validateProcessor(
|
|
45
|
+
modelOpts: TransformersJsModel,
|
|
46
|
+
config: TransformersJsModelConfig,
|
|
47
|
+
modelPath: string,
|
|
48
|
+
): Promise<string | undefined> {
|
|
49
|
+
const processorClass = modelOpts.processorClass ?? AutoProcessor
|
|
50
|
+
try {
|
|
51
|
+
if (modelOpts.processor) {
|
|
52
|
+
const processorPath = resolveModelFileLocation({
|
|
53
|
+
url: modelOpts.processor.url,
|
|
54
|
+
filePath: modelOpts.processor.file,
|
|
55
|
+
modelsCachePath: config.modelsCachePath,
|
|
56
|
+
})
|
|
57
|
+
await processorClass.from_pretrained(processorPath, {
|
|
58
|
+
local_files_only: true,
|
|
59
|
+
})
|
|
60
|
+
} else {
|
|
61
|
+
if (modelOpts.processorClass) {
|
|
62
|
+
await processorClass.from_pretrained(modelPath, {
|
|
63
|
+
local_files_only: true,
|
|
64
|
+
})
|
|
65
|
+
} else if (config.url) {
|
|
66
|
+
const { branch } = parseHuggingfaceModelIdAndBranch(config.url)
|
|
67
|
+
const [hasProcessor, hasPreprocessor] = await Promise.all([
|
|
68
|
+
remoteFileExists(`${config.url}/blob/${branch}/processor_config.json`),
|
|
69
|
+
remoteFileExists(`${config.url}/blob/${branch}/preprocessor_config.json`),
|
|
70
|
+
])
|
|
71
|
+
if (hasProcessor || hasPreprocessor) {
|
|
72
|
+
await processorClass.from_pretrained(modelPath, {
|
|
73
|
+
local_files_only: true,
|
|
74
|
+
})
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
} catch (error) {
|
|
79
|
+
return `Failed to load processor (${error})`
|
|
80
|
+
}
|
|
81
|
+
return undefined
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
interface ComponentValidationErrors {
|
|
85
|
+
model?: string
|
|
86
|
+
tokenizer?: string
|
|
87
|
+
processor?: string
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
interface ModelValidationErrors {
|
|
91
|
+
textModel?: ComponentValidationErrors
|
|
92
|
+
visionModel?: ComponentValidationErrors
|
|
93
|
+
speechModel?: ComponentValidationErrors
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export interface ModelValidationResult {
|
|
97
|
+
message: string
|
|
98
|
+
errors?: ModelValidationErrors
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export async function validateModelFiles(
|
|
102
|
+
config: TransformersJsModelConfig,
|
|
103
|
+
): Promise<ModelValidationResult | undefined> {
|
|
104
|
+
if (!fs.existsSync(config.location)) {
|
|
105
|
+
return {
|
|
106
|
+
message: `model directory does not exist: ${config.location}`,
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
let modelPath = config.location
|
|
111
|
+
if (!modelPath.endsWith('/')) {
|
|
112
|
+
modelPath += '/'
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const validateModelComponents = async (modelOpts: TransformersJsModel) => {
|
|
116
|
+
const componentValidationPromises = [
|
|
117
|
+
validateModel(modelOpts, config, modelPath),
|
|
118
|
+
validateTokenizer(modelOpts, config, modelPath),
|
|
119
|
+
validateProcessor(modelOpts, config, modelPath),
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
// if (modelOpts.processor) {
|
|
124
|
+
// componentValidationPromises.push(validateProcessor(modelOpts, config, modelPath))
|
|
125
|
+
// }
|
|
126
|
+
const [model, tokenizer, processor] = await Promise.all(componentValidationPromises)
|
|
127
|
+
const result: ComponentValidationErrors = {}
|
|
128
|
+
if (model) result.model = model
|
|
129
|
+
if (tokenizer) result.tokenizer = tokenizer
|
|
130
|
+
if (processor) result.processor = processor
|
|
131
|
+
return result
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const modelValidationPromises: any = {}
|
|
135
|
+
const noModelConfigured = !config.textModel && !config.visionModel && !config.speechModel
|
|
136
|
+
if (config.textModel || noModelConfigured) {
|
|
137
|
+
modelValidationPromises.textModel = validateModelComponents(config.textModel || {})
|
|
138
|
+
}
|
|
139
|
+
if (config.visionModel) {
|
|
140
|
+
modelValidationPromises.visionModel = validateModelComponents(config.visionModel)
|
|
141
|
+
}
|
|
142
|
+
if (config.speechModel) {
|
|
143
|
+
modelValidationPromises.speechModel = validateModelComponents(config.speechModel)
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
await Promise.all(Object.values(modelValidationPromises))
|
|
147
|
+
const validationErrors: ModelValidationErrors = {}
|
|
148
|
+
const textModelErrors = await modelValidationPromises.textModel
|
|
149
|
+
if (textModelErrors && Object.keys(textModelErrors).length) {
|
|
150
|
+
validationErrors.textModel = textModelErrors
|
|
151
|
+
}
|
|
152
|
+
const visionModelErrors = await modelValidationPromises.visionModel
|
|
153
|
+
if (visionModelErrors && Object.keys(visionModelErrors).length) {
|
|
154
|
+
validationErrors.visionModel = visionModelErrors
|
|
155
|
+
}
|
|
156
|
+
const speechModelErrors = await modelValidationPromises.speechModel
|
|
157
|
+
if (speechModelErrors && Object.keys(speechModelErrors).length) {
|
|
158
|
+
validationErrors.speechModel = speechModelErrors
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (Object.keys(validationErrors).length > 0) {
|
|
162
|
+
return {
|
|
163
|
+
message: 'Failed to validate model components',
|
|
164
|
+
errors: validationErrors,
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return undefined
|
|
168
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
|
|
2
|
+
import { EngineChatCompletionArgs, ImageToTextRequest, ModelEngine } from '#package/types/index.js'
|
|
3
|
+
import { CustomEngine } from '#package/engines/index.js'
|
|
4
|
+
|
|
5
|
+
// an experimental engine that replaces images with their descriptions before passing them to a chat model
|
|
6
|
+
|
|
7
|
+
export class ChatWithVisionEngine extends CustomEngine implements ModelEngine {
|
|
8
|
+
imageToTextModel: string
|
|
9
|
+
chatModel: string
|
|
10
|
+
|
|
11
|
+
constructor({ imageToTextModel, chatModel }: { imageToTextModel: string, chatModel: string }) {
|
|
12
|
+
super()
|
|
13
|
+
this.imageToTextModel = imageToTextModel
|
|
14
|
+
this.chatModel = chatModel
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async processChatCompletionTask (
|
|
18
|
+
args: EngineChatCompletionArgs,
|
|
19
|
+
) {
|
|
20
|
+
|
|
21
|
+
const imageTextPromises: any[] = []
|
|
22
|
+
const imageToTextModel = await this.pool.requestInstance({
|
|
23
|
+
model: this.imageToTextModel,
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
const messagesWithImageDescriptions = [... args.request.messages]
|
|
27
|
+
|
|
28
|
+
for (let m = 0; m < messagesWithImageDescriptions.length; m++) {
|
|
29
|
+
const message = messagesWithImageDescriptions[m]
|
|
30
|
+
if (!Array.isArray(message.content)) {
|
|
31
|
+
continue
|
|
32
|
+
}
|
|
33
|
+
for (let p = 0; p < message.content.length; p++) {
|
|
34
|
+
const contentPart = message.content[p]
|
|
35
|
+
if (contentPart.type !== 'image') {
|
|
36
|
+
continue
|
|
37
|
+
}
|
|
38
|
+
imageTextPromises.push(new Promise(async (resolve, reject) => {
|
|
39
|
+
// Florence2 prompt doc
|
|
40
|
+
// "task_prompts_without_inputs": {
|
|
41
|
+
// "<OCR>": "What is the text in the image?",
|
|
42
|
+
// "<OCR_WITH_REGION>": "What is the text in the image, with regions?",
|
|
43
|
+
// "<CAPTION>": "What does the image describe?",
|
|
44
|
+
// "<DETAILED_CAPTION>": "Describe in detail what is shown in the image.",
|
|
45
|
+
// "<MORE_DETAILED_CAPTION>": "Describe with a paragraph what is shown in the image.",
|
|
46
|
+
// "<OD>": "Locate the objects with category name in the image.",
|
|
47
|
+
// "<DENSE_REGION_CAPTION>": "Locate the objects in the image, with their descriptions.",
|
|
48
|
+
// "<REGION_PROPOSAL>": "Locate the region proposals in the image."
|
|
49
|
+
// },
|
|
50
|
+
// "task_prompts_with_input": {
|
|
51
|
+
// "<CAPTION_TO_PHRASE_GROUNDING>": "Locate the phrases in the caption: {input}",
|
|
52
|
+
// "<REFERRING_EXPRESSION_SEGMENTATION>": "Locate {input} in the image with mask",
|
|
53
|
+
// "<REGION_TO_SEGMENTATION>": "What is the polygon mask of region {input}",
|
|
54
|
+
// "<OPEN_VOCABULARY_DETECTION>": "Locate {input} in the image.",
|
|
55
|
+
// "<REGION_TO_CATEGORY>": "What is the region {input}?",
|
|
56
|
+
// "<REGION_TO_DESCRIPTION>": "What does the region {input} describe?",
|
|
57
|
+
// "<REGION_TO_OCR>": "What text is in the region {input}?"
|
|
58
|
+
// }
|
|
59
|
+
// const imageData = await fetch(contentPart.image.url).then((res) => res.arrayBuffer())
|
|
60
|
+
const task = imageToTextModel.instance.processImageToTextTask({
|
|
61
|
+
model: this.imageToTextModel,
|
|
62
|
+
// url: contentPart.url,
|
|
63
|
+
image: contentPart.image,
|
|
64
|
+
prompt: 'What does the image describe?',
|
|
65
|
+
})
|
|
66
|
+
const result = await task.result
|
|
67
|
+
resolve({
|
|
68
|
+
text: result.text,
|
|
69
|
+
messageIndex: m,
|
|
70
|
+
contentPartIndex: p,
|
|
71
|
+
})
|
|
72
|
+
}))
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const imageTextResults = await Promise.all(imageTextPromises)
|
|
77
|
+
imageToTextModel.release()
|
|
78
|
+
console.debug('Image text results', imageTextResults)
|
|
79
|
+
|
|
80
|
+
for (const imageTextResult of imageTextResults) {
|
|
81
|
+
const { text, messageIndex, contentPartIndex } = imageTextResult
|
|
82
|
+
const message = messagesWithImageDescriptions[messageIndex]
|
|
83
|
+
// if ('type' in message.content[contentPartIndex]) {
|
|
84
|
+
// message.content[contentPartIndex].type = 'text'
|
|
85
|
+
// @ts-ignore
|
|
86
|
+
message.content[contentPartIndex] = {
|
|
87
|
+
type: 'text',
|
|
88
|
+
text: `User uploaded image: ${text}`,
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const chatRequest = { ...args.request, messages: messagesWithImageDescriptions, model: this.chatModel }
|
|
93
|
+
const chatModel = await this.pool.requestInstance(chatRequest)
|
|
94
|
+
const task = chatModel.instance.processChatCompletionTask(chatRequest, {
|
|
95
|
+
onChunk: (chunk) => {
|
|
96
|
+
if (args.onChunk) args.onChunk(chunk)
|
|
97
|
+
},
|
|
98
|
+
})
|
|
99
|
+
const result = await task.result
|
|
100
|
+
chatModel.release()
|
|
101
|
+
return result
|
|
102
|
+
}
|
|
103
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import {
|
|
2
|
+
EngineSpeechToTextArgs,
|
|
3
|
+
ModelEngine,
|
|
4
|
+
ToolDefinition,
|
|
5
|
+
} from '#package/types/index.js'
|
|
6
|
+
import { CustomEngine } from '#package/engines/index.js'
|
|
7
|
+
|
|
8
|
+
type EngineArgs = {
|
|
9
|
+
speechToTextModel: string
|
|
10
|
+
chatModel: string
|
|
11
|
+
tools: Record<string, ToolDefinition>
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
// an experimental engine that forwards a transcription to a (function calling) chat model
|
|
15
|
+
|
|
16
|
+
export class VoiceFunctionCallEngine
|
|
17
|
+
extends CustomEngine
|
|
18
|
+
implements ModelEngine
|
|
19
|
+
{
|
|
20
|
+
speechToTextModel: string
|
|
21
|
+
chatModel: string
|
|
22
|
+
tools: Record<string, ToolDefinition>
|
|
23
|
+
|
|
24
|
+
constructor({ speechToTextModel, chatModel, tools }: EngineArgs) {
|
|
25
|
+
super()
|
|
26
|
+
this.speechToTextModel = speechToTextModel
|
|
27
|
+
this.chatModel = chatModel
|
|
28
|
+
this.tools = tools
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async createTranscription(args: EngineSpeechToTextArgs) {
|
|
32
|
+
const speechToTextModel = await this.pool.requestInstance({
|
|
33
|
+
model: this.speechToTextModel,
|
|
34
|
+
})
|
|
35
|
+
const transcriptionTask = speechToTextModel.instance.processSpeechToTextTask(
|
|
36
|
+
{
|
|
37
|
+
...args.request,
|
|
38
|
+
model: this.speechToTextModel,
|
|
39
|
+
},
|
|
40
|
+
)
|
|
41
|
+
const transcription = await transcriptionTask.result
|
|
42
|
+
speechToTextModel.release()
|
|
43
|
+
return transcription.text
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async processSpeechToTextTask(args: EngineSpeechToTextArgs) {
|
|
47
|
+
const [transcription, chatModel] = await Promise.all([
|
|
48
|
+
this.createTranscription(args),
|
|
49
|
+
this.pool.requestInstance({
|
|
50
|
+
model: this.chatModel,
|
|
51
|
+
}),
|
|
52
|
+
])
|
|
53
|
+
const chatTask = chatModel.instance.processChatCompletionTask({
|
|
54
|
+
model: this.chatModel,
|
|
55
|
+
tools: this.tools,
|
|
56
|
+
messages: [
|
|
57
|
+
{
|
|
58
|
+
role: 'user',
|
|
59
|
+
content: transcription,
|
|
60
|
+
},
|
|
61
|
+
],
|
|
62
|
+
}, {
|
|
63
|
+
onChunk: args.onChunk,
|
|
64
|
+
})
|
|
65
|
+
const chatResponse = await chatTask.result
|
|
66
|
+
chatModel.release()
|
|
67
|
+
return {
|
|
68
|
+
text: chatResponse.message.content,
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
package/src/http.ts
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import http from 'node:http'
|
|
2
|
+
import { ListenOptions } from 'node:net'
|
|
3
|
+
import express from 'express'
|
|
4
|
+
import cors from 'cors'
|
|
5
|
+
import { createOpenAIRequestHandlers } from '#package/api/openai/index.js'
|
|
6
|
+
import { createAPIMiddleware } from '#package/api/v1/index.js'
|
|
7
|
+
import { LogLevel } from '#package/lib/logger.js'
|
|
8
|
+
import { ModelServer, ModelServerOptions, startModelServer } from '#package/server.js'
|
|
9
|
+
|
|
10
|
+
export function createOpenAIMiddleware(modelServer: ModelServer) {
|
|
11
|
+
const router = express.Router()
|
|
12
|
+
const requestHandlers = createOpenAIRequestHandlers(modelServer)
|
|
13
|
+
router.get('/v1/models', requestHandlers.models)
|
|
14
|
+
router.post('/v1/completions', requestHandlers.completions)
|
|
15
|
+
router.post('/v1/chat/completions', requestHandlers.chatCompletions)
|
|
16
|
+
router.post('/v1/embeddings', requestHandlers.embeddings)
|
|
17
|
+
return router
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function createExpressMiddleware(modelServer: ModelServer) {
|
|
21
|
+
const router = express.Router()
|
|
22
|
+
router.get('/', (req, res) => {
|
|
23
|
+
res.json(modelServer.getStatus())
|
|
24
|
+
})
|
|
25
|
+
router.use('/openai', createOpenAIMiddleware(modelServer))
|
|
26
|
+
router.use('/llama', createAPIMiddleware(modelServer))
|
|
27
|
+
return router
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface ModelHTTPServerOptions extends ModelServerOptions {
|
|
31
|
+
listen?: ListenOptions
|
|
32
|
+
logLevel?: LogLevel
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export class ModelHTTPServer {
|
|
36
|
+
httpServer: http.Server
|
|
37
|
+
modelServer: ModelServer
|
|
38
|
+
listenOptions: ListenOptions
|
|
39
|
+
|
|
40
|
+
constructor(options: ModelHTTPServerOptions) {
|
|
41
|
+
const { listen, ...modelServerOpts } = options
|
|
42
|
+
this.modelServer = new ModelServer(modelServerOpts)
|
|
43
|
+
this.listenOptions = listen ?? { port: 3000 }
|
|
44
|
+
const app = express()
|
|
45
|
+
app.use(
|
|
46
|
+
cors(),
|
|
47
|
+
express.json({ limit: '50mb' }),
|
|
48
|
+
createExpressMiddleware(this.modelServer),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
app.set('json spaces', 2)
|
|
52
|
+
this.httpServer = http.createServer(app)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async start() {
|
|
56
|
+
await this.modelServer.start()
|
|
57
|
+
await new Promise<void>((resolve) => {
|
|
58
|
+
this.httpServer.listen(this.listenOptions, resolve)
|
|
59
|
+
})
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async stop() {
|
|
63
|
+
this.httpServer.close()
|
|
64
|
+
await this.modelServer.stop()
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export async function startHTTPServer(options: ModelHTTPServerOptions) {
|
|
69
|
+
const server = new ModelHTTPServer(options)
|
|
70
|
+
await server.start()
|
|
71
|
+
return server
|
|
72
|
+
}
|