@chenchaolong/plugin-vllm 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,68 @@
1
+ # Xpert Plugin: vLLM
2
+
3
+ ## Overview
4
+
5
+ `@chenchaolong/plugin-vllm` provides a model adapter for connecting vLLM inference services to the [XpertAI](https://github.com/xpert-ai/xpert) platform. The plugin communicates with vLLM clusters via an OpenAI-compatible API, enabling agents to invoke conversational models, embedding models, vision-enhanced models, and reranking models within a unified XpertAI agentic workflow.
6
+
7
+ ## Core Features
8
+
9
+ - Provides the `VLLMPlugin` NestJS module, which automatically registers model providers, lifecycle logging, and configuration validation logic.
10
+ - Wraps vLLM's conversational/inference capabilities as XpertAI's `LargeLanguageModel` via `VLLMLargeLanguageModel`, supporting function calling, streaming output, and agent token statistics.
11
+ - Exposes `VLLMTextEmbeddingModel`, reusing LangChain's `OpenAIEmbeddings` to generate vector representations for knowledge base retrieval.
12
+ - Integrates `VLLMRerankModel`, leveraging the OpenAI-compatible rerank API to improve retrieval result ranking.
13
+ - Supports declaring capabilities such as vision, function calling, and streaming mode in plugin metadata, allowing flexible configuration of different vLLM deployments in the console.
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ npm install @chenchaolong/plugin-vllm
19
+ ```
20
+
21
+ > **Peer Dependencies**: The host project must also provide libraries such as `@xpert-ai/plugin-sdk`, `@nestjs/common`, `@metad/contracts`, `@langchain/openai`, `lodash-es`, `chalk`, and `zod`. Please refer to `package.json` for version requirements.
22
+
23
+ ## Enabling in XpertAI
24
+
25
+ 1. Add the plugin package to your system dependencies and ensure it is resolvable by Node.js.
26
+ 2. Before starting the service, declare the plugin in your environment variables:
27
+ ```bash
28
+ PLUGINS=@chenchaolong/plugin-vllm
29
+ ```
30
+ 3. Add a new model provider in the XpertAI admin interface or configuration file, and select `vllm`.
31
+
32
+ ## Credentials & Model Configuration
33
+
34
+ The form fields defined in `vllm.yaml` cover common deployment scenarios:
35
+
36
+ | Field | Description |
37
+ | --- | --- |
38
+ | `api_key` | vLLM service access token (leave blank if the service does not require authentication). |
39
+ | `endpoint_url` | Required. The base URL of the vLLM OpenAI-compatible API, e.g., `https://vllm.example.com/v1`. |
40
+ | `endpoint_model_name` | Specify explicitly if the model name on the server differs from the logical model name in XpertAI. |
41
+ | `mode` | Choose between `chat` or `completion` inference modes. |
42
+ | `context_size` / `max_tokens_to_sample` | Control the context window and generation length. |
43
+ | `agent_though_support`, `function_calling_type`, `stream_function_calling`, `vision_support` | Indicate whether the model supports agent thought exposure, function/tool calling, streaming function calling, and multimodal input, to inform UI capability hints. |
44
+ | `stream_mode_delimiter` | Customize the paragraph delimiter for streaming output. |
45
+
46
+ After saving the configuration, the plugin will call the `validateCredentials` method in the background, making a minimal request to the vLLM service to ensure the credentials are valid.
47
+
48
+ ## Model Capabilities
49
+
50
+ - **Conversational Models**: Uses `ChatOAICompatReasoningModel` to proxy the vLLM OpenAI API, supporting message history, function calling, and streaming output.
51
+ - **Embedding Models**: Relies on LangChain's `OpenAIEmbeddings` for knowledge base vectorization and retrieval-augmented generation.
52
+ - **Reranking Models**: Wraps `OpenAICompatibleReranker` to semantically rerank recall results.
53
+ - **Vision Models**: If the vLLM inference service supports multimodal (text+image) input, enable `vision_support` in the configuration to declare multimodal capabilities to the frontend.
54
+
55
+ ## Development & Debugging
56
+
57
+ From the repository root, enter the `xpertai/` directory and use Nx commands to build and test:
58
+
59
+ ```bash
60
+ npx nx build @chenchaolong/plugin-vllm
61
+ npx nx test @chenchaolong/plugin-vllm
62
+ ```
63
+
64
+ Build artifacts are output to `dist/` by default. Jest configuration is in `jest.config.ts` for writing and running unit tests.
65
+
66
+ ## License
67
+
68
+ This project follows the [AGPL-3.0 License](../../../LICENSE) found at the root of the repository.
@@ -0,0 +1 @@
1
+ <svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>vLLM</title><path d="M0 4.973h9.324V23L0 4.973z" fill="#FDB515"></path><path d="M13.986 4.351L22.378 0l-6.216 23H9.324l4.662-18.649z" fill="#30A2FF"></path></svg>
package/dist/i18n.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ import { TOptions } from 'i18next';
2
+ export declare function translate(key: string, options?: TOptions): string;
3
+ export declare function initI18n(pluginDir: string): void;
4
+ //# sourceMappingURL=i18n.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"i18n.d.ts","sourceRoot":"","sources":["../src/i18n.ts"],"names":[],"mappings":"AACA,OAAO,EAAwB,QAAQ,EAAE,MAAM,SAAS,CAAC;AAGzD,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,QAAQ,UAIxD;AAED,wBAAgB,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAIhD"}
package/dist/i18n.js ADDED
@@ -0,0 +1,12 @@
1
+ import { createI18nInstance, RequestContext } from '@xpert-ai/plugin-sdk';
2
+ let i18nObject = null;
3
+ export function translate(key, options) {
4
+ options = options || {};
5
+ options.lng = options.lng || RequestContext.getLanguageCode();
6
+ return i18nObject?.t(key, options) || key;
7
+ }
8
+ export function initI18n(pluginDir) {
9
+ createI18nInstance(pluginDir).then((i18n) => {
10
+ i18nObject = i18n;
11
+ });
12
+ }
@@ -0,0 +1,6 @@
1
+ import type { XpertPlugin } from '@xpert-ai/plugin-sdk';
2
+ import { z } from 'zod';
3
+ declare const ConfigSchema: z.ZodObject<{}, "strip", z.ZodTypeAny, {}, {}>;
4
+ declare const plugin: XpertPlugin<z.infer<typeof ConfigSchema>>;
5
+ export default plugin;
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAIxD,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAaxB,QAAA,MAAM,YAAY,gDAChB,CAAC;AAEH,QAAA,MAAM,MAAM,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,YAAY,CAAC,CA4BrD,CAAC;AAEF,eAAe,MAAM,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,41 @@
1
+ import { readFileSync } from 'fs';
2
+ import { fileURLToPath } from 'url';
3
+ import { dirname, join } from 'path';
4
+ import { z } from 'zod';
5
+ import { SvgIcon } from './types.js';
6
+ import { VLLMPlugin } from './vllm.js';
7
+ import { initI18n } from './i18n.js';
8
+ const __filename = fileURLToPath(import.meta.url);
9
+ const __dirname = dirname(__filename);
10
+ const packageJson = JSON.parse(readFileSync(join(__dirname, '../package.json'), 'utf8'));
11
+ const ConfigSchema = z.object({});
12
+ const plugin = {
13
+ meta: {
14
+ name: packageJson.name,
15
+ version: packageJson.version,
16
+ category: 'model',
17
+ icon: {
18
+ type: 'svg',
19
+ value: SvgIcon
20
+ },
21
+ displayName: 'vLLM',
22
+ description: 'Provide connector for vLLM models',
23
+ keywords: ['vLLM', 'model'],
24
+ author: 'XpertAI',
25
+ },
26
+ config: {
27
+ schema: ConfigSchema,
28
+ },
29
+ register(ctx) {
30
+ ctx.logger.log('register VLLM plugin');
31
+ initI18n(join(__dirname, '../src'));
32
+ return { module: VLLMPlugin, global: true };
33
+ },
34
+ async onStart(ctx) {
35
+ ctx.logger.log('VLLM plugin started');
36
+ },
37
+ async onStop(ctx) {
38
+ ctx.logger.log('VLLM plugin stopped');
39
+ },
40
+ };
41
+ export default plugin;
@@ -0,0 +1,11 @@
1
+ import { ICopilotModel } from '@metad/contracts';
2
+ import { ChatOAICompatReasoningModel, LargeLanguageModel, TChatModelOptions } from '@xpert-ai/plugin-sdk';
3
+ import { VLLMProviderStrategy } from '../provider.strategy.js';
4
+ import { VLLMModelCredentials } from '../types.js';
5
+ export declare class VLLMLargeLanguageModel extends LargeLanguageModel {
6
+ #private;
7
+ constructor(modelProvider: VLLMProviderStrategy);
8
+ validateCredentials(model: string, credentials: VLLMModelCredentials): Promise<void>;
9
+ getChatModel(copilotModel: ICopilotModel, options?: TChatModelOptions): ChatOAICompatReasoningModel;
10
+ }
11
+ //# sourceMappingURL=llm.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../src/llm/llm.ts"],"names":[],"mappings":"AACA,OAAO,EAAmB,aAAa,EAAE,MAAM,kBAAkB,CAAA;AAEjE,OAAO,EACL,2BAA2B,EAG3B,kBAAkB,EAClB,iBAAiB,EAClB,MAAM,sBAAsB,CAAA;AAE7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAA;AAC9D,OAAO,EAAsB,oBAAoB,EAAE,MAAM,aAAa,CAAA;AAGtE,qBACa,sBAAuB,SAAQ,kBAAkB;;gBAGhD,aAAa,EAAE,oBAAoB;IAIzC,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBjF,YAAY,CAAC,YAAY,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,iBAAiB;CA4B/E"}
@@ -0,0 +1,62 @@
1
+ var _a;
2
+ var VLLMLargeLanguageModel_1;
3
+ import { __decorate, __metadata } from "tslib";
4
+ import { ChatOpenAI } from '@langchain/openai';
5
+ import { AiModelTypeEnum } from '@metad/contracts';
6
+ import { Injectable, Logger } from '@nestjs/common';
7
+ import { ChatOAICompatReasoningModel, CredentialsValidateFailedError, getErrorMessage, LargeLanguageModel } from '@xpert-ai/plugin-sdk';
8
+ import { isNil, omitBy } from 'lodash-es';
9
+ import { VLLMProviderStrategy } from '../provider.strategy.js';
10
+ import { toCredentialKwargs } from '../types.js';
11
+ import { translate } from '../i18n.js';
12
+ let VLLMLargeLanguageModel = VLLMLargeLanguageModel_1 = class VLLMLargeLanguageModel extends LargeLanguageModel {
13
+ #logger = new Logger(VLLMLargeLanguageModel_1.name);
14
+ constructor(modelProvider) {
15
+ super(modelProvider, AiModelTypeEnum.LLM);
16
+ }
17
+ async validateCredentials(model, credentials) {
18
+ try {
19
+ const chatModel = new ChatOpenAI({
20
+ ...toCredentialKwargs(credentials, model),
21
+ temperature: 0,
22
+ maxTokens: 5
23
+ });
24
+ await chatModel.invoke([
25
+ {
26
+ role: 'human',
27
+ content: `Hi`
28
+ }
29
+ ]);
30
+ }
31
+ catch (err) {
32
+ throw new CredentialsValidateFailedError(getErrorMessage(err));
33
+ }
34
+ }
35
+ getChatModel(copilotModel, options) {
36
+ const { handleLLMTokens, modelProperties } = options ?? {};
37
+ const { copilot } = copilotModel;
38
+ if (!modelProperties) {
39
+ throw new Error(translate('Error.ModelCredentialsMissing', { model: copilotModel.model }));
40
+ }
41
+ const params = toCredentialKwargs(modelProperties, copilotModel.model);
42
+ const fields = omitBy({
43
+ ...params,
44
+ streaming: copilotModel.options?.['streaming'] ?? true,
45
+ // include token usage in the stream. this will include an additional chunk at the end of the stream with the token usage.
46
+ streamUsage: true
47
+ }, isNil);
48
+ return new ChatOAICompatReasoningModel({
49
+ ...fields,
50
+ verbose: options?.verbose,
51
+ callbacks: [
52
+ ...this.createHandleUsageCallbacks(copilot, params.model, modelProperties, handleLLMTokens),
53
+ this.createHandleLLMErrorCallbacks(fields, this.#logger)
54
+ ]
55
+ });
56
+ }
57
+ };
58
+ VLLMLargeLanguageModel = VLLMLargeLanguageModel_1 = __decorate([
59
+ Injectable(),
60
+ __metadata("design:paramtypes", [VLLMProviderStrategy])
61
+ ], VLLMLargeLanguageModel);
62
+ export { VLLMLargeLanguageModel };
@@ -0,0 +1,10 @@
1
+ import { Logger } from '@nestjs/common';
2
+ import { ModelProvider } from '@xpert-ai/plugin-sdk';
3
+ import { VLLMModelCredentials } from './types.js';
4
+ export declare class VLLMProviderStrategy extends ModelProvider {
5
+ logger: Logger;
6
+ validateProviderCredentials(credentials: VLLMModelCredentials): Promise<void>;
7
+ getBaseUrl(credentials: VLLMModelCredentials): string;
8
+ getAuthorization(credentials: VLLMModelCredentials): string;
9
+ }
10
+ //# sourceMappingURL=provider.strategy.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"provider.strategy.d.ts","sourceRoot":"","sources":["../src/provider.strategy.ts"],"names":[],"mappings":"AAAA,OAAO,EAAc,MAAM,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAA2B,aAAa,EAAE,MAAM,sBAAsB,CAAA;AAC7E,OAAO,EAAQ,oBAAoB,EAAE,MAAM,YAAY,CAAA;AAEvD,qBAEa,oBAAqB,SAAQ,aAAa;IAC5C,MAAM,SAAwC;IAExC,2BAA2B,CAAC,WAAW,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAI5F,UAAU,CAAC,WAAW,EAAE,oBAAoB,GAAG,MAAM;IAIrD,gBAAgB,CAAC,WAAW,EAAE,oBAAoB,GAAG,MAAM;CAG5D"}
@@ -0,0 +1,25 @@
1
+ var VLLMProviderStrategy_1;
2
+ import { __decorate } from "tslib";
3
+ import { Injectable, Logger } from '@nestjs/common';
4
+ import { AIModelProviderStrategy, ModelProvider } from '@xpert-ai/plugin-sdk';
5
+ import { VLLM } from './types.js';
6
+ let VLLMProviderStrategy = VLLMProviderStrategy_1 = class VLLMProviderStrategy extends ModelProvider {
7
+ constructor() {
8
+ super(...arguments);
9
+ this.logger = new Logger(VLLMProviderStrategy_1.name);
10
+ }
11
+ async validateProviderCredentials(credentials) {
12
+ // No validation needed for vLLM
13
+ }
14
+ getBaseUrl(credentials) {
15
+ return credentials.endpoint_url;
16
+ }
17
+ getAuthorization(credentials) {
18
+ return `Bearer ${credentials.api_key}`;
19
+ }
20
+ };
21
+ VLLMProviderStrategy = VLLMProviderStrategy_1 = __decorate([
22
+ Injectable(),
23
+ AIModelProviderStrategy(VLLM)
24
+ ], VLLMProviderStrategy);
25
+ export { VLLMProviderStrategy };
@@ -0,0 +1,9 @@
1
+ import { ICopilotModel } from '@metad/contracts';
2
+ import { IRerank, RerankModel, TChatModelOptions } from '@xpert-ai/plugin-sdk';
3
+ import { VLLMProviderStrategy } from '../provider.strategy.js';
4
+ export declare class VLLMRerankModel extends RerankModel {
5
+ constructor(modelProvider: VLLMProviderStrategy);
6
+ validateCredentials(model: string, credentials: Record<string, any>): Promise<void>;
7
+ getReranker(copilotModel: ICopilotModel, options?: TChatModelOptions): Promise<IRerank>;
8
+ }
9
+ //# sourceMappingURL=rerank.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rerank.d.ts","sourceRoot":"","sources":["../../src/rerank/rerank.ts"],"names":[],"mappings":"AAAA,OAAO,EAAmB,aAAa,EAAE,MAAM,kBAAkB,CAAA;AAEjE,OAAO,EAEL,OAAO,EAEP,WAAW,EACX,iBAAiB,EAElB,MAAM,sBAAsB,CAAA;AAC7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAA;AAG9D,qBACa,eAAgB,SAAQ,WAAW;gBAClC,aAAa,EAAE,oBAAoB;IAIhC,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC;IAcnE,WAAW,CAAC,YAAY,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,OAAO,CAAC;CAcvG"}
@@ -0,0 +1,41 @@
1
+ import { __decorate, __metadata } from "tslib";
2
+ import { AiModelTypeEnum } from '@metad/contracts';
3
+ import { Injectable } from '@nestjs/common';
4
+ import { getErrorMessage, OpenAICompatibleReranker, RerankModel } from '@xpert-ai/plugin-sdk';
5
+ import { VLLMProviderStrategy } from '../provider.strategy.js';
6
+ import { translate } from '../i18n.js';
7
+ let VLLMRerankModel = class VLLMRerankModel extends RerankModel {
8
+ constructor(modelProvider) {
9
+ super(modelProvider, AiModelTypeEnum.RERANK);
10
+ }
11
+ async validateCredentials(model, credentials) {
12
+ const _credentials = credentials ?? {};
13
+ const reranker = new OpenAICompatibleReranker({
14
+ endpointUrl: _credentials.endpoint_url,
15
+ apiKey: _credentials.api_key,
16
+ endpointModelName: _credentials.endpoint_model_name
17
+ });
18
+ try {
19
+ await reranker.rerank([], 'test', { model });
20
+ }
21
+ catch (error) {
22
+ throw new Error(`Reranker credentials validation failed: ${getErrorMessage(error)}`);
23
+ }
24
+ }
25
+ async getReranker(copilotModel, options) {
26
+ const credentials = options?.modelProperties;
27
+ if (!credentials) {
28
+ throw new Error(translate('Error.ModelCredentialsMissing', { model: copilotModel.model }));
29
+ }
30
+ return new OpenAICompatibleReranker({
31
+ endpointUrl: credentials.endpoint_url,
32
+ apiKey: credentials.api_key,
33
+ endpointModelName: credentials.endpoint_model_name || copilotModel.model
34
+ });
35
+ }
36
+ };
37
+ VLLMRerankModel = __decorate([
38
+ Injectable(),
39
+ __metadata("design:paramtypes", [VLLMProviderStrategy])
40
+ ], VLLMRerankModel);
41
+ export { VLLMRerankModel };
@@ -0,0 +1,11 @@
1
+ import { OpenAIEmbeddings } from '@langchain/openai';
2
+ import { ICopilotModel } from '@metad/contracts';
3
+ import { VLLMModelCredentials } from '../types.js';
4
+ import { TChatModelOptions, TextEmbeddingModelManager } from '@xpert-ai/plugin-sdk';
5
+ import { VLLMProviderStrategy } from '../provider.strategy.js';
6
+ export declare class VLLMTextEmbeddingModel extends TextEmbeddingModelManager {
7
+ constructor(modelProvider: VLLMProviderStrategy);
8
+ getEmbeddingInstance(copilotModel: ICopilotModel, options?: TChatModelOptions): OpenAIEmbeddings;
9
+ validateCredentials(model: string, credentials: VLLMModelCredentials): Promise<void>;
10
+ }
11
+ //# sourceMappingURL=text-embedding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"text-embedding.d.ts","sourceRoot":"","sources":["../../src/text-embedding/text-embedding.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAA;AACpD,OAAO,EAAmB,aAAa,EAAE,MAAM,kBAAkB,CAAA;AAEjE,OAAO,EAAsB,oBAAoB,EAAE,MAAM,aAAa,CAAA;AACtE,OAAO,EAAmD,iBAAiB,EAAE,yBAAyB,EAAE,MAAM,sBAAsB,CAAA;AACpI,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAA;AAG9D,qBACa,sBAAuB,SAAQ,yBAAyB;gBACxD,aAAa,EAAE,oBAAoB;IAI/C,oBAAoB,CAAC,YAAY,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,gBAAgB;IAqB1F,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;CAe1F"}
@@ -0,0 +1,48 @@
1
+ import { __decorate, __metadata } from "tslib";
2
+ import { OpenAIEmbeddings } from '@langchain/openai';
3
+ import { AiModelTypeEnum } from '@metad/contracts';
4
+ import { Injectable } from '@nestjs/common';
5
+ import { toCredentialKwargs } from '../types.js';
6
+ import { CredentialsValidateFailedError, getErrorMessage, TextEmbeddingModelManager } from '@xpert-ai/plugin-sdk';
7
+ import { VLLMProviderStrategy } from '../provider.strategy.js';
8
+ import { translate } from '../i18n.js';
9
+ let VLLMTextEmbeddingModel = class VLLMTextEmbeddingModel extends TextEmbeddingModelManager {
10
+ constructor(modelProvider) {
11
+ super(modelProvider, AiModelTypeEnum.TEXT_EMBEDDING);
12
+ }
13
+ getEmbeddingInstance(copilotModel, options) {
14
+ const { copilot } = copilotModel;
15
+ const { modelProvider } = copilot;
16
+ if (!options?.modelProperties) {
17
+ throw new Error(translate('Error.ModelCredentialsMissing', { model: copilotModel.model }));
18
+ }
19
+ const params = toCredentialKwargs({
20
+ ...(modelProvider.credentials ?? {}),
21
+ ...options.modelProperties,
22
+ }, copilotModel.model || copilotModel.copilot.copilotModel?.model);
23
+ return new OpenAIEmbeddings({
24
+ ...params,
25
+ // batchSize: 512, // Default value if omitted is 512. Max is 2048
26
+ });
27
+ }
28
+ async validateCredentials(model, credentials) {
29
+ try {
30
+ // transform credentials to kwargs for model instance
31
+ const params = toCredentialKwargs(credentials, model);
32
+ const embeddings = new OpenAIEmbeddings({
33
+ ...params,
34
+ // batchSize: 512, // Default value if omitted is 512. Max is 2048
35
+ });
36
+ // call embedding model
37
+ await embeddings.embedQuery('ping');
38
+ }
39
+ catch (ex) {
40
+ throw new CredentialsValidateFailedError(getErrorMessage(ex));
41
+ }
42
+ }
43
+ };
44
+ VLLMTextEmbeddingModel = __decorate([
45
+ Injectable(),
46
+ __metadata("design:paramtypes", [VLLMProviderStrategy])
47
+ ], VLLMTextEmbeddingModel);
48
+ export { VLLMTextEmbeddingModel };
@@ -0,0 +1,41 @@
1
+ import { ClientOptions } from "@langchain/openai";
2
+ export declare const VLLM = "vllm";
3
+ export declare const SvgIcon = "<svg height=\"1em\" style=\"flex:none;line-height:1\" viewBox=\"0 0 24 24\" width=\"1em\" xmlns=\"http://www.w3.org/2000/svg\"><title>vLLM</title><path d=\"M0 4.973h9.324V23L0 4.973z\" fill=\"#FDB515\"></path><path d=\"M13.986 4.351L22.378 0l-6.216 23H9.324l4.662-18.649z\" fill=\"#30A2FF\"></path></svg>";
4
+ export type VLLMModelCredentials = {
5
+ api_key: string;
6
+ endpoint_url?: string;
7
+ endpoint_model_name?: string;
8
+ mode?: 'completion' | 'chat';
9
+ context_size?: number;
10
+ max_tokens_to_sample?: number;
11
+ agent_though_support?: 'supported' | 'unsupported';
12
+ function_calling_type?: 'function_call' | 'tool_call' | 'no_call';
13
+ stream_function_calling?: 'supported' | 'unsupported';
14
+ vision_support?: 'supported' | 'unsupported';
15
+ stream_mode_delimiter?: string;
16
+ thinking?: boolean;
17
+ };
18
+ export declare function toCredentialKwargs(credentials: VLLMModelCredentials, model: string): {
19
+ modelKwargs: {};
20
+ configuration: ClientOptions;
21
+ temperature: number;
22
+ maxTokens?: number;
23
+ maxCompletionTokens?: number;
24
+ topP: number;
25
+ frequencyPenalty: number;
26
+ presencePenalty: number;
27
+ n: number;
28
+ logitBias?: Record<string, number>;
29
+ user?: string;
30
+ streaming: boolean;
31
+ streamUsage?: boolean;
32
+ modelName: string;
33
+ model: import("@langchain/openai").OpenAIChatModelId;
34
+ stop?: string[];
35
+ stopSequences?: string[];
36
+ timeout?: number;
37
+ openAIApiKey?: string;
38
+ apiKey?: string;
39
+ verbosity?: import("@langchain/openai").OpenAIVerbosityParam;
40
+ };
41
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAmB,MAAM,mBAAmB,CAAC;AAEnE,eAAO,MAAM,IAAI,SAAS,CAAC;AAC3B,eAAO,MAAM,OAAO,qTAAmS,CAAA;AAEvT,MAAM,MAAM,oBAAoB,GAAG;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,MAAM,CAAC;IACzB,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,IAAI,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IAC7B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,oBAAoB,CAAC,EAAE,WAAW,GAAG,aAAa,CAAC;IACnD,qBAAqB,CAAC,EAAE,eAAe,GAAG,WAAW,GAAG,SAAS,CAAC;IAClE,uBAAuB,CAAC,EAAE,WAAW,GAAG,aAAa,CAAC;IACtD,cAAc,CAAC,EAAE,WAAW,GAAG,aAAa,CAAC;IAC7C,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,EAAE,OAAO,CAAC;CACnB,CAAA;AAGD,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,oBAAoB,EAAE,KAAK,EAAE,MAAM;;;;;;;;;;;;;;;;;;;;;;EAyBlF"}
package/dist/types.js ADDED
@@ -0,0 +1,25 @@
1
+ export const VLLM = 'vllm';
2
+ export const SvgIcon = `<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>vLLM</title><path d="M0 4.973h9.324V23L0 4.973z" fill="#FDB515"></path><path d="M13.986 4.351L22.378 0l-6.216 23H9.324l4.662-18.649z" fill="#30A2FF"></path></svg>`;
3
+ export function toCredentialKwargs(credentials, model) {
4
+ const credentialsKwargs = {
5
+ apiKey: credentials.api_key || 'no-key-required',
6
+ model: credentials.endpoint_model_name || model,
7
+ };
8
+ const configuration = {};
9
+ if (credentials.endpoint_url) {
10
+ const openaiApiBase = credentials.endpoint_url.replace(/\/$/, '');
11
+ configuration.baseURL = openaiApiBase;
12
+ }
13
+ // Handle thinking mode parameter
14
+ // Pass thinking parameter through modelKwargs for ChatOAICompatReasoningModel
15
+ const modelKwargs = {};
16
+ if (credentials.thinking != null) {
17
+ modelKwargs['chat_template_kwargs'] ??= {};
18
+ modelKwargs['chat_template_kwargs']['enable_thinking'] = !!credentials.thinking;
19
+ }
20
+ return {
21
+ ...credentialsKwargs,
22
+ modelKwargs,
23
+ configuration
24
+ };
25
+ }
package/dist/vllm.d.ts ADDED
@@ -0,0 +1,13 @@
1
+ import { IOnPluginBootstrap, IOnPluginDestroy } from '@xpert-ai/plugin-sdk';
2
+ export declare class VLLMPlugin implements IOnPluginBootstrap, IOnPluginDestroy {
3
+ private logEnabled;
4
+ /**
5
+ * Called when the plugin is being initialized.
6
+ */
7
+ onPluginBootstrap(): void | Promise<void>;
8
+ /**
9
+ * Called when the plugin is being destroyed.
10
+ */
11
+ onPluginDestroy(): void | Promise<void>;
12
+ }
13
+ //# sourceMappingURL=vllm.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vllm.d.ts","sourceRoot":"","sources":["../src/vllm.ts"],"names":[],"mappings":"AAAA,OAAO,EAAqB,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAQ/F,qBAaa,UAAW,YAAW,kBAAkB,EAAE,gBAAgB;IAEtE,OAAO,CAAC,UAAU,CAAQ;IAE1B;;OAEG;IACH,iBAAiB,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAMzC;;OAEG;IACH,eAAe,IAAI,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;CAKvC"}
package/dist/vllm.js ADDED
@@ -0,0 +1,46 @@
1
+ var VLLMPlugin_1;
2
+ import { __decorate } from "tslib";
3
+ import { XpertServerPlugin } from '@xpert-ai/plugin-sdk';
4
+ import { ConfigModule } from '@nestjs/config';
5
+ import chalk from 'chalk';
6
+ import { VLLMProviderStrategy } from './provider.strategy.js';
7
+ import { VLLMLargeLanguageModel } from './llm/llm.js';
8
+ import { VLLMRerankModel } from './rerank/rerank.js';
9
+ import { VLLMTextEmbeddingModel } from './text-embedding/text-embedding.js';
10
+ let VLLMPlugin = VLLMPlugin_1 = class VLLMPlugin {
11
+ constructor() {
12
+ // We disable by default additional logging for each event to avoid cluttering the logs
13
+ this.logEnabled = true;
14
+ }
15
+ /**
16
+ * Called when the plugin is being initialized.
17
+ */
18
+ onPluginBootstrap() {
19
+ if (this.logEnabled) {
20
+ console.log(chalk.green(`${VLLMPlugin_1.name} is being bootstrapped...`));
21
+ }
22
+ }
23
+ /**
24
+ * Called when the plugin is being destroyed.
25
+ */
26
+ onPluginDestroy() {
27
+ if (this.logEnabled) {
28
+ console.log(chalk.green(`${VLLMPlugin_1.name} is being destroyed...`));
29
+ }
30
+ }
31
+ };
32
+ VLLMPlugin = VLLMPlugin_1 = __decorate([
33
+ XpertServerPlugin({
34
+ /**
35
+ * An array of modules that will be imported and registered with the plugin.
36
+ */
37
+ imports: [ConfigModule],
38
+ providers: [
39
+ VLLMProviderStrategy,
40
+ VLLMLargeLanguageModel,
41
+ VLLMRerankModel,
42
+ VLLMTextEmbeddingModel
43
+ ]
44
+ })
45
+ ], VLLMPlugin);
46
+ export { VLLMPlugin };
package/dist/vllm.yaml ADDED
@@ -0,0 +1,196 @@
1
+ provider: vllm
2
+ label:
3
+ en_US: Vllm
4
+ description:
5
+ en_US: Models provided by vllm with guided inference supported.
6
+ zh_Hans: Vllm openai guided 支持
7
+ icon_small:
8
+ en_US: vllm-color.svg
9
+ background: "#E5E7EB"
10
+ supported_model_types:
11
+ - llm
12
+ - text-embedding
13
+ - rerank
14
+ configurate_methods:
15
+ - customizable-model
16
+ model_credential_schema:
17
+ model:
18
+ label:
19
+ en_US: Model Name
20
+ zh_Hans: 模型名称
21
+ placeholder:
22
+ en_US: Enter your model name
23
+ zh_Hans: 输入模型名称
24
+ credential_form_schemas:
25
+ - variable: api_key
26
+ label:
27
+ en_US: API Key
28
+ type: secret-input
29
+ required: false
30
+ placeholder:
31
+ zh_Hans: 在此输入您的 API Key
32
+ en_US: Enter your API Key
33
+ - variable: endpoint_url
34
+ label:
35
+ zh_Hans: API endpoint URL
36
+ en_US: API endpoint URL
37
+ type: text-input
38
+ required: true
39
+ placeholder:
40
+ zh_Hans: Base URL, e.g. https://api.openai.com/v1
41
+ en_US: Base URL, e.g. https://api.openai.com/v1
42
+ - variable: endpoint_model_name
43
+ label:
44
+ zh_Hans: API endpoint中的模型名称
45
+ en_US: model name for API endpoint
46
+ type: text-input
47
+ required: false
48
+ placeholder:
49
+ zh_Hans: endpoint model name, e.g. chatgpt4.0
50
+ en_US: endpoint model name, e.g. chatgpt4.0
51
+ - variable: mode
52
+ show_on:
53
+ - variable: __model_type
54
+ value: llm
55
+ label:
56
+ en_US: Completion mode
57
+ type: select
58
+ required: false
59
+ default: chat
60
+ placeholder:
61
+ zh_Hans: 选择对话类型
62
+ en_US: Select completion mode
63
+ options:
64
+ - value: completion
65
+ label:
66
+ en_US: Completion
67
+ zh_Hans: 补全
68
+ - value: chat
69
+ label:
70
+ en_US: Chat
71
+ zh_Hans: 对话
72
+ - variable: context_size
73
+ label:
74
+ zh_Hans: 模型上下文长度
75
+ en_US: Model context size
76
+ required: true
77
+ show_on:
78
+ - variable: __model_type
79
+ value: llm
80
+ type: text-input
81
+ default: '4096'
82
+ placeholder:
83
+ zh_Hans: 在此输入您的模型上下文长度
84
+ en_US: Enter your Model context size
85
+ - variable: max_tokens_to_sample
86
+ label:
87
+ zh_Hans: 最大 token 上限
88
+ en_US: Upper bound for max tokens
89
+ show_on:
90
+ - variable: __model_type
91
+ value: llm
92
+ default: '4096'
93
+ type: text-input
94
+ - variable: agent_though_support
95
+ show_on:
96
+ - variable: __model_type
97
+ value: llm
98
+ label:
99
+ en_US: Agent Thought
100
+ type: select
101
+ required: false
102
+ default: not_supported
103
+ options:
104
+ - value: supported
105
+ label:
106
+ en_US: Support
107
+ zh_Hans: 支持
108
+ - value: not_supported
109
+ label:
110
+ en_US: Not Support
111
+ zh_Hans: 不支持
112
+ - variable: function_calling_type
113
+ show_on:
114
+ - variable: __model_type
115
+ value: llm
116
+ label:
117
+ en_US: Function calling
118
+ type: select
119
+ required: false
120
+ default: no_call
121
+ options:
122
+ - value: function_call
123
+ label:
124
+ en_US: Function Call
125
+ zh_Hans: Function Call
126
+ - value: tool_call
127
+ label:
128
+ en_US: Tool Call
129
+ zh_Hans: Tool Call
130
+ - value: no_call
131
+ label:
132
+ en_US: Not Support
133
+ zh_Hans: 不支持
134
+ - variable: stream_function_calling
135
+ show_on:
136
+ - variable: __model_type
137
+ value: llm
138
+ label:
139
+ en_US: Stream function calling
140
+ type: select
141
+ required: false
142
+ default: not_supported
143
+ options:
144
+ - value: supported
145
+ label:
146
+ en_US: Support
147
+ zh_Hans: 支持
148
+ - value: not_supported
149
+ label:
150
+ en_US: Not Support
151
+ zh_Hans: 不支持
152
+ - variable: vision_support
153
+ show_on:
154
+ - variable: __model_type
155
+ value: llm
156
+ label:
157
+ zh_Hans: Vision 支持
158
+ en_US: Vision Support
159
+ type: select
160
+ required: false
161
+ default: no_support
162
+ options:
163
+ - value: support
164
+ label:
165
+ en_US: Support
166
+ zh_Hans: 支持
167
+ - value: no_support
168
+ label:
169
+ en_US: Not Support
170
+ zh_Hans: 不支持
171
+ - variable: stream_mode_delimiter
172
+ label:
173
+ zh_Hans: 流模式返回结果的分隔符
174
+ en_US: Delimiter for streaming results
175
+ show_on:
176
+ - variable: __model_type
177
+ value: llm
178
+ default: '\n\n'
179
+ type: text-input
180
+ - variable: thinking
181
+ label:
182
+ zh_Hans: 思考模式
183
+ en_US: Thinking Mode
184
+ show_on:
185
+ - variable: __model_type
186
+ value: llm
187
+ type: boolean
188
+ default: false
189
+ help:
190
+ zh_Hans: 是否启用思考模式
191
+ en_US: Enable thinking mode
192
+ extra:
193
+ python:
194
+ provider_source: provider/vllm.py
195
+ model_sources:
196
+ - "models/llm/llm.py"
package/package.json ADDED
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "@chenchaolong/plugin-vllm",
3
+ "version": "0.0.4",
4
+ "author": {
5
+ "name": "XpertAI",
6
+ "url": "https://xpertai.cn"
7
+ },
8
+ "license": "AGPL-3.0",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "https://github.com/xpert-ai/xpert-plugins.git"
12
+ },
13
+ "bugs": {
14
+ "url": "https://github.com/xpert-ai/xpert-plugins/issues"
15
+ },
16
+ "type": "module",
17
+ "main": "./dist/index.js",
18
+ "module": "./dist/index.js",
19
+ "types": "./dist/index.d.ts",
20
+ "exports": {
21
+ "./package.json": "./package.json",
22
+ ".": {
23
+ "@xpert-plugins-starter/source": "./src/index.ts",
24
+ "types": "./dist/index.d.ts",
25
+ "import": "./dist/index.js",
26
+ "default": "./dist/index.js"
27
+ }
28
+ },
29
+ "files": [
30
+ "dist",
31
+ "src/i18n",
32
+ "!**/*.tsbuildinfo"
33
+ ],
34
+ "scripts": {
35
+ "prepack": "node ./scripts/copy-assets.mjs"
36
+ },
37
+ "dependencies": {
38
+ "tslib": "^2.3.0"
39
+ },
40
+ "peerDependencies": {
41
+ "@langchain/openai": "0.6.9",
42
+ "@metad/contracts": "^3.6.1",
43
+ "@nestjs/common": "^11.1.6",
44
+ "@nestjs/config": "^4.0.2",
45
+ "@xpert-ai/plugin-sdk": "^3.6.3",
46
+ "i18next": "25.6.0",
47
+ "lodash-es": "4.17.21",
48
+ "chalk": "4.1.2",
49
+ "zod": "3.25.67"
50
+ }
51
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "Error": {
3
+ "ModelCredentialsMissing": "Credentials for model '{{model}}' are missing or deleted"
4
+ }
5
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "Error": {
3
+ "ModelCredentialsMissing": "Credentials for model '{{model}}' are missing or deleted"
4
+ }
5
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "Error": {
3
+ "ModelCredentialsMissing": "模型‘{{model}}’的凭证丢失或已删除"
4
+ }
5
+ }