@chenchaolong/plugin-vllm 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/llm/llm.d.ts +1 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +36 -4
- package/package.json +1 -1
package/dist/llm/llm.d.ts
CHANGED
|
@@ -10,6 +10,7 @@ export declare class VLLMLargeLanguageModel extends LargeLanguageModel {
|
|
|
10
10
|
/**
|
|
11
11
|
* Generate model schema from credentials for customizable models
|
|
12
12
|
* This method dynamically generates parameter rules including thinking mode
|
|
13
|
+
* Merges parent class parameter rules (streaming, temperature, etc.) with thinking mode
|
|
13
14
|
*/
|
|
14
15
|
getCustomizableModelSchemaFromCredentials(model: string, credentials: Record<string, any>): AIModelEntity | null;
|
|
15
16
|
}
|
package/dist/llm/llm.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../src/llm/llm.ts"],"names":[],"mappings":"AACA,OAAO,EACL,aAAa,EAGb,aAAa,EAId,MAAM,kBAAkB,CAAA;AAEzB,OAAO,EACL,2BAA2B,EAG3B,kBAAkB,EAClB,iBAAiB,EAClB,MAAM,sBAAsB,CAAA;AAE7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAA;AAC9D,OAAO,EAAsB,oBAAoB,EAAE,MAAM,aAAa,CAAA;AAGtE,qBACa,sBAAuB,SAAQ,kBAAkB;;gBAGhD,aAAa,EAAE,oBAAoB;IAIzC,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBjF,YAAY,CAAC,YAAY,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,iBAAiB;
|
|
1
|
+
{"version":3,"file":"llm.d.ts","sourceRoot":"","sources":["../../src/llm/llm.ts"],"names":[],"mappings":"AACA,OAAO,EACL,aAAa,EAGb,aAAa,EAId,MAAM,kBAAkB,CAAA;AAEzB,OAAO,EACL,2BAA2B,EAG3B,kBAAkB,EAClB,iBAAiB,EAClB,MAAM,sBAAsB,CAAA;AAE7B,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAA;AAC9D,OAAO,EAAsB,oBAAoB,EAAE,MAAM,aAAa,CAAA;AAGtE,qBACa,sBAAuB,SAAQ,kBAAkB;;gBAGhD,aAAa,EAAE,oBAAoB;IAIzC,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,WAAW,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBjF,YAAY,CAAC,YAAY,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,iBAAiB;IA+C9E;;;;OAIG;IACM,yCAAyC,CAChD,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAC/B,aAAa,GAAG,IAAI;CA0FxB"}
|
package/dist/llm/llm.js
CHANGED
|
@@ -39,8 +39,24 @@ let VLLMLargeLanguageModel = VLLMLargeLanguageModel_1 = class VLLMLargeLanguageM
|
|
|
39
39
|
throw new Error(translate('Error.ModelCredentialsMissing', { model: copilotModel.model }));
|
|
40
40
|
}
|
|
41
41
|
const params = toCredentialKwargs(modelProperties, copilotModel.model);
|
|
42
|
+
// Get thinking parameter from model options (runtime parameter)
|
|
43
|
+
// This takes priority over the default value in credentials
|
|
44
|
+
const modelOptions = copilotModel.options;
|
|
45
|
+
const thinking = modelOptions?.thinking ?? modelProperties?.thinking ?? false;
|
|
46
|
+
// Merge modelKwargs with thinking parameter
|
|
47
|
+
// Ensure chat_template_kwargs structure is correct for vLLM API
|
|
48
|
+
const existingModelKwargs = (params.modelKwargs || {});
|
|
49
|
+
const existingChatTemplateKwargs = existingModelKwargs.chat_template_kwargs || {};
|
|
50
|
+
const modelKwargs = {
|
|
51
|
+
...existingModelKwargs,
|
|
52
|
+
chat_template_kwargs: {
|
|
53
|
+
...existingChatTemplateKwargs,
|
|
54
|
+
enable_thinking: !!thinking
|
|
55
|
+
}
|
|
56
|
+
};
|
|
42
57
|
const fields = omitBy({
|
|
43
58
|
...params,
|
|
59
|
+
modelKwargs,
|
|
44
60
|
streaming: copilotModel.options?.['streaming'] ?? true,
|
|
45
61
|
// include token usage in the stream. this will include an additional chunk at the end of the stream with the token usage.
|
|
46
62
|
streamUsage: true
|
|
@@ -57,12 +73,16 @@ let VLLMLargeLanguageModel = VLLMLargeLanguageModel_1 = class VLLMLargeLanguageM
|
|
|
57
73
|
/**
|
|
58
74
|
* Generate model schema from credentials for customizable models
|
|
59
75
|
* This method dynamically generates parameter rules including thinking mode
|
|
76
|
+
* Merges parent class parameter rules (streaming, temperature, etc.) with thinking mode
|
|
60
77
|
*/
|
|
61
78
|
getCustomizableModelSchemaFromCredentials(model, credentials) {
|
|
62
|
-
|
|
79
|
+
// Get parent class parameter rules (streaming and temperature)
|
|
80
|
+
// This ensures we include common parameters from the base class
|
|
81
|
+
const parentSchema = super.getCustomizableModelSchemaFromCredentials(model, credentials);
|
|
82
|
+
const parentRules = parentSchema?.parameter_rules || [];
|
|
63
83
|
// Add thinking mode parameter
|
|
64
84
|
// This parameter enables thinking mode for models deployed on vLLM and SGLang
|
|
65
|
-
|
|
85
|
+
const thinkingRule = {
|
|
66
86
|
name: 'thinking',
|
|
67
87
|
type: ParameterType.BOOLEAN,
|
|
68
88
|
label: {
|
|
@@ -75,7 +95,13 @@ let VLLMLargeLanguageModel = VLLMLargeLanguageModel_1 = class VLLMLargeLanguageM
|
|
|
75
95
|
},
|
|
76
96
|
required: false,
|
|
77
97
|
default: credentials['thinking'] ?? false
|
|
78
|
-
}
|
|
98
|
+
};
|
|
99
|
+
// Merge parent rules with thinking rule
|
|
100
|
+
// Filter out any duplicate rules by name to ensure thinking rule takes precedence
|
|
101
|
+
const rules = [
|
|
102
|
+
...parentRules,
|
|
103
|
+
thinkingRule
|
|
104
|
+
].filter((rule, index, self) => index === self.findIndex(r => r.name === rule.name));
|
|
79
105
|
// Determine completion type from credentials
|
|
80
106
|
let completionType = 'chat';
|
|
81
107
|
if (credentials['mode']) {
|
|
@@ -120,7 +146,13 @@ let VLLMLargeLanguageModel = VLLMLargeLanguageModel_1 = class VLLMLargeLanguageM
|
|
|
120
146
|
[ModelPropertyKey.MODE]: completionType,
|
|
121
147
|
[ModelPropertyKey.CONTEXT_SIZE]: contextSize
|
|
122
148
|
},
|
|
123
|
-
parameter_rules: rules
|
|
149
|
+
parameter_rules: rules,
|
|
150
|
+
pricing: parentSchema?.pricing || {
|
|
151
|
+
input: credentials['input_price'] ?? 0,
|
|
152
|
+
output: credentials['output_price'] ?? 0,
|
|
153
|
+
unit: credentials['unit'] ?? 0,
|
|
154
|
+
currency: credentials['currency'] ?? 'USD'
|
|
155
|
+
}
|
|
124
156
|
};
|
|
125
157
|
}
|
|
126
158
|
};
|