@loonylabs/llm-middleware 2.30.1 → 2.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/middleware/services/llm/index.d.ts +1 -0
- package/dist/middleware/services/llm/index.d.ts.map +1 -1
- package/dist/middleware/services/llm/index.js +2 -0
- package/dist/middleware/services/llm/index.js.map +1 -1
- package/dist/middleware/services/llm/llm.service.d.ts +8 -0
- package/dist/middleware/services/llm/llm.service.d.ts.map +1 -1
- package/dist/middleware/services/llm/llm.service.js +32 -2
- package/dist/middleware/services/llm/llm.service.js.map +1 -1
- package/dist/middleware/services/llm/model-safety-profiles.d.ts +68 -0
- package/dist/middleware/services/llm/model-safety-profiles.d.ts.map +1 -0
- package/dist/middleware/services/llm/model-safety-profiles.js +70 -0
- package/dist/middleware/services/llm/model-safety-profiles.js.map +1 -0
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/middleware/services/llm/index.ts"],"names":[],"mappings":"AACA,cAAc,eAAe,CAAC;AAG9B,cAAc,SAAS,CAAC;AAGxB,cAAc,aAAa,CAAC;AAG5B,cAAc,YAAY,CAAC;AAG3B,cAAc,SAAS,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/middleware/services/llm/index.ts"],"names":[],"mappings":"AACA,cAAc,eAAe,CAAC;AAG9B,cAAc,yBAAyB,CAAC;AAGxC,cAAc,SAAS,CAAC;AAGxB,cAAc,aAAa,CAAC;AAG5B,cAAc,YAAY,CAAC;AAG3B,cAAc,SAAS,CAAC"}
|
|
@@ -16,6 +16,8 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
17
|
// Main LLM Service
|
|
18
18
|
__exportStar(require("./llm.service"), exports);
|
|
19
|
+
// Per-model safety profiles (intrinsic operating envelopes, enforced in LLMService)
|
|
20
|
+
__exportStar(require("./model-safety-profiles"), exports);
|
|
19
21
|
// Types
|
|
20
22
|
__exportStar(require("./types"), exports);
|
|
21
23
|
// Providers
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/middleware/services/llm/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,mBAAmB;AACnB,gDAA8B;AAE9B,QAAQ;AACR,0CAAwB;AAExB,YAAY;AACZ,8CAA4B;AAE5B,qEAAqE;AACrE,6CAA2B;AAE3B,QAAQ;AACR,0CAAwB"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/middleware/services/llm/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,mBAAmB;AACnB,gDAA8B;AAE9B,oFAAoF;AACpF,0DAAwC;AAExC,QAAQ;AACR,0CAAwB;AAExB,YAAY;AACZ,8CAA4B;AAE5B,qEAAqE;AACrE,6CAA2B;AAE3B,QAAQ;AACR,0CAAwB"}
|
|
@@ -60,6 +60,14 @@ export declare class LLMService {
|
|
|
60
60
|
call(prompt: MultimodalContent, options?: CommonLLMOptions & {
|
|
61
61
|
provider?: LLMProvider;
|
|
62
62
|
}): Promise<CommonLLMResponse | null>;
|
|
63
|
+
/**
|
|
64
|
+
* Centrally enforce per-model safety envelopes (see model-safety-profiles.ts)
|
|
65
|
+
* for every provider: clamp reasoning_effort up to the model's floor and
|
|
66
|
+
* temperature down to its ceiling, so a fragile model (e.g. GLM-5.1) cannot be
|
|
67
|
+
* driven into degeneration regardless of which consumer / use case called it.
|
|
68
|
+
* Only ever moves values toward the safe envelope; logs whenever it fires.
|
|
69
|
+
*/
|
|
70
|
+
private applySafety;
|
|
63
71
|
/**
|
|
64
72
|
* Get list of available providers
|
|
65
73
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm.service.d.ts","sourceRoot":"","sources":["../../../../src/middleware/services/llm/llm.service.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAKhE,OAAO,EAAoB,sBAAsB,EAAE,MAAM,oBAAoB,CAAC;AAI9E,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"llm.service.d.ts","sourceRoot":"","sources":["../../../../src/middleware/services/llm/llm.service.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAKhE,OAAO,EAAoB,sBAAsB,EAAE,MAAM,oBAAoB,CAAC;AAI9E,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAC3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAI7D,MAAM,WAAW,iBAAiB;IAChC,wEAAwE;IACxE,cAAc,CAAC,EAAE,sBAAsB,CAAC;CACzC;AAED,qBAAa,UAAU;IACrB,OAAO,CAAC,SAAS,CAAoC;IACrD,OAAO,CAAC,eAAe,CAAmC;gBAE9C,OAAO,CAAC,EAAE,iBAAiB;IAavC;;OAEG;IACI,WAAW,CAAC,QAAQ,EAAE,WAAW,GAAG,eAAe;IAQ1D;;;;;;;;;;;;;;;;;;OAkBG;IACI,gBAAgB,CAAC,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,eAAe,GAAG,IAAI;IAI/E;;OAEG;IACI,kBAAkB,CAAC,QAAQ,EAAE,WAAW,GAAG,IAAI;IAOtD;;OAEG;IACI,kBAAkB,IAAI,WAAW;IAIxC;;;OAGG;IACU,qBAAqB,CAChC,UAAU,EAAE,iBAAiB,EAC7B,aAAa,EAAE,MAAM,EACrB,OAAO,GAAE,gBAAgB,GAAG;QAAE,QAAQ,CAAC,EAAE,WAAW,CAAA;KAAO,GAC1D,OAAO,CAAC,iBAAiB,GAAG,IAAI,CAAC;IAMpC;;;OAGG;IACU,IAAI,CACf,MAAM,EAAE,iBAAiB,EACzB,OAAO,GAAE,gBAAgB,GAAG;QAAE,QAAQ,CAAC,EAAE,WAAW,CAAA;KAAO,GAC1D,OAAO,CAAC,iBAAiB,GAAG,IAAI,CAAC;IAMpC;;;;;;OAMG;IACH,OAAO,CAAC,WAAW;IAsBnB;;OAEG;IACI,qBAAqB,IAAI,WAAW,EAAE;CAG9C;AAGD,eAAO,MAAM,UAAU,YAAmB,CAAC"}
|
|
@@ -14,6 +14,8 @@ const bedrock_provider_1 = require("./providers/bedrock-provider");
|
|
|
14
14
|
const azure_openai_provider_1 = require("./providers/azure-openai-provider");
|
|
15
15
|
const inceptron_provider_1 = require("./providers/inceptron-provider");
|
|
16
16
|
const types_1 = require("./types");
|
|
17
|
+
const model_safety_profiles_1 = require("./model-safety-profiles");
|
|
18
|
+
const logging_utils_1 = require("../../shared/utils/logging.utils");
|
|
17
19
|
class LLMService {
|
|
18
20
|
constructor(options) {
|
|
19
21
|
this.defaultProvider = types_1.LLMProvider.OLLAMA;
|
|
@@ -82,7 +84,7 @@ class LLMService {
|
|
|
82
84
|
async callWithSystemMessage(userPrompt, systemMessage, options = {}) {
|
|
83
85
|
const provider = options.provider || this.defaultProvider;
|
|
84
86
|
const providerInstance = this.getProvider(provider);
|
|
85
|
-
return providerInstance.callWithSystemMessage(userPrompt, systemMessage, options);
|
|
87
|
+
return providerInstance.callWithSystemMessage(userPrompt, systemMessage, this.applySafety(options));
|
|
86
88
|
}
|
|
87
89
|
/**
|
|
88
90
|
* Call an LLM with the default system message
|
|
@@ -91,7 +93,35 @@ class LLMService {
|
|
|
91
93
|
async call(prompt, options = {}) {
|
|
92
94
|
const provider = options.provider || this.defaultProvider;
|
|
93
95
|
const providerInstance = this.getProvider(provider);
|
|
94
|
-
return providerInstance.call(prompt, options);
|
|
96
|
+
return providerInstance.call(prompt, this.applySafety(options));
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Centrally enforce per-model safety envelopes (see model-safety-profiles.ts)
|
|
100
|
+
* for every provider: clamp reasoning_effort up to the model's floor and
|
|
101
|
+
* temperature down to its ceiling, so a fragile model (e.g. GLM-5.1) cannot be
|
|
102
|
+
* driven into degeneration regardless of which consumer / use case called it.
|
|
103
|
+
* Only ever moves values toward the safe envelope; logs whenever it fires.
|
|
104
|
+
*/
|
|
105
|
+
applySafety(options) {
|
|
106
|
+
const safe = (0, model_safety_profiles_1.applyModelSafetyProfile)({
|
|
107
|
+
model: options.model,
|
|
108
|
+
temperature: options.temperature,
|
|
109
|
+
reasoningEffort: options.reasoningEffort,
|
|
110
|
+
});
|
|
111
|
+
if (!safe.clamped.reasoningEffort && !safe.clamped.temperature) {
|
|
112
|
+
return options;
|
|
113
|
+
}
|
|
114
|
+
logging_utils_1.logger.warn('Applied model safety profile (params clamped to safe envelope)', {
|
|
115
|
+
context: 'LLMService',
|
|
116
|
+
metadata: {
|
|
117
|
+
model: options.model,
|
|
118
|
+
profile: safe.profile?.match,
|
|
119
|
+
reasoningEffort: safe.clamped.reasoningEffort,
|
|
120
|
+
temperature: safe.clamped.temperature,
|
|
121
|
+
note: safe.profile?.note,
|
|
122
|
+
},
|
|
123
|
+
});
|
|
124
|
+
return { ...options, temperature: safe.temperature, reasoningEffort: safe.reasoningEffort };
|
|
95
125
|
}
|
|
96
126
|
/**
|
|
97
127
|
* Get list of available providers
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm.service.js","sourceRoot":"","sources":["../../../../src/middleware/services/llm/llm.service.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAGH,iEAA6D;AAC7D,uEAAmE;AACnE,iEAA6D;AAC7D,qEAAiE;AACjE,+CAA8E;AAC9E,mEAA+D;AAC/D,6EAAwE;AACxE,uEAAmE;AACnE,mCAA2E;
|
|
1
|
+
{"version":3,"file":"llm.service.js","sourceRoot":"","sources":["../../../../src/middleware/services/llm/llm.service.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAGH,iEAA6D;AAC7D,uEAAmE;AACnE,iEAA6D;AAC7D,qEAAiE;AACjE,+CAA8E;AAC9E,mEAA+D;AAC/D,6EAAwE;AACxE,uEAAmE;AACnE,mCAA2E;AAE3E,mEAAkE;AAClE,oEAA0D;AAO1D,MAAa,UAAU;IAIrB,YAAY,OAA2B;QAF/B,oBAAe,GAAgB,mBAAW,CAAC,MAAM,CAAC;QAGxD,IAAI,CAAC,SAAS,GAAG,IAAI,GAAG,EAAE,CAAC;QAC3B,iCAAiC;QACjC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,mBAAW,CAAC,MAAM,EAAE,IAAI,gCAAc,EAAE,CAAC,CAAC;QAC7D,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,mBAAW,CAAC,SAAS,EAAE,IAAI,sCAAiB,EAAE,CAAC,CAAC;QACnE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,mBAAW,CAAC,MAAM,EAAE,IAAI,gCAAc,EAAE,CAAC,CAAC;QAC7D,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,mBAAW,CAAC,QAAQ,EAAE,IAAI,oCAAgB,EAAE,CAAC,CAAC;QACjE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,mBAAW,CAAC,SAAS,EAAE,IAAI,yBAAgB,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC,CAAC;QACzF,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,mBAAW,CAAC,OAAO,EAAE,IAAI,kCAAe,EAAE,CAAC,CAAC;QAC/D,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,mBAAW,CAAC,YAAY,EAAE,IAAI,2CAAmB,EAAE,CAAC,CAAC;QACxE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,mBAAW,CAAC,SAAS,EAAE,IAAI,sCAAiB,EAAE,CAAC,CAAC;IACrE,CAAC;IAED;;OAEG;IACI,WAAW,CAAC,QAAqB;QACtC,MAAM,gBAAgB,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACtD,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,YAAY,QAAQ,2CAA2C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjI,CAAC;QACD,OAAO,gBAAgB,CAAC;IAC1B,CAAC;IAED;;;;;;;;;;;;;;;;;;OAkBG;IACI,gBAAgB,CAAC,QAAqB,EAAE,QAAyB;QACtE,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACI,kBAAkB,CAAC,QAAqB;QAC7C,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,YAAY,QAAQ,mBAAmB,CAAC,CAAC;QAC3D,CAAC;QACD,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC;IAClC,CAAC;IAED;;OAEG;IACI,kBAAkB;QACvB,OAAO,IAAI,CAAC,eAAe,CAAC;IAC9B,CAAC;IAED;;;OAGG;IACI,KAAK,CAAC,qBAAqB,CAChC,UAA6B,EAC7B,aAAqB,EACrB,UAAyD,EAAE;QAE3D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,eAAe,CAAC;QAC1D,MAAM,gBAAgB,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QACpD,OAAO,gBAAgB,CAAC,qBAAqB,CAAC,UAAU,EAAE,aAAa,EAAE,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;IACtG,CAAC;IAED;;;OAGG;IACI,KAAK,CAAC,IAAI,CACf,MAAyB,EACzB,UAAyD,EAAE;QAE3D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC,eAAe,CAAC;QAC1D,MAAM,gBAAgB,GAAG,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QACpD,OAAO,gBAAgB,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;IAClE,CAAC;IAED;;;;;;OAMG;IACK,WAAW,CAA0D,OAAU;QACrF,MAAM,IAAI,GAAG,IAAA,+CAAuB,EAAC;YACnC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,WAAW,EAAE,OAAO,CAAC,WAAW;YAChC,eAAe,EAAE,OAAO,CAAC,eAAe;SACzC,CAAC,CAAC;QACH,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,eAAe,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YAC/D,OAAO,OAAO,CAAC;QACjB,CAAC;QACD,sBAAM,CAAC,IAAI,CAAC,gEAAgE,EAAE;YAC5E,OAAO,EAAE,YAAY;YACrB,QAAQ,EAAE;gBACR,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,KAAK;gBAC5B,eAAe,EAAE,IAAI,CAAC,OAAO,CAAC,eAAe;gBAC7C,WAAW,EAAE,IAAI,CAAC,OAAO,CAAC,WAAW;gBACrC,IAAI,EAAE,IAAI,CAAC,OAAO,EAAE,IAAI;aACzB;SACF,CAAC,CAAC;QACH,OAAO,EAAE,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,EAAE,eAAe,EAAE,IAAI,CAAC,eAAe,EAAE,CAAC;IAC9F,CAAC;IAED;;OAEG;IACI,qBAAqB;QAC1B,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;CACF;AAlID,gCAkIC;AAED,4BAA4B;AACf,QAAA,UAAU,GAAG,IAAI,UAAU,EAAE,CAAC"}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Declarative per-model safety profiles.
|
|
3
|
+
*
|
|
4
|
+
* Some models have an **intrinsic safe operating envelope**: outside it they
|
|
5
|
+
* degenerate (garbled multi-script output, runaway repetition, empty content,
|
|
6
|
+
* leaked reasoning markers). That is a property of the *model / serving stack*,
|
|
7
|
+
* independent of any consumer — so the knowledge lives here in the middleware and
|
|
8
|
+
* is enforced centrally in `LLMService` for **every** provider.
|
|
9
|
+
*
|
|
10
|
+
* This is **not** for API-format differences between models/providers (those live
|
|
11
|
+
* in the providers, e.g. Gemini 2.5 `thinkingBudget` vs 3.x `thinkingLevel`). It is
|
|
12
|
+
* *only* for "this model misbehaves outside these bounds" safety clamps:
|
|
13
|
+
* - `minReasoningEffort` — the effort is raised UP to this floor.
|
|
14
|
+
* - `maxTemperature` — the temperature is lowered DOWN to this ceiling.
|
|
15
|
+
*
|
|
16
|
+
* Consumers may still pass stricter values; the clamp only ever moves a value
|
|
17
|
+
* toward the safe envelope, never the other way.
|
|
18
|
+
*/
|
|
19
|
+
import { ReasoningEffort } from './types';
|
|
20
|
+
export interface ModelSafetyProfile {
|
|
21
|
+
/** Case-insensitive substring matched against the model id. */
|
|
22
|
+
match: string;
|
|
23
|
+
/** Effort floor — a lower (or unset) requested effort is raised to this. */
|
|
24
|
+
minReasoningEffort?: ReasoningEffort;
|
|
25
|
+
/** Temperature ceiling — a higher requested temperature is lowered to this. */
|
|
26
|
+
maxTemperature?: number;
|
|
27
|
+
/** Short human-readable rationale (logged when a clamp fires). */
|
|
28
|
+
note?: string;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Known-fragile models. Keep entries small, evidence-based, and documented
|
|
32
|
+
* (link the verifying notes in the provider doc).
|
|
33
|
+
*/
|
|
34
|
+
export declare const MODEL_SAFETY_PROFILES: ModelSafetyProfile[];
|
|
35
|
+
/** Find the first safety profile whose `match` is a substring of the model id. */
|
|
36
|
+
export declare function findModelSafetyProfile(model?: string): ModelSafetyProfile | undefined;
|
|
37
|
+
export interface SafetyClampResult {
|
|
38
|
+
temperature?: number;
|
|
39
|
+
reasoningEffort?: ReasoningEffort;
|
|
40
|
+
/** What was changed (for logging). Empty object = no clamp fired. */
|
|
41
|
+
clamped: {
|
|
42
|
+
reasoningEffort?: {
|
|
43
|
+
from?: ReasoningEffort;
|
|
44
|
+
to: ReasoningEffort;
|
|
45
|
+
};
|
|
46
|
+
temperature?: {
|
|
47
|
+
from: number;
|
|
48
|
+
to: number;
|
|
49
|
+
};
|
|
50
|
+
};
|
|
51
|
+
profile?: ModelSafetyProfile;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Apply the matching model's safety profile to a (model, temperature,
|
|
55
|
+
* reasoningEffort) set. Pure — performs no I/O and never throws. Returns the
|
|
56
|
+
* (possibly adjusted) values plus a record of what changed; the caller logs.
|
|
57
|
+
*
|
|
58
|
+
* - Reasoning floor applies even when `reasoningEffort` is undefined (a fragile
|
|
59
|
+
* model must not run below its floor just because the caller omitted it).
|
|
60
|
+
* - Temperature ceiling only applies when a temperature is set (undefined =
|
|
61
|
+
* provider default, left untouched).
|
|
62
|
+
*/
|
|
63
|
+
export declare function applyModelSafetyProfile(opts: {
|
|
64
|
+
model?: string;
|
|
65
|
+
temperature?: number;
|
|
66
|
+
reasoningEffort?: ReasoningEffort;
|
|
67
|
+
}): SafetyClampResult;
|
|
68
|
+
//# sourceMappingURL=model-safety-profiles.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-safety-profiles.d.ts","sourceRoot":"","sources":["../../../../src/middleware/services/llm/model-safety-profiles.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AACH,OAAO,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE1C,MAAM,WAAW,kBAAkB;IACjC,+DAA+D;IAC/D,KAAK,EAAE,MAAM,CAAC;IACd,4EAA4E;IAC5E,kBAAkB,CAAC,EAAE,eAAe,CAAC;IACrC,+EAA+E;IAC/E,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,kEAAkE;IAClE,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAKD;;;GAGG;AACH,eAAO,MAAM,qBAAqB,EAAE,kBAAkB,EAYrD,CAAC;AAEF,kFAAkF;AAClF,wBAAgB,sBAAsB,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,kBAAkB,GAAG,SAAS,CAIrF;AAED,MAAM,WAAW,iBAAiB;IAChC,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,qEAAqE;IACrE,OAAO,EAAE;QACP,eAAe,CAAC,EAAE;YAAE,IAAI,CAAC,EAAE,eAAe,CAAC;YAAC,EAAE,EAAE,eAAe,CAAA;SAAE,CAAC;QAClE,WAAW,CAAC,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,EAAE,EAAE,MAAM,CAAA;SAAE,CAAC;KAC5C,CAAC;IACF,OAAO,CAAC,EAAE,kBAAkB,CAAC;CAC9B;AAED;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CAAC,IAAI,EAAE;IAC5C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC,GAAG,iBAAiB,CA6BpB"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MODEL_SAFETY_PROFILES = void 0;
|
|
4
|
+
exports.findModelSafetyProfile = findModelSafetyProfile;
|
|
5
|
+
exports.applyModelSafetyProfile = applyModelSafetyProfile;
|
|
6
|
+
/** Ascending order used to compare reasoning-effort levels. */
|
|
7
|
+
const REASONING_ORDER = ['none', 'low', 'medium', 'high'];
|
|
8
|
+
/**
|
|
9
|
+
* Known-fragile models. Keep entries small, evidence-based, and documented
|
|
10
|
+
* (link the verifying notes in the provider doc).
|
|
11
|
+
*/
|
|
12
|
+
exports.MODEL_SAFETY_PROFILES = [
|
|
13
|
+
{
|
|
14
|
+
// GLM-5.1-FP8 (Inceptron). Verified 2026-05-30 (see docs/INCEPTRON.md):
|
|
15
|
+
// - reasons even at reasoning_effort='none' and discards the answer → content
|
|
16
|
+
// null; effective floor is 'low'.
|
|
17
|
+
// - degenerates into garbage/repetition/empty/leaked <think> at temperature
|
|
18
|
+
// >= 1.0; stable at <= 0.7.
|
|
19
|
+
match: 'glm-5.1',
|
|
20
|
+
minReasoningEffort: 'low',
|
|
21
|
+
maxTemperature: 0.7,
|
|
22
|
+
note: 'GLM-5.1 returns empty content at reasoning=none and degenerates at temperature>=1.0',
|
|
23
|
+
},
|
|
24
|
+
];
|
|
25
|
+
/** Find the first safety profile whose `match` is a substring of the model id. */
|
|
26
|
+
function findModelSafetyProfile(model) {
|
|
27
|
+
if (!model)
|
|
28
|
+
return undefined;
|
|
29
|
+
const m = model.toLowerCase();
|
|
30
|
+
return exports.MODEL_SAFETY_PROFILES.find((p) => m.includes(p.match.toLowerCase()));
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Apply the matching model's safety profile to a (model, temperature,
|
|
34
|
+
* reasoningEffort) set. Pure — performs no I/O and never throws. Returns the
|
|
35
|
+
* (possibly adjusted) values plus a record of what changed; the caller logs.
|
|
36
|
+
*
|
|
37
|
+
* - Reasoning floor applies even when `reasoningEffort` is undefined (a fragile
|
|
38
|
+
* model must not run below its floor just because the caller omitted it).
|
|
39
|
+
* - Temperature ceiling only applies when a temperature is set (undefined =
|
|
40
|
+
* provider default, left untouched).
|
|
41
|
+
*/
|
|
42
|
+
function applyModelSafetyProfile(opts) {
|
|
43
|
+
const profile = findModelSafetyProfile(opts.model);
|
|
44
|
+
const result = {
|
|
45
|
+
temperature: opts.temperature,
|
|
46
|
+
reasoningEffort: opts.reasoningEffort,
|
|
47
|
+
clamped: {},
|
|
48
|
+
profile,
|
|
49
|
+
};
|
|
50
|
+
if (!profile)
|
|
51
|
+
return result;
|
|
52
|
+
if (profile.minReasoningEffort) {
|
|
53
|
+
const floorIdx = REASONING_ORDER.indexOf(profile.minReasoningEffort);
|
|
54
|
+
const curIdx = result.reasoningEffort === undefined
|
|
55
|
+
? 0 // undefined behaves as the weakest effort ('none')
|
|
56
|
+
: REASONING_ORDER.indexOf(result.reasoningEffort);
|
|
57
|
+
if (floorIdx >= 0 && curIdx < floorIdx) {
|
|
58
|
+
result.clamped.reasoningEffort = { from: result.reasoningEffort, to: profile.minReasoningEffort };
|
|
59
|
+
result.reasoningEffort = profile.minReasoningEffort;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
if (profile.maxTemperature !== undefined
|
|
63
|
+
&& result.temperature !== undefined
|
|
64
|
+
&& result.temperature > profile.maxTemperature) {
|
|
65
|
+
result.clamped.temperature = { from: result.temperature, to: profile.maxTemperature };
|
|
66
|
+
result.temperature = profile.maxTemperature;
|
|
67
|
+
}
|
|
68
|
+
return result;
|
|
69
|
+
}
|
|
70
|
+
//# sourceMappingURL=model-safety-profiles.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-safety-profiles.js","sourceRoot":"","sources":["../../../../src/middleware/services/llm/model-safety-profiles.ts"],"names":[],"mappings":";;;AAqDA,wDAIC;AAuBD,0DAiCC;AAlFD,+DAA+D;AAC/D,MAAM,eAAe,GAAsB,CAAC,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;AAE7E;;;GAGG;AACU,QAAA,qBAAqB,GAAyB;IACzD;QACE,wEAAwE;QACxE,+EAA+E;QAC/E,qCAAqC;QACrC,6EAA6E;QAC7E,+BAA+B;QAC/B,KAAK,EAAE,SAAS;QAChB,kBAAkB,EAAE,KAAK;QACzB,cAAc,EAAE,GAAG;QACnB,IAAI,EAAE,qFAAqF;KAC5F;CACF,CAAC;AAEF,kFAAkF;AAClF,SAAgB,sBAAsB,CAAC,KAAc;IACnD,IAAI,CAAC,KAAK;QAAE,OAAO,SAAS,CAAC;IAC7B,MAAM,CAAC,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAC9B,OAAO,6BAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;AAC9E,CAAC;AAaD;;;;;;;;;GASG;AACH,SAAgB,uBAAuB,CAAC,IAIvC;IACC,MAAM,OAAO,GAAG,sBAAsB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACnD,MAAM,MAAM,GAAsB;QAChC,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,eAAe,EAAE,IAAI,CAAC,eAAe;QACrC,OAAO,EAAE,EAAE;QACX,OAAO;KACR,CAAC;IACF,IAAI,CAAC,OAAO;QAAE,OAAO,MAAM,CAAC;IAE5B,IAAI,OAAO,CAAC,kBAAkB,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;QACrE,MAAM,MAAM,GAAG,MAAM,CAAC,eAAe,KAAK,SAAS;YACjD,CAAC,CAAC,CAAC,CAAC,mDAAmD;YACvD,CAAC,CAAC,eAAe,CAAC,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QACpD,IAAI,QAAQ,IAAI,CAAC,IAAI,MAAM,GAAG,QAAQ,EAAE,CAAC;YACvC,MAAM,CAAC,OAAO,CAAC,eAAe,GAAG,EAAE,IAAI,EAAE,MAAM,CAAC,eAAe,EAAE,EAAE,EAAE,OAAO,CAAC,kBAAkB,EAAE,CAAC;YAClG,MAAM,CAAC,eAAe,GAAG,OAAO,CAAC,kBAAkB,CAAC;QACtD,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,cAAc,KAAK,SAAS;WACnC,MAAM,CAAC,WAAW,KAAK,SAAS;WAChC,MAAM,CAAC,WAAW,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;QACjD,MAAM,CAAC,OAAO,CAAC,WAAW,GAAG,EAAE,IAAI,EAAE,MAAM,CAAC,WAAW,EAAE,EAAE,EAAE,OAAO,CAAC,cAAc,EAAE,CAAC;QACtF,MAAM,CAAC,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC;IAC9C,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loonylabs/llm-middleware",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.31.0",
|
|
4
4
|
"description": "Complete middleware infrastructure for LLM-based backends with multi-provider support (Ollama, Anthropic, Google Gemini & Vertex AI, Requesty, AWS Bedrock, Azure OpenAI / Foundry, Inceptron)",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|