@rheonic/sdk 0.1.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +150 -0
- package/dist/client.d.ts +59 -0
- package/dist/client.js +305 -0
- package/dist/config.d.ts +15 -0
- package/dist/config.js +15 -0
- package/dist/costCalculator.d.ts +3 -0
- package/dist/costCalculator.js +6 -0
- package/dist/eventBuilder.d.ts +35 -0
- package/dist/eventBuilder.js +15 -0
- package/dist/httpTransport.d.ts +12 -0
- package/dist/httpTransport.js +100 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.js +60 -0
- package/dist/logger.d.ts +16 -0
- package/dist/logger.js +56 -0
- package/dist/protectEngine.d.ts +48 -0
- package/dist/protectEngine.js +255 -0
- package/dist/providerModelValidation.d.ts +7 -0
- package/dist/providerModelValidation.js +26 -0
- package/dist/providers/anthropicAdapter.d.ts +9 -0
- package/dist/providers/anthropicAdapter.js +189 -0
- package/dist/providers/googleAdapter.d.ts +9 -0
- package/dist/providers/googleAdapter.js +212 -0
- package/dist/providers/openaiAdapter.d.ts +9 -0
- package/dist/providers/openaiAdapter.js +203 -0
- package/dist/rateLimiter.d.ts +3 -0
- package/dist/rateLimiter.js +6 -0
- package/dist/tokenEstimator.d.ts +2 -0
- package/dist/tokenEstimator.js +64 -0
- package/package.json +74 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import { buildEvent } from "../eventBuilder.js";
|
|
2
|
+
import { RHEONICBlockedError } from "../protectEngine.js";
|
|
3
|
+
import { validateProviderModel } from "../providerModelValidation.js";
|
|
4
|
+
import { estimateInputTokensFromRequest } from "../tokenEstimator.js";
|
|
5
|
+
let estimatorOverrideForTests = null;
|
|
6
|
+
export function __setInputTokenEstimatorForTests(estimator) {
|
|
7
|
+
estimatorOverrideForTests = estimator;
|
|
8
|
+
}
|
|
9
|
+
export function instrumentAnthropic(anthropicClient, options) {
|
|
10
|
+
const targetCreate = anthropicClient?.messages?.create;
|
|
11
|
+
if (typeof targetCreate !== "function") {
|
|
12
|
+
return anthropicClient;
|
|
13
|
+
}
|
|
14
|
+
const originalCreate = targetCreate.bind(anthropicClient.messages);
|
|
15
|
+
anthropicClient.messages.create = async (...args) => {
|
|
16
|
+
const startedAt = Date.now();
|
|
17
|
+
const requestPayload = extractRequestPayload(args);
|
|
18
|
+
const requestedModel = extractRequestedModel(args);
|
|
19
|
+
validateProviderModel("anthropic", requestedModel);
|
|
20
|
+
let estimatedInputTokens = null;
|
|
21
|
+
const tokenEstimateStartedAt = Date.now();
|
|
22
|
+
estimatedInputTokens = requestPayload
|
|
23
|
+
? (estimatorOverrideForTests
|
|
24
|
+
? estimatorOverrideForTests(requestPayload)
|
|
25
|
+
: estimateInputTokensFromRequest(requestPayload))
|
|
26
|
+
: null;
|
|
27
|
+
options.client.debugLog("Protect token estimation completed", {
|
|
28
|
+
provider: "anthropic",
|
|
29
|
+
model: requestedModel,
|
|
30
|
+
latency_ms: Date.now() - tokenEstimateStartedAt,
|
|
31
|
+
estimated_input_tokens: estimatedInputTokens ?? undefined,
|
|
32
|
+
});
|
|
33
|
+
const protectPayload = {
|
|
34
|
+
provider: "anthropic",
|
|
35
|
+
model: requestedModel,
|
|
36
|
+
environment: options.environment ?? options.client.environment,
|
|
37
|
+
feature: options.feature,
|
|
38
|
+
max_output_tokens: extractMaxOutputTokens(args),
|
|
39
|
+
};
|
|
40
|
+
if (typeof estimatedInputTokens === "number") {
|
|
41
|
+
protectPayload.input_tokens_estimate = estimatedInputTokens;
|
|
42
|
+
}
|
|
43
|
+
const protectDecision = await options.client.evaluateProtectDecision(protectPayload);
|
|
44
|
+
if (protectDecision.decision === "block") {
|
|
45
|
+
throw new RHEONICBlockedError(protectDecision.reason);
|
|
46
|
+
}
|
|
47
|
+
const callArgs = maybeApplyAnthropicClamp(args, protectDecision);
|
|
48
|
+
markClampAppliedIfChanged(protectDecision, extractMaxOutputTokens(args), extractMaxOutputTokens(callArgs));
|
|
49
|
+
try {
|
|
50
|
+
const response = await originalCreate(...callArgs);
|
|
51
|
+
void options.client.captureEvent(buildEvent({
|
|
52
|
+
provider: "anthropic",
|
|
53
|
+
model: extractResponseModel(response) ?? requestedModel,
|
|
54
|
+
environment: options.environment ?? options.client.environment,
|
|
55
|
+
request: {
|
|
56
|
+
endpoint: options.endpoint,
|
|
57
|
+
feature: options.feature,
|
|
58
|
+
input_tokens_estimate: typeof estimatedInputTokens === "number" ? estimatedInputTokens : undefined,
|
|
59
|
+
protect_decision: protectDecision.decision === "warn" ? "warn" : undefined,
|
|
60
|
+
protect_reason: protectDecision.decision === "warn" ? protectDecision.reason : undefined,
|
|
61
|
+
},
|
|
62
|
+
response: {
|
|
63
|
+
latency_ms: Date.now() - startedAt,
|
|
64
|
+
total_tokens: extractTotalTokens(response),
|
|
65
|
+
http_status: 200,
|
|
66
|
+
},
|
|
67
|
+
}));
|
|
68
|
+
return response;
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
void options.client.captureEvent(buildEvent({
|
|
72
|
+
provider: "anthropic",
|
|
73
|
+
model: requestedModel,
|
|
74
|
+
environment: options.environment ?? options.client.environment,
|
|
75
|
+
request: {
|
|
76
|
+
endpoint: options.endpoint,
|
|
77
|
+
feature: options.feature,
|
|
78
|
+
input_tokens_estimate: typeof estimatedInputTokens === "number" ? estimatedInputTokens : undefined,
|
|
79
|
+
protect_decision: protectDecision.decision === "warn" ? "warn" : undefined,
|
|
80
|
+
protect_reason: protectDecision.decision === "warn" ? protectDecision.reason : undefined,
|
|
81
|
+
},
|
|
82
|
+
response: {
|
|
83
|
+
latency_ms: Date.now() - startedAt,
|
|
84
|
+
error_type: extractErrorType(error),
|
|
85
|
+
http_status: extractHttpStatus(error),
|
|
86
|
+
},
|
|
87
|
+
}));
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
return anthropicClient;
|
|
92
|
+
}
|
|
93
|
+
function extractRequestPayload(args) {
|
|
94
|
+
const firstArg = args[0];
|
|
95
|
+
if (!firstArg || typeof firstArg !== "object") {
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
return firstArg;
|
|
99
|
+
}
|
|
100
|
+
function extractRequestedModel(args) {
|
|
101
|
+
const payload = extractRequestPayload(args);
|
|
102
|
+
const model = payload?.model;
|
|
103
|
+
return typeof model === "string" ? model : null;
|
|
104
|
+
}
|
|
105
|
+
function extractMaxOutputTokens(args) {
|
|
106
|
+
const payload = extractRequestPayload(args);
|
|
107
|
+
const maxTokens = payload?.max_tokens;
|
|
108
|
+
return typeof maxTokens === "number" ? maxTokens : undefined;
|
|
109
|
+
}
|
|
110
|
+
function extractResponseModel(response) {
|
|
111
|
+
if (response && typeof response === "object" && "model" in response) {
|
|
112
|
+
const model = response.model;
|
|
113
|
+
return typeof model === "string" ? model : null;
|
|
114
|
+
}
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
function extractTotalTokens(response) {
|
|
118
|
+
if (!response || typeof response !== "object") {
|
|
119
|
+
return undefined;
|
|
120
|
+
}
|
|
121
|
+
const usage = response.usage;
|
|
122
|
+
if (typeof usage?.total_tokens === "number") {
|
|
123
|
+
return usage.total_tokens;
|
|
124
|
+
}
|
|
125
|
+
const input = typeof usage?.input_tokens === "number" ? usage.input_tokens : 0;
|
|
126
|
+
const output = typeof usage?.output_tokens === "number" ? usage.output_tokens : 0;
|
|
127
|
+
const total = input + output;
|
|
128
|
+
return total > 0 ? total : undefined;
|
|
129
|
+
}
|
|
130
|
+
function extractErrorType(error) {
|
|
131
|
+
if (error && typeof error === "object" && "name" in error) {
|
|
132
|
+
const name = error.name;
|
|
133
|
+
if (typeof name === "string" && name.length > 0) {
|
|
134
|
+
return name;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return "unknown";
|
|
138
|
+
}
|
|
139
|
+
function extractHttpStatus(error) {
|
|
140
|
+
if (!error || typeof error !== "object") {
|
|
141
|
+
return undefined;
|
|
142
|
+
}
|
|
143
|
+
const withStatus = error;
|
|
144
|
+
if (typeof withStatus.status === "number") {
|
|
145
|
+
return withStatus.status;
|
|
146
|
+
}
|
|
147
|
+
if (typeof withStatus.statusCode === "number") {
|
|
148
|
+
return withStatus.statusCode;
|
|
149
|
+
}
|
|
150
|
+
if (typeof withStatus.response?.status === "number") {
|
|
151
|
+
return withStatus.response.status;
|
|
152
|
+
}
|
|
153
|
+
return undefined;
|
|
154
|
+
}
|
|
155
|
+
function maybeApplyAnthropicClamp(args, decision) {
|
|
156
|
+
if (decision.decision !== "warn" || decision.reason !== "near_cap") {
|
|
157
|
+
return args;
|
|
158
|
+
}
|
|
159
|
+
if (!decision.applyClampEnabled) {
|
|
160
|
+
return args;
|
|
161
|
+
}
|
|
162
|
+
const recommended = decision.clamp?.recommended_max_output_tokens;
|
|
163
|
+
if (typeof recommended !== "number" || recommended < 1) {
|
|
164
|
+
return args;
|
|
165
|
+
}
|
|
166
|
+
const firstArg = args[0];
|
|
167
|
+
if (!firstArg || typeof firstArg !== "object") {
|
|
168
|
+
return args;
|
|
169
|
+
}
|
|
170
|
+
const payload = { ...firstArg };
|
|
171
|
+
const maxTokens = payload.max_tokens;
|
|
172
|
+
if (typeof maxTokens === "number") {
|
|
173
|
+
payload.max_tokens = Math.min(maxTokens, recommended);
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
payload.max_tokens = recommended;
|
|
177
|
+
}
|
|
178
|
+
const nextArgs = [...args];
|
|
179
|
+
nextArgs[0] = payload;
|
|
180
|
+
return nextArgs;
|
|
181
|
+
}
|
|
182
|
+
function markClampAppliedIfChanged(decision, originalMaxTokens, appliedMaxTokens) {
|
|
183
|
+
if (!decision.clamp || typeof appliedMaxTokens !== "number") {
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
if (typeof originalMaxTokens !== "number" || appliedMaxTokens < originalMaxTokens) {
|
|
187
|
+
decision.clamp.applied = true;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Client } from "../client.js";
|
|
2
|
+
export interface GoogleInstrumentationOptions {
|
|
3
|
+
client: Client;
|
|
4
|
+
environment?: string;
|
|
5
|
+
endpoint?: string;
|
|
6
|
+
feature?: string;
|
|
7
|
+
}
|
|
8
|
+
export declare function __setInputTokenEstimatorForTests(estimator: ((payload: unknown) => number | null) | null): void;
|
|
9
|
+
export declare function instrumentGoogle<T extends Record<string, any>>(googleModel: T, options: GoogleInstrumentationOptions): T;
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import { buildEvent } from "../eventBuilder.js";
|
|
2
|
+
import { RHEONICBlockedError } from "../protectEngine.js";
|
|
3
|
+
import { validateProviderModel } from "../providerModelValidation.js";
|
|
4
|
+
import { estimateInputTokensFromRequest } from "../tokenEstimator.js";
|
|
5
|
+
let estimatorOverrideForTests = null;
|
|
6
|
+
export function __setInputTokenEstimatorForTests(estimator) {
|
|
7
|
+
estimatorOverrideForTests = estimator;
|
|
8
|
+
}
|
|
9
|
+
export function instrumentGoogle(googleModel, options) {
|
|
10
|
+
const targetGenerate = googleModel?.generateContent;
|
|
11
|
+
if (typeof targetGenerate !== "function") {
|
|
12
|
+
return googleModel;
|
|
13
|
+
}
|
|
14
|
+
const originalGenerate = targetGenerate.bind(googleModel);
|
|
15
|
+
googleModel.generateContent = async (...args) => {
|
|
16
|
+
const startedAt = Date.now();
|
|
17
|
+
const requestedModel = extractRequestedModel(googleModel);
|
|
18
|
+
validateProviderModel("google", requestedModel);
|
|
19
|
+
const requestPayload = extractRequestPayload(args, requestedModel);
|
|
20
|
+
let estimatedInputTokens = null;
|
|
21
|
+
const tokenEstimateStartedAt = Date.now();
|
|
22
|
+
estimatedInputTokens = requestPayload
|
|
23
|
+
? (estimatorOverrideForTests
|
|
24
|
+
? estimatorOverrideForTests(requestPayload)
|
|
25
|
+
: estimateInputTokensFromRequest(requestPayload))
|
|
26
|
+
: null;
|
|
27
|
+
options.client.debugLog("Protect token estimation completed", {
|
|
28
|
+
provider: "google",
|
|
29
|
+
model: requestedModel,
|
|
30
|
+
latency_ms: Date.now() - tokenEstimateStartedAt,
|
|
31
|
+
estimated_input_tokens: estimatedInputTokens ?? undefined,
|
|
32
|
+
});
|
|
33
|
+
const protectPayload = {
|
|
34
|
+
provider: "google",
|
|
35
|
+
model: requestedModel,
|
|
36
|
+
environment: options.environment ?? options.client.environment,
|
|
37
|
+
feature: options.feature,
|
|
38
|
+
max_output_tokens: extractMaxOutputTokens(args),
|
|
39
|
+
};
|
|
40
|
+
if (typeof estimatedInputTokens === "number") {
|
|
41
|
+
protectPayload.input_tokens_estimate = estimatedInputTokens;
|
|
42
|
+
}
|
|
43
|
+
const protectDecision = await options.client.evaluateProtectDecision(protectPayload);
|
|
44
|
+
if (protectDecision.decision === "block") {
|
|
45
|
+
throw new RHEONICBlockedError(protectDecision.reason);
|
|
46
|
+
}
|
|
47
|
+
const callArgs = maybeApplyGoogleClamp(args, protectDecision);
|
|
48
|
+
markClampAppliedIfChanged(protectDecision, extractMaxOutputTokens(args), extractMaxOutputTokens(callArgs));
|
|
49
|
+
try {
|
|
50
|
+
const response = await originalGenerate(...callArgs);
|
|
51
|
+
void options.client.captureEvent(buildEvent({
|
|
52
|
+
provider: "google",
|
|
53
|
+
model: requestedModel,
|
|
54
|
+
environment: options.environment ?? options.client.environment,
|
|
55
|
+
request: {
|
|
56
|
+
endpoint: options.endpoint,
|
|
57
|
+
feature: options.feature,
|
|
58
|
+
input_tokens_estimate: typeof estimatedInputTokens === "number" ? estimatedInputTokens : undefined,
|
|
59
|
+
protect_decision: protectDecision.decision === "warn" ? "warn" : undefined,
|
|
60
|
+
protect_reason: protectDecision.decision === "warn" ? protectDecision.reason : undefined,
|
|
61
|
+
},
|
|
62
|
+
response: {
|
|
63
|
+
latency_ms: Date.now() - startedAt,
|
|
64
|
+
total_tokens: extractTotalTokens(response),
|
|
65
|
+
http_status: 200,
|
|
66
|
+
},
|
|
67
|
+
}));
|
|
68
|
+
return response;
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
void options.client.captureEvent(buildEvent({
|
|
72
|
+
provider: "google",
|
|
73
|
+
model: requestedModel,
|
|
74
|
+
environment: options.environment ?? options.client.environment,
|
|
75
|
+
request: {
|
|
76
|
+
endpoint: options.endpoint,
|
|
77
|
+
feature: options.feature,
|
|
78
|
+
input_tokens_estimate: typeof estimatedInputTokens === "number" ? estimatedInputTokens : undefined,
|
|
79
|
+
protect_decision: protectDecision.decision === "warn" ? "warn" : undefined,
|
|
80
|
+
protect_reason: protectDecision.decision === "warn" ? protectDecision.reason : undefined,
|
|
81
|
+
},
|
|
82
|
+
response: {
|
|
83
|
+
latency_ms: Date.now() - startedAt,
|
|
84
|
+
error_type: extractErrorType(error),
|
|
85
|
+
http_status: extractHttpStatus(error),
|
|
86
|
+
},
|
|
87
|
+
}));
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
return googleModel;
|
|
92
|
+
}
|
|
93
|
+
function extractRequestPayload(args, model) {
|
|
94
|
+
const firstArg = args[0];
|
|
95
|
+
if (typeof firstArg === "string") {
|
|
96
|
+
return { model, prompt: firstArg };
|
|
97
|
+
}
|
|
98
|
+
if (firstArg && typeof firstArg === "object") {
|
|
99
|
+
return firstArg;
|
|
100
|
+
}
|
|
101
|
+
return null;
|
|
102
|
+
}
|
|
103
|
+
function extractRequestedModel(googleModel) {
|
|
104
|
+
if (!googleModel || typeof googleModel !== "object") {
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
const withModel = googleModel;
|
|
108
|
+
if (typeof withModel.model === "string") {
|
|
109
|
+
return withModel.model;
|
|
110
|
+
}
|
|
111
|
+
if (typeof withModel.modelName === "string") {
|
|
112
|
+
return withModel.modelName;
|
|
113
|
+
}
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
function extractMaxOutputTokens(args) {
|
|
117
|
+
const firstArg = args[0];
|
|
118
|
+
if (!firstArg || typeof firstArg !== "object") {
|
|
119
|
+
return undefined;
|
|
120
|
+
}
|
|
121
|
+
const payload = firstArg;
|
|
122
|
+
const maxOutput = payload.generationConfig?.maxOutputTokens;
|
|
123
|
+
return typeof maxOutput === "number" ? maxOutput : undefined;
|
|
124
|
+
}
|
|
125
|
+
function extractTotalTokens(response) {
|
|
126
|
+
if (!response || typeof response !== "object") {
|
|
127
|
+
return undefined;
|
|
128
|
+
}
|
|
129
|
+
const usageMetadata = response.response?.usageMetadata
|
|
130
|
+
?? response.usageMetadata;
|
|
131
|
+
if (!usageMetadata || typeof usageMetadata !== "object") {
|
|
132
|
+
return undefined;
|
|
133
|
+
}
|
|
134
|
+
const usage = usageMetadata;
|
|
135
|
+
if (typeof usage.totalTokenCount === "number") {
|
|
136
|
+
return usage.totalTokenCount;
|
|
137
|
+
}
|
|
138
|
+
const prompt = typeof usage.promptTokenCount === "number" ? usage.promptTokenCount : 0;
|
|
139
|
+
const candidates = typeof usage.candidatesTokenCount === "number" ? usage.candidatesTokenCount : 0;
|
|
140
|
+
const total = prompt + candidates;
|
|
141
|
+
return total > 0 ? total : undefined;
|
|
142
|
+
}
|
|
143
|
+
function extractErrorType(error) {
|
|
144
|
+
if (error && typeof error === "object" && "name" in error) {
|
|
145
|
+
const name = error.name;
|
|
146
|
+
if (typeof name === "string" && name.length > 0) {
|
|
147
|
+
return name;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return "unknown";
|
|
151
|
+
}
|
|
152
|
+
function extractHttpStatus(error) {
|
|
153
|
+
if (!error || typeof error !== "object") {
|
|
154
|
+
return undefined;
|
|
155
|
+
}
|
|
156
|
+
const withStatus = error;
|
|
157
|
+
if (typeof withStatus.status === "number") {
|
|
158
|
+
return withStatus.status;
|
|
159
|
+
}
|
|
160
|
+
if (typeof withStatus.statusCode === "number") {
|
|
161
|
+
return withStatus.statusCode;
|
|
162
|
+
}
|
|
163
|
+
if (typeof withStatus.response?.status === "number") {
|
|
164
|
+
return withStatus.response.status;
|
|
165
|
+
}
|
|
166
|
+
return undefined;
|
|
167
|
+
}
|
|
168
|
+
function maybeApplyGoogleClamp(args, decision) {
|
|
169
|
+
if (decision.decision !== "warn" || decision.reason !== "near_cap") {
|
|
170
|
+
return args;
|
|
171
|
+
}
|
|
172
|
+
if (!decision.applyClampEnabled) {
|
|
173
|
+
return args;
|
|
174
|
+
}
|
|
175
|
+
const recommended = decision.clamp?.recommended_max_output_tokens;
|
|
176
|
+
if (typeof recommended !== "number" || recommended < 1) {
|
|
177
|
+
return args;
|
|
178
|
+
}
|
|
179
|
+
const nextArgs = [...args];
|
|
180
|
+
const firstArg = nextArgs[0];
|
|
181
|
+
if (firstArg && typeof firstArg === "object") {
|
|
182
|
+
const payload = { ...firstArg };
|
|
183
|
+
const existingConfig = payload.generationConfig && typeof payload.generationConfig === "object"
|
|
184
|
+
? payload.generationConfig
|
|
185
|
+
: {};
|
|
186
|
+
const existingMax = existingConfig.maxOutputTokens;
|
|
187
|
+
payload.generationConfig = {
|
|
188
|
+
...existingConfig,
|
|
189
|
+
maxOutputTokens: typeof existingMax === "number" ? Math.min(existingMax, recommended) : recommended,
|
|
190
|
+
};
|
|
191
|
+
nextArgs[0] = payload;
|
|
192
|
+
return nextArgs;
|
|
193
|
+
}
|
|
194
|
+
const secondArg = nextArgs[1];
|
|
195
|
+
const existingConfig = secondArg && typeof secondArg === "object" ? { ...secondArg } : {};
|
|
196
|
+
const generationConfig = existingConfig.generationConfig && typeof existingConfig.generationConfig === "object"
|
|
197
|
+
? { ...existingConfig.generationConfig }
|
|
198
|
+
: {};
|
|
199
|
+
const existingMax = generationConfig.maxOutputTokens;
|
|
200
|
+
generationConfig.maxOutputTokens = typeof existingMax === "number" ? Math.min(existingMax, recommended) : recommended;
|
|
201
|
+
existingConfig.generationConfig = generationConfig;
|
|
202
|
+
nextArgs[1] = existingConfig;
|
|
203
|
+
return nextArgs;
|
|
204
|
+
}
|
|
205
|
+
function markClampAppliedIfChanged(decision, originalMaxTokens, appliedMaxTokens) {
|
|
206
|
+
if (!decision.clamp || typeof appliedMaxTokens !== "number") {
|
|
207
|
+
return;
|
|
208
|
+
}
|
|
209
|
+
if (typeof originalMaxTokens !== "number" || appliedMaxTokens < originalMaxTokens) {
|
|
210
|
+
decision.clamp.applied = true;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Client } from "../client.js";
|
|
2
|
+
export interface OpenAIInstrumentationOptions {
|
|
3
|
+
client: Client;
|
|
4
|
+
environment?: string;
|
|
5
|
+
endpoint?: string;
|
|
6
|
+
feature?: string;
|
|
7
|
+
}
|
|
8
|
+
export declare function __setInputTokenEstimatorForTests(estimator: ((payload: unknown) => number | null) | null): void;
|
|
9
|
+
export declare function instrumentOpenAI<T extends Record<string, any>>(openaiClient: T, options: OpenAIInstrumentationOptions): T;
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import { buildEvent } from "../eventBuilder.js";
|
|
2
|
+
import { RHEONICBlockedError } from "../protectEngine.js";
|
|
3
|
+
import { validateProviderModel } from "../providerModelValidation.js";
|
|
4
|
+
import { estimateInputTokensFromRequest } from "../tokenEstimator.js";
|
|
5
|
+
let estimatorOverrideForTests = null;
|
|
6
|
+
export function __setInputTokenEstimatorForTests(estimator) {
|
|
7
|
+
estimatorOverrideForTests = estimator;
|
|
8
|
+
}
|
|
9
|
+
export function instrumentOpenAI(openaiClient, options) {
|
|
10
|
+
const targetCreate = openaiClient?.chat?.completions?.create;
|
|
11
|
+
if (typeof targetCreate !== "function") {
|
|
12
|
+
return openaiClient;
|
|
13
|
+
}
|
|
14
|
+
const originalCreate = targetCreate.bind(openaiClient.chat.completions);
|
|
15
|
+
openaiClient.chat.completions.create = async (...args) => {
|
|
16
|
+
const startedAt = Date.now();
|
|
17
|
+
const model = extractRequestedModel(args);
|
|
18
|
+
validateProviderModel("openai", model);
|
|
19
|
+
const requestPayload = extractRequestPayload(args);
|
|
20
|
+
const tokenEstimateStartedAt = Date.now();
|
|
21
|
+
const estimatedInputTokens = requestPayload
|
|
22
|
+
? (estimatorOverrideForTests
|
|
23
|
+
? estimatorOverrideForTests(requestPayload)
|
|
24
|
+
: estimateInputTokensFromRequest(requestPayload))
|
|
25
|
+
: null;
|
|
26
|
+
options.client.debugLog("Protect token estimation completed", {
|
|
27
|
+
provider: "openai",
|
|
28
|
+
model,
|
|
29
|
+
latency_ms: Date.now() - tokenEstimateStartedAt,
|
|
30
|
+
estimated_input_tokens: estimatedInputTokens ?? undefined,
|
|
31
|
+
});
|
|
32
|
+
const protectPayload = {
|
|
33
|
+
provider: "openai",
|
|
34
|
+
model,
|
|
35
|
+
environment: options.environment ?? options.client.environment,
|
|
36
|
+
feature: options.feature,
|
|
37
|
+
max_output_tokens: extractMaxOutputTokens(args),
|
|
38
|
+
};
|
|
39
|
+
if (typeof estimatedInputTokens === "number") {
|
|
40
|
+
protectPayload.input_tokens_estimate = estimatedInputTokens;
|
|
41
|
+
}
|
|
42
|
+
const protectDecision = await options.client.evaluateProtectDecision({
|
|
43
|
+
...protectPayload,
|
|
44
|
+
});
|
|
45
|
+
if (protectDecision.decision === "block") {
|
|
46
|
+
throw new RHEONICBlockedError(protectDecision.reason);
|
|
47
|
+
}
|
|
48
|
+
const callArgs = maybeApplyOpenAIClamp(args, protectDecision);
|
|
49
|
+
markClampAppliedIfChanged(protectDecision, extractMaxOutputTokens(args), extractMaxOutputTokens(callArgs));
|
|
50
|
+
try {
|
|
51
|
+
const response = await originalCreate(...callArgs);
|
|
52
|
+
void options.client.captureEvent(buildEvent({
|
|
53
|
+
provider: "openai",
|
|
54
|
+
model: extractResponseModel(response) ?? model,
|
|
55
|
+
environment: options.environment ?? options.client.environment,
|
|
56
|
+
request: {
|
|
57
|
+
endpoint: options.endpoint,
|
|
58
|
+
feature: options.feature,
|
|
59
|
+
protect_decision: protectDecision.decision === "warn" ? "warn" : undefined,
|
|
60
|
+
protect_reason: protectDecision.decision === "warn" ? protectDecision.reason : undefined,
|
|
61
|
+
},
|
|
62
|
+
response: {
|
|
63
|
+
latency_ms: Date.now() - startedAt,
|
|
64
|
+
total_tokens: extractTotalTokens(response),
|
|
65
|
+
http_status: 200,
|
|
66
|
+
},
|
|
67
|
+
}));
|
|
68
|
+
return response;
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
void options.client.captureEvent(buildEvent({
|
|
72
|
+
provider: "openai",
|
|
73
|
+
model,
|
|
74
|
+
environment: options.environment ?? options.client.environment,
|
|
75
|
+
request: {
|
|
76
|
+
endpoint: options.endpoint,
|
|
77
|
+
feature: options.feature,
|
|
78
|
+
protect_decision: protectDecision.decision === "warn" ? "warn" : undefined,
|
|
79
|
+
protect_reason: protectDecision.decision === "warn" ? protectDecision.reason : undefined,
|
|
80
|
+
},
|
|
81
|
+
response: {
|
|
82
|
+
latency_ms: Date.now() - startedAt,
|
|
83
|
+
error_type: extractErrorType(error),
|
|
84
|
+
http_status: extractHttpStatus(error),
|
|
85
|
+
},
|
|
86
|
+
}));
|
|
87
|
+
throw error;
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
return openaiClient;
|
|
91
|
+
}
|
|
92
|
+
function extractRequestedModel(args) {
|
|
93
|
+
const firstArg = args[0];
|
|
94
|
+
if (firstArg && typeof firstArg === "object" && "model" in firstArg) {
|
|
95
|
+
const maybeModel = firstArg.model;
|
|
96
|
+
return typeof maybeModel === "string" ? maybeModel : null;
|
|
97
|
+
}
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
function extractRequestPayload(args) {
|
|
101
|
+
const firstArg = args[0];
|
|
102
|
+
if (!firstArg || typeof firstArg !== "object") {
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
return firstArg;
|
|
106
|
+
}
|
|
107
|
+
function extractMaxOutputTokens(args) {
|
|
108
|
+
const firstArg = args[0];
|
|
109
|
+
if (!firstArg || typeof firstArg !== "object") {
|
|
110
|
+
return undefined;
|
|
111
|
+
}
|
|
112
|
+
if ("max_tokens" in firstArg && typeof firstArg.max_tokens === "number") {
|
|
113
|
+
return firstArg.max_tokens;
|
|
114
|
+
}
|
|
115
|
+
if ("max_output_tokens" in firstArg && typeof firstArg.max_output_tokens === "number") {
|
|
116
|
+
return firstArg.max_output_tokens;
|
|
117
|
+
}
|
|
118
|
+
return undefined;
|
|
119
|
+
}
|
|
120
|
+
function maybeApplyOpenAIClamp(args, decision) {
|
|
121
|
+
if (decision.decision !== "warn" || decision.reason !== "near_cap") {
|
|
122
|
+
return args;
|
|
123
|
+
}
|
|
124
|
+
if (!decision.applyClampEnabled) {
|
|
125
|
+
return args;
|
|
126
|
+
}
|
|
127
|
+
const recommended = decision.clamp?.recommended_max_output_tokens;
|
|
128
|
+
if (typeof recommended !== "number" || recommended < 1) {
|
|
129
|
+
return args;
|
|
130
|
+
}
|
|
131
|
+
const firstArg = args[0];
|
|
132
|
+
if (!firstArg || typeof firstArg !== "object") {
|
|
133
|
+
return args;
|
|
134
|
+
}
|
|
135
|
+
const payload = { ...firstArg };
|
|
136
|
+
let updated = false;
|
|
137
|
+
const maxTokens = payload.max_tokens;
|
|
138
|
+
if (typeof maxTokens === "number") {
|
|
139
|
+
payload.max_tokens = Math.min(maxTokens, recommended);
|
|
140
|
+
updated = true;
|
|
141
|
+
}
|
|
142
|
+
const maxOutputTokens = payload.max_output_tokens;
|
|
143
|
+
if (typeof maxOutputTokens === "number") {
|
|
144
|
+
payload.max_output_tokens = Math.min(maxOutputTokens, recommended);
|
|
145
|
+
updated = true;
|
|
146
|
+
}
|
|
147
|
+
if (!updated) {
|
|
148
|
+
payload.max_tokens = recommended;
|
|
149
|
+
}
|
|
150
|
+
const nextArgs = [...args];
|
|
151
|
+
nextArgs[0] = payload;
|
|
152
|
+
return nextArgs;
|
|
153
|
+
}
|
|
154
|
+
function markClampAppliedIfChanged(decision, originalMaxTokens, appliedMaxTokens) {
|
|
155
|
+
if (!decision.clamp || typeof appliedMaxTokens !== "number") {
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
if (typeof originalMaxTokens !== "number" || appliedMaxTokens < originalMaxTokens) {
|
|
159
|
+
decision.clamp.applied = true;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
function extractResponseModel(response) {
|
|
163
|
+
if (response && typeof response === "object" && "model" in response) {
|
|
164
|
+
const maybeModel = response.model;
|
|
165
|
+
return typeof maybeModel === "string" ? maybeModel : null;
|
|
166
|
+
}
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
function extractTotalTokens(response) {
|
|
170
|
+
if (response && typeof response === "object" && "usage" in response) {
|
|
171
|
+
const usage = response.usage;
|
|
172
|
+
const totalTokens = usage?.total_tokens;
|
|
173
|
+
if (typeof totalTokens === "number") {
|
|
174
|
+
return totalTokens;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return undefined;
|
|
178
|
+
}
|
|
179
|
+
function extractErrorType(error) {
|
|
180
|
+
if (error && typeof error === "object" && "name" in error) {
|
|
181
|
+
const name = error.name;
|
|
182
|
+
if (typeof name === "string" && name.length > 0) {
|
|
183
|
+
return name;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return "unknown";
|
|
187
|
+
}
|
|
188
|
+
function extractHttpStatus(error) {
|
|
189
|
+
if (!error || typeof error !== "object") {
|
|
190
|
+
return undefined;
|
|
191
|
+
}
|
|
192
|
+
const withStatus = error;
|
|
193
|
+
if (typeof withStatus.status === "number") {
|
|
194
|
+
return withStatus.status;
|
|
195
|
+
}
|
|
196
|
+
if (typeof withStatus.statusCode === "number") {
|
|
197
|
+
return withStatus.statusCode;
|
|
198
|
+
}
|
|
199
|
+
if (typeof withStatus.response?.status === "number") {
|
|
200
|
+
return withStatus.response.status;
|
|
201
|
+
}
|
|
202
|
+
return undefined;
|
|
203
|
+
}
|