chat-nest-server 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -11
- package/dist/index.js +214 -110
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
This package exposes an Express-compatible request handler that:
|
|
6
6
|
- Streams AI responses using Server-Side Events (SSE)
|
|
7
7
|
- Sends real-time tokens via SSE protocol
|
|
8
|
-
- Enforces rate limits and
|
|
8
|
+
- Enforces rate limits and profile-based limits
|
|
9
9
|
- Supports abort propagation
|
|
10
10
|
- Protects against runaway usage
|
|
11
11
|
|
|
@@ -18,7 +18,7 @@ This package exposes an Express-compatible request handler that:
|
|
|
18
18
|
- SSE event types: `start`, `token`, `done`, `error`, `ping`
|
|
19
19
|
- Heartbeat pings to keep connection alive
|
|
20
20
|
- End-to-end cancellation support
|
|
21
|
-
- Daily token
|
|
21
|
+
- Daily token limit enforcement (profile-based)
|
|
22
22
|
- Rate limiting
|
|
23
23
|
- Message trimming
|
|
24
24
|
- Safe retry semantics
|
|
@@ -76,12 +76,12 @@ The handler sends SSE-formatted events:
|
|
|
76
76
|
|
|
77
77
|
## 💰 Cost Controls
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
Profiles (`constrained`, `balanced`, `expanded`) control limits. HARD_CAPS clamp all profiles. Server enforces:
|
|
80
80
|
|
|
81
|
-
- Maximum tokens per request
|
|
82
|
-
- Daily token
|
|
83
|
-
- Request rate limiting
|
|
84
|
-
- Prompt size trimming
|
|
81
|
+
- Maximum tokens per request (profile + HARD_CAPS)
|
|
82
|
+
- Daily token limit (profile)
|
|
83
|
+
- Request rate limiting (profile)
|
|
84
|
+
- Prompt size trimming (profile)
|
|
85
85
|
- Retry classification
|
|
86
86
|
|
|
87
87
|
This prevents accidental overspending and abuse.
|
|
@@ -101,11 +101,9 @@ SSE provides better efficiency and real-time streaming compared to traditional p
|
|
|
101
101
|
---
|
|
102
102
|
|
|
103
103
|
## ⚙ Configuration
|
|
104
|
-
Limits can be customized in:
|
|
105
104
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
budget.ts
|
|
105
|
+
- **`config/profiles.ts`** – AI usage profiles (`constrained`, `balanced`, `expanded`), HARD_CAPS, and `resolveProfile()`. Frontend may send `aiUsageProfile` string; backend resolves safely.
|
|
106
|
+
- **`config/aiLimits.ts`** – `AI_MODEL` only. All limits come from profiles.
|
|
109
107
|
|
|
110
108
|
---
|
|
111
109
|
|
package/dist/index.js
CHANGED
|
@@ -1,22 +1,96 @@
|
|
|
1
1
|
// src/createChatHandler.ts
|
|
2
2
|
import OpenAI from "openai";
|
|
3
3
|
|
|
4
|
-
// src/config/
|
|
5
|
-
var
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
rateLimitWindowMs: 6e4,
|
|
10
|
-
maxRequestsPerWindow: 30
|
|
4
|
+
// src/config/profiles.ts
|
|
5
|
+
var AIUsageProfile = {
|
|
6
|
+
CONSTRAINED: "constrained",
|
|
7
|
+
BALANCED: "balanced",
|
|
8
|
+
EXPANDED: "expanded"
|
|
11
9
|
};
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
maxTokensPerRequest:
|
|
18
|
-
|
|
10
|
+
var HARD_CAPS = {
|
|
11
|
+
maxOutputTokens: 4096,
|
|
12
|
+
maxMessages: 20,
|
|
13
|
+
temperature: 2,
|
|
14
|
+
dailyTokenLimit: 5e5,
|
|
15
|
+
maxTokensPerRequest: 16e3,
|
|
16
|
+
rateLimit: { windowMs: 6e4, maxRequests: 60 }
|
|
17
|
+
};
|
|
18
|
+
function clampToCaps(limits) {
|
|
19
|
+
return {
|
|
20
|
+
maxOutputTokens: Math.min(limits.maxOutputTokens, HARD_CAPS.maxOutputTokens),
|
|
21
|
+
maxMessages: Math.min(limits.maxMessages, HARD_CAPS.maxMessages),
|
|
22
|
+
temperature: Math.min(limits.temperature, HARD_CAPS.temperature),
|
|
23
|
+
dailyTokenLimit: Math.min(limits.dailyTokenLimit, HARD_CAPS.dailyTokenLimit),
|
|
24
|
+
maxTokensPerRequest: Math.min(
|
|
25
|
+
limits.maxTokensPerRequest,
|
|
26
|
+
HARD_CAPS.maxTokensPerRequest
|
|
27
|
+
),
|
|
28
|
+
rateLimit: {
|
|
29
|
+
windowMs: limits.rateLimit.windowMs,
|
|
30
|
+
maxRequests: Math.min(
|
|
31
|
+
limits.rateLimit.maxRequests,
|
|
32
|
+
HARD_CAPS.rateLimit.maxRequests
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
var DEFAULT_MAX_TOKENS_PER_REQUEST = 2048;
|
|
38
|
+
var PROFILES = {
|
|
39
|
+
[AIUsageProfile.CONSTRAINED]: clampToCaps({
|
|
40
|
+
maxOutputTokens: 150,
|
|
41
|
+
maxMessages: 4,
|
|
42
|
+
temperature: 0.5,
|
|
43
|
+
dailyTokenLimit: 3e4,
|
|
44
|
+
maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
|
|
45
|
+
rateLimit: { windowMs: 6e4, maxRequests: 15 }
|
|
46
|
+
}),
|
|
47
|
+
[AIUsageProfile.BALANCED]: clampToCaps({
|
|
48
|
+
maxOutputTokens: 400,
|
|
49
|
+
maxMessages: 6,
|
|
50
|
+
temperature: 0.7,
|
|
51
|
+
dailyTokenLimit: 7e4,
|
|
52
|
+
maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
|
|
53
|
+
rateLimit: { windowMs: 6e4, maxRequests: 30 }
|
|
54
|
+
}),
|
|
55
|
+
[AIUsageProfile.EXPANDED]: clampToCaps({
|
|
56
|
+
maxOutputTokens: 3e3,
|
|
57
|
+
maxMessages: 12,
|
|
58
|
+
temperature: 0.8,
|
|
59
|
+
dailyTokenLimit: 2e5,
|
|
60
|
+
maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
|
|
61
|
+
rateLimit: { windowMs: 6e4, maxRequests: 60 }
|
|
62
|
+
})
|
|
63
|
+
};
|
|
64
|
+
var LEGACY_MAP = {
|
|
65
|
+
budget: AIUsageProfile.CONSTRAINED,
|
|
66
|
+
moderate: AIUsageProfile.BALANCED,
|
|
67
|
+
free: AIUsageProfile.EXPANDED
|
|
19
68
|
};
|
|
69
|
+
var VALID_PROFILE_STRINGS = /* @__PURE__ */ new Set([
|
|
70
|
+
AIUsageProfile.CONSTRAINED,
|
|
71
|
+
AIUsageProfile.BALANCED,
|
|
72
|
+
AIUsageProfile.EXPANDED,
|
|
73
|
+
...Object.keys(LEGACY_MAP)
|
|
74
|
+
]);
|
|
75
|
+
function resolveProfile(input) {
|
|
76
|
+
const s = typeof input === "string" ? input.toLowerCase().trim() : "";
|
|
77
|
+
const profile = LEGACY_MAP[s] ?? (VALID_PROFILE_STRINGS.has(s) ? s : null) ?? AIUsageProfile.BALANCED;
|
|
78
|
+
return { limits: PROFILES[profile], profile };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// src/guards/rateLimit.ts
|
|
82
|
+
var requestCount = 0;
|
|
83
|
+
var windowStart = Date.now();
|
|
84
|
+
function isRateLimited(limits) {
|
|
85
|
+
const { windowMs, maxRequests } = limits.rateLimit;
|
|
86
|
+
const now = Date.now();
|
|
87
|
+
if (now - windowStart > windowMs) {
|
|
88
|
+
windowStart = now;
|
|
89
|
+
requestCount = 0;
|
|
90
|
+
}
|
|
91
|
+
requestCount++;
|
|
92
|
+
return requestCount > maxRequests;
|
|
93
|
+
}
|
|
20
94
|
|
|
21
95
|
// src/utils/tokenEstimator.ts
|
|
22
96
|
function estimateTokens(messages) {
|
|
@@ -39,139 +113,169 @@ function recordTokenUsage(used) {
|
|
|
39
113
|
tokensUsedToday += used;
|
|
40
114
|
}
|
|
41
115
|
|
|
42
|
-
// src/
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
116
|
+
// src/guards/budgetGuards.ts
|
|
117
|
+
function trimMessages(messages, maxMessages) {
|
|
118
|
+
return messages.slice(-maxMessages);
|
|
119
|
+
}
|
|
120
|
+
function checkBudget(messages, limits) {
|
|
121
|
+
const estimatedInput = estimateTokens(messages);
|
|
122
|
+
const estimatedTotal = estimatedInput + limits.maxOutputTokens;
|
|
123
|
+
if (estimatedTotal > limits.maxTokensPerRequest) {
|
|
124
|
+
return { success: false, estimatedTotal, reason: "request_limit" };
|
|
50
125
|
}
|
|
51
|
-
|
|
52
|
-
|
|
126
|
+
if (!canSpendTokens(estimatedTotal, limits.dailyTokenLimit)) {
|
|
127
|
+
return { success: false, estimatedTotal, reason: "daily_limit" };
|
|
128
|
+
}
|
|
129
|
+
return { success: true, estimatedTotal };
|
|
53
130
|
}
|
|
54
|
-
|
|
55
|
-
|
|
131
|
+
|
|
132
|
+
// src/sse/sseWriter.ts
|
|
133
|
+
function createSSEWriter(res, abortSignal) {
|
|
134
|
+
const write = (event, data) => {
|
|
135
|
+
if (abortSignal.aborted) return;
|
|
136
|
+
res.write(`event: ${event}
|
|
137
|
+
data: ${data}
|
|
138
|
+
|
|
139
|
+
`);
|
|
140
|
+
};
|
|
141
|
+
const startHeartbeat = (intervalMs = 15e3) => {
|
|
142
|
+
return setInterval(() => {
|
|
143
|
+
if (!abortSignal.aborted) write("ping", "");
|
|
144
|
+
}, intervalMs);
|
|
145
|
+
};
|
|
146
|
+
const stopHeartbeat = (id) => {
|
|
147
|
+
if (id) clearInterval(id);
|
|
148
|
+
};
|
|
149
|
+
return { write, startHeartbeat, stopHeartbeat };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// src/config/aiLimits.ts
|
|
153
|
+
var AI_MODEL = "gpt-4o-mini";
|
|
154
|
+
|
|
155
|
+
// src/streaming/openaiStream.ts
|
|
156
|
+
async function streamChatCompletion(client, messages, limits, writeSSE, signal) {
|
|
157
|
+
const stream = await client.chat.completions.create(
|
|
158
|
+
{
|
|
159
|
+
model: AI_MODEL,
|
|
160
|
+
stream: true,
|
|
161
|
+
temperature: limits.temperature,
|
|
162
|
+
max_tokens: limits.maxOutputTokens,
|
|
163
|
+
messages: messages.map((m) => ({ role: m.role, content: m.content }))
|
|
164
|
+
},
|
|
165
|
+
{ signal }
|
|
166
|
+
);
|
|
167
|
+
for await (const chunk of stream) {
|
|
168
|
+
if (signal.aborted) break;
|
|
169
|
+
const token = chunk.choices[0]?.delta?.content;
|
|
170
|
+
if (token) {
|
|
171
|
+
writeSSE("token", JSON.stringify(token));
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
writeSSE("done", "");
|
|
56
175
|
}
|
|
176
|
+
|
|
177
|
+
// src/createChatHandler.ts
|
|
57
178
|
function createChatHandler(config) {
|
|
58
179
|
if (!config.apiKey) {
|
|
59
180
|
throw new Error("OPENAI_API_KEY is missing");
|
|
60
181
|
}
|
|
61
|
-
const client = new OpenAI({
|
|
62
|
-
apiKey: config.apiKey
|
|
63
|
-
});
|
|
182
|
+
const client = new OpenAI({ apiKey: config.apiKey });
|
|
64
183
|
return async function handler(req, res) {
|
|
65
184
|
const abortController = new AbortController();
|
|
66
185
|
let streamStarted = false;
|
|
67
|
-
let
|
|
68
|
-
const
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
186
|
+
let heartbeatId = null;
|
|
187
|
+
const body = req.body;
|
|
188
|
+
const { limits: profileLimits } = resolveProfile(
|
|
189
|
+
body.profile ?? body.aiUsageProfile
|
|
190
|
+
);
|
|
191
|
+
let effectiveLimits = { ...profileLimits };
|
|
192
|
+
if (body.dailyTokenLimit != null) {
|
|
193
|
+
effectiveLimits = {
|
|
194
|
+
...effectiveLimits,
|
|
195
|
+
dailyTokenLimit: Math.min(
|
|
196
|
+
profileLimits.dailyTokenLimit,
|
|
197
|
+
body.dailyTokenLimit
|
|
198
|
+
)
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
if (body.maxTokensPerRequest != null) {
|
|
202
|
+
effectiveLimits = {
|
|
203
|
+
...effectiveLimits,
|
|
204
|
+
maxTokensPerRequest: Math.min(
|
|
205
|
+
profileLimits.maxTokensPerRequest,
|
|
206
|
+
body.maxTokensPerRequest
|
|
207
|
+
)
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
if (!Array.isArray(body.messages)) {
|
|
211
|
+
res.status(400).json({ error: "Invalid messages payload" });
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
if (isRateLimited(effectiveLimits)) {
|
|
215
|
+
res.status(429).json({
|
|
216
|
+
error: "Rate limit exceeded. Please slow down."
|
|
217
|
+
});
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
const trimmedMessages = trimMessages(
|
|
221
|
+
body.messages,
|
|
222
|
+
effectiveLimits.maxMessages
|
|
223
|
+
);
|
|
224
|
+
const budgetResult = checkBudget(trimmedMessages, effectiveLimits);
|
|
225
|
+
if (!budgetResult.success) {
|
|
226
|
+
const errorMessage = budgetResult.reason === "request_limit" ? "Request too large. Please shorten your message." : "Daily AI budget exceeded. Try again tomorrow.";
|
|
227
|
+
res.status(429).json({ error: errorMessage });
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
const { write, startHeartbeat, stopHeartbeat } = createSSEWriter(
|
|
231
|
+
res,
|
|
232
|
+
abortController.signal
|
|
233
|
+
);
|
|
75
234
|
res.on("close", () => {
|
|
76
|
-
|
|
77
|
-
clearInterval(heartbeatInterval);
|
|
78
|
-
heartbeatInterval = null;
|
|
79
|
-
}
|
|
235
|
+
stopHeartbeat(heartbeatId);
|
|
80
236
|
if (streamStarted && !abortController.signal.aborted) {
|
|
81
237
|
abortController.abort();
|
|
82
238
|
}
|
|
83
239
|
});
|
|
84
240
|
try {
|
|
85
|
-
const body = req.body;
|
|
86
|
-
if (!Array.isArray(body.messages)) {
|
|
87
|
-
res.status(400).json({ error: "Invalid messages payload" });
|
|
88
|
-
return;
|
|
89
|
-
}
|
|
90
|
-
if (isRateLimited()) {
|
|
91
|
-
res.status(429).json({
|
|
92
|
-
error: "Rate limit exceeded. Please slow down."
|
|
93
|
-
});
|
|
94
|
-
return;
|
|
95
|
-
}
|
|
96
|
-
const trimmedMessages = trimMessages(body.messages);
|
|
97
|
-
const estimatedInputTokens = estimateTokens(trimmedMessages);
|
|
98
|
-
const estimatedTotalTokens = estimatedInputTokens + AI_LIMITS.maxOutputTokens;
|
|
99
|
-
if (estimatedTotalTokens > BUDGET.maxTokensPerRequest || !canSpendTokens(estimatedTotalTokens, BUDGET.dailyTokenLimit)) {
|
|
100
|
-
res.status(429).json({
|
|
101
|
-
error: "Daily AI budget exceeded. Try again tomorrow."
|
|
102
|
-
});
|
|
103
|
-
return;
|
|
104
|
-
}
|
|
105
241
|
res.setHeader("Content-Type", "text/event-stream");
|
|
106
242
|
res.setHeader("Cache-Control", "no-cache");
|
|
107
243
|
res.setHeader("Connection", "keep-alive");
|
|
108
244
|
res.flushHeaders();
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
if (!abortController.signal.aborted) {
|
|
112
|
-
writeSSE("ping", "");
|
|
113
|
-
}
|
|
114
|
-
}, 15e3);
|
|
115
|
-
const stream = await client.chat.completions.create(
|
|
116
|
-
{
|
|
117
|
-
model: AI_MODEL,
|
|
118
|
-
stream: true,
|
|
119
|
-
temperature: 0.7,
|
|
120
|
-
max_tokens: AI_LIMITS.maxOutputTokens,
|
|
121
|
-
messages: trimmedMessages.map((m) => ({
|
|
122
|
-
role: m.role,
|
|
123
|
-
content: m.content
|
|
124
|
-
}))
|
|
125
|
-
},
|
|
126
|
-
{
|
|
127
|
-
signal: abortController.signal
|
|
128
|
-
}
|
|
129
|
-
);
|
|
245
|
+
write("start", "");
|
|
246
|
+
heartbeatId = startHeartbeat(15e3);
|
|
130
247
|
streamStarted = true;
|
|
131
248
|
try {
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
}
|
|
141
|
-
writeSSE("done", "");
|
|
142
|
-
} catch (error) {
|
|
249
|
+
await streamChatCompletion(
|
|
250
|
+
client,
|
|
251
|
+
trimmedMessages,
|
|
252
|
+
effectiveLimits,
|
|
253
|
+
write,
|
|
254
|
+
abortController.signal
|
|
255
|
+
);
|
|
256
|
+
} catch (streamError) {
|
|
143
257
|
if (abortController.signal.aborted) {
|
|
144
258
|
console.log("Stream aborted by client");
|
|
145
259
|
} else {
|
|
146
|
-
const
|
|
147
|
-
|
|
148
|
-
});
|
|
149
|
-
writeSSE("error", errorData);
|
|
260
|
+
const msg = streamError instanceof Error ? streamError.message : "Unknown error";
|
|
261
|
+
write("error", JSON.stringify({ message: msg }));
|
|
150
262
|
}
|
|
151
263
|
} finally {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
}
|
|
156
|
-
recordTokenUsage(estimatedTotalTokens);
|
|
264
|
+
stopHeartbeat(heartbeatId);
|
|
265
|
+
heartbeatId = null;
|
|
266
|
+
recordTokenUsage(budgetResult.estimatedTotal);
|
|
157
267
|
res.end();
|
|
158
268
|
}
|
|
159
269
|
} catch (error) {
|
|
160
270
|
if (error?.name === "AbortError") {
|
|
161
|
-
|
|
162
|
-
clearInterval(heartbeatInterval);
|
|
163
|
-
heartbeatInterval = null;
|
|
164
|
-
}
|
|
271
|
+
stopHeartbeat(heartbeatId);
|
|
165
272
|
return;
|
|
166
273
|
}
|
|
167
274
|
console.error("AI error:", error);
|
|
168
275
|
if (!res.headersSent) {
|
|
169
276
|
res.status(500).json({ error: "AI request failed" });
|
|
170
277
|
} else {
|
|
171
|
-
|
|
172
|
-
message: "AI request failed"
|
|
173
|
-
});
|
|
174
|
-
writeSSE("error", errorData);
|
|
278
|
+
write("error", JSON.stringify({ message: "AI request failed" }));
|
|
175
279
|
res.end();
|
|
176
280
|
}
|
|
177
281
|
}
|
package/package.json
CHANGED