chat-nest-server 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -11
- package/dist/index.js +215 -110
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
This package exposes an Express-compatible request handler that:
|
|
6
6
|
- Streams AI responses using Server-Side Events (SSE)
|
|
7
7
|
- Sends real-time tokens via SSE protocol
|
|
8
|
-
- Enforces rate limits and
|
|
8
|
+
- Enforces rate limits and profile-based limits
|
|
9
9
|
- Supports abort propagation
|
|
10
10
|
- Protects against runaway usage
|
|
11
11
|
|
|
@@ -18,7 +18,7 @@ This package exposes an Express-compatible request handler that:
|
|
|
18
18
|
- SSE event types: `start`, `token`, `done`, `error`, `ping`
|
|
19
19
|
- Heartbeat pings to keep connection alive
|
|
20
20
|
- End-to-end cancellation support
|
|
21
|
-
- Daily token
|
|
21
|
+
- Daily token limit enforcement (profile-based)
|
|
22
22
|
- Rate limiting
|
|
23
23
|
- Message trimming
|
|
24
24
|
- Safe retry semantics
|
|
@@ -76,14 +76,23 @@ The handler sends SSE-formatted events:
|
|
|
76
76
|
|
|
77
77
|
## 💰 Cost Controls
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
Profiles (`constrained`, `balanced`, `expanded`) control limits. HARD_CAPS clamp all profiles. Server enforces:
|
|
80
80
|
|
|
81
|
-
- Maximum tokens per request
|
|
82
|
-
- Daily token
|
|
83
|
-
- Request rate limiting
|
|
84
|
-
- Prompt size trimming
|
|
81
|
+
- **Maximum tokens per request** – Profile-based; client can override via `maxTokensPerRequest` (server applies `min(profile limit, client value)`)
|
|
82
|
+
- **Daily token limit** – Profile-based; client can override via `dailyTokenLimit` (same `min` rule)
|
|
83
|
+
- Request rate limiting (profile)
|
|
84
|
+
- Prompt size trimming (profile)
|
|
85
85
|
- Retry classification
|
|
86
86
|
|
|
87
|
+
### Request body options
|
|
88
|
+
|
|
89
|
+
| Field | Type | Description |
|
|
90
|
+
| ------------------- | ------ | --------------------------------------------------------------------------- |
|
|
91
|
+
| `messages` | array | Chat messages (required) |
|
|
92
|
+
| `profile` / `aiUsageProfile` | string | Profile: `constrained`, `balanced`, `expanded` (legacy: `budget`, `moderate`, `free`) |
|
|
93
|
+
| `dailyTokenLimit` | number | Optional. Cap daily tokens; server uses `min(profile limit, this)` |
|
|
94
|
+
| `maxTokensPerRequest` | number | Optional. Cap tokens per request; server uses `min(profile limit, this)` |
|
|
95
|
+
|
|
87
96
|
This prevents accidental overspending and abuse.
|
|
88
97
|
|
|
89
98
|
## 🔄 Server-Side Events (SSE)
|
|
@@ -101,11 +110,9 @@ SSE provides better efficiency and real-time streaming compared to traditional p
|
|
|
101
110
|
---
|
|
102
111
|
|
|
103
112
|
## ⚙ Configuration
|
|
104
|
-
Limits can be customized in:
|
|
105
113
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
budget.ts
|
|
114
|
+
- **`config/profiles.ts`** – AI usage profiles (`constrained`, `balanced`, `expanded`), HARD_CAPS, and `resolveProfile()`. Frontend may send `aiUsageProfile` string; backend resolves safely.
|
|
115
|
+
- **`config/aiLimits.ts`** – `AI_MODEL` only. All limits come from profiles.
|
|
109
116
|
|
|
110
117
|
---
|
|
111
118
|
|
package/dist/index.js
CHANGED
|
@@ -1,22 +1,97 @@
|
|
|
1
1
|
// src/createChatHandler.ts
|
|
2
2
|
import OpenAI from "openai";
|
|
3
3
|
|
|
4
|
-
// src/config/
|
|
5
|
-
var
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
rateLimitWindowMs: 6e4,
|
|
10
|
-
maxRequestsPerWindow: 30
|
|
4
|
+
// src/config/profiles.ts
|
|
5
|
+
var AIUsageProfile = {
|
|
6
|
+
CONSTRAINED: "constrained",
|
|
7
|
+
BALANCED: "balanced",
|
|
8
|
+
EXPANDED: "expanded"
|
|
11
9
|
};
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
maxTokensPerRequest:
|
|
18
|
-
|
|
10
|
+
var HARD_CAPS = {
|
|
11
|
+
maxOutputTokens: 4096,
|
|
12
|
+
maxMessages: 20,
|
|
13
|
+
temperature: 2,
|
|
14
|
+
dailyTokenLimit: 5e5,
|
|
15
|
+
maxTokensPerRequest: 16e3,
|
|
16
|
+
rateLimit: { windowMs: 6e4, maxRequests: 60 }
|
|
17
|
+
};
|
|
18
|
+
function clampToCaps(limits) {
|
|
19
|
+
return {
|
|
20
|
+
maxOutputTokens: Math.min(limits.maxOutputTokens, HARD_CAPS.maxOutputTokens),
|
|
21
|
+
maxMessages: Math.min(limits.maxMessages, HARD_CAPS.maxMessages),
|
|
22
|
+
temperature: Math.min(limits.temperature, HARD_CAPS.temperature),
|
|
23
|
+
dailyTokenLimit: Math.min(limits.dailyTokenLimit, HARD_CAPS.dailyTokenLimit),
|
|
24
|
+
maxTokensPerRequest: Math.min(
|
|
25
|
+
limits.maxTokensPerRequest,
|
|
26
|
+
HARD_CAPS.maxTokensPerRequest
|
|
27
|
+
),
|
|
28
|
+
rateLimit: {
|
|
29
|
+
windowMs: limits.rateLimit.windowMs,
|
|
30
|
+
maxRequests: Math.min(
|
|
31
|
+
limits.rateLimit.maxRequests,
|
|
32
|
+
HARD_CAPS.rateLimit.maxRequests
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
var DEFAULT_MAX_TOKENS_PER_REQUEST = 2048;
|
|
38
|
+
var PROFILES = {
|
|
39
|
+
[AIUsageProfile.CONSTRAINED]: clampToCaps({
|
|
40
|
+
maxOutputTokens: 150,
|
|
41
|
+
maxMessages: 4,
|
|
42
|
+
temperature: 0.5,
|
|
43
|
+
dailyTokenLimit: 3e4,
|
|
44
|
+
maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
|
|
45
|
+
rateLimit: { windowMs: 6e4, maxRequests: 15 }
|
|
46
|
+
}),
|
|
47
|
+
[AIUsageProfile.BALANCED]: clampToCaps({
|
|
48
|
+
maxOutputTokens: 400,
|
|
49
|
+
maxMessages: 6,
|
|
50
|
+
temperature: 0.7,
|
|
51
|
+
dailyTokenLimit: 7e4,
|
|
52
|
+
maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
|
|
53
|
+
rateLimit: { windowMs: 6e4, maxRequests: 30 }
|
|
54
|
+
}),
|
|
55
|
+
[AIUsageProfile.EXPANDED]: clampToCaps({
|
|
56
|
+
maxOutputTokens: 3e3,
|
|
57
|
+
maxMessages: 12,
|
|
58
|
+
temperature: 0.8,
|
|
59
|
+
dailyTokenLimit: 2e5,
|
|
60
|
+
maxTokensPerRequest: 12e3,
|
|
61
|
+
// input (~9k) + output (3k) for 12 msgs
|
|
62
|
+
rateLimit: { windowMs: 6e4, maxRequests: 60 }
|
|
63
|
+
})
|
|
64
|
+
};
|
|
65
|
+
var LEGACY_MAP = {
|
|
66
|
+
budget: AIUsageProfile.CONSTRAINED,
|
|
67
|
+
moderate: AIUsageProfile.BALANCED,
|
|
68
|
+
free: AIUsageProfile.EXPANDED
|
|
19
69
|
};
|
|
70
|
+
var VALID_PROFILE_STRINGS = /* @__PURE__ */ new Set([
|
|
71
|
+
AIUsageProfile.CONSTRAINED,
|
|
72
|
+
AIUsageProfile.BALANCED,
|
|
73
|
+
AIUsageProfile.EXPANDED,
|
|
74
|
+
...Object.keys(LEGACY_MAP)
|
|
75
|
+
]);
|
|
76
|
+
function resolveProfile(input) {
|
|
77
|
+
const s = typeof input === "string" ? input.toLowerCase().trim() : "";
|
|
78
|
+
const profile = LEGACY_MAP[s] ?? (VALID_PROFILE_STRINGS.has(s) ? s : null) ?? AIUsageProfile.BALANCED;
|
|
79
|
+
return { limits: PROFILES[profile], profile };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// src/guards/rateLimit.ts
|
|
83
|
+
var requestCount = 0;
|
|
84
|
+
var windowStart = Date.now();
|
|
85
|
+
function isRateLimited(limits) {
|
|
86
|
+
const { windowMs, maxRequests } = limits.rateLimit;
|
|
87
|
+
const now = Date.now();
|
|
88
|
+
if (now - windowStart > windowMs) {
|
|
89
|
+
windowStart = now;
|
|
90
|
+
requestCount = 0;
|
|
91
|
+
}
|
|
92
|
+
requestCount++;
|
|
93
|
+
return requestCount > maxRequests;
|
|
94
|
+
}
|
|
20
95
|
|
|
21
96
|
// src/utils/tokenEstimator.ts
|
|
22
97
|
function estimateTokens(messages) {
|
|
@@ -39,139 +114,169 @@ function recordTokenUsage(used) {
|
|
|
39
114
|
tokensUsedToday += used;
|
|
40
115
|
}
|
|
41
116
|
|
|
42
|
-
// src/
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
117
|
+
// src/guards/budgetGuards.ts
|
|
118
|
+
function trimMessages(messages, maxMessages) {
|
|
119
|
+
return messages.slice(-maxMessages);
|
|
120
|
+
}
|
|
121
|
+
function checkBudget(messages, limits) {
|
|
122
|
+
const estimatedInput = estimateTokens(messages);
|
|
123
|
+
const estimatedTotal = estimatedInput + limits.maxOutputTokens;
|
|
124
|
+
if (estimatedTotal > limits.maxTokensPerRequest) {
|
|
125
|
+
return { success: false, estimatedTotal, reason: "request_limit" };
|
|
50
126
|
}
|
|
51
|
-
|
|
52
|
-
|
|
127
|
+
if (!canSpendTokens(estimatedTotal, limits.dailyTokenLimit)) {
|
|
128
|
+
return { success: false, estimatedTotal, reason: "daily_limit" };
|
|
129
|
+
}
|
|
130
|
+
return { success: true, estimatedTotal };
|
|
53
131
|
}
|
|
54
|
-
|
|
55
|
-
|
|
132
|
+
|
|
133
|
+
// src/sse/sseWriter.ts
|
|
134
|
+
function createSSEWriter(res, abortSignal) {
|
|
135
|
+
const write = (event, data) => {
|
|
136
|
+
if (abortSignal.aborted) return;
|
|
137
|
+
res.write(`event: ${event}
|
|
138
|
+
data: ${data}
|
|
139
|
+
|
|
140
|
+
`);
|
|
141
|
+
};
|
|
142
|
+
const startHeartbeat = (intervalMs = 15e3) => {
|
|
143
|
+
return setInterval(() => {
|
|
144
|
+
if (!abortSignal.aborted) write("ping", "");
|
|
145
|
+
}, intervalMs);
|
|
146
|
+
};
|
|
147
|
+
const stopHeartbeat = (id) => {
|
|
148
|
+
if (id) clearInterval(id);
|
|
149
|
+
};
|
|
150
|
+
return { write, startHeartbeat, stopHeartbeat };
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// src/config/aiLimits.ts
|
|
154
|
+
var AI_MODEL = "gpt-4o-mini";
|
|
155
|
+
|
|
156
|
+
// src/streaming/openaiStream.ts
|
|
157
|
+
async function streamChatCompletion(client, messages, limits, writeSSE, signal) {
|
|
158
|
+
const stream = await client.chat.completions.create(
|
|
159
|
+
{
|
|
160
|
+
model: AI_MODEL,
|
|
161
|
+
stream: true,
|
|
162
|
+
temperature: limits.temperature,
|
|
163
|
+
max_tokens: limits.maxOutputTokens,
|
|
164
|
+
messages: messages.map((m) => ({ role: m.role, content: m.content }))
|
|
165
|
+
},
|
|
166
|
+
{ signal }
|
|
167
|
+
);
|
|
168
|
+
for await (const chunk of stream) {
|
|
169
|
+
if (signal.aborted) break;
|
|
170
|
+
const token = chunk.choices[0]?.delta?.content;
|
|
171
|
+
if (token) {
|
|
172
|
+
writeSSE("token", JSON.stringify(token));
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
writeSSE("done", "");
|
|
56
176
|
}
|
|
177
|
+
|
|
178
|
+
// src/createChatHandler.ts
|
|
57
179
|
function createChatHandler(config) {
|
|
58
180
|
if (!config.apiKey) {
|
|
59
181
|
throw new Error("OPENAI_API_KEY is missing");
|
|
60
182
|
}
|
|
61
|
-
const client = new OpenAI({
|
|
62
|
-
apiKey: config.apiKey
|
|
63
|
-
});
|
|
183
|
+
const client = new OpenAI({ apiKey: config.apiKey });
|
|
64
184
|
return async function handler(req, res) {
|
|
65
185
|
const abortController = new AbortController();
|
|
66
186
|
let streamStarted = false;
|
|
67
|
-
let
|
|
68
|
-
const
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
187
|
+
let heartbeatId = null;
|
|
188
|
+
const body = req.body;
|
|
189
|
+
const { limits: profileLimits } = resolveProfile(
|
|
190
|
+
body.profile ?? body.aiUsageProfile
|
|
191
|
+
);
|
|
192
|
+
let effectiveLimits = { ...profileLimits };
|
|
193
|
+
if (body.dailyTokenLimit != null) {
|
|
194
|
+
effectiveLimits = {
|
|
195
|
+
...effectiveLimits,
|
|
196
|
+
dailyTokenLimit: Math.min(
|
|
197
|
+
profileLimits.dailyTokenLimit,
|
|
198
|
+
body.dailyTokenLimit
|
|
199
|
+
)
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
if (body.maxTokensPerRequest != null) {
|
|
203
|
+
effectiveLimits = {
|
|
204
|
+
...effectiveLimits,
|
|
205
|
+
maxTokensPerRequest: Math.min(
|
|
206
|
+
profileLimits.maxTokensPerRequest,
|
|
207
|
+
body.maxTokensPerRequest
|
|
208
|
+
)
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
if (!Array.isArray(body.messages)) {
|
|
212
|
+
res.status(400).json({ error: "Invalid messages payload" });
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
if (isRateLimited(effectiveLimits)) {
|
|
216
|
+
res.status(429).json({
|
|
217
|
+
error: "Rate limit exceeded. Please slow down."
|
|
218
|
+
});
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
221
|
+
const trimmedMessages = trimMessages(
|
|
222
|
+
body.messages,
|
|
223
|
+
effectiveLimits.maxMessages
|
|
224
|
+
);
|
|
225
|
+
const budgetResult = checkBudget(trimmedMessages, effectiveLimits);
|
|
226
|
+
if (!budgetResult.success) {
|
|
227
|
+
const errorMessage = budgetResult.reason === "request_limit" ? "Request too large. Please shorten your message." : "Daily AI budget exceeded. Try again tomorrow.";
|
|
228
|
+
res.status(429).json({ error: errorMessage });
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
const { write, startHeartbeat, stopHeartbeat } = createSSEWriter(
|
|
232
|
+
res,
|
|
233
|
+
abortController.signal
|
|
234
|
+
);
|
|
75
235
|
res.on("close", () => {
|
|
76
|
-
|
|
77
|
-
clearInterval(heartbeatInterval);
|
|
78
|
-
heartbeatInterval = null;
|
|
79
|
-
}
|
|
236
|
+
stopHeartbeat(heartbeatId);
|
|
80
237
|
if (streamStarted && !abortController.signal.aborted) {
|
|
81
238
|
abortController.abort();
|
|
82
239
|
}
|
|
83
240
|
});
|
|
84
241
|
try {
|
|
85
|
-
const body = req.body;
|
|
86
|
-
if (!Array.isArray(body.messages)) {
|
|
87
|
-
res.status(400).json({ error: "Invalid messages payload" });
|
|
88
|
-
return;
|
|
89
|
-
}
|
|
90
|
-
if (isRateLimited()) {
|
|
91
|
-
res.status(429).json({
|
|
92
|
-
error: "Rate limit exceeded. Please slow down."
|
|
93
|
-
});
|
|
94
|
-
return;
|
|
95
|
-
}
|
|
96
|
-
const trimmedMessages = trimMessages(body.messages);
|
|
97
|
-
const estimatedInputTokens = estimateTokens(trimmedMessages);
|
|
98
|
-
const estimatedTotalTokens = estimatedInputTokens + AI_LIMITS.maxOutputTokens;
|
|
99
|
-
if (estimatedTotalTokens > BUDGET.maxTokensPerRequest || !canSpendTokens(estimatedTotalTokens, BUDGET.dailyTokenLimit)) {
|
|
100
|
-
res.status(429).json({
|
|
101
|
-
error: "Daily AI budget exceeded. Try again tomorrow."
|
|
102
|
-
});
|
|
103
|
-
return;
|
|
104
|
-
}
|
|
105
242
|
res.setHeader("Content-Type", "text/event-stream");
|
|
106
243
|
res.setHeader("Cache-Control", "no-cache");
|
|
107
244
|
res.setHeader("Connection", "keep-alive");
|
|
108
245
|
res.flushHeaders();
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
if (!abortController.signal.aborted) {
|
|
112
|
-
writeSSE("ping", "");
|
|
113
|
-
}
|
|
114
|
-
}, 15e3);
|
|
115
|
-
const stream = await client.chat.completions.create(
|
|
116
|
-
{
|
|
117
|
-
model: AI_MODEL,
|
|
118
|
-
stream: true,
|
|
119
|
-
temperature: 0.7,
|
|
120
|
-
max_tokens: AI_LIMITS.maxOutputTokens,
|
|
121
|
-
messages: trimmedMessages.map((m) => ({
|
|
122
|
-
role: m.role,
|
|
123
|
-
content: m.content
|
|
124
|
-
}))
|
|
125
|
-
},
|
|
126
|
-
{
|
|
127
|
-
signal: abortController.signal
|
|
128
|
-
}
|
|
129
|
-
);
|
|
246
|
+
write("start", "");
|
|
247
|
+
heartbeatId = startHeartbeat(15e3);
|
|
130
248
|
streamStarted = true;
|
|
131
249
|
try {
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
}
|
|
141
|
-
writeSSE("done", "");
|
|
142
|
-
} catch (error) {
|
|
250
|
+
await streamChatCompletion(
|
|
251
|
+
client,
|
|
252
|
+
trimmedMessages,
|
|
253
|
+
effectiveLimits,
|
|
254
|
+
write,
|
|
255
|
+
abortController.signal
|
|
256
|
+
);
|
|
257
|
+
} catch (streamError) {
|
|
143
258
|
if (abortController.signal.aborted) {
|
|
144
259
|
console.log("Stream aborted by client");
|
|
145
260
|
} else {
|
|
146
|
-
const
|
|
147
|
-
|
|
148
|
-
});
|
|
149
|
-
writeSSE("error", errorData);
|
|
261
|
+
const msg = streamError instanceof Error ? streamError.message : "Unknown error";
|
|
262
|
+
write("error", JSON.stringify({ message: msg }));
|
|
150
263
|
}
|
|
151
264
|
} finally {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
}
|
|
156
|
-
recordTokenUsage(estimatedTotalTokens);
|
|
265
|
+
stopHeartbeat(heartbeatId);
|
|
266
|
+
heartbeatId = null;
|
|
267
|
+
recordTokenUsage(budgetResult.estimatedTotal);
|
|
157
268
|
res.end();
|
|
158
269
|
}
|
|
159
270
|
} catch (error) {
|
|
160
271
|
if (error?.name === "AbortError") {
|
|
161
|
-
|
|
162
|
-
clearInterval(heartbeatInterval);
|
|
163
|
-
heartbeatInterval = null;
|
|
164
|
-
}
|
|
272
|
+
stopHeartbeat(heartbeatId);
|
|
165
273
|
return;
|
|
166
274
|
}
|
|
167
275
|
console.error("AI error:", error);
|
|
168
276
|
if (!res.headersSent) {
|
|
169
277
|
res.status(500).json({ error: "AI request failed" });
|
|
170
278
|
} else {
|
|
171
|
-
|
|
172
|
-
message: "AI request failed"
|
|
173
|
-
});
|
|
174
|
-
writeSSE("error", errorData);
|
|
279
|
+
write("error", JSON.stringify({ message: "AI request failed" }));
|
|
175
280
|
res.end();
|
|
176
281
|
}
|
|
177
282
|
}
|
package/package.json
CHANGED