chat-nest-server 1.0.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -18
- package/dist/index.js +223 -79
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# chat-nest-server
|
|
2
2
|
|
|
3
|
-
> Streaming AI backend server for Chat Nest with built-in cost protection and cancellation propagation.
|
|
3
|
+
> Streaming AI backend server for Chat Nest with built-in cost protection and cancellation propagation using Server-Side Events (SSE).
|
|
4
4
|
|
|
5
5
|
This package exposes an Express-compatible request handler that:
|
|
6
|
-
- Streams AI responses
|
|
7
|
-
-
|
|
6
|
+
- Streams AI responses using Server-Side Events (SSE)
|
|
7
|
+
- Sends real-time tokens via SSE protocol
|
|
8
|
+
- Enforces rate limits and profile-based limits
|
|
8
9
|
- Supports abort propagation
|
|
9
10
|
- Protects against runaway usage
|
|
10
11
|
|
|
@@ -12,9 +13,12 @@ This package exposes an Express-compatible request handler that:
|
|
|
12
13
|
|
|
13
14
|
## ✨ Features
|
|
14
15
|
|
|
15
|
-
-
|
|
16
|
+
- Server-Side Events (SSE) streaming over HTTP
|
|
17
|
+
- Real-time token streaming via SSE protocol
|
|
18
|
+
- SSE event types: `start`, `token`, `done`, `error`, `ping`
|
|
19
|
+
- Heartbeat pings to keep connection alive
|
|
16
20
|
- End-to-end cancellation support
|
|
17
|
-
- Daily token
|
|
21
|
+
- Daily token limit enforcement (profile-based)
|
|
18
22
|
- Rate limiting
|
|
19
23
|
- Message trimming
|
|
20
24
|
- Safe retry semantics
|
|
@@ -31,6 +35,8 @@ npm install chat-nest-server
|
|
|
31
35
|
## 🚀 Usage
|
|
32
36
|
Express Integration
|
|
33
37
|
|
|
38
|
+
The handler automatically uses Server-Side Events (SSE) for streaming responses:
|
|
39
|
+
|
|
34
40
|
```
|
|
35
41
|
import express from "express";
|
|
36
42
|
import cors from "cors";
|
|
@@ -53,6 +59,13 @@ app.listen(3001, () => {
|
|
|
53
59
|
});
|
|
54
60
|
```
|
|
55
61
|
|
|
62
|
+
The handler sends SSE-formatted events:
|
|
63
|
+
- `event: start\ndata: \n\n` - Stream started
|
|
64
|
+
- `event: token\ndata: <token>\n\n` - Each token chunk
|
|
65
|
+
- `event: done\ndata: \n\n` - Stream completed
|
|
66
|
+
- `event: error\ndata: <error_json>\n\n` - Error occurred
|
|
67
|
+
- `event: ping\ndata: \n\n` - Heartbeat (every 15s)
|
|
68
|
+
|
|
56
69
|
---
|
|
57
70
|
|
|
58
71
|
## 🔐 Environment Variables
|
|
@@ -63,30 +76,34 @@ app.listen(3001, () => {
|
|
|
63
76
|
|
|
64
77
|
## 💰 Cost Controls
|
|
65
78
|
|
|
66
|
-
|
|
67
|
-
The server enforces:
|
|
79
|
+
Profiles (`constrained`, `balanced`, `expanded`) control limits. HARD_CAPS clamp all profiles. Server enforces:
|
|
68
80
|
|
|
69
|
-
Maximum tokens per request
|
|
81
|
+
- Maximum tokens per request (profile + HARD_CAPS)
|
|
82
|
+
- Daily token limit (profile)
|
|
83
|
+
- Request rate limiting (profile)
|
|
84
|
+
- Prompt size trimming (profile)
|
|
85
|
+
- Retry classification
|
|
70
86
|
|
|
71
|
-
|
|
87
|
+
This prevents accidental overspending and abuse.
|
|
72
88
|
|
|
73
|
-
|
|
89
|
+
## 🔄 Server-Side Events (SSE)
|
|
74
90
|
|
|
75
|
-
|
|
91
|
+
This package uses SSE protocol for efficient streaming:
|
|
76
92
|
|
|
77
|
-
|
|
93
|
+
- **Content-Type**: `text/event-stream`
|
|
94
|
+
- **Connection**: `keep-alive`
|
|
95
|
+
- **Cache-Control**: `no-cache`
|
|
96
|
+
- **Heartbeat**: Ping every 15 seconds to keep connection alive
|
|
97
|
+
- **Event Format**: `event: <type>\ndata: <data>\n\n`
|
|
78
98
|
|
|
79
|
-
|
|
80
|
-
```
|
|
99
|
+
SSE provides better efficiency and real-time streaming compared to traditional polling or chunked responses.
|
|
81
100
|
|
|
82
101
|
---
|
|
83
102
|
|
|
84
103
|
## ⚙ Configuration
|
|
85
|
-
Limits can be customized in:
|
|
86
104
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
budget.ts
|
|
105
|
+
- **`config/profiles.ts`** – AI usage profiles (`constrained`, `balanced`, `expanded`), HARD_CAPS, and `resolveProfile()`. Frontend may send `aiUsageProfile` string; backend resolves safely.
|
|
106
|
+
- **`config/aiLimits.ts`** – `AI_MODEL` only. All limits come from profiles.
|
|
90
107
|
|
|
91
108
|
---
|
|
92
109
|
|
package/dist/index.js
CHANGED
|
@@ -1,22 +1,96 @@
|
|
|
1
1
|
// src/createChatHandler.ts
|
|
2
2
|
import OpenAI from "openai";
|
|
3
3
|
|
|
4
|
-
// src/config/
|
|
5
|
-
var
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
rateLimitWindowMs: 6e4,
|
|
10
|
-
maxRequestsPerWindow: 30
|
|
4
|
+
// src/config/profiles.ts
|
|
5
|
+
var AIUsageProfile = {
|
|
6
|
+
CONSTRAINED: "constrained",
|
|
7
|
+
BALANCED: "balanced",
|
|
8
|
+
EXPANDED: "expanded"
|
|
11
9
|
};
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
maxTokensPerRequest:
|
|
18
|
-
|
|
10
|
+
var HARD_CAPS = {
|
|
11
|
+
maxOutputTokens: 4096,
|
|
12
|
+
maxMessages: 20,
|
|
13
|
+
temperature: 2,
|
|
14
|
+
dailyTokenLimit: 5e5,
|
|
15
|
+
maxTokensPerRequest: 16e3,
|
|
16
|
+
rateLimit: { windowMs: 6e4, maxRequests: 60 }
|
|
17
|
+
};
|
|
18
|
+
function clampToCaps(limits) {
|
|
19
|
+
return {
|
|
20
|
+
maxOutputTokens: Math.min(limits.maxOutputTokens, HARD_CAPS.maxOutputTokens),
|
|
21
|
+
maxMessages: Math.min(limits.maxMessages, HARD_CAPS.maxMessages),
|
|
22
|
+
temperature: Math.min(limits.temperature, HARD_CAPS.temperature),
|
|
23
|
+
dailyTokenLimit: Math.min(limits.dailyTokenLimit, HARD_CAPS.dailyTokenLimit),
|
|
24
|
+
maxTokensPerRequest: Math.min(
|
|
25
|
+
limits.maxTokensPerRequest,
|
|
26
|
+
HARD_CAPS.maxTokensPerRequest
|
|
27
|
+
),
|
|
28
|
+
rateLimit: {
|
|
29
|
+
windowMs: limits.rateLimit.windowMs,
|
|
30
|
+
maxRequests: Math.min(
|
|
31
|
+
limits.rateLimit.maxRequests,
|
|
32
|
+
HARD_CAPS.rateLimit.maxRequests
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
var DEFAULT_MAX_TOKENS_PER_REQUEST = 2048;
|
|
38
|
+
var PROFILES = {
|
|
39
|
+
[AIUsageProfile.CONSTRAINED]: clampToCaps({
|
|
40
|
+
maxOutputTokens: 150,
|
|
41
|
+
maxMessages: 4,
|
|
42
|
+
temperature: 0.5,
|
|
43
|
+
dailyTokenLimit: 3e4,
|
|
44
|
+
maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
|
|
45
|
+
rateLimit: { windowMs: 6e4, maxRequests: 15 }
|
|
46
|
+
}),
|
|
47
|
+
[AIUsageProfile.BALANCED]: clampToCaps({
|
|
48
|
+
maxOutputTokens: 400,
|
|
49
|
+
maxMessages: 6,
|
|
50
|
+
temperature: 0.7,
|
|
51
|
+
dailyTokenLimit: 7e4,
|
|
52
|
+
maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
|
|
53
|
+
rateLimit: { windowMs: 6e4, maxRequests: 30 }
|
|
54
|
+
}),
|
|
55
|
+
[AIUsageProfile.EXPANDED]: clampToCaps({
|
|
56
|
+
maxOutputTokens: 3e3,
|
|
57
|
+
maxMessages: 12,
|
|
58
|
+
temperature: 0.8,
|
|
59
|
+
dailyTokenLimit: 2e5,
|
|
60
|
+
maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
|
|
61
|
+
rateLimit: { windowMs: 6e4, maxRequests: 60 }
|
|
62
|
+
})
|
|
19
63
|
};
|
|
64
|
+
var LEGACY_MAP = {
|
|
65
|
+
budget: AIUsageProfile.CONSTRAINED,
|
|
66
|
+
moderate: AIUsageProfile.BALANCED,
|
|
67
|
+
free: AIUsageProfile.EXPANDED
|
|
68
|
+
};
|
|
69
|
+
var VALID_PROFILE_STRINGS = /* @__PURE__ */ new Set([
|
|
70
|
+
AIUsageProfile.CONSTRAINED,
|
|
71
|
+
AIUsageProfile.BALANCED,
|
|
72
|
+
AIUsageProfile.EXPANDED,
|
|
73
|
+
...Object.keys(LEGACY_MAP)
|
|
74
|
+
]);
|
|
75
|
+
function resolveProfile(input) {
|
|
76
|
+
const s = typeof input === "string" ? input.toLowerCase().trim() : "";
|
|
77
|
+
const profile = LEGACY_MAP[s] ?? (VALID_PROFILE_STRINGS.has(s) ? s : null) ?? AIUsageProfile.BALANCED;
|
|
78
|
+
return { limits: PROFILES[profile], profile };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// src/guards/rateLimit.ts
|
|
82
|
+
var requestCount = 0;
|
|
83
|
+
var windowStart = Date.now();
|
|
84
|
+
function isRateLimited(limits) {
|
|
85
|
+
const { windowMs, maxRequests } = limits.rateLimit;
|
|
86
|
+
const now = Date.now();
|
|
87
|
+
if (now - windowStart > windowMs) {
|
|
88
|
+
windowStart = now;
|
|
89
|
+
requestCount = 0;
|
|
90
|
+
}
|
|
91
|
+
requestCount++;
|
|
92
|
+
return requestCount > maxRequests;
|
|
93
|
+
}
|
|
20
94
|
|
|
21
95
|
// src/utils/tokenEstimator.ts
|
|
22
96
|
function estimateTokens(messages) {
|
|
@@ -39,101 +113,171 @@ function recordTokenUsage(used) {
|
|
|
39
113
|
tokensUsedToday += used;
|
|
40
114
|
}
|
|
41
115
|
|
|
42
|
-
// src/
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
116
|
+
// src/guards/budgetGuards.ts
|
|
117
|
+
function trimMessages(messages, maxMessages) {
|
|
118
|
+
return messages.slice(-maxMessages);
|
|
119
|
+
}
|
|
120
|
+
function checkBudget(messages, limits) {
|
|
121
|
+
const estimatedInput = estimateTokens(messages);
|
|
122
|
+
const estimatedTotal = estimatedInput + limits.maxOutputTokens;
|
|
123
|
+
if (estimatedTotal > limits.maxTokensPerRequest) {
|
|
124
|
+
return { success: false, estimatedTotal, reason: "request_limit" };
|
|
50
125
|
}
|
|
51
|
-
|
|
52
|
-
|
|
126
|
+
if (!canSpendTokens(estimatedTotal, limits.dailyTokenLimit)) {
|
|
127
|
+
return { success: false, estimatedTotal, reason: "daily_limit" };
|
|
128
|
+
}
|
|
129
|
+
return { success: true, estimatedTotal };
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// src/sse/sseWriter.ts
|
|
133
|
+
function createSSEWriter(res, abortSignal) {
|
|
134
|
+
const write = (event, data) => {
|
|
135
|
+
if (abortSignal.aborted) return;
|
|
136
|
+
res.write(`event: ${event}
|
|
137
|
+
data: ${data}
|
|
138
|
+
|
|
139
|
+
`);
|
|
140
|
+
};
|
|
141
|
+
const startHeartbeat = (intervalMs = 15e3) => {
|
|
142
|
+
return setInterval(() => {
|
|
143
|
+
if (!abortSignal.aborted) write("ping", "");
|
|
144
|
+
}, intervalMs);
|
|
145
|
+
};
|
|
146
|
+
const stopHeartbeat = (id) => {
|
|
147
|
+
if (id) clearInterval(id);
|
|
148
|
+
};
|
|
149
|
+
return { write, startHeartbeat, stopHeartbeat };
|
|
53
150
|
}
|
|
54
|
-
|
|
55
|
-
|
|
151
|
+
|
|
152
|
+
// src/config/aiLimits.ts
|
|
153
|
+
var AI_MODEL = "gpt-4o-mini";
|
|
154
|
+
|
|
155
|
+
// src/streaming/openaiStream.ts
|
|
156
|
+
async function streamChatCompletion(client, messages, limits, writeSSE, signal) {
|
|
157
|
+
const stream = await client.chat.completions.create(
|
|
158
|
+
{
|
|
159
|
+
model: AI_MODEL,
|
|
160
|
+
stream: true,
|
|
161
|
+
temperature: limits.temperature,
|
|
162
|
+
max_tokens: limits.maxOutputTokens,
|
|
163
|
+
messages: messages.map((m) => ({ role: m.role, content: m.content }))
|
|
164
|
+
},
|
|
165
|
+
{ signal }
|
|
166
|
+
);
|
|
167
|
+
for await (const chunk of stream) {
|
|
168
|
+
if (signal.aborted) break;
|
|
169
|
+
const token = chunk.choices[0]?.delta?.content;
|
|
170
|
+
if (token) {
|
|
171
|
+
writeSSE("token", JSON.stringify(token));
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
writeSSE("done", "");
|
|
56
175
|
}
|
|
176
|
+
|
|
177
|
+
// src/createChatHandler.ts
|
|
57
178
|
function createChatHandler(config) {
|
|
58
179
|
if (!config.apiKey) {
|
|
59
180
|
throw new Error("OPENAI_API_KEY is missing");
|
|
60
181
|
}
|
|
61
|
-
const client = new OpenAI({
|
|
62
|
-
apiKey: config.apiKey
|
|
63
|
-
});
|
|
182
|
+
const client = new OpenAI({ apiKey: config.apiKey });
|
|
64
183
|
return async function handler(req, res) {
|
|
65
184
|
const abortController = new AbortController();
|
|
66
185
|
let streamStarted = false;
|
|
186
|
+
let heartbeatId = null;
|
|
187
|
+
const body = req.body;
|
|
188
|
+
const { limits: profileLimits } = resolveProfile(
|
|
189
|
+
body.profile ?? body.aiUsageProfile
|
|
190
|
+
);
|
|
191
|
+
let effectiveLimits = { ...profileLimits };
|
|
192
|
+
if (body.dailyTokenLimit != null) {
|
|
193
|
+
effectiveLimits = {
|
|
194
|
+
...effectiveLimits,
|
|
195
|
+
dailyTokenLimit: Math.min(
|
|
196
|
+
profileLimits.dailyTokenLimit,
|
|
197
|
+
body.dailyTokenLimit
|
|
198
|
+
)
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
if (body.maxTokensPerRequest != null) {
|
|
202
|
+
effectiveLimits = {
|
|
203
|
+
...effectiveLimits,
|
|
204
|
+
maxTokensPerRequest: Math.min(
|
|
205
|
+
profileLimits.maxTokensPerRequest,
|
|
206
|
+
body.maxTokensPerRequest
|
|
207
|
+
)
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
if (!Array.isArray(body.messages)) {
|
|
211
|
+
res.status(400).json({ error: "Invalid messages payload" });
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
if (isRateLimited(effectiveLimits)) {
|
|
215
|
+
res.status(429).json({
|
|
216
|
+
error: "Rate limit exceeded. Please slow down."
|
|
217
|
+
});
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
const trimmedMessages = trimMessages(
|
|
221
|
+
body.messages,
|
|
222
|
+
effectiveLimits.maxMessages
|
|
223
|
+
);
|
|
224
|
+
const budgetResult = checkBudget(trimmedMessages, effectiveLimits);
|
|
225
|
+
if (!budgetResult.success) {
|
|
226
|
+
const errorMessage = budgetResult.reason === "request_limit" ? "Request too large. Please shorten your message." : "Daily AI budget exceeded. Try again tomorrow.";
|
|
227
|
+
res.status(429).json({ error: errorMessage });
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
const { write, startHeartbeat, stopHeartbeat } = createSSEWriter(
|
|
231
|
+
res,
|
|
232
|
+
abortController.signal
|
|
233
|
+
);
|
|
67
234
|
res.on("close", () => {
|
|
235
|
+
stopHeartbeat(heartbeatId);
|
|
68
236
|
if (streamStarted && !abortController.signal.aborted) {
|
|
69
237
|
abortController.abort();
|
|
70
238
|
}
|
|
71
239
|
});
|
|
72
240
|
try {
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
res.status(429).json({
|
|
80
|
-
error: "Rate limit exceeded. Please slow down."
|
|
81
|
-
});
|
|
82
|
-
return;
|
|
83
|
-
}
|
|
84
|
-
const trimmedMessages = trimMessages(body.messages);
|
|
85
|
-
const estimatedInputTokens = estimateTokens(trimmedMessages);
|
|
86
|
-
const estimatedTotalTokens = estimatedInputTokens + AI_LIMITS.maxOutputTokens;
|
|
87
|
-
if (estimatedTotalTokens > BUDGET.maxTokensPerRequest || !canSpendTokens(estimatedTotalTokens, BUDGET.dailyTokenLimit)) {
|
|
88
|
-
res.status(429).json({
|
|
89
|
-
error: "Daily AI budget exceeded. Try again tomorrow."
|
|
90
|
-
});
|
|
91
|
-
return;
|
|
92
|
-
}
|
|
93
|
-
res.setHeader("Content-Type", "text/plain");
|
|
94
|
-
res.setHeader("Transfer-Encoding", "chunked");
|
|
95
|
-
const stream = await client.chat.completions.create(
|
|
96
|
-
{
|
|
97
|
-
model: AI_MODEL,
|
|
98
|
-
stream: true,
|
|
99
|
-
temperature: 0.7,
|
|
100
|
-
max_tokens: AI_LIMITS.maxOutputTokens,
|
|
101
|
-
messages: trimmedMessages.map((m) => ({
|
|
102
|
-
role: m.role,
|
|
103
|
-
content: m.content
|
|
104
|
-
}))
|
|
105
|
-
},
|
|
106
|
-
{
|
|
107
|
-
signal: abortController.signal
|
|
108
|
-
}
|
|
109
|
-
);
|
|
241
|
+
res.setHeader("Content-Type", "text/event-stream");
|
|
242
|
+
res.setHeader("Cache-Control", "no-cache");
|
|
243
|
+
res.setHeader("Connection", "keep-alive");
|
|
244
|
+
res.flushHeaders();
|
|
245
|
+
write("start", "");
|
|
246
|
+
heartbeatId = startHeartbeat(15e3);
|
|
110
247
|
streamStarted = true;
|
|
111
248
|
try {
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
}
|
|
121
|
-
} catch (error) {
|
|
249
|
+
await streamChatCompletion(
|
|
250
|
+
client,
|
|
251
|
+
trimmedMessages,
|
|
252
|
+
effectiveLimits,
|
|
253
|
+
write,
|
|
254
|
+
abortController.signal
|
|
255
|
+
);
|
|
256
|
+
} catch (streamError) {
|
|
122
257
|
if (abortController.signal.aborted) {
|
|
123
258
|
console.log("Stream aborted by client");
|
|
124
259
|
} else {
|
|
125
|
-
|
|
260
|
+
const msg = streamError instanceof Error ? streamError.message : "Unknown error";
|
|
261
|
+
write("error", JSON.stringify({ message: msg }));
|
|
126
262
|
}
|
|
127
263
|
} finally {
|
|
128
|
-
|
|
264
|
+
stopHeartbeat(heartbeatId);
|
|
265
|
+
heartbeatId = null;
|
|
266
|
+
recordTokenUsage(budgetResult.estimatedTotal);
|
|
129
267
|
res.end();
|
|
130
268
|
}
|
|
131
269
|
} catch (error) {
|
|
132
270
|
if (error?.name === "AbortError") {
|
|
271
|
+
stopHeartbeat(heartbeatId);
|
|
133
272
|
return;
|
|
134
273
|
}
|
|
135
274
|
console.error("AI error:", error);
|
|
136
|
-
res.
|
|
275
|
+
if (!res.headersSent) {
|
|
276
|
+
res.status(500).json({ error: "AI request failed" });
|
|
277
|
+
} else {
|
|
278
|
+
write("error", JSON.stringify({ message: "AI request failed" }));
|
|
279
|
+
res.end();
|
|
280
|
+
}
|
|
137
281
|
}
|
|
138
282
|
};
|
|
139
283
|
}
|
package/package.json
CHANGED