chat-nest-server 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +35 -18
  2. package/dist/index.js +223 -79
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -1,10 +1,11 @@
1
1
  # chat-nest-server
2
2
 
3
- > Streaming AI backend server for Chat Nest with built-in cost protection and cancellation propagation.
3
+ > Streaming AI backend server for Chat Nest with built-in cost protection and cancellation propagation using Server-Side Events (SSE).
4
4
 
5
5
  This package exposes an Express-compatible request handler that:
6
- - Streams AI responses
7
- - Enforces rate limits and budgets
6
+ - Streams AI responses using Server-Side Events (SSE)
7
+ - Sends real-time tokens via SSE protocol
8
+ - Enforces rate limits and profile-based limits
8
9
  - Supports abort propagation
9
10
  - Protects against runaway usage
10
11
 
@@ -12,9 +13,12 @@ This package exposes an Express-compatible request handler that:
12
13
 
13
14
  ## ✨ Features
14
15
 
15
- - Streaming responses over HTTP
16
+ - Server-Side Events (SSE) streaming over HTTP
17
+ - Real-time token streaming via SSE protocol
18
+ - SSE event types: `start`, `token`, `done`, `error`, `ping`
19
+ - Heartbeat pings to keep connection alive
16
20
  - End-to-end cancellation support
17
- - Daily token budget enforcement
21
+ - Daily token limit enforcement (profile-based)
18
22
  - Rate limiting
19
23
  - Message trimming
20
24
  - Safe retry semantics
@@ -31,6 +35,8 @@ npm install chat-nest-server
31
35
  ## 🚀 Usage
32
36
  Express Integration
33
37
 
38
+ The handler automatically uses Server-Side Events (SSE) for streaming responses:
39
+
34
40
  ```
35
41
  import express from "express";
36
42
  import cors from "cors";
@@ -53,6 +59,13 @@ app.listen(3001, () => {
53
59
  });
54
60
  ```
55
61
 
62
+ The handler sends SSE-formatted events:
63
+ - `event: start\ndata: \n\n` - Stream started
64
+ - `event: token\ndata: <token>\n\n` - Each token chunk
65
+ - `event: done\ndata: \n\n` - Stream completed
66
+ - `event: error\ndata: <error_json>\n\n` - Error occurred
67
+ - `event: ping\ndata: \n\n` - Heartbeat (every 15s)
68
+
56
69
  ---
57
70
 
58
71
  ## 🔐 Environment Variables
@@ -63,30 +76,34 @@ app.listen(3001, () => {
63
76
 
64
77
  ## 💰 Cost Controls
65
78
 
66
- ```
67
- The server enforces:
79
+ Profiles (`constrained`, `balanced`, `expanded`) control limits. HARD_CAPS clamp all profiles. Server enforces:
68
80
 
69
- Maximum tokens per request
81
+ - Maximum tokens per request (profile + HARD_CAPS)
82
+ - Daily token limit (profile)
83
+ - Request rate limiting (profile)
84
+ - Prompt size trimming (profile)
85
+ - Retry classification
70
86
 
71
- Daily token budget
87
+ This prevents accidental overspending and abuse.
72
88
 
73
- Request rate limiting
89
+ ## 🔄 Server-Side Events (SSE)
74
90
 
75
- Prompt size trimming
91
+ This package uses SSE protocol for efficient streaming:
76
92
 
77
- Retry classification
93
+ - **Content-Type**: `text/event-stream`
94
+ - **Connection**: `keep-alive`
95
+ - **Cache-Control**: `no-cache`
96
+ - **Heartbeat**: Ping every 15 seconds to keep connection alive
97
+ - **Event Format**: `event: <type>\ndata: <data>\n\n`
78
98
 
79
- This prevents accidental overspending and abuse.
80
- ```
99
+ SSE provides better efficiency and real-time streaming compared to traditional polling or chunked responses.
81
100
 
82
101
  ---
83
102
 
84
103
  ## ⚙ Configuration
85
- Limits can be customized in:
86
104
 
87
- src/config/
88
- aiLimits.ts
89
- budget.ts
105
+ - **`config/profiles.ts`** – AI usage profiles (`constrained`, `balanced`, `expanded`), HARD_CAPS, and `resolveProfile()`. Frontend may send `aiUsageProfile` string; backend resolves safely.
106
+ - **`config/aiLimits.ts`** – `AI_MODEL` only. All limits come from profiles.
90
107
 
91
108
  ---
92
109
 
package/dist/index.js CHANGED
@@ -1,22 +1,96 @@
1
1
  // src/createChatHandler.ts
2
2
  import OpenAI from "openai";
3
3
 
4
- // src/config/aiLimits.ts
5
- var AI_MODEL = "gpt-4o-mini";
6
- var AI_LIMITS = {
7
- maxOutputTokens: 300,
8
- maxMessages: 6,
9
- rateLimitWindowMs: 6e4,
10
- maxRequestsPerWindow: 30
4
+ // src/config/profiles.ts
5
+ var AIUsageProfile = {
6
+ CONSTRAINED: "constrained",
7
+ BALANCED: "balanced",
8
+ EXPANDED: "expanded"
11
9
  };
12
-
13
- // src/config/budget.ts
14
- var BUDGET = {
15
- dailyTokenLimit: 7e4,
16
- // ~safe for $5 / 3 months
17
- maxTokensPerRequest: 600
18
- // input + output guard
10
+ var HARD_CAPS = {
11
+ maxOutputTokens: 4096,
12
+ maxMessages: 20,
13
+ temperature: 2,
14
+ dailyTokenLimit: 5e5,
15
+ maxTokensPerRequest: 16e3,
16
+ rateLimit: { windowMs: 6e4, maxRequests: 60 }
17
+ };
18
+ function clampToCaps(limits) {
19
+ return {
20
+ maxOutputTokens: Math.min(limits.maxOutputTokens, HARD_CAPS.maxOutputTokens),
21
+ maxMessages: Math.min(limits.maxMessages, HARD_CAPS.maxMessages),
22
+ temperature: Math.min(limits.temperature, HARD_CAPS.temperature),
23
+ dailyTokenLimit: Math.min(limits.dailyTokenLimit, HARD_CAPS.dailyTokenLimit),
24
+ maxTokensPerRequest: Math.min(
25
+ limits.maxTokensPerRequest,
26
+ HARD_CAPS.maxTokensPerRequest
27
+ ),
28
+ rateLimit: {
29
+ windowMs: limits.rateLimit.windowMs,
30
+ maxRequests: Math.min(
31
+ limits.rateLimit.maxRequests,
32
+ HARD_CAPS.rateLimit.maxRequests
33
+ )
34
+ }
35
+ };
36
+ }
37
+ var DEFAULT_MAX_TOKENS_PER_REQUEST = 2048;
38
+ var PROFILES = {
39
+ [AIUsageProfile.CONSTRAINED]: clampToCaps({
40
+ maxOutputTokens: 150,
41
+ maxMessages: 4,
42
+ temperature: 0.5,
43
+ dailyTokenLimit: 3e4,
44
+ maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
45
+ rateLimit: { windowMs: 6e4, maxRequests: 15 }
46
+ }),
47
+ [AIUsageProfile.BALANCED]: clampToCaps({
48
+ maxOutputTokens: 400,
49
+ maxMessages: 6,
50
+ temperature: 0.7,
51
+ dailyTokenLimit: 7e4,
52
+ maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
53
+ rateLimit: { windowMs: 6e4, maxRequests: 30 }
54
+ }),
55
+ [AIUsageProfile.EXPANDED]: clampToCaps({
56
+ maxOutputTokens: 3e3,
57
+ maxMessages: 12,
58
+ temperature: 0.8,
59
+ dailyTokenLimit: 2e5,
60
+ maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
61
+ rateLimit: { windowMs: 6e4, maxRequests: 60 }
62
+ })
19
63
  };
64
+ var LEGACY_MAP = {
65
+ budget: AIUsageProfile.CONSTRAINED,
66
+ moderate: AIUsageProfile.BALANCED,
67
+ free: AIUsageProfile.EXPANDED
68
+ };
69
+ var VALID_PROFILE_STRINGS = /* @__PURE__ */ new Set([
70
+ AIUsageProfile.CONSTRAINED,
71
+ AIUsageProfile.BALANCED,
72
+ AIUsageProfile.EXPANDED,
73
+ ...Object.keys(LEGACY_MAP)
74
+ ]);
75
+ function resolveProfile(input) {
76
+ const s = typeof input === "string" ? input.toLowerCase().trim() : "";
77
+ const profile = LEGACY_MAP[s] ?? (VALID_PROFILE_STRINGS.has(s) ? s : null) ?? AIUsageProfile.BALANCED;
78
+ return { limits: PROFILES[profile], profile };
79
+ }
80
+
81
+ // src/guards/rateLimit.ts
82
+ var requestCount = 0;
83
+ var windowStart = Date.now();
84
+ function isRateLimited(limits) {
85
+ const { windowMs, maxRequests } = limits.rateLimit;
86
+ const now = Date.now();
87
+ if (now - windowStart > windowMs) {
88
+ windowStart = now;
89
+ requestCount = 0;
90
+ }
91
+ requestCount++;
92
+ return requestCount > maxRequests;
93
+ }
20
94
 
21
95
  // src/utils/tokenEstimator.ts
22
96
  function estimateTokens(messages) {
@@ -39,101 +113,171 @@ function recordTokenUsage(used) {
39
113
  tokensUsedToday += used;
40
114
  }
41
115
 
42
- // src/createChatHandler.ts
43
- var requestCount = 0;
44
- var windowStart = Date.now();
45
- function isRateLimited() {
46
- const now = Date.now();
47
- if (now - windowStart > AI_LIMITS.rateLimitWindowMs) {
48
- windowStart = now;
49
- requestCount = 0;
116
+ // src/guards/budgetGuards.ts
117
+ function trimMessages(messages, maxMessages) {
118
+ return messages.slice(-maxMessages);
119
+ }
120
+ function checkBudget(messages, limits) {
121
+ const estimatedInput = estimateTokens(messages);
122
+ const estimatedTotal = estimatedInput + limits.maxOutputTokens;
123
+ if (estimatedTotal > limits.maxTokensPerRequest) {
124
+ return { success: false, estimatedTotal, reason: "request_limit" };
50
125
  }
51
- requestCount++;
52
- return requestCount > AI_LIMITS.maxRequestsPerWindow;
126
+ if (!canSpendTokens(estimatedTotal, limits.dailyTokenLimit)) {
127
+ return { success: false, estimatedTotal, reason: "daily_limit" };
128
+ }
129
+ return { success: true, estimatedTotal };
130
+ }
131
+
132
+ // src/sse/sseWriter.ts
133
+ function createSSEWriter(res, abortSignal) {
134
+ const write = (event, data) => {
135
+ if (abortSignal.aborted) return;
136
+ res.write(`event: ${event}
137
+ data: ${data}
138
+
139
+ `);
140
+ };
141
+ const startHeartbeat = (intervalMs = 15e3) => {
142
+ return setInterval(() => {
143
+ if (!abortSignal.aborted) write("ping", "");
144
+ }, intervalMs);
145
+ };
146
+ const stopHeartbeat = (id) => {
147
+ if (id) clearInterval(id);
148
+ };
149
+ return { write, startHeartbeat, stopHeartbeat };
53
150
  }
54
- function trimMessages(messages) {
55
- return messages.slice(-AI_LIMITS.maxMessages);
151
+
152
+ // src/config/aiLimits.ts
153
+ var AI_MODEL = "gpt-4o-mini";
154
+
155
+ // src/streaming/openaiStream.ts
156
+ async function streamChatCompletion(client, messages, limits, writeSSE, signal) {
157
+ const stream = await client.chat.completions.create(
158
+ {
159
+ model: AI_MODEL,
160
+ stream: true,
161
+ temperature: limits.temperature,
162
+ max_tokens: limits.maxOutputTokens,
163
+ messages: messages.map((m) => ({ role: m.role, content: m.content }))
164
+ },
165
+ { signal }
166
+ );
167
+ for await (const chunk of stream) {
168
+ if (signal.aborted) break;
169
+ const token = chunk.choices[0]?.delta?.content;
170
+ if (token) {
171
+ writeSSE("token", JSON.stringify(token));
172
+ }
173
+ }
174
+ writeSSE("done", "");
56
175
  }
176
+
177
+ // src/createChatHandler.ts
57
178
  function createChatHandler(config) {
58
179
  if (!config.apiKey) {
59
180
  throw new Error("OPENAI_API_KEY is missing");
60
181
  }
61
- const client = new OpenAI({
62
- apiKey: config.apiKey
63
- });
182
+ const client = new OpenAI({ apiKey: config.apiKey });
64
183
  return async function handler(req, res) {
65
184
  const abortController = new AbortController();
66
185
  let streamStarted = false;
186
+ let heartbeatId = null;
187
+ const body = req.body;
188
+ const { limits: profileLimits } = resolveProfile(
189
+ body.profile ?? body.aiUsageProfile
190
+ );
191
+ let effectiveLimits = { ...profileLimits };
192
+ if (body.dailyTokenLimit != null) {
193
+ effectiveLimits = {
194
+ ...effectiveLimits,
195
+ dailyTokenLimit: Math.min(
196
+ profileLimits.dailyTokenLimit,
197
+ body.dailyTokenLimit
198
+ )
199
+ };
200
+ }
201
+ if (body.maxTokensPerRequest != null) {
202
+ effectiveLimits = {
203
+ ...effectiveLimits,
204
+ maxTokensPerRequest: Math.min(
205
+ profileLimits.maxTokensPerRequest,
206
+ body.maxTokensPerRequest
207
+ )
208
+ };
209
+ }
210
+ if (!Array.isArray(body.messages)) {
211
+ res.status(400).json({ error: "Invalid messages payload" });
212
+ return;
213
+ }
214
+ if (isRateLimited(effectiveLimits)) {
215
+ res.status(429).json({
216
+ error: "Rate limit exceeded. Please slow down."
217
+ });
218
+ return;
219
+ }
220
+ const trimmedMessages = trimMessages(
221
+ body.messages,
222
+ effectiveLimits.maxMessages
223
+ );
224
+ const budgetResult = checkBudget(trimmedMessages, effectiveLimits);
225
+ if (!budgetResult.success) {
226
+ const errorMessage = budgetResult.reason === "request_limit" ? "Request too large. Please shorten your message." : "Daily AI budget exceeded. Try again tomorrow.";
227
+ res.status(429).json({ error: errorMessage });
228
+ return;
229
+ }
230
+ const { write, startHeartbeat, stopHeartbeat } = createSSEWriter(
231
+ res,
232
+ abortController.signal
233
+ );
67
234
  res.on("close", () => {
235
+ stopHeartbeat(heartbeatId);
68
236
  if (streamStarted && !abortController.signal.aborted) {
69
237
  abortController.abort();
70
238
  }
71
239
  });
72
240
  try {
73
- const body = req.body;
74
- if (!Array.isArray(body.messages)) {
75
- res.status(400).json({ error: "Invalid messages payload" });
76
- return;
77
- }
78
- if (isRateLimited()) {
79
- res.status(429).json({
80
- error: "Rate limit exceeded. Please slow down."
81
- });
82
- return;
83
- }
84
- const trimmedMessages = trimMessages(body.messages);
85
- const estimatedInputTokens = estimateTokens(trimmedMessages);
86
- const estimatedTotalTokens = estimatedInputTokens + AI_LIMITS.maxOutputTokens;
87
- if (estimatedTotalTokens > BUDGET.maxTokensPerRequest || !canSpendTokens(estimatedTotalTokens, BUDGET.dailyTokenLimit)) {
88
- res.status(429).json({
89
- error: "Daily AI budget exceeded. Try again tomorrow."
90
- });
91
- return;
92
- }
93
- res.setHeader("Content-Type", "text/plain");
94
- res.setHeader("Transfer-Encoding", "chunked");
95
- const stream = await client.chat.completions.create(
96
- {
97
- model: AI_MODEL,
98
- stream: true,
99
- temperature: 0.7,
100
- max_tokens: AI_LIMITS.maxOutputTokens,
101
- messages: trimmedMessages.map((m) => ({
102
- role: m.role,
103
- content: m.content
104
- }))
105
- },
106
- {
107
- signal: abortController.signal
108
- }
109
- );
241
+ res.setHeader("Content-Type", "text/event-stream");
242
+ res.setHeader("Cache-Control", "no-cache");
243
+ res.setHeader("Connection", "keep-alive");
244
+ res.flushHeaders();
245
+ write("start", "");
246
+ heartbeatId = startHeartbeat(15e3);
110
247
  streamStarted = true;
111
248
  try {
112
- for await (const chunk of stream) {
113
- if (abortController.signal.aborted) {
114
- break;
115
- }
116
- const token = chunk.choices[0]?.delta?.content;
117
- if (token) {
118
- res.write(token);
119
- }
120
- }
121
- } catch (error) {
249
+ await streamChatCompletion(
250
+ client,
251
+ trimmedMessages,
252
+ effectiveLimits,
253
+ write,
254
+ abortController.signal
255
+ );
256
+ } catch (streamError) {
122
257
  if (abortController.signal.aborted) {
123
258
  console.log("Stream aborted by client");
124
259
  } else {
125
- throw error;
260
+ const msg = streamError instanceof Error ? streamError.message : "Unknown error";
261
+ write("error", JSON.stringify({ message: msg }));
126
262
  }
127
263
  } finally {
128
- recordTokenUsage(estimatedTotalTokens);
264
+ stopHeartbeat(heartbeatId);
265
+ heartbeatId = null;
266
+ recordTokenUsage(budgetResult.estimatedTotal);
129
267
  res.end();
130
268
  }
131
269
  } catch (error) {
132
270
  if (error?.name === "AbortError") {
271
+ stopHeartbeat(heartbeatId);
133
272
  return;
134
273
  }
135
274
  console.error("AI error:", error);
136
- res.status(500).json({ error: "AI request failed" });
275
+ if (!res.headersSent) {
276
+ res.status(500).json({ error: "AI request failed" });
277
+ } else {
278
+ write("error", JSON.stringify({ message: "AI request failed" }));
279
+ res.end();
280
+ }
137
281
  }
138
282
  };
139
283
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "chat-nest-server",
3
3
  "description": "Streaming AI backend server with cost controls, rate limiting, and cancellation support.",
4
- "version": "1.0.1",
4
+ "version": "1.1.1",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
7
7
  "license": "ISC",