chat-nest-server 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +9 -11
  2. package/dist/index.js +214 -110
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  This package exposes an Express-compatible request handler that:
6
6
  - Streams AI responses using Server-Side Events (SSE)
7
7
  - Sends real-time tokens via SSE protocol
8
- - Enforces rate limits and budgets
8
+ - Enforces rate limits and profile-based limits
9
9
  - Supports abort propagation
10
10
  - Protects against runaway usage
11
11
 
@@ -18,7 +18,7 @@ This package exposes an Express-compatible request handler that:
18
18
  - SSE event types: `start`, `token`, `done`, `error`, `ping`
19
19
  - Heartbeat pings to keep connection alive
20
20
  - End-to-end cancellation support
21
- - Daily token budget enforcement
21
+ - Daily token limit enforcement (profile-based)
22
22
  - Rate limiting
23
23
  - Message trimming
24
24
  - Safe retry semantics
@@ -76,12 +76,12 @@ The handler sends SSE-formatted events:
76
76
 
77
77
  ## 💰 Cost Controls
78
78
 
79
- The server enforces:
79
+ Profiles (`constrained`, `balanced`, `expanded`) control limits. HARD_CAPS clamp all profiles. Server enforces:
80
80
 
81
- - Maximum tokens per request
82
- - Daily token budget
83
- - Request rate limiting
84
- - Prompt size trimming
81
+ - Maximum tokens per request (profile + HARD_CAPS)
82
+ - Daily token limit (profile)
83
+ - Request rate limiting (profile)
84
+ - Prompt size trimming (profile)
85
85
  - Retry classification
86
86
 
87
87
  This prevents accidental overspending and abuse.
@@ -101,11 +101,9 @@ SSE provides better efficiency and real-time streaming compared to traditional p
101
101
  ---
102
102
 
103
103
  ## ⚙ Configuration
104
- Limits can be customized in:
105
104
 
106
- src/config/
107
- aiLimits.ts
108
- budget.ts
105
+ - **`config/profiles.ts`** – AI usage profiles (`constrained`, `balanced`, `expanded`), HARD_CAPS, and `resolveProfile()`. Frontend may send `aiUsageProfile` string; backend resolves safely.
106
+ - **`config/aiLimits.ts`** – `AI_MODEL` only. All limits come from profiles.
109
107
 
110
108
  ---
111
109
 
package/dist/index.js CHANGED
@@ -1,22 +1,96 @@
1
1
  // src/createChatHandler.ts
2
2
  import OpenAI from "openai";
3
3
 
4
- // src/config/aiLimits.ts
5
- var AI_MODEL = "gpt-4o-mini";
6
- var AI_LIMITS = {
7
- maxOutputTokens: 300,
8
- maxMessages: 6,
9
- rateLimitWindowMs: 6e4,
10
- maxRequestsPerWindow: 30
4
+ // src/config/profiles.ts
5
+ var AIUsageProfile = {
6
+ CONSTRAINED: "constrained",
7
+ BALANCED: "balanced",
8
+ EXPANDED: "expanded"
11
9
  };
12
-
13
- // src/config/budget.ts
14
- var BUDGET = {
15
- dailyTokenLimit: 7e4,
16
- // ~safe for $5 / 3 months
17
- maxTokensPerRequest: 600
18
- // input + output guard
10
+ var HARD_CAPS = {
11
+ maxOutputTokens: 4096,
12
+ maxMessages: 20,
13
+ temperature: 2,
14
+ dailyTokenLimit: 5e5,
15
+ maxTokensPerRequest: 16e3,
16
+ rateLimit: { windowMs: 6e4, maxRequests: 60 }
17
+ };
18
+ function clampToCaps(limits) {
19
+ return {
20
+ maxOutputTokens: Math.min(limits.maxOutputTokens, HARD_CAPS.maxOutputTokens),
21
+ maxMessages: Math.min(limits.maxMessages, HARD_CAPS.maxMessages),
22
+ temperature: Math.min(limits.temperature, HARD_CAPS.temperature),
23
+ dailyTokenLimit: Math.min(limits.dailyTokenLimit, HARD_CAPS.dailyTokenLimit),
24
+ maxTokensPerRequest: Math.min(
25
+ limits.maxTokensPerRequest,
26
+ HARD_CAPS.maxTokensPerRequest
27
+ ),
28
+ rateLimit: {
29
+ windowMs: limits.rateLimit.windowMs,
30
+ maxRequests: Math.min(
31
+ limits.rateLimit.maxRequests,
32
+ HARD_CAPS.rateLimit.maxRequests
33
+ )
34
+ }
35
+ };
36
+ }
37
+ var DEFAULT_MAX_TOKENS_PER_REQUEST = 2048;
38
+ var PROFILES = {
39
+ [AIUsageProfile.CONSTRAINED]: clampToCaps({
40
+ maxOutputTokens: 150,
41
+ maxMessages: 4,
42
+ temperature: 0.5,
43
+ dailyTokenLimit: 3e4,
44
+ maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
45
+ rateLimit: { windowMs: 6e4, maxRequests: 15 }
46
+ }),
47
+ [AIUsageProfile.BALANCED]: clampToCaps({
48
+ maxOutputTokens: 400,
49
+ maxMessages: 6,
50
+ temperature: 0.7,
51
+ dailyTokenLimit: 7e4,
52
+ maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
53
+ rateLimit: { windowMs: 6e4, maxRequests: 30 }
54
+ }),
55
+ [AIUsageProfile.EXPANDED]: clampToCaps({
56
+ maxOutputTokens: 3e3,
57
+ maxMessages: 12,
58
+ temperature: 0.8,
59
+ dailyTokenLimit: 2e5,
60
+ maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
61
+ rateLimit: { windowMs: 6e4, maxRequests: 60 }
62
+ })
63
+ };
64
+ var LEGACY_MAP = {
65
+ budget: AIUsageProfile.CONSTRAINED,
66
+ moderate: AIUsageProfile.BALANCED,
67
+ free: AIUsageProfile.EXPANDED
19
68
  };
69
+ var VALID_PROFILE_STRINGS = /* @__PURE__ */ new Set([
70
+ AIUsageProfile.CONSTRAINED,
71
+ AIUsageProfile.BALANCED,
72
+ AIUsageProfile.EXPANDED,
73
+ ...Object.keys(LEGACY_MAP)
74
+ ]);
75
+ function resolveProfile(input) {
76
+ const s = typeof input === "string" ? input.toLowerCase().trim() : "";
77
+ const profile = LEGACY_MAP[s] ?? (VALID_PROFILE_STRINGS.has(s) ? s : null) ?? AIUsageProfile.BALANCED;
78
+ return { limits: PROFILES[profile], profile };
79
+ }
80
+
81
+ // src/guards/rateLimit.ts
82
+ var requestCount = 0;
83
+ var windowStart = Date.now();
84
+ function isRateLimited(limits) {
85
+ const { windowMs, maxRequests } = limits.rateLimit;
86
+ const now = Date.now();
87
+ if (now - windowStart > windowMs) {
88
+ windowStart = now;
89
+ requestCount = 0;
90
+ }
91
+ requestCount++;
92
+ return requestCount > maxRequests;
93
+ }
20
94
 
21
95
  // src/utils/tokenEstimator.ts
22
96
  function estimateTokens(messages) {
@@ -39,139 +113,169 @@ function recordTokenUsage(used) {
39
113
  tokensUsedToday += used;
40
114
  }
41
115
 
42
- // src/createChatHandler.ts
43
- var requestCount = 0;
44
- var windowStart = Date.now();
45
- function isRateLimited() {
46
- const now = Date.now();
47
- if (now - windowStart > AI_LIMITS.rateLimitWindowMs) {
48
- windowStart = now;
49
- requestCount = 0;
116
+ // src/guards/budgetGuards.ts
117
+ function trimMessages(messages, maxMessages) {
118
+ return messages.slice(-maxMessages);
119
+ }
120
+ function checkBudget(messages, limits) {
121
+ const estimatedInput = estimateTokens(messages);
122
+ const estimatedTotal = estimatedInput + limits.maxOutputTokens;
123
+ if (estimatedTotal > limits.maxTokensPerRequest) {
124
+ return { success: false, estimatedTotal, reason: "request_limit" };
50
125
  }
51
- requestCount++;
52
- return requestCount > AI_LIMITS.maxRequestsPerWindow;
126
+ if (!canSpendTokens(estimatedTotal, limits.dailyTokenLimit)) {
127
+ return { success: false, estimatedTotal, reason: "daily_limit" };
128
+ }
129
+ return { success: true, estimatedTotal };
53
130
  }
54
- function trimMessages(messages) {
55
- return messages.slice(-AI_LIMITS.maxMessages);
131
+
132
+ // src/sse/sseWriter.ts
133
+ function createSSEWriter(res, abortSignal) {
134
+ const write = (event, data) => {
135
+ if (abortSignal.aborted) return;
136
+ res.write(`event: ${event}
137
+ data: ${data}
138
+
139
+ `);
140
+ };
141
+ const startHeartbeat = (intervalMs = 15e3) => {
142
+ return setInterval(() => {
143
+ if (!abortSignal.aborted) write("ping", "");
144
+ }, intervalMs);
145
+ };
146
+ const stopHeartbeat = (id) => {
147
+ if (id) clearInterval(id);
148
+ };
149
+ return { write, startHeartbeat, stopHeartbeat };
150
+ }
151
+
152
+ // src/config/aiLimits.ts
153
+ var AI_MODEL = "gpt-4o-mini";
154
+
155
+ // src/streaming/openaiStream.ts
156
+ async function streamChatCompletion(client, messages, limits, writeSSE, signal) {
157
+ const stream = await client.chat.completions.create(
158
+ {
159
+ model: AI_MODEL,
160
+ stream: true,
161
+ temperature: limits.temperature,
162
+ max_tokens: limits.maxOutputTokens,
163
+ messages: messages.map((m) => ({ role: m.role, content: m.content }))
164
+ },
165
+ { signal }
166
+ );
167
+ for await (const chunk of stream) {
168
+ if (signal.aborted) break;
169
+ const token = chunk.choices[0]?.delta?.content;
170
+ if (token) {
171
+ writeSSE("token", JSON.stringify(token));
172
+ }
173
+ }
174
+ writeSSE("done", "");
56
175
  }
176
+
177
+ // src/createChatHandler.ts
57
178
  function createChatHandler(config) {
58
179
  if (!config.apiKey) {
59
180
  throw new Error("OPENAI_API_KEY is missing");
60
181
  }
61
- const client = new OpenAI({
62
- apiKey: config.apiKey
63
- });
182
+ const client = new OpenAI({ apiKey: config.apiKey });
64
183
  return async function handler(req, res) {
65
184
  const abortController = new AbortController();
66
185
  let streamStarted = false;
67
- let heartbeatInterval = null;
68
- const writeSSE = (event, data) => {
69
- if (abortController.signal.aborted) return;
70
- res.write(`event: ${event}
71
- data: ${data}
72
-
73
- `);
74
- };
186
+ let heartbeatId = null;
187
+ const body = req.body;
188
+ const { limits: profileLimits } = resolveProfile(
189
+ body.profile ?? body.aiUsageProfile
190
+ );
191
+ let effectiveLimits = { ...profileLimits };
192
+ if (body.dailyTokenLimit != null) {
193
+ effectiveLimits = {
194
+ ...effectiveLimits,
195
+ dailyTokenLimit: Math.min(
196
+ profileLimits.dailyTokenLimit,
197
+ body.dailyTokenLimit
198
+ )
199
+ };
200
+ }
201
+ if (body.maxTokensPerRequest != null) {
202
+ effectiveLimits = {
203
+ ...effectiveLimits,
204
+ maxTokensPerRequest: Math.min(
205
+ profileLimits.maxTokensPerRequest,
206
+ body.maxTokensPerRequest
207
+ )
208
+ };
209
+ }
210
+ if (!Array.isArray(body.messages)) {
211
+ res.status(400).json({ error: "Invalid messages payload" });
212
+ return;
213
+ }
214
+ if (isRateLimited(effectiveLimits)) {
215
+ res.status(429).json({
216
+ error: "Rate limit exceeded. Please slow down."
217
+ });
218
+ return;
219
+ }
220
+ const trimmedMessages = trimMessages(
221
+ body.messages,
222
+ effectiveLimits.maxMessages
223
+ );
224
+ const budgetResult = checkBudget(trimmedMessages, effectiveLimits);
225
+ if (!budgetResult.success) {
226
+ const errorMessage = budgetResult.reason === "request_limit" ? "Request too large. Please shorten your message." : "Daily AI budget exceeded. Try again tomorrow.";
227
+ res.status(429).json({ error: errorMessage });
228
+ return;
229
+ }
230
+ const { write, startHeartbeat, stopHeartbeat } = createSSEWriter(
231
+ res,
232
+ abortController.signal
233
+ );
75
234
  res.on("close", () => {
76
- if (heartbeatInterval) {
77
- clearInterval(heartbeatInterval);
78
- heartbeatInterval = null;
79
- }
235
+ stopHeartbeat(heartbeatId);
80
236
  if (streamStarted && !abortController.signal.aborted) {
81
237
  abortController.abort();
82
238
  }
83
239
  });
84
240
  try {
85
- const body = req.body;
86
- if (!Array.isArray(body.messages)) {
87
- res.status(400).json({ error: "Invalid messages payload" });
88
- return;
89
- }
90
- if (isRateLimited()) {
91
- res.status(429).json({
92
- error: "Rate limit exceeded. Please slow down."
93
- });
94
- return;
95
- }
96
- const trimmedMessages = trimMessages(body.messages);
97
- const estimatedInputTokens = estimateTokens(trimmedMessages);
98
- const estimatedTotalTokens = estimatedInputTokens + AI_LIMITS.maxOutputTokens;
99
- if (estimatedTotalTokens > BUDGET.maxTokensPerRequest || !canSpendTokens(estimatedTotalTokens, BUDGET.dailyTokenLimit)) {
100
- res.status(429).json({
101
- error: "Daily AI budget exceeded. Try again tomorrow."
102
- });
103
- return;
104
- }
105
241
  res.setHeader("Content-Type", "text/event-stream");
106
242
  res.setHeader("Cache-Control", "no-cache");
107
243
  res.setHeader("Connection", "keep-alive");
108
244
  res.flushHeaders();
109
- writeSSE("start", "");
110
- heartbeatInterval = setInterval(() => {
111
- if (!abortController.signal.aborted) {
112
- writeSSE("ping", "");
113
- }
114
- }, 15e3);
115
- const stream = await client.chat.completions.create(
116
- {
117
- model: AI_MODEL,
118
- stream: true,
119
- temperature: 0.7,
120
- max_tokens: AI_LIMITS.maxOutputTokens,
121
- messages: trimmedMessages.map((m) => ({
122
- role: m.role,
123
- content: m.content
124
- }))
125
- },
126
- {
127
- signal: abortController.signal
128
- }
129
- );
245
+ write("start", "");
246
+ heartbeatId = startHeartbeat(15e3);
130
247
  streamStarted = true;
131
248
  try {
132
- for await (const chunk of stream) {
133
- if (abortController.signal.aborted) {
134
- break;
135
- }
136
- const token = chunk.choices[0]?.delta?.content;
137
- if (token) {
138
- writeSSE("token", JSON.stringify(token));
139
- }
140
- }
141
- writeSSE("done", "");
142
- } catch (error) {
249
+ await streamChatCompletion(
250
+ client,
251
+ trimmedMessages,
252
+ effectiveLimits,
253
+ write,
254
+ abortController.signal
255
+ );
256
+ } catch (streamError) {
143
257
  if (abortController.signal.aborted) {
144
258
  console.log("Stream aborted by client");
145
259
  } else {
146
- const errorData = JSON.stringify({
147
- message: error instanceof Error ? error.message : "Unknown error"
148
- });
149
- writeSSE("error", errorData);
260
+ const msg = streamError instanceof Error ? streamError.message : "Unknown error";
261
+ write("error", JSON.stringify({ message: msg }));
150
262
  }
151
263
  } finally {
152
- if (heartbeatInterval) {
153
- clearInterval(heartbeatInterval);
154
- heartbeatInterval = null;
155
- }
156
- recordTokenUsage(estimatedTotalTokens);
264
+ stopHeartbeat(heartbeatId);
265
+ heartbeatId = null;
266
+ recordTokenUsage(budgetResult.estimatedTotal);
157
267
  res.end();
158
268
  }
159
269
  } catch (error) {
160
270
  if (error?.name === "AbortError") {
161
- if (heartbeatInterval) {
162
- clearInterval(heartbeatInterval);
163
- heartbeatInterval = null;
164
- }
271
+ stopHeartbeat(heartbeatId);
165
272
  return;
166
273
  }
167
274
  console.error("AI error:", error);
168
275
  if (!res.headersSent) {
169
276
  res.status(500).json({ error: "AI request failed" });
170
277
  } else {
171
- const errorData = JSON.stringify({
172
- message: "AI request failed"
173
- });
174
- writeSSE("error", errorData);
278
+ write("error", JSON.stringify({ message: "AI request failed" }));
175
279
  res.end();
176
280
  }
177
281
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "chat-nest-server",
3
3
  "description": "Streaming AI backend server with cost controls, rate limiting, and cancellation support.",
4
- "version": "1.1.0",
4
+ "version": "1.1.1",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
7
7
  "license": "ISC",