chat-nest-server 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +18 -11
  2. package/dist/index.js +215 -110
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  This package exposes an Express-compatible request handler that:
6
6
  - Streams AI responses using Server-Side Events (SSE)
7
7
  - Sends real-time tokens via SSE protocol
8
- - Enforces rate limits and budgets
8
+ - Enforces rate limits and profile-based limits
9
9
  - Supports abort propagation
10
10
  - Protects against runaway usage
11
11
 
@@ -18,7 +18,7 @@ This package exposes an Express-compatible request handler that:
18
18
  - SSE event types: `start`, `token`, `done`, `error`, `ping`
19
19
  - Heartbeat pings to keep connection alive
20
20
  - End-to-end cancellation support
21
- - Daily token budget enforcement
21
+ - Daily token limit enforcement (profile-based)
22
22
  - Rate limiting
23
23
  - Message trimming
24
24
  - Safe retry semantics
@@ -76,14 +76,23 @@ The handler sends SSE-formatted events:
76
76
 
77
77
  ## 💰 Cost Controls
78
78
 
79
- The server enforces:
79
+ Profiles (`constrained`, `balanced`, `expanded`) control limits. HARD_CAPS clamp all profiles. Server enforces:
80
80
 
81
- - Maximum tokens per request
82
- - Daily token budget
83
- - Request rate limiting
84
- - Prompt size trimming
81
+ - **Maximum tokens per request** – Profile-based; client can override via `maxTokensPerRequest` (server applies `min(profile limit, client value)`)
82
+ - **Daily token limit** – Profile-based; client can override via `dailyTokenLimit` (same `min` rule)
83
+ - Request rate limiting (profile)
84
+ - Prompt size trimming (profile)
85
85
  - Retry classification
86
86
 
87
+ ### Request body options
88
+
89
+ | Field | Type | Description |
90
+ | ------------------- | ------ | --------------------------------------------------------------------------- |
91
+ | `messages` | array | Chat messages (required) |
92
+ | `profile` / `aiUsageProfile` | string | Profile: `constrained`, `balanced`, `expanded` (legacy: `budget`, `moderate`, `free`) |
93
+ | `dailyTokenLimit` | number | Optional. Cap daily tokens; server uses `min(profile limit, this)` |
94
+ | `maxTokensPerRequest` | number | Optional. Cap tokens per request; server uses `min(profile limit, this)` |
95
+
87
96
  This prevents accidental overspending and abuse.
88
97
 
89
98
  ## 🔄 Server-Side Events (SSE)
@@ -101,11 +110,9 @@ SSE provides better efficiency and real-time streaming compared to traditional p
101
110
  ---
102
111
 
103
112
  ## ⚙ Configuration
104
- Limits can be customized in:
105
113
 
106
- src/config/
107
- aiLimits.ts
108
- budget.ts
114
+ - **`config/profiles.ts`** – AI usage profiles (`constrained`, `balanced`, `expanded`), HARD_CAPS, and `resolveProfile()`. Frontend may send `aiUsageProfile` string; backend resolves safely.
115
+ - **`config/aiLimits.ts`** – `AI_MODEL` only. All limits come from profiles.
109
116
 
110
117
  ---
111
118
 
package/dist/index.js CHANGED
@@ -1,22 +1,97 @@
1
1
  // src/createChatHandler.ts
2
2
  import OpenAI from "openai";
3
3
 
4
- // src/config/aiLimits.ts
5
- var AI_MODEL = "gpt-4o-mini";
6
- var AI_LIMITS = {
7
- maxOutputTokens: 300,
8
- maxMessages: 6,
9
- rateLimitWindowMs: 6e4,
10
- maxRequestsPerWindow: 30
4
+ // src/config/profiles.ts
5
+ var AIUsageProfile = {
6
+ CONSTRAINED: "constrained",
7
+ BALANCED: "balanced",
8
+ EXPANDED: "expanded"
11
9
  };
12
-
13
- // src/config/budget.ts
14
- var BUDGET = {
15
- dailyTokenLimit: 7e4,
16
- // ~safe for $5 / 3 months
17
- maxTokensPerRequest: 600
18
- // input + output guard
10
+ var HARD_CAPS = {
11
+ maxOutputTokens: 4096,
12
+ maxMessages: 20,
13
+ temperature: 2,
14
+ dailyTokenLimit: 5e5,
15
+ maxTokensPerRequest: 16e3,
16
+ rateLimit: { windowMs: 6e4, maxRequests: 60 }
17
+ };
18
+ function clampToCaps(limits) {
19
+ return {
20
+ maxOutputTokens: Math.min(limits.maxOutputTokens, HARD_CAPS.maxOutputTokens),
21
+ maxMessages: Math.min(limits.maxMessages, HARD_CAPS.maxMessages),
22
+ temperature: Math.min(limits.temperature, HARD_CAPS.temperature),
23
+ dailyTokenLimit: Math.min(limits.dailyTokenLimit, HARD_CAPS.dailyTokenLimit),
24
+ maxTokensPerRequest: Math.min(
25
+ limits.maxTokensPerRequest,
26
+ HARD_CAPS.maxTokensPerRequest
27
+ ),
28
+ rateLimit: {
29
+ windowMs: limits.rateLimit.windowMs,
30
+ maxRequests: Math.min(
31
+ limits.rateLimit.maxRequests,
32
+ HARD_CAPS.rateLimit.maxRequests
33
+ )
34
+ }
35
+ };
36
+ }
37
+ var DEFAULT_MAX_TOKENS_PER_REQUEST = 2048;
38
+ var PROFILES = {
39
+ [AIUsageProfile.CONSTRAINED]: clampToCaps({
40
+ maxOutputTokens: 150,
41
+ maxMessages: 4,
42
+ temperature: 0.5,
43
+ dailyTokenLimit: 3e4,
44
+ maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
45
+ rateLimit: { windowMs: 6e4, maxRequests: 15 }
46
+ }),
47
+ [AIUsageProfile.BALANCED]: clampToCaps({
48
+ maxOutputTokens: 400,
49
+ maxMessages: 6,
50
+ temperature: 0.7,
51
+ dailyTokenLimit: 7e4,
52
+ maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
53
+ rateLimit: { windowMs: 6e4, maxRequests: 30 }
54
+ }),
55
+ [AIUsageProfile.EXPANDED]: clampToCaps({
56
+ maxOutputTokens: 3e3,
57
+ maxMessages: 12,
58
+ temperature: 0.8,
59
+ dailyTokenLimit: 2e5,
60
+ maxTokensPerRequest: 12e3,
61
+ // input (~9k) + output (3k) for 12 msgs
62
+ rateLimit: { windowMs: 6e4, maxRequests: 60 }
63
+ })
64
+ };
65
+ var LEGACY_MAP = {
66
+ budget: AIUsageProfile.CONSTRAINED,
67
+ moderate: AIUsageProfile.BALANCED,
68
+ free: AIUsageProfile.EXPANDED
19
69
  };
70
+ var VALID_PROFILE_STRINGS = /* @__PURE__ */ new Set([
71
+ AIUsageProfile.CONSTRAINED,
72
+ AIUsageProfile.BALANCED,
73
+ AIUsageProfile.EXPANDED,
74
+ ...Object.keys(LEGACY_MAP)
75
+ ]);
76
+ function resolveProfile(input) {
77
+ const s = typeof input === "string" ? input.toLowerCase().trim() : "";
78
+ const profile = LEGACY_MAP[s] ?? (VALID_PROFILE_STRINGS.has(s) ? s : null) ?? AIUsageProfile.BALANCED;
79
+ return { limits: PROFILES[profile], profile };
80
+ }
81
+
82
+ // src/guards/rateLimit.ts
83
+ var requestCount = 0;
84
+ var windowStart = Date.now();
85
+ function isRateLimited(limits) {
86
+ const { windowMs, maxRequests } = limits.rateLimit;
87
+ const now = Date.now();
88
+ if (now - windowStart > windowMs) {
89
+ windowStart = now;
90
+ requestCount = 0;
91
+ }
92
+ requestCount++;
93
+ return requestCount > maxRequests;
94
+ }
20
95
 
21
96
  // src/utils/tokenEstimator.ts
22
97
  function estimateTokens(messages) {
@@ -39,139 +114,169 @@ function recordTokenUsage(used) {
39
114
  tokensUsedToday += used;
40
115
  }
41
116
 
42
- // src/createChatHandler.ts
43
- var requestCount = 0;
44
- var windowStart = Date.now();
45
- function isRateLimited() {
46
- const now = Date.now();
47
- if (now - windowStart > AI_LIMITS.rateLimitWindowMs) {
48
- windowStart = now;
49
- requestCount = 0;
117
+ // src/guards/budgetGuards.ts
118
+ function trimMessages(messages, maxMessages) {
119
+ return messages.slice(-maxMessages);
120
+ }
121
+ function checkBudget(messages, limits) {
122
+ const estimatedInput = estimateTokens(messages);
123
+ const estimatedTotal = estimatedInput + limits.maxOutputTokens;
124
+ if (estimatedTotal > limits.maxTokensPerRequest) {
125
+ return { success: false, estimatedTotal, reason: "request_limit" };
50
126
  }
51
- requestCount++;
52
- return requestCount > AI_LIMITS.maxRequestsPerWindow;
127
+ if (!canSpendTokens(estimatedTotal, limits.dailyTokenLimit)) {
128
+ return { success: false, estimatedTotal, reason: "daily_limit" };
129
+ }
130
+ return { success: true, estimatedTotal };
53
131
  }
54
- function trimMessages(messages) {
55
- return messages.slice(-AI_LIMITS.maxMessages);
132
+
133
+ // src/sse/sseWriter.ts
134
+ function createSSEWriter(res, abortSignal) {
135
+ const write = (event, data) => {
136
+ if (abortSignal.aborted) return;
137
+ res.write(`event: ${event}
138
+ data: ${data}
139
+
140
+ `);
141
+ };
142
+ const startHeartbeat = (intervalMs = 15e3) => {
143
+ return setInterval(() => {
144
+ if (!abortSignal.aborted) write("ping", "");
145
+ }, intervalMs);
146
+ };
147
+ const stopHeartbeat = (id) => {
148
+ if (id) clearInterval(id);
149
+ };
150
+ return { write, startHeartbeat, stopHeartbeat };
151
+ }
152
+
153
+ // src/config/aiLimits.ts
154
+ var AI_MODEL = "gpt-4o-mini";
155
+
156
+ // src/streaming/openaiStream.ts
157
+ async function streamChatCompletion(client, messages, limits, writeSSE, signal) {
158
+ const stream = await client.chat.completions.create(
159
+ {
160
+ model: AI_MODEL,
161
+ stream: true,
162
+ temperature: limits.temperature,
163
+ max_tokens: limits.maxOutputTokens,
164
+ messages: messages.map((m) => ({ role: m.role, content: m.content }))
165
+ },
166
+ { signal }
167
+ );
168
+ for await (const chunk of stream) {
169
+ if (signal.aborted) break;
170
+ const token = chunk.choices[0]?.delta?.content;
171
+ if (token) {
172
+ writeSSE("token", JSON.stringify(token));
173
+ }
174
+ }
175
+ writeSSE("done", "");
56
176
  }
177
+
178
+ // src/createChatHandler.ts
57
179
  function createChatHandler(config) {
58
180
  if (!config.apiKey) {
59
181
  throw new Error("OPENAI_API_KEY is missing");
60
182
  }
61
- const client = new OpenAI({
62
- apiKey: config.apiKey
63
- });
183
+ const client = new OpenAI({ apiKey: config.apiKey });
64
184
  return async function handler(req, res) {
65
185
  const abortController = new AbortController();
66
186
  let streamStarted = false;
67
- let heartbeatInterval = null;
68
- const writeSSE = (event, data) => {
69
- if (abortController.signal.aborted) return;
70
- res.write(`event: ${event}
71
- data: ${data}
72
-
73
- `);
74
- };
187
+ let heartbeatId = null;
188
+ const body = req.body;
189
+ const { limits: profileLimits } = resolveProfile(
190
+ body.profile ?? body.aiUsageProfile
191
+ );
192
+ let effectiveLimits = { ...profileLimits };
193
+ if (body.dailyTokenLimit != null) {
194
+ effectiveLimits = {
195
+ ...effectiveLimits,
196
+ dailyTokenLimit: Math.min(
197
+ profileLimits.dailyTokenLimit,
198
+ body.dailyTokenLimit
199
+ )
200
+ };
201
+ }
202
+ if (body.maxTokensPerRequest != null) {
203
+ effectiveLimits = {
204
+ ...effectiveLimits,
205
+ maxTokensPerRequest: Math.min(
206
+ profileLimits.maxTokensPerRequest,
207
+ body.maxTokensPerRequest
208
+ )
209
+ };
210
+ }
211
+ if (!Array.isArray(body.messages)) {
212
+ res.status(400).json({ error: "Invalid messages payload" });
213
+ return;
214
+ }
215
+ if (isRateLimited(effectiveLimits)) {
216
+ res.status(429).json({
217
+ error: "Rate limit exceeded. Please slow down."
218
+ });
219
+ return;
220
+ }
221
+ const trimmedMessages = trimMessages(
222
+ body.messages,
223
+ effectiveLimits.maxMessages
224
+ );
225
+ const budgetResult = checkBudget(trimmedMessages, effectiveLimits);
226
+ if (!budgetResult.success) {
227
+ const errorMessage = budgetResult.reason === "request_limit" ? "Request too large. Please shorten your message." : "Daily AI budget exceeded. Try again tomorrow.";
228
+ res.status(429).json({ error: errorMessage });
229
+ return;
230
+ }
231
+ const { write, startHeartbeat, stopHeartbeat } = createSSEWriter(
232
+ res,
233
+ abortController.signal
234
+ );
75
235
  res.on("close", () => {
76
- if (heartbeatInterval) {
77
- clearInterval(heartbeatInterval);
78
- heartbeatInterval = null;
79
- }
236
+ stopHeartbeat(heartbeatId);
80
237
  if (streamStarted && !abortController.signal.aborted) {
81
238
  abortController.abort();
82
239
  }
83
240
  });
84
241
  try {
85
- const body = req.body;
86
- if (!Array.isArray(body.messages)) {
87
- res.status(400).json({ error: "Invalid messages payload" });
88
- return;
89
- }
90
- if (isRateLimited()) {
91
- res.status(429).json({
92
- error: "Rate limit exceeded. Please slow down."
93
- });
94
- return;
95
- }
96
- const trimmedMessages = trimMessages(body.messages);
97
- const estimatedInputTokens = estimateTokens(trimmedMessages);
98
- const estimatedTotalTokens = estimatedInputTokens + AI_LIMITS.maxOutputTokens;
99
- if (estimatedTotalTokens > BUDGET.maxTokensPerRequest || !canSpendTokens(estimatedTotalTokens, BUDGET.dailyTokenLimit)) {
100
- res.status(429).json({
101
- error: "Daily AI budget exceeded. Try again tomorrow."
102
- });
103
- return;
104
- }
105
242
  res.setHeader("Content-Type", "text/event-stream");
106
243
  res.setHeader("Cache-Control", "no-cache");
107
244
  res.setHeader("Connection", "keep-alive");
108
245
  res.flushHeaders();
109
- writeSSE("start", "");
110
- heartbeatInterval = setInterval(() => {
111
- if (!abortController.signal.aborted) {
112
- writeSSE("ping", "");
113
- }
114
- }, 15e3);
115
- const stream = await client.chat.completions.create(
116
- {
117
- model: AI_MODEL,
118
- stream: true,
119
- temperature: 0.7,
120
- max_tokens: AI_LIMITS.maxOutputTokens,
121
- messages: trimmedMessages.map((m) => ({
122
- role: m.role,
123
- content: m.content
124
- }))
125
- },
126
- {
127
- signal: abortController.signal
128
- }
129
- );
246
+ write("start", "");
247
+ heartbeatId = startHeartbeat(15e3);
130
248
  streamStarted = true;
131
249
  try {
132
- for await (const chunk of stream) {
133
- if (abortController.signal.aborted) {
134
- break;
135
- }
136
- const token = chunk.choices[0]?.delta?.content;
137
- if (token) {
138
- writeSSE("token", JSON.stringify(token));
139
- }
140
- }
141
- writeSSE("done", "");
142
- } catch (error) {
250
+ await streamChatCompletion(
251
+ client,
252
+ trimmedMessages,
253
+ effectiveLimits,
254
+ write,
255
+ abortController.signal
256
+ );
257
+ } catch (streamError) {
143
258
  if (abortController.signal.aborted) {
144
259
  console.log("Stream aborted by client");
145
260
  } else {
146
- const errorData = JSON.stringify({
147
- message: error instanceof Error ? error.message : "Unknown error"
148
- });
149
- writeSSE("error", errorData);
261
+ const msg = streamError instanceof Error ? streamError.message : "Unknown error";
262
+ write("error", JSON.stringify({ message: msg }));
150
263
  }
151
264
  } finally {
152
- if (heartbeatInterval) {
153
- clearInterval(heartbeatInterval);
154
- heartbeatInterval = null;
155
- }
156
- recordTokenUsage(estimatedTotalTokens);
265
+ stopHeartbeat(heartbeatId);
266
+ heartbeatId = null;
267
+ recordTokenUsage(budgetResult.estimatedTotal);
157
268
  res.end();
158
269
  }
159
270
  } catch (error) {
160
271
  if (error?.name === "AbortError") {
161
- if (heartbeatInterval) {
162
- clearInterval(heartbeatInterval);
163
- heartbeatInterval = null;
164
- }
272
+ stopHeartbeat(heartbeatId);
165
273
  return;
166
274
  }
167
275
  console.error("AI error:", error);
168
276
  if (!res.headersSent) {
169
277
  res.status(500).json({ error: "AI request failed" });
170
278
  } else {
171
- const errorData = JSON.stringify({
172
- message: "AI request failed"
173
- });
174
- writeSSE("error", errorData);
279
+ write("error", JSON.stringify({ message: "AI request failed" }));
175
280
  res.end();
176
281
  }
177
282
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "chat-nest-server",
3
3
  "description": "Streaming AI backend server with cost controls, rate limiting, and cancellation support.",
4
- "version": "1.1.0",
4
+ "version": "1.1.2",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
7
7
  "license": "ISC",