npm - chat-nest-server - Versions diffs - 1.0.1 → 1.1.1 - Mend

chat-nest-server 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 # chat-nest-server
-> Streaming AI backend server for Chat Nest with built-in cost protection and cancellation propagation.
+> Streaming AI backend server for Chat Nest with built-in cost protection and cancellation propagation using Server-Side Events (SSE).
 This package exposes an Express-compatible request handler that:
-- Streams AI responses
-- Enforces rate limits and budgets
+- Streams AI responses using Server-Side Events (SSE)
+- Sends real-time tokens via SSE protocol
+- Enforces rate limits and profile-based limits
 - Supports abort propagation
 - Protects against runaway usage
@@ -12,9 +13,12 @@ This package exposes an Express-compatible request handler that:
 ## ✨ Features
-- Streaming responses over HTTP
+- Server-Side Events (SSE) streaming over HTTP
+- Real-time token streaming via SSE protocol
+- SSE event types: `start`, `token`, `done`, `error`, `ping`
+- Heartbeat pings to keep connection alive
 - End-to-end cancellation support
-- Daily token budget enforcement
+- Daily token limit enforcement (profile-based)
 - Rate limiting
 - Message trimming
 - Safe retry semantics
@@ -31,6 +35,8 @@ npm install chat-nest-server
 ## 🚀 Usage
 Express Integration
+The handler automatically uses Server-Side Events (SSE) for streaming responses:
 ```
 import express from "express";
 import cors from "cors";
@@ -53,6 +59,13 @@ app.listen(3001, () => {
 });
 ```
+The handler sends SSE-formatted events:
+- `event: start\ndata: \n\n` - Stream started
+- `event: token\ndata: <token>\n\n` - Each token chunk
+- `event: done\ndata: \n\n` - Stream completed
+- `event: error\ndata: <error_json>\n\n` - Error occurred
+- `event: ping\ndata: \n\n` - Heartbeat (every 15s)
 ---
 ## 🔐 Environment Variables
@@ -63,30 +76,34 @@ app.listen(3001, () => {
 ## 💰 Cost Controls
-```
-The server enforces:
+Profiles (`constrained`, `balanced`, `expanded`) control limits. HARD_CAPS clamp all profiles. Server enforces:
-Maximum tokens per request
+- Maximum tokens per request (profile + HARD_CAPS)
+- Daily token limit (profile)
+- Request rate limiting (profile)
+- Prompt size trimming (profile)
+- Retry classification
-Daily token budget
+This prevents accidental overspending and abuse.
-Request rate limiting
+## 🔄 Server-Side Events (SSE)
-Prompt size trimming
+This package uses SSE protocol for efficient streaming:
-Retry classification
+- **Content-Type**: `text/event-stream`
+- **Connection**: `keep-alive`
+- **Cache-Control**: `no-cache`
+- **Heartbeat**: Ping every 15 seconds to keep connection alive
+- **Event Format**: `event: <type>\ndata: <data>\n\n`
-This prevents accidental overspending and abuse.
-```
+SSE provides better efficiency and real-time streaming compared to traditional polling or chunked responses.
 ---
 ## ⚙ Configuration
-Limits can be customized in:
-src/config/
-  aiLimits.ts
-  budget.ts
+- **`config/profiles.ts`** – AI usage profiles (`constrained`, `balanced`, `expanded`), HARD_CAPS, and `resolveProfile()`. Frontend may send `aiUsageProfile` string; backend resolves safely.
+- **`config/aiLimits.ts`** – `AI_MODEL` only. All limits come from profiles.
 ---

package/dist/index.js CHANGED Viewed

@@ -1,22 +1,96 @@
 // src/createChatHandler.ts
 import OpenAI from "openai";
-// src/config/aiLimits.ts
-var AI_MODEL = "gpt-4o-mini";
-var AI_LIMITS = {
-  maxOutputTokens: 300,
-  maxMessages: 6,
-  rateLimitWindowMs: 6e4,
-  maxRequestsPerWindow: 30
+// src/config/profiles.ts
+var AIUsageProfile = {
+  CONSTRAINED: "constrained",
+  BALANCED: "balanced",
+  EXPANDED: "expanded"
 };
-// src/config/budget.ts
-var BUDGET = {
-  dailyTokenLimit: 7e4,
-  // ~safe for $5 / 3 months
-  maxTokensPerRequest: 600
-  // input + output guard
+var HARD_CAPS = {
+  maxOutputTokens: 4096,
+  maxMessages: 20,
+  temperature: 2,
+  dailyTokenLimit: 5e5,
+  maxTokensPerRequest: 16e3,
+  rateLimit: { windowMs: 6e4, maxRequests: 60 }
+};
+function clampToCaps(limits) {
+  return {
+    maxOutputTokens: Math.min(limits.maxOutputTokens, HARD_CAPS.maxOutputTokens),
+    maxMessages: Math.min(limits.maxMessages, HARD_CAPS.maxMessages),
+    temperature: Math.min(limits.temperature, HARD_CAPS.temperature),
+    dailyTokenLimit: Math.min(limits.dailyTokenLimit, HARD_CAPS.dailyTokenLimit),
+    maxTokensPerRequest: Math.min(
+      limits.maxTokensPerRequest,
+      HARD_CAPS.maxTokensPerRequest
+    ),
+    rateLimit: {
+      windowMs: limits.rateLimit.windowMs,
+      maxRequests: Math.min(
+        limits.rateLimit.maxRequests,
+        HARD_CAPS.rateLimit.maxRequests
+      )
+    }
+  };
+}
+var DEFAULT_MAX_TOKENS_PER_REQUEST = 2048;
+var PROFILES = {
+  [AIUsageProfile.CONSTRAINED]: clampToCaps({
+    maxOutputTokens: 150,
+    maxMessages: 4,
+    temperature: 0.5,
+    dailyTokenLimit: 3e4,
+    maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
+    rateLimit: { windowMs: 6e4, maxRequests: 15 }
+  }),
+  [AIUsageProfile.BALANCED]: clampToCaps({
+    maxOutputTokens: 400,
+    maxMessages: 6,
+    temperature: 0.7,
+    dailyTokenLimit: 7e4,
+    maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
+    rateLimit: { windowMs: 6e4, maxRequests: 30 }
+  }),
+  [AIUsageProfile.EXPANDED]: clampToCaps({
+    maxOutputTokens: 3e3,
+    maxMessages: 12,
+    temperature: 0.8,
+    dailyTokenLimit: 2e5,
+    maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
+    rateLimit: { windowMs: 6e4, maxRequests: 60 }
+  })
 };
+var LEGACY_MAP = {
+  budget: AIUsageProfile.CONSTRAINED,
+  moderate: AIUsageProfile.BALANCED,
+  free: AIUsageProfile.EXPANDED
+};
+var VALID_PROFILE_STRINGS = /* @__PURE__ */ new Set([
+  AIUsageProfile.CONSTRAINED,
+  AIUsageProfile.BALANCED,
+  AIUsageProfile.EXPANDED,
+  ...Object.keys(LEGACY_MAP)
+]);
+function resolveProfile(input) {
+  const s = typeof input === "string" ? input.toLowerCase().trim() : "";
+  const profile = LEGACY_MAP[s] ?? (VALID_PROFILE_STRINGS.has(s) ? s : null) ?? AIUsageProfile.BALANCED;
+  return { limits: PROFILES[profile], profile };
+}
+// src/guards/rateLimit.ts
+var requestCount = 0;
+var windowStart = Date.now();
+function isRateLimited(limits) {
+  const { windowMs, maxRequests } = limits.rateLimit;
+  const now = Date.now();
+  if (now - windowStart > windowMs) {
+    windowStart = now;
+    requestCount = 0;
+  }
+  requestCount++;
+  return requestCount > maxRequests;
+}
 // src/utils/tokenEstimator.ts
 function estimateTokens(messages) {
@@ -39,101 +113,171 @@ function recordTokenUsage(used) {
   tokensUsedToday += used;
 }
-// src/createChatHandler.ts
-var requestCount = 0;
-var windowStart = Date.now();
-function isRateLimited() {
-  const now = Date.now();
-  if (now - windowStart > AI_LIMITS.rateLimitWindowMs) {
-    windowStart = now;
-    requestCount = 0;
+// src/guards/budgetGuards.ts
+function trimMessages(messages, maxMessages) {
+  return messages.slice(-maxMessages);
+}
+function checkBudget(messages, limits) {
+  const estimatedInput = estimateTokens(messages);
+  const estimatedTotal = estimatedInput + limits.maxOutputTokens;
+  if (estimatedTotal > limits.maxTokensPerRequest) {
+    return { success: false, estimatedTotal, reason: "request_limit" };
   }
-  requestCount++;
-  return requestCount > AI_LIMITS.maxRequestsPerWindow;
+  if (!canSpendTokens(estimatedTotal, limits.dailyTokenLimit)) {
+    return { success: false, estimatedTotal, reason: "daily_limit" };
+  }
+  return { success: true, estimatedTotal };
+}
+// src/sse/sseWriter.ts
+function createSSEWriter(res, abortSignal) {
+  const write = (event, data) => {
+    if (abortSignal.aborted) return;
+    res.write(`event: ${event}
+data: ${data}
+`);
+  };
+  const startHeartbeat = (intervalMs = 15e3) => {
+    return setInterval(() => {
+      if (!abortSignal.aborted) write("ping", "");
+    }, intervalMs);
+  };
+  const stopHeartbeat = (id) => {
+    if (id) clearInterval(id);
+  };
+  return { write, startHeartbeat, stopHeartbeat };
 }
-function trimMessages(messages) {
-  return messages.slice(-AI_LIMITS.maxMessages);
+// src/config/aiLimits.ts
+var AI_MODEL = "gpt-4o-mini";
+// src/streaming/openaiStream.ts
+async function streamChatCompletion(client, messages, limits, writeSSE, signal) {
+  const stream = await client.chat.completions.create(
+    {
+      model: AI_MODEL,
+      stream: true,
+      temperature: limits.temperature,
+      max_tokens: limits.maxOutputTokens,
+      messages: messages.map((m) => ({ role: m.role, content: m.content }))
+    },
+    { signal }
+  );
+  for await (const chunk of stream) {
+    if (signal.aborted) break;
+    const token = chunk.choices[0]?.delta?.content;
+    if (token) {
+      writeSSE("token", JSON.stringify(token));
+    }
+  }
+  writeSSE("done", "");
 }
+// src/createChatHandler.ts
 function createChatHandler(config) {
   if (!config.apiKey) {
     throw new Error("OPENAI_API_KEY is missing");
   }
-  const client = new OpenAI({
-    apiKey: config.apiKey
-  });
+  const client = new OpenAI({ apiKey: config.apiKey });
   return async function handler(req, res) {
     const abortController = new AbortController();
     let streamStarted = false;
+    let heartbeatId = null;
+    const body = req.body;
+    const { limits: profileLimits } = resolveProfile(
+      body.profile ?? body.aiUsageProfile
+    );
+    let effectiveLimits = { ...profileLimits };
+    if (body.dailyTokenLimit != null) {
+      effectiveLimits = {
+        ...effectiveLimits,
+        dailyTokenLimit: Math.min(
+          profileLimits.dailyTokenLimit,
+          body.dailyTokenLimit
+        )
+      };
+    }
+    if (body.maxTokensPerRequest != null) {
+      effectiveLimits = {
+        ...effectiveLimits,
+        maxTokensPerRequest: Math.min(
+          profileLimits.maxTokensPerRequest,
+          body.maxTokensPerRequest
+        )
+      };
+    }
+    if (!Array.isArray(body.messages)) {
+      res.status(400).json({ error: "Invalid messages payload" });
+      return;
+    }
+    if (isRateLimited(effectiveLimits)) {
+      res.status(429).json({
+        error: "Rate limit exceeded. Please slow down."
+      });
+      return;
+    }
+    const trimmedMessages = trimMessages(
+      body.messages,
+      effectiveLimits.maxMessages
+    );
+    const budgetResult = checkBudget(trimmedMessages, effectiveLimits);
+    if (!budgetResult.success) {
+      const errorMessage = budgetResult.reason === "request_limit" ? "Request too large. Please shorten your message." : "Daily AI budget exceeded. Try again tomorrow.";
+      res.status(429).json({ error: errorMessage });
+      return;
+    }
+    const { write, startHeartbeat, stopHeartbeat } = createSSEWriter(
+      res,
+      abortController.signal
+    );
     res.on("close", () => {
+      stopHeartbeat(heartbeatId);
       if (streamStarted && !abortController.signal.aborted) {
         abortController.abort();
       }
     });
     try {
-      const body = req.body;
-      if (!Array.isArray(body.messages)) {
-        res.status(400).json({ error: "Invalid messages payload" });
-        return;
-      }
-      if (isRateLimited()) {
-        res.status(429).json({
-          error: "Rate limit exceeded. Please slow down."
-        });
-        return;
-      }
-      const trimmedMessages = trimMessages(body.messages);
-      const estimatedInputTokens = estimateTokens(trimmedMessages);
-      const estimatedTotalTokens = estimatedInputTokens + AI_LIMITS.maxOutputTokens;
-      if (estimatedTotalTokens > BUDGET.maxTokensPerRequest || !canSpendTokens(estimatedTotalTokens, BUDGET.dailyTokenLimit)) {
-        res.status(429).json({
-          error: "Daily AI budget exceeded. Try again tomorrow."
-        });
-        return;
-      }
-      res.setHeader("Content-Type", "text/plain");
-      res.setHeader("Transfer-Encoding", "chunked");
-      const stream = await client.chat.completions.create(
-        {
-          model: AI_MODEL,
-          stream: true,
-          temperature: 0.7,
-          max_tokens: AI_LIMITS.maxOutputTokens,
-          messages: trimmedMessages.map((m) => ({
-            role: m.role,
-            content: m.content
-          }))
-        },
-        {
-          signal: abortController.signal
-        }
-      );
+      res.setHeader("Content-Type", "text/event-stream");
+      res.setHeader("Cache-Control", "no-cache");
+      res.setHeader("Connection", "keep-alive");
+      res.flushHeaders();
+      write("start", "");
+      heartbeatId = startHeartbeat(15e3);
       streamStarted = true;
       try {
-        for await (const chunk of stream) {
-          if (abortController.signal.aborted) {
-            break;
-          }
-          const token = chunk.choices[0]?.delta?.content;
-          if (token) {
-            res.write(token);
-          }
-        }
-      } catch (error) {
+        await streamChatCompletion(
+          client,
+          trimmedMessages,
+          effectiveLimits,
+          write,
+          abortController.signal
+        );
+      } catch (streamError) {
         if (abortController.signal.aborted) {
           console.log("Stream aborted by client");
         } else {
-          throw error;
+          const msg = streamError instanceof Error ? streamError.message : "Unknown error";
+          write("error", JSON.stringify({ message: msg }));
         }
       } finally {
-        recordTokenUsage(estimatedTotalTokens);
+        stopHeartbeat(heartbeatId);
+        heartbeatId = null;
+        recordTokenUsage(budgetResult.estimatedTotal);
         res.end();
       }
     } catch (error) {
       if (error?.name === "AbortError") {
+        stopHeartbeat(heartbeatId);
         return;
       }
       console.error("AI error:", error);
-      res.status(500).json({ error: "AI request failed" });
+      if (!res.headersSent) {
+        res.status(500).json({ error: "AI request failed" });
+      } else {
+        write("error", JSON.stringify({ message: "AI request failed" }));
+        res.end();
+      }
     }
   };
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"name": "chat-nest-server",
 	"description": "Streaming AI backend server with cost controls, rate limiting, and cancellation support.",
-	"version": "1.0.1",
+	"version": "1.1.1",
 	"type": "module",
 	"main": "dist/index.js",
 	"license": "ISC",