npm - chat-nest-server - Versions diffs - 1.1.0 → 1.1.1 - Mend

chat-nest-server 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@
 This package exposes an Express-compatible request handler that:
 - Streams AI responses using Server-Side Events (SSE)
 - Sends real-time tokens via SSE protocol
-- Enforces rate limits and budgets
+- Enforces rate limits and profile-based limits
 - Supports abort propagation
 - Protects against runaway usage
@@ -18,7 +18,7 @@ This package exposes an Express-compatible request handler that:
 - SSE event types: `start`, `token`, `done`, `error`, `ping`
 - Heartbeat pings to keep connection alive
 - End-to-end cancellation support
-- Daily token budget enforcement
+- Daily token limit enforcement (profile-based)
 - Rate limiting
 - Message trimming
 - Safe retry semantics
@@ -76,12 +76,12 @@ The handler sends SSE-formatted events:
 ## 💰 Cost Controls
-The server enforces:
+Profiles (`constrained`, `balanced`, `expanded`) control limits. HARD_CAPS clamp all profiles. Server enforces:
-- Maximum tokens per request
-- Daily token budget
-- Request rate limiting
-- Prompt size trimming
+- Maximum tokens per request (profile + HARD_CAPS)
+- Daily token limit (profile)
+- Request rate limiting (profile)
+- Prompt size trimming (profile)
 - Retry classification
 This prevents accidental overspending and abuse.
@@ -101,11 +101,9 @@ SSE provides better efficiency and real-time streaming compared to traditional p
 ---
 ## ⚙ Configuration
-Limits can be customized in:
-src/config/
-  aiLimits.ts
-  budget.ts
+- **`config/profiles.ts`** – AI usage profiles (`constrained`, `balanced`, `expanded`), HARD_CAPS, and `resolveProfile()`. Frontend may send `aiUsageProfile` string; backend resolves safely.
+- **`config/aiLimits.ts`** – `AI_MODEL` only. All limits come from profiles.
 ---

package/dist/index.js CHANGED Viewed

@@ -1,22 +1,96 @@
 // src/createChatHandler.ts
 import OpenAI from "openai";
-// src/config/aiLimits.ts
-var AI_MODEL = "gpt-4o-mini";
-var AI_LIMITS = {
-  maxOutputTokens: 300,
-  maxMessages: 6,
-  rateLimitWindowMs: 6e4,
-  maxRequestsPerWindow: 30
+// src/config/profiles.ts
+var AIUsageProfile = {
+  CONSTRAINED: "constrained",
+  BALANCED: "balanced",
+  EXPANDED: "expanded"
 };
-// src/config/budget.ts
-var BUDGET = {
-  dailyTokenLimit: 7e4,
-  // ~safe for $5 / 3 months
-  maxTokensPerRequest: 600
-  // input + output guard
+var HARD_CAPS = {
+  maxOutputTokens: 4096,
+  maxMessages: 20,
+  temperature: 2,
+  dailyTokenLimit: 5e5,
+  maxTokensPerRequest: 16e3,
+  rateLimit: { windowMs: 6e4, maxRequests: 60 }
+};
+function clampToCaps(limits) {
+  return {
+    maxOutputTokens: Math.min(limits.maxOutputTokens, HARD_CAPS.maxOutputTokens),
+    maxMessages: Math.min(limits.maxMessages, HARD_CAPS.maxMessages),
+    temperature: Math.min(limits.temperature, HARD_CAPS.temperature),
+    dailyTokenLimit: Math.min(limits.dailyTokenLimit, HARD_CAPS.dailyTokenLimit),
+    maxTokensPerRequest: Math.min(
+      limits.maxTokensPerRequest,
+      HARD_CAPS.maxTokensPerRequest
+    ),
+    rateLimit: {
+      windowMs: limits.rateLimit.windowMs,
+      maxRequests: Math.min(
+        limits.rateLimit.maxRequests,
+        HARD_CAPS.rateLimit.maxRequests
+      )
+    }
+  };
+}
+var DEFAULT_MAX_TOKENS_PER_REQUEST = 2048;
+var PROFILES = {
+  [AIUsageProfile.CONSTRAINED]: clampToCaps({
+    maxOutputTokens: 150,
+    maxMessages: 4,
+    temperature: 0.5,
+    dailyTokenLimit: 3e4,
+    maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
+    rateLimit: { windowMs: 6e4, maxRequests: 15 }
+  }),
+  [AIUsageProfile.BALANCED]: clampToCaps({
+    maxOutputTokens: 400,
+    maxMessages: 6,
+    temperature: 0.7,
+    dailyTokenLimit: 7e4,
+    maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
+    rateLimit: { windowMs: 6e4, maxRequests: 30 }
+  }),
+  [AIUsageProfile.EXPANDED]: clampToCaps({
+    maxOutputTokens: 3e3,
+    maxMessages: 12,
+    temperature: 0.8,
+    dailyTokenLimit: 2e5,
+    maxTokensPerRequest: DEFAULT_MAX_TOKENS_PER_REQUEST,
+    rateLimit: { windowMs: 6e4, maxRequests: 60 }
+  })
+};
+var LEGACY_MAP = {
+  budget: AIUsageProfile.CONSTRAINED,
+  moderate: AIUsageProfile.BALANCED,
+  free: AIUsageProfile.EXPANDED
 };
+var VALID_PROFILE_STRINGS = /* @__PURE__ */ new Set([
+  AIUsageProfile.CONSTRAINED,
+  AIUsageProfile.BALANCED,
+  AIUsageProfile.EXPANDED,
+  ...Object.keys(LEGACY_MAP)
+]);
+function resolveProfile(input) {
+  const s = typeof input === "string" ? input.toLowerCase().trim() : "";
+  const profile = LEGACY_MAP[s] ?? (VALID_PROFILE_STRINGS.has(s) ? s : null) ?? AIUsageProfile.BALANCED;
+  return { limits: PROFILES[profile], profile };
+}
+// src/guards/rateLimit.ts
+var requestCount = 0;
+var windowStart = Date.now();
+function isRateLimited(limits) {
+  const { windowMs, maxRequests } = limits.rateLimit;
+  const now = Date.now();
+  if (now - windowStart > windowMs) {
+    windowStart = now;
+    requestCount = 0;
+  }
+  requestCount++;
+  return requestCount > maxRequests;
+}
 // src/utils/tokenEstimator.ts
 function estimateTokens(messages) {
@@ -39,139 +113,169 @@ function recordTokenUsage(used) {
   tokensUsedToday += used;
 }
-// src/createChatHandler.ts
-var requestCount = 0;
-var windowStart = Date.now();
-function isRateLimited() {
-  const now = Date.now();
-  if (now - windowStart > AI_LIMITS.rateLimitWindowMs) {
-    windowStart = now;
-    requestCount = 0;
+// src/guards/budgetGuards.ts
+function trimMessages(messages, maxMessages) {
+  return messages.slice(-maxMessages);
+}
+function checkBudget(messages, limits) {
+  const estimatedInput = estimateTokens(messages);
+  const estimatedTotal = estimatedInput + limits.maxOutputTokens;
+  if (estimatedTotal > limits.maxTokensPerRequest) {
+    return { success: false, estimatedTotal, reason: "request_limit" };
   }
-  requestCount++;
-  return requestCount > AI_LIMITS.maxRequestsPerWindow;
+  if (!canSpendTokens(estimatedTotal, limits.dailyTokenLimit)) {
+    return { success: false, estimatedTotal, reason: "daily_limit" };
+  }
+  return { success: true, estimatedTotal };
 }
-function trimMessages(messages) {
-  return messages.slice(-AI_LIMITS.maxMessages);
+// src/sse/sseWriter.ts
+function createSSEWriter(res, abortSignal) {
+  const write = (event, data) => {
+    if (abortSignal.aborted) return;
+    res.write(`event: ${event}
+data: ${data}
+`);
+  };
+  const startHeartbeat = (intervalMs = 15e3) => {
+    return setInterval(() => {
+      if (!abortSignal.aborted) write("ping", "");
+    }, intervalMs);
+  };
+  const stopHeartbeat = (id) => {
+    if (id) clearInterval(id);
+  };
+  return { write, startHeartbeat, stopHeartbeat };
+}
+// src/config/aiLimits.ts
+var AI_MODEL = "gpt-4o-mini";
+// src/streaming/openaiStream.ts
+async function streamChatCompletion(client, messages, limits, writeSSE, signal) {
+  const stream = await client.chat.completions.create(
+    {
+      model: AI_MODEL,
+      stream: true,
+      temperature: limits.temperature,
+      max_tokens: limits.maxOutputTokens,
+      messages: messages.map((m) => ({ role: m.role, content: m.content }))
+    },
+    { signal }
+  );
+  for await (const chunk of stream) {
+    if (signal.aborted) break;
+    const token = chunk.choices[0]?.delta?.content;
+    if (token) {
+      writeSSE("token", JSON.stringify(token));
+    }
+  }
+  writeSSE("done", "");
 }
+// src/createChatHandler.ts
 function createChatHandler(config) {
   if (!config.apiKey) {
     throw new Error("OPENAI_API_KEY is missing");
   }
-  const client = new OpenAI({
-    apiKey: config.apiKey
-  });
+  const client = new OpenAI({ apiKey: config.apiKey });
   return async function handler(req, res) {
     const abortController = new AbortController();
     let streamStarted = false;
-    let heartbeatInterval = null;
-    const writeSSE = (event, data) => {
-      if (abortController.signal.aborted) return;
-      res.write(`event: ${event}
-data: ${data}
-`);
-    };
+    let heartbeatId = null;
+    const body = req.body;
+    const { limits: profileLimits } = resolveProfile(
+      body.profile ?? body.aiUsageProfile
+    );
+    let effectiveLimits = { ...profileLimits };
+    if (body.dailyTokenLimit != null) {
+      effectiveLimits = {
+        ...effectiveLimits,
+        dailyTokenLimit: Math.min(
+          profileLimits.dailyTokenLimit,
+          body.dailyTokenLimit
+        )
+      };
+    }
+    if (body.maxTokensPerRequest != null) {
+      effectiveLimits = {
+        ...effectiveLimits,
+        maxTokensPerRequest: Math.min(
+          profileLimits.maxTokensPerRequest,
+          body.maxTokensPerRequest
+        )
+      };
+    }
+    if (!Array.isArray(body.messages)) {
+      res.status(400).json({ error: "Invalid messages payload" });
+      return;
+    }
+    if (isRateLimited(effectiveLimits)) {
+      res.status(429).json({
+        error: "Rate limit exceeded. Please slow down."
+      });
+      return;
+    }
+    const trimmedMessages = trimMessages(
+      body.messages,
+      effectiveLimits.maxMessages
+    );
+    const budgetResult = checkBudget(trimmedMessages, effectiveLimits);
+    if (!budgetResult.success) {
+      const errorMessage = budgetResult.reason === "request_limit" ? "Request too large. Please shorten your message." : "Daily AI budget exceeded. Try again tomorrow.";
+      res.status(429).json({ error: errorMessage });
+      return;
+    }
+    const { write, startHeartbeat, stopHeartbeat } = createSSEWriter(
+      res,
+      abortController.signal
+    );
     res.on("close", () => {
-      if (heartbeatInterval) {
-        clearInterval(heartbeatInterval);
-        heartbeatInterval = null;
-      }
+      stopHeartbeat(heartbeatId);
       if (streamStarted && !abortController.signal.aborted) {
         abortController.abort();
       }
     });
     try {
-      const body = req.body;
-      if (!Array.isArray(body.messages)) {
-        res.status(400).json({ error: "Invalid messages payload" });
-        return;
-      }
-      if (isRateLimited()) {
-        res.status(429).json({
-          error: "Rate limit exceeded. Please slow down."
-        });
-        return;
-      }
-      const trimmedMessages = trimMessages(body.messages);
-      const estimatedInputTokens = estimateTokens(trimmedMessages);
-      const estimatedTotalTokens = estimatedInputTokens + AI_LIMITS.maxOutputTokens;
-      if (estimatedTotalTokens > BUDGET.maxTokensPerRequest || !canSpendTokens(estimatedTotalTokens, BUDGET.dailyTokenLimit)) {
-        res.status(429).json({
-          error: "Daily AI budget exceeded. Try again tomorrow."
-        });
-        return;
-      }
       res.setHeader("Content-Type", "text/event-stream");
       res.setHeader("Cache-Control", "no-cache");
       res.setHeader("Connection", "keep-alive");
       res.flushHeaders();
-      writeSSE("start", "");
-      heartbeatInterval = setInterval(() => {
-        if (!abortController.signal.aborted) {
-          writeSSE("ping", "");
-        }
-      }, 15e3);
-      const stream = await client.chat.completions.create(
-        {
-          model: AI_MODEL,
-          stream: true,
-          temperature: 0.7,
-          max_tokens: AI_LIMITS.maxOutputTokens,
-          messages: trimmedMessages.map((m) => ({
-            role: m.role,
-            content: m.content
-          }))
-        },
-        {
-          signal: abortController.signal
-        }
-      );
+      write("start", "");
+      heartbeatId = startHeartbeat(15e3);
       streamStarted = true;
       try {
-        for await (const chunk of stream) {
-          if (abortController.signal.aborted) {
-            break;
-          }
-          const token = chunk.choices[0]?.delta?.content;
-          if (token) {
-            writeSSE("token", JSON.stringify(token));
-          }
-        }
-        writeSSE("done", "");
-      } catch (error) {
+        await streamChatCompletion(
+          client,
+          trimmedMessages,
+          effectiveLimits,
+          write,
+          abortController.signal
+        );
+      } catch (streamError) {
         if (abortController.signal.aborted) {
           console.log("Stream aborted by client");
         } else {
-          const errorData = JSON.stringify({
-            message: error instanceof Error ? error.message : "Unknown error"
-          });
-          writeSSE("error", errorData);
+          const msg = streamError instanceof Error ? streamError.message : "Unknown error";
+          write("error", JSON.stringify({ message: msg }));
         }
       } finally {
-        if (heartbeatInterval) {
-          clearInterval(heartbeatInterval);
-          heartbeatInterval = null;
-        }
-        recordTokenUsage(estimatedTotalTokens);
+        stopHeartbeat(heartbeatId);
+        heartbeatId = null;
+        recordTokenUsage(budgetResult.estimatedTotal);
         res.end();
       }
     } catch (error) {
       if (error?.name === "AbortError") {
-        if (heartbeatInterval) {
-          clearInterval(heartbeatInterval);
-          heartbeatInterval = null;
-        }
+        stopHeartbeat(heartbeatId);
         return;
       }
       console.error("AI error:", error);
       if (!res.headersSent) {
         res.status(500).json({ error: "AI request failed" });
       } else {
-        const errorData = JSON.stringify({
-          message: "AI request failed"
-        });
-        writeSSE("error", errorData);
+        write("error", JSON.stringify({ message: "AI request failed" }));
         res.end();
       }
     }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"name": "chat-nest-server",
 	"description": "Streaming AI backend server with cost controls, rate limiting, and cancellation support.",
-	"version": "1.1.0",
+	"version": "1.1.1",
 	"type": "module",
 	"main": "dist/index.js",
 	"license": "ISC",