npm - product-spec-mcp - Versions diffs - 0.3.32 → 0.3.34 - Mend

product-spec-mcp 0.3.32 → 0.3.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.cjs +1 -1
package/docs/online-pm-gate.md +29 -1
package/package.json +1 -1
package/workers/pm-intent-gate.mjs +20 -13
package/workers/wrangler.toml.example +1 -0

package/dist/index.cjs CHANGED Viewed

@@ -29976,7 +29976,7 @@ function registerProductSpecAssist(server) {
 function createServer() {
   const server = new McpServer({
     name: "product-spec-mcp",
-    version: "0.3.32"
+    version: "0.3.34"
   });
   registerSpecInterrogate(server);
   registerSpecCompile(server);

package/docs/online-pm-gate.md CHANGED Viewed

@@ -53,6 +53,7 @@ Default LLM provider:
 LLM_PROVIDER = "mimo"
 LLM_BASE_URL = "https://token-plan-cn.xiaomimimo.com/v1"
 LLM_MODEL = "mimo-v2.5"
+DAILY_LLM_LIMIT = "20"
 ```
 To switch later to DeepSeek, change the Worker vars to:
@@ -74,9 +75,36 @@ Runtime behavior:
 - Prompt cache key: `cache:{model}:{promptHash}:pm-gate-v1`
 - Cache TTL: 7 days
-- LLM quota: 3 non-cached LLM decisions per IP per Shanghai calendar day
+- LLM quota: `DAILY_LLM_LIMIT` non-cached LLM decisions per IP per Shanghai calendar day. Default: 20.
 - User message sent to LLM: max 500 characters
 - LLM max output tokens: 600
 - LLM temperature: 0.1
+## Change LLM Daily Quota
+`DAILY_LLM_LIMIT` controls the number of non-cached LLM gate calls allowed per IP per Shanghai calendar day. It is a Worker runtime variable, not an npm package setting.
+Default:
+```toml
+DAILY_LLM_LIMIT = "20"
+```
+To change it from local config:
+```bash
+cd /Users/george/Documents/product-spec-mcp/workers
+# edit DAILY_LLM_LIMIT in wrangler.toml
+npx wrangler deploy
+```
+To change it from Cloudflare Dashboard:
+1. Open Worker `product-spec-pm-intent-gate`.
+2. Go to Variables and Secrets.
+3. Edit plaintext variable `DAILY_LLM_LIMIT`.
+4. Save/deploy the Worker configuration.
+Changing only this quota does not require an npm release. npm only needs to be published when the package code, bundled Worker file, or documentation should be distributed to npm users.
 If the Worker is unreachable, rate-limited, returns invalid JSON, or returns invalid enum fields, the local MCP falls back to the local PM Gate decision.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "product-spec-mcp",
-  "version": "0.3.32",
+  "version": "0.3.34",
   "description": "MCP Server for product specification - requirement interrogation, architecture decision, UI translation, debug guidance, and acceptance generation",
   "type": "commonjs",
   "main": "dist/index.cjs",

package/workers/pm-intent-gate.mjs CHANGED Viewed

@@ -4,7 +4,7 @@ const DEFAULT_MIMO_BASE_URL = "https://token-plan-cn.xiaomimimo.com/v1";
 const DEFAULT_MIMO_MODEL = "mimo-v2.5";
 const DEFAULT_DEEPSEEK_BASE_URL = "https://api.deepseek.com";
 const DEFAULT_DEEPSEEK_MODEL = "deepseek-chat";
-const DAILY_LIMIT = 3;
+const DEFAULT_DAILY_LIMIT = 20;
 export default {
   async fetch(request, env) {
@@ -34,6 +34,7 @@ export default {
     const cached = await env.PROMPT_CACHE?.get(cacheKey, "json");
     const ipKey = await rateLimitKey(request, env);
     const resetAt = nextShanghaiMidnightIso();
+    const dailyLimit = resolveDailyLimit(env);
     if (cached?.decision) {
       await maybeStoreSample(env, telemetryMode, body, cached.decision, cached.decision, {
@@ -52,15 +53,15 @@ export default {
           cacheHit: true,
         },
         rateLimit: {
-          limit: DAILY_LIMIT,
-          remaining: await remainingForKey(env, ipKey),
+          limit: dailyLimit,
+          remaining: await remainingForKey(env, ipKey, dailyLimit),
           resetAt,
         },
         privacy: privacyResult(telemetryMode),
       });
     }
-    const limit = await consumeLimit(env, ipKey, resetAt);
+    const limit = await consumeLimit(env, ipKey, resetAt, dailyLimit);
     if (!limit.allowed) {
       await maybeStoreSample(env, telemetryMode, body, null, body.ruleDecision || {}, {
         llmUsed: 0,
@@ -71,7 +72,7 @@ export default {
       return json({
         decision: fallbackDecision(body.ruleDecision),
         llmGate: { used: false, provider: llm.provider, model: llm.model, cacheHit: false },
-        rateLimit: { limit: DAILY_LIMIT, remaining: 0, resetAt },
+        rateLimit: { limit: dailyLimit, remaining: 0, resetAt },
         privacy: privacyResult(telemetryMode),
       }, 429);
     }
@@ -121,7 +122,7 @@ export default {
         ...(fallbackReason ? { fallbackReason } : {}),
       },
       rateLimit: {
-        limit: DAILY_LIMIT,
+        limit: dailyLimit,
         remaining: limit.remaining,
         resetAt,
       },
@@ -195,6 +196,12 @@ function resolveLlmConfig(env) {
   };
 }
+function resolveDailyLimit(env) {
+  const parsed = Number(env.DAILY_LLM_LIMIT || DEFAULT_DAILY_LIMIT);
+  if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_DAILY_LIMIT;
+  return Math.floor(parsed);
+}
 async function callOpenAiCompatible(llm, prompt) {
   if (!llm.apiKey) throw new Error(`missing_${llm.provider}_api_key`);
   const response = await fetch(`${normalizeBaseUrl(llm.baseUrl)}/chat/completions`, {
@@ -343,20 +350,20 @@ function fallbackDecision(ruleDecision) {
   };
 }
-async function consumeLimit(env, key, resetAt) {
-  if (!env.PROMPT_CACHE) return { allowed: true, remaining: DAILY_LIMIT - 1 };
+async function consumeLimit(env, key, resetAt, dailyLimit) {
+  if (!env.PROMPT_CACHE) return { allowed: true, remaining: dailyLimit - 1 };
   const current = Number(await env.PROMPT_CACHE.get(key) || "0");
-  if (current >= DAILY_LIMIT) return { allowed: false, remaining: 0 };
+  if (current >= dailyLimit) return { allowed: false, remaining: 0 };
   const next = current + 1;
   const resetSeconds = Math.max(60, Math.floor((new Date(resetAt).getTime() - Date.now()) / 1000));
   await env.PROMPT_CACHE.put(key, String(next), { expirationTtl: resetSeconds });
-  return { allowed: true, remaining: Math.max(0, DAILY_LIMIT - next) };
+  return { allowed: true, remaining: Math.max(0, dailyLimit - next) };
 }
-async function remainingForKey(env, key) {
-  if (!env.PROMPT_CACHE) return DAILY_LIMIT;
+async function remainingForKey(env, key, dailyLimit) {
+  if (!env.PROMPT_CACHE) return dailyLimit;
   const current = Number(await env.PROMPT_CACHE.get(key) || "0");
-  return Math.max(0, DAILY_LIMIT - current);
+  return Math.max(0, dailyLimit - current);
 }
 async function rateLimitKey(request, env) {

package/workers/wrangler.toml.example CHANGED Viewed

@@ -15,6 +15,7 @@ database_id = "replace-with-d1-database-id"
 LLM_PROVIDER = "mimo"
 LLM_BASE_URL = "https://token-plan-cn.xiaomimimo.com/v1"
 LLM_MODEL = "mimo-v2.5"
+DAILY_LLM_LIMIT = "20"
 # Secrets to set with wrangler:
 # wrangler secret put GATE_TOKEN