npm - @intentsolutionsio/vercel-pack - Versions diffs - 1.0.0 → 1.0.3 - Mend

@intentsolutionsio/vercel-pack 1.0.0 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

package/skills/vercel-load-scale/SKILL.md CHANGED Viewed

@@ -1,274 +1,267 @@
 ---
 name: vercel-load-scale
-description: |
-  Implement Vercel load testing, auto-scaling, and capacity planning strategies.
-  Use when running performance tests, configuring horizontal scaling,
-  or planning capacity for Vercel integrations.
+description: 'Load test and scale Vercel deployments with concurrency tuning and capacity
+  planning.
+  Use when running performance tests, planning for traffic spikes,
+  or optimizing serverless function scaling on Vercel.
   Trigger with phrases like "vercel load test", "vercel scale",
-  "vercel performance test", "vercel capacity", "vercel k6", "vercel benchmark".
-allowed-tools: Read, Write, Edit, Bash(k6:*), Bash(kubectl:*)
+  "vercel performance test", "vercel capacity", "vercel benchmark".
+  '
+allowed-tools: Read, Write, Edit, Bash(npx:*), Bash(vercel:*), Bash(curl:*)
 version: 1.0.0
 license: MIT
 author: Jeremy Longshore <jeremy@intentsolutions.io>
+tags:
+- saas
+- vercel
+- testing
+- performance
+- scaling
+compatibility: Designed for Claude Code, also compatible with Codex and OpenClaw
 ---
 # Vercel Load & Scale
 ## Overview
-Load testing, scaling strategies, and capacity planning for Vercel integrations.
+Load test Vercel deployments to identify scaling limits, cold start impact, and concurrency thresholds. Covers k6/autocannon test scripts, Vercel's auto-scaling model, Fluid Compute concurrency, and capacity planning.
 ## Prerequisites
-- k6 load testing tool installed
-- Kubernetes cluster with HPA configured
-- Prometheus for metrics collection
-- Test environment API keys
-## Load Testing with k6
+- Load testing tool: k6, autocannon, or artillery
+- Test environment deployment (never load test production without approval)
+- Access to Vercel Analytics for monitoring during tests
+## Instructions
+### Step 1: Understand Vercel's Scaling Model
+Vercel serverless functions scale automatically:
+| Behavior | Details |
+|----------|---------|
+| Scale-up | New function instances spawn on demand |
+| Scale-down | Idle instances shut down after ~15 minutes |
+| Cold starts | First request to a new instance pays initialization cost |
+| Concurrency | Each instance handles one request at a time (by default) |
+| Fluid Compute | Pro/Enterprise: multiple requests per instance |
+**Concurrency limits by plan:**
+| Plan | Max Concurrent Functions |
+|------|------------------------|
+| Hobby | 10 |
+| Pro | 1,000 |
+| Enterprise | 100,000 |
+### Step 2: Basic Load Test with autocannon
+```bash
+# Install autocannon
+npm install -g autocannon
+# Test with 50 concurrent connections for 30 seconds
+autocannon -c 50 -d 30 https://my-app-preview.vercel.app/api/endpoint
+# Output includes:
+# Latency: avg, p50, p99, max
+# Requests/sec: avg, min, max
+# Errors: timeouts, non-2xx responses
+```
+### Step 3: k6 Load Test Script
-### Basic Load Test
 ```javascript
-// vercel-load-test.js
+// load-test.js
 import http from 'k6/http';
 import { check, sleep } from 'k6';
+import { Rate, Trend } from 'k6/metrics';
+const errorRate = new Rate('errors');
+const coldStartRate = new Rate('cold_starts');
+const latency = new Trend('api_latency');
 export const options = {
   stages: [
-    { duration: '2m', target: 10 },   // Ramp up
-    { duration: '5m', target: 10 },   // Steady state
-    { duration: '2m', target: 50 },   // Ramp to peak
-    { duration: '5m', target: 50 },   // Stress test
-    { duration: '2m', target: 0 },    // Ramp down
+    { duration: '1m', target: 10 },   // Warm up
+    { duration: '3m', target: 50 },   // Ramp to 50 users
+    { duration: '2m', target: 100 },  // Peak load
+    { duration: '1m', target: 0 },    // Cool down
   ],
   thresholds: {
-    http_req_duration: ['p(95)<100'],
-    http_req_failed: ['rate<0.01'],
+    http_req_duration: ['p(95)<2000'],  // P95 < 2s
+    errors: ['rate<0.01'],              // Error rate < 1%
   },
 };
 export default function () {
-  const response = http.post(
-    'https://api.vercel.com/v1/resource',
-    JSON.stringify({ test: true }),
-    {
-      headers: {
-        'Content-Type': 'application/json',
-        'Authorization': `Bearer ${__ENV.VERCEL_API_KEY}`,
-      },
-    }
-  );
+  const res = http.get('https://my-app-preview.vercel.app/api/endpoint');
-  check(response, {
+  check(res, {
     'status is 200': (r) => r.status === 200,
-    'latency < 100ms': (r) => r.timings.duration < 100,
+    'latency < 2s': (r) => r.timings.duration < 2000,
   });
+  errorRate.add(res.status !== 200);
+  latency.add(res.timings.duration);
+  // Track cold starts if your API returns this header
+  if (res.headers['X-Cold-Start'] === 'true') {
+    coldStartRate.add(1);
+  }
   sleep(1);
 }
 ```
-### Run Load Test
 ```bash
-# Install k6
-brew install k6  # macOS
-# or: sudo apt install k6  # Linux
+# Run the load test
+k6 run load-test.js
-# Run test
-k6 run --env VERCEL_API_KEY=${VERCEL_API_KEY} vercel-load-test.js
-# Run with output to InfluxDB
-k6 run --out influxdb=http://localhost:8086/k6 vercel-load-test.js
+# Run with output to JSON for analysis
+k6 run --out json=results.json load-test.js
 ```
-## Scaling Patterns
-### Horizontal Scaling
-```yaml
-# kubernetes HPA
-apiVersion: autoscaling/v2
-kind: HorizontalPodAutoscaler
-metadata:
-  name: vercel-integration-hpa
-spec:
-  scaleTargetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: vercel-integration
-  minReplicas: 2
-  maxReplicas: 20
-  metrics:
-    - type: Resource
-      resource:
-        name: cpu
-        target:
-          type: Utilization
-          averageUtilization: 70
-    - type: Pods
-      pods:
-        metric:
-          name: vercel_queue_depth
-        target:
-          type: AverageValue
-          averageValue: 100
-```
+### Step 4: Cold Start Stress Test
-### Connection Pooling
-```typescript
-import { Pool } from 'generic-pool';
+```javascript
+// cold-start-test.js — specifically test cold start behavior
+import http from 'k6/http';
+import { sleep } from 'k6';
-const vercelPool = Pool.create({
-  create: async () => {
-    return new VercelClient({
-      apiKey: process.env.VERCEL_API_KEY!,
-    });
-  },
-  destroy: async (client) => {
-    await client.close();
+export const options = {
+  scenarios: {
+    // Scenario 1: Sustained load (warm instances)
+    sustained: {
+      executor: 'constant-arrival-rate',
+      rate: 10,
+      timeUnit: '1s',
+      duration: '2m',
+      preAllocatedVUs: 20,
+    },
+    // Scenario 2: Spike (forces new cold starts)
+    spike: {
+      executor: 'ramping-arrival-rate',
+      startRate: 10,
+      timeUnit: '1s',
+      stages: [
+        { target: 200, duration: '10s' },  // Sudden spike
+        { target: 10, duration: '1m' },     // Return to normal
+      ],
+      preAllocatedVUs: 300,
+      startTime: '2m',  // Start after sustained phase
+    },
   },
-  max: None,
-  min: None,
-  idleTimeoutMillis: 30000,
-});
-async function withVercelClient<T>(
-  fn: (client: VercelClient) => Promise<T>
-): Promise<T> {
-  const client = await vercelPool.acquire();
-  try {
-    return await fn(client);
-  } finally {
-    vercelPool.release(client);
-  }
+};
+export default function () {
+  const res = http.get('https://my-app-preview.vercel.app/api/endpoint');
+  // Log cold start timing for analysis
 }
 ```
-## Capacity Planning
-### Metrics to Monitor
-| Metric | Warning | Critical |
-|--------|---------|----------|
-| CPU Utilization | > 70% | > 85% |
-| Memory Usage | > 75% | > 90% |
-| Request Queue Depth | > 100 | > 500 |
-| Error Rate | > 1% | > 5% |
-| P95 Latency | > 500ms | > 2000ms |
-### Capacity Calculation
-```typescript
-interface CapacityEstimate {
-  currentRPS: number;
-  maxRPS: number;
-  headroom: number;
-  scaleRecommendation: string;
-}
+### Step 5: Fluid Compute Concurrency Tuning
-function estimateVercelCapacity(
-  metrics: SystemMetrics
-): CapacityEstimate {
-  const currentRPS = metrics.requestsPerSecond;
-  const avgLatency = metrics.p50Latency;
-  const cpuUtilization = metrics.cpuPercent;
-  // Estimate max RPS based on current performance
-  const maxRPS = currentRPS / (cpuUtilization / 100) * 0.7; // 70% target
-  const headroom = ((maxRPS - currentRPS) / currentRPS) * 100;
-  return {
-    currentRPS,
-    maxRPS: Math.floor(maxRPS),
-    headroom: Math.round(headroom),
-    scaleRecommendation: headroom < 30
-      ? 'Scale up soon'
-      : headroom < 50
-      ? 'Monitor closely'
-      : 'Adequate capacity',
-  };
+```json
+// vercel.json — configure concurrency for Fluid Compute (Pro/Enterprise)
+{
+  "functions": {
+    "api/high-throughput.ts": {
+      "memory": 1024,
+      "maxDuration": 30,
+      "concurrency": 10
+    }
+  }
 }
 ```
-## Benchmark Results Template
+With Fluid Compute concurrency, a single function instance handles multiple requests:
-```markdown
-## Vercel Performance Benchmark
-**Date:** YYYY-MM-DD
-**Environment:** [staging/production]
-**SDK Version:** X.Y.Z
+- Reduces cold starts (fewer instances needed)
+- Reduces cost (shared memory across requests)
+- Best for I/O-bound functions (waiting on DB/API calls)
+- Not ideal for CPU-bound functions (computation blocks other requests)
-### Test Configuration
-- Duration: 10 minutes
-- Ramp: 10 → 100 → 10 VUs
-- Target endpoint: /v1/resource
+### Step 6: Capacity Planning
-### Results
-| Metric | Value |
-|--------|-------|
-| Total Requests | 50,000 |
-| Success Rate | 99.9% |
-| P50 Latency | 120ms |
-| P95 Latency | 350ms |
-| P99 Latency | 800ms |
-| Max RPS Achieved | 150 |
+```
+Capacity Planning Formula:
-### Observations
-- [Key finding 1]
-- [Key finding 2]
+  Required instances = Peak RPS * Avg Response Time (seconds)
-### Recommendations
-- [Scaling recommendation]
+  Example:
+  - Peak: 500 requests/second
+  - Avg response: 200ms (0.2s)
+  - Required: 500 * 0.2 = 100 concurrent instances
+  With Fluid Compute (concurrency=10):
+  - Required: 500 * 0.2 / 10 = 10 concurrent instances
+  Plan check:
+  - Hobby (10 concurrent): NOT sufficient
+  - Pro (1000 concurrent): Sufficient with headroom
 ```
-## Instructions
+## Load Test Results Template
-### Step 1: Create Load Test Script
-Write k6 test script with appropriate thresholds.
+```markdown
+## Load Test Report — [Date]
-### Step 2: Configure Auto-Scaling
-Set up HPA with CPU and custom metrics.
+### Configuration
+- Target: https://my-app-preview.vercel.app/api/endpoint
+- Tool: k6 v0.50
+- Duration: 7 minutes (ramp up → peak → cool down)
+- Peak concurrent users: 100
-### Step 3: Run Load Test
-Execute test and collect metrics.
+### Results
+| Metric | Value |
+|--------|-------|
+| Total requests | 12,450 |
+| Success rate | 99.8% |
+| P50 latency | 45ms |
+| P95 latency | 320ms |
+| P99 latency | 1,200ms |
+| Max latency | 3,400ms |
+| Cold start % | 8% |
+| Avg cold start duration | 650ms |
+| Throttled (429) | 0 |
-### Step 4: Analyze and Document
-Record results in benchmark template.
+### Recommendations
+1. Cold start: 650ms avg — consider Edge Functions for latency-critical paths
+2. P99 spike: caused by cold starts — Fluid Compute concurrency would help
+3. No throttling at 100 concurrent — Pro plan (1000 limit) is sufficient
+```
 ## Output
-- Load test script created
-- HPA configured
+- Load test scripts for sustained and spike traffic scenarios
+- Cold start frequency and duration measured
+- Concurrency limits tested and validated
+- Capacity plan with scaling recommendations
 - Benchmark results documented
-- Capacity recommendations defined
 ## Error Handling
-| Issue | Cause | Solution |
-|-------|-------|----------|
-| k6 timeout | Rate limited | Reduce RPS |
-| HPA not scaling | Wrong metrics | Verify metric name |
-| Connection refused | Pool exhausted | Increase pool size |
-| Inconsistent results | Warm-up needed | Add ramp-up phase |
-## Examples
-### Quick k6 Test
-```bash
-k6 run --vus 10 --duration 30s vercel-load-test.js
-```
-### Check Current Capacity
-```typescript
-const metrics = await getSystemMetrics();
-const capacity = estimateVercelCapacity(metrics);
-console.log('Headroom:', capacity.headroom + '%');
-console.log('Recommendation:', capacity.scaleRecommendation);
-```
-### Scale HPA Manually
-```bash
-kubectl scale deployment vercel-integration --replicas=5
-kubectl get hpa vercel-integration-hpa
-```
+| Error | Cause | Solution |
+|-------|-------|----------|
+| `FUNCTION_THROTTLED` (429) | Exceeded concurrent limit | Reduce test concurrency or upgrade plan |
+| Vercel blocks load test | Not from approved IP | Contact Vercel support before load testing |
+| High P99 but low P50 | Cold starts on spikes | Use Fluid Compute concurrency or Edge Functions |
+| All requests timeout | Function region far from test origin | Set `regions` in vercel.json closer to test source |
+| Inconsistent results | Shared infrastructure variability | Run multiple test rounds, use median results |
 ## Resources
+- [Vercel Function Limits](https://vercel.com/docs/functions/limitations)
+- [Concurrency Scaling](https://vercel.com/docs/functions/concurrency-scaling)
+- [Fluid Compute](https://vercel.com/docs/functions/usage-and-pricing)
 - [k6 Documentation](https://k6.io/docs/)
-- [Kubernetes HPA](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/)
-- [Vercel Rate Limits](https://vercel.com/docs/rate-limits)
+- [Vercel Load Testing Policy](https://vercel.com/kb/guide/what-s-vercel-s-policy-regarding-load-testing-deployments)
 ## Next Steps
-For reliability patterns, see `vercel-reliability-patterns`.
+For reliability patterns, see `vercel-reliability-patterns`.

package/skills/vercel-load-scale/references/capacity-planning.md ADDED Viewed

@@ -0,0 +1,47 @@
+# Capacity Planning
+## Capacity Planning
+### Metrics to Monitor
+| Metric | Warning | Critical |
+|--------|---------|----------|
+| CPU Utilization | > 70% | > 85% |
+| Memory Usage | > 75% | > 90% |
+| Request Queue Depth | > 100 | > 500 |
+| Error Rate | > 1% | > 5% |
+| P95 Latency | > 500ms | > 2000ms |
+### Capacity Calculation
+```typescript
+interface CapacityEstimate {
+  currentRPS: number;
+  maxRPS: number;
+  headroom: number;
+  scaleRecommendation: string;
+}
+function estimateVercelCapacity(
+  metrics: SystemMetrics
+): CapacityEstimate {
+  const currentRPS = metrics.requestsPerSecond;
+  const avgLatency = metrics.p50Latency;
+  const cpuUtilization = metrics.cpuPercent;
+  // Estimate max RPS based on current performance
+  const maxRPS = currentRPS / (cpuUtilization / 100) * 0.7; // 70% target
+  const headroom = ((maxRPS - currentRPS) / currentRPS) * 100;
+  return {
+    currentRPS,
+    maxRPS: Math.floor(maxRPS),
+    headroom: Math.round(headroom),
+    scaleRecommendation: headroom < 30
+      ? 'Scale up soon'
+      : headroom < 50
+      ? 'Monitor closely'
+      : 'Adequate capacity',
+  };
+}
+```

package/skills/vercel-load-scale/references/errors.md ADDED Viewed

@@ -0,0 +1,11 @@
+# Error Handling Reference
+| Issue | Cause | Solution |
+|-------|-------|----------|
+| k6 timeout | Rate limited | Reduce RPS |
+| HPA not scaling | Wrong metrics | Verify metric name |
+| Connection refused | Pool exhausted | Increase pool size |
+| Inconsistent results | Warm-up needed | Add ramp-up phase |
+---
+*[Tons of Skills](https://tonsofskills.com) by [Intent Solutions](https://intentsolutions.io) | [jeremylongshore.com](https://jeremylongshore.com)*

package/skills/vercel-load-scale/references/examples.md ADDED Viewed

@@ -0,0 +1,26 @@
+## Examples
+### Quick k6 Test
+```bash
+k6 run --vus 10 --duration 30s vercel-load-test.js
+```
+### Check Current Capacity
+```typescript
+const metrics = await getSystemMetrics();
+const capacity = estimateVercelCapacity(metrics);
+console.log('Headroom:', capacity.headroom + '%');
+console.log('Recommendation:', capacity.scaleRecommendation);
+```
+### Scale HPA Manually
+```bash
+kubectl scale deployment vercel-integration --replicas=5
+kubectl get hpa vercel-integration-hpa
+```
+---
+*[Tons of Skills](https://tonsofskills.com) by [Intent Solutions](https://intentsolutions.io) | [jeremylongshore.com](https://jeremylongshore.com)*

package/skills/vercel-load-scale/references/load-testing-with-k6.md ADDED Viewed

@@ -0,0 +1,59 @@
+# Load Testing With K6
+## Load Testing with k6
+### Basic Load Test
+```javascript
+// vercel-load-test.js
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+export const options = {
+  stages: [
+    { duration: '2m', target: 10 },   // Ramp up
+    { duration: '5m', target: 10 },   // Steady state
+    { duration: '2m', target: 50 },   // Ramp to peak
+    { duration: '5m', target: 50 },   // Stress test
+    { duration: '2m', target: 0 },    // Ramp down
+  ],
+  thresholds: {
+    http_req_duration: ['p(95)<100'],
+    http_req_failed: ['rate<0.01'],
+  },
+};
+export default function () {
+  const response = http.post(
+    'https://api.vercel.com/v1/resource',
+    JSON.stringify({ test: true }),
+    {
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${__ENV.VERCEL_API_KEY}`,
+      },
+    }
+  );
+  check(response, {
+    'status is 200': (r) => r.status === 200,
+    'latency < 100ms': (r) => r.timings.duration < 100,
+  });
+  sleep(1);
+}
+```
+### Run Load Test
+```bash
+# Install k6
+brew install k6  # macOS
+# or: sudo apt install k6  # Linux
+# Run test
+k6 run --env VERCEL_API_KEY=${VERCEL_API_KEY} vercel-load-test.js
+# Run with output to InfluxDB
+k6 run --out influxdb=http://localhost:8086/k6 vercel-load-test.js
+```

package/skills/vercel-load-scale/references/scaling-patterns.md ADDED Viewed

@@ -0,0 +1,65 @@
+# Scaling Patterns
+## Scaling Patterns
+### Horizontal Scaling
+```yaml
+# kubernetes HPA
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: vercel-integration-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: vercel-integration
+  minReplicas: 2
+  maxReplicas: 20
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70
+    - type: Pods
+      pods:
+        metric:
+          name: vercel_queue_depth
+        target:
+          type: AverageValue
+          averageValue: 100
+```
+### Connection Pooling
+```typescript
+import { Pool } from 'generic-pool';
+const vercelPool = Pool.create({
+  create: async () => {
+    return new VercelClient({
+      apiKey: process.env.VERCEL_API_KEY!,
+    });
+  },
+  destroy: async (client) => {
+    await client.close();
+  },
+  max: None,
+  min: None,
+  idleTimeoutMillis: 30000,
+});
+async function withVercelClient<T>(
+  fn: (client: VercelClient) => Promise<T>
+): Promise<T> {
+  const client = await vercelPool.acquire();
+  try {
+    return await fn(client);
+  } finally {
+    vercelPool.release(client);
+  }
+}
+```