npm - qai-cli - Versions diffs - 3.1.0 → 3.2.0 - Mend

qai-cli 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/.github/workflows/qai-review-reusable.yml +82 -0
package/.github/workflows/qai-review.yml +63 -0
package/README.md +15 -2
package/benchmarks/README.md +68 -0
package/benchmarks/dataset/breaking-api-change.json +17 -0
package/benchmarks/dataset/hardcoded-secrets.json +17 -0
package/benchmarks/dataset/memory-leak.json +17 -0
package/benchmarks/dataset/missing-error-handling.json +17 -0
package/benchmarks/dataset/null-pointer.json +17 -0
package/benchmarks/dataset/off-by-one.json +17 -0
package/benchmarks/dataset/race-condition.json +17 -0
package/benchmarks/dataset/sql-injection.json +17 -0
package/benchmarks/dataset/unvalidated-input.json +17 -0
package/benchmarks/dataset/xss-vulnerability.json +17 -0
package/benchmarks/run.js +184 -0
package/package.json +1 -1

package/.github/workflows/qai-review-reusable.yml ADDED Viewed

@@ -0,0 +1,82 @@
+name: QAI Code Review (Reusable)
+on:
+  workflow_call:
+    inputs:
+      provider:
+        description: 'AI provider to use (anthropic or openai)'
+        required: false
+        type: string
+        default: 'anthropic'
+      node-version:
+        description: 'Node.js version'
+        required: false
+        type: string
+        default: '20'
+      qai-version:
+        description: 'qai-cli version (npm version specifier)'
+        required: false
+        type: string
+        default: 'latest'
+    secrets:
+      api-key:
+        description: 'API key for the chosen provider'
+        required: true
+permissions:
+  contents: read
+  pull-requests: write
+jobs:
+  review:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: ${{ inputs.node-version }}
+      - name: Install qai-cli
+        run: npm install -g qai-cli@${{ inputs.qai-version }}
+      - name: Run QAI Review
+        id: review
+        env:
+          ANTHROPIC_API_KEY: ${{ inputs.provider == 'anthropic' && secrets.api-key || '' }}
+          OPENAI_API_KEY: ${{ inputs.provider == 'openai' && secrets.api-key || '' }}
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          set +e
+          REVIEW=$(qai review ${{ github.event.pull_request.number }} --json 2>&1)
+          EXIT_CODE=$?
+          echo "$REVIEW" > review-output.json
+          echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"
+          set -e
+      - name: Post Review Comment
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          BODY=$(cat review-output.json)
+          gh pr comment ${{ github.event.pull_request.number }} \
+            --body "## 🤖 QAI Code Review
+          <details>
+          <summary>Review Details</summary>
+          \`\`\`json
+          $BODY
+          \`\`\`
+          </details>"
+      - name: Fail on Critical Issues
+        if: steps.review.outputs.exit_code == '1'
+        run: |
+          echo "::error::QAI review found critical issues"
+          exit 1

package/.github/workflows/qai-review.yml ADDED Viewed

@@ -0,0 +1,63 @@
+name: QAI Code Review
+on:
+  pull_request:
+    types: [opened, synchronize]
+permissions:
+  contents: read
+  pull-requests: write
+jobs:
+  review:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: 20
+      - name: Install qai-cli
+        run: npm install -g qai-cli
+      - name: Run QAI Review
+        id: review
+        env:
+          ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          set +e
+          REVIEW=$(qai review ${{ github.event.pull_request.number }} --json 2>&1)
+          EXIT_CODE=$?
+          echo "$REVIEW" > review-output.json
+          echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"
+          set -e
+      - name: Post Review Comment
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          BODY=$(cat review-output.json)
+          gh pr comment ${{ github.event.pull_request.number }} \
+            --body "## 🤖 QAI Code Review
+          <details>
+          <summary>Review Details</summary>
+          \`\`\`json
+          $BODY
+          \`\`\`
+          </details>"
+      - name: Fail on Critical Issues
+        if: steps.review.outputs.exit_code == '1'
+        run: |
+          echo "::error::QAI review found critical issues"
+          exit 1

package/README.md CHANGED Viewed

@@ -29,7 +29,7 @@ VIEWPORTS=desktop,mobile,tablet qai scan https://mysite.com
 FOCUS=accessibility qai scan https://mysite.com
 ```
-### `qai review` — PR Code Review _(Coming Soon)_
+### `qai review` — PR Code Review
 Deep code review with full codebase context. Not just the diff — traces through dependencies, callers, and related tests.
@@ -41,7 +41,7 @@ qai review 42
 qai review --base main
 ```
-### `qai generate` — Test Generation _(Coming Soon)_
+### `qai generate` — Test Generation
 Auto-generate Playwright E2E tests from URLs or unit tests from source files.
@@ -106,6 +106,19 @@ Works with any major LLM. Set one env var:
 - **Structured reports** — JSON + Markdown output
 - **CI/CD ready** — GitHub Action + exit codes for pipelines
+## How It Compares
+| Feature                                        | **qai**                 | Paragon   | CodeRabbit  | Cursor BugBot |
+| ---------------------------------------------- | ----------------------- | --------- | ----------- | ------------- |
+| Open source                                    | ✅                      | ❌        | ❌          | ❌            |
+| Visual QA scanning                             | ✅                      | ✅        | ❌          | ❌            |
+| PR code review                                 | ✅                      | ❌        | ✅          | ✅            |
+| Test generation                                | ✅                      | ❌        | ❌          | ❌            |
+| Multi-provider (Claude, GPT-4, Gemini, Ollama) | ✅                      | ❌        | ❌          | ❌            |
+| Local/offline mode (Ollama)                    | ✅                      | ❌        | ❌          | ❌            |
+| CLI + library + GitHub Action                  | ✅                      | SaaS only | GitHub only | GitHub only   |
+| Free                                           | ✅ (bring your own key) | Paid      | Freemium    | Freemium      |
 ## License
 MIT

package/benchmarks/README.md ADDED Viewed

@@ -0,0 +1,68 @@
+# qai Benchmark Suite
+Measures code review accuracy across LLM providers.
+## Methodology
+The benchmark uses a curated dataset of **10 realistic code diffs**, each containing a known bug. Bug types span:
+| Category         | Cases                                                    |
+| ---------------- | -------------------------------------------------------- |
+| Security         | SQL injection, XSS, hardcoded secrets, unvalidated input |
+| Bugs             | Null pointer / undefined access                          |
+| Concurrency      | Race condition (TOCTOU)                                  |
+| Error handling   | Missing try/catch on file operations                     |
+| Logic            | Off-by-one in pagination                                 |
+| Performance      | Memory leak / unclosed resources                         |
+| Breaking changes | Public API signature change                              |
+Each case includes:
+- A unified diff (10-50 lines)
+- Surrounding file context
+- Expected issues with severity and category
+## Scoring
+For each test case the runner checks:
+1. **True positive** — did the LLM identify the known bug? Matched via fuzzy keyword overlap on the issue description, category, and severity.
+2. **False positives** — how many extra issues were reported beyond the expected ones.
+3. **Latency** — wall-clock time per review call.
+## Running
+```bash
+# Default provider (uses first available API key)
+node benchmarks/run.js
+# Specific provider
+node benchmarks/run.js --provider anthropic
+# JSON output to stdout
+node benchmarks/run.js --json
+```
+Results are always saved to `benchmarks/results/`.
+## Adding Test Cases
+Create a new JSON file in `benchmarks/dataset/`:
+```json
+{
+  "name": "descriptive-slug",
+  "description": "What the bug is",
+  "diff": "... unified diff ...",
+  "context": { "files": { "path/to/file.js": "full file content" } },
+  "expectedIssues": [
+    {
+      "severity": "critical",
+      "category": "security",
+      "description": "Short description of expected finding"
+    }
+  ]
+}
+```
+Then re-run the benchmark. The runner auto-discovers all `.json` files in the dataset directory.

package/benchmarks/dataset/breaking-api-change.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "breaking-api-change",
+  "description": "Public API method signature changed without deprecation or major version bump",
+  "diff": "diff --git a/src/api/client.js b/src/api/client.js\nindex ab12cd3..ef45gh6 100644\n--- a/src/api/client.js\n+++ b/src/api/client.js\n@@ -15,14 +15,16 @@ class ApiClient {\n   /**\n    * Fetch a user by ID\n-   * @param {string} userId\n-   * @param {Object} [options]\n-   * @returns {Promise<User>}\n+   * @param {Object} params\n+   * @param {string} params.userId\n+   * @param {string[]} [params.fields]\n+   * @returns {Promise<UserResponse>}\n    */\n-  async getUser(userId, options = {}) {\n-    const res = await this.http.get(`/users/${userId}`, { params: options });\n-    return res.data;\n+  async getUser({ userId, fields = ['id', 'name', 'email'] } = {}) {\n+    const query = fields.length ? `?fields=${fields.join(',')}` : '';\n+    const res = await this.http.get(`/users/${userId}${query}`);\n+    return { user: res.data, meta: { fields } };\n   }\n \n   /**\n    * List all users",
+  "context": {
+    "files": {
+      "src/api/client.js": "const axios = require('axios');\n\nclass ApiClient {\n  constructor(baseURL) {\n    this.http = axios.create({ baseURL });\n  }\n\n  async getUser(userId, options = {}) {\n    const res = await this.http.get(`/users/${userId}`, { params: options });\n    return res.data;\n  }\n}\n\nmodule.exports = { ApiClient };"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "high",
+      "category": "breaking-change",
+      "description": "Breaking API change: getUser() signature changed from (userId, options) to ({ userId, fields }), return type also changed"
+    }
+  ]
+}

package/benchmarks/dataset/hardcoded-secrets.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "hardcoded-secrets",
+  "description": "API keys and database credentials hardcoded in source",
+  "diff": "diff --git a/src/config/database.js b/src/config/database.js\nindex aabb112..ccdd334 100644\n--- a/src/config/database.js\n+++ b/src/config/database.js\n@@ -1,10 +1,14 @@\n-const dbUrl = process.env.DATABASE_URL;\n-const apiKey = process.env.STRIPE_API_KEY;\n+const dbUrl = 'postgresql://admin:s3cretPassw0rd!@prod-db.internal.company.com:5432/maindb';\n+const apiKey = 'sk_live_FAKE_EXAMPLE_KEY_NOT_REAL_1234567890';\n+const jwtSecret = 'my-super-secret-jwt-key-do-not-share';\n \n module.exports = {\n   database: {\n     connectionString: dbUrl,\n     ssl: true,\n+    pool: { min: 2, max: 10 },\n   },\n-  stripe: { apiKey },\n+  stripe: { apiKey },\n+  jwt: { secret: jwtSecret, expiresIn: '7d' },\n };",
+  "context": {
+    "files": {
+      "src/config/database.js": "const dbUrl = process.env.DATABASE_URL;\nconst apiKey = process.env.STRIPE_API_KEY;\n\nmodule.exports = {\n  database: {\n    connectionString: dbUrl,\n    ssl: true,\n  },\n  stripe: { apiKey },\n};"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "critical",
+      "category": "security",
+      "description": "Hardcoded production database credentials, Stripe live API key, and JWT secret in source code"
+    }
+  ]
+}

package/benchmarks/dataset/memory-leak.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "memory-leak",
+  "description": "Database connection pool never closed, event listeners accumulate",
+  "diff": "diff --git a/src/services/analytics.js b/src/services/analytics.js\nindex 9a8b7c6..5d4e3f2 100644\n--- a/src/services/analytics.js\n+++ b/src/services/analytics.js\n@@ -3,18 +3,22 @@ const { Pool } = require('pg');\n class AnalyticsService {\n   constructor(config) {\n     this.config = config;\n+    this.pools = [];\n   }\n \n   async trackEvent(event) {\n-    const pool = new Pool(this.config.database);\n-    try {\n-      const client = await pool.connect();\n-      await client.query('INSERT INTO events (type, data, ts) VALUES ($1, $2, NOW())', [\n-        event.type,\n-        JSON.stringify(event.data),\n-      ]);\n-      client.release();\n-    } finally {\n-      await pool.end();\n-    }\n+    const pool = new Pool(this.config.database);\n+    this.pools.push(pool);\n+    const client = await pool.connect();\n+    await client.query(\n+      'INSERT INTO events (type, data, ts) VALUES ($1, $2, NOW())',\n+      [event.type, JSON.stringify(event.data)]\n+    );\n+    // client.release() removed for \"performance\"\n+    pool.on('error', (err) => {\n+      console.error('Pool error:', err);\n+    });\n+    return { success: true };\n   }\n }",
+  "context": {
+    "files": {
+      "src/services/analytics.js": "const { Pool } = require('pg');\n\nclass AnalyticsService {\n  constructor(config) {\n    this.config = config;\n  }\n\n  async trackEvent(event) {\n    const pool = new Pool(this.config.database);\n    try {\n      const client = await pool.connect();\n      await client.query('INSERT INTO events (type, data, ts) VALUES ($1, $2, NOW())', [\n        event.type, JSON.stringify(event.data)\n      ]);\n      client.release();\n    } finally {\n      await pool.end();\n    }\n  }\n}"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "high",
+      "category": "performance",
+      "description": "Memory leak: new Pool created per call and never closed, client never released, error listeners accumulate"
+    }
+  ]
+}

package/benchmarks/dataset/missing-error-handling.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "missing-error-handling",
+  "description": "File operations without error handling or cleanup",
+  "diff": "diff --git a/src/utils/config.js b/src/utils/config.js\nindex 2a3b4c5..6d7e8f9 100644\n--- a/src/utils/config.js\n+++ b/src/utils/config.js\n@@ -5,15 +5,12 @@ const yaml = require('js-yaml');\n \n /**\n  * Load and merge configuration from multiple sources\n- * @param {string[]} configPaths\n+ * @param {string} configPath\n  * @returns {Object}\n  */\n-function loadConfig(configPaths) {\n-  const configs = configPaths.map((p) => {\n-    try {\n-      const raw = fs.readFileSync(p, 'utf8');\n-      return yaml.load(raw);\n-    } catch (err) {\n-      console.warn(`Config not found: ${p}, skipping`);\n-      return {};\n-    }\n-  });\n-  return Object.assign({}, ...configs);\n+function loadConfig(configPath) {\n+  const raw = fs.readFileSync(configPath, 'utf8');\n+  const config = yaml.load(raw);\n+  const overridePath = configPath.replace('.yml', '.local.yml');\n+  const overrideRaw = fs.readFileSync(overridePath, 'utf8');\n+  const override = yaml.load(overrideRaw);\n+  return { ...config, ...override };\n }",
+  "context": {
+    "files": {
+      "src/utils/config.js": "const fs = require('fs');\nconst yaml = require('js-yaml');\n\nfunction loadConfig(configPaths) {\n  const configs = configPaths.map((p) => {\n    try {\n      const raw = fs.readFileSync(p, 'utf8');\n      return yaml.load(raw);\n    } catch (err) {\n      console.warn(`Config not found: ${p}, skipping`);\n      return {};\n    }\n  });\n  return Object.assign({}, ...configs);\n}\n\nmodule.exports = { loadConfig };"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "high",
+      "category": "error-handling",
+      "description": "No error handling for file read operations; will crash if config or override file is missing"
+    }
+  ]
+}

package/benchmarks/dataset/null-pointer.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "null-pointer",
+  "description": "Null pointer access when optional nested object is missing",
+  "diff": "diff --git a/src/services/order.js b/src/services/order.js\nindex a1b2c3d..e4f5678 100644\n--- a/src/services/order.js\n+++ b/src/services/order.js\n@@ -24,9 +24,15 @@ class OrderService {\n    * @returns {Object} formatted order summary\n    */\n   formatOrderSummary(order) {\n-    return {\n-      id: order.id,\n-      total: order.total,\n-    };\n+    const address = order.customer.shippingAddress;\n+    return {\n+      id: order.id,\n+      total: order.total,\n+      customerName: order.customer.name,\n+      shippingCity: address.city,\n+      shippingZip: address.zipCode,\n+      formattedAddress: `${address.street}, ${address.city}, ${address.state} ${address.zipCode}`,\n+    };\n   }\n }",
+  "context": {
+    "files": {
+      "src/services/order.js": "class OrderService {\n  constructor(db) {\n    this.db = db;\n  }\n\n  async getOrder(id) {\n    const order = await this.db.orders.findById(id);\n    return order; // order.customer.shippingAddress may be null\n  }\n\n  formatOrderSummary(order) {\n    return { id: order.id, total: order.total };\n  }\n}"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "high",
+      "category": "bug",
+      "description": "Null/undefined access on order.customer.shippingAddress without null check"
+    }
+  ]
+}

package/benchmarks/dataset/off-by-one.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "off-by-one",
+  "description": "Off-by-one error in pagination logic",
+  "diff": "diff --git a/src/utils/paginate.js b/src/utils/paginate.js\nindex 1122334..5566778 100644\n--- a/src/utils/paginate.js\n+++ b/src/utils/paginate.js\n@@ -8,10 +8,11 @@\n  */\n function paginate(items, page, pageSize = 20) {\n   const total = items.length;\n-  const totalPages = Math.ceil(total / pageSize);\n-  const start = (page - 1) * pageSize;\n+  const totalPages = Math.floor(total / pageSize);\n+  const start = page * pageSize;\n   const end = start + pageSize;\n   return {\n     data: items.slice(start, end),\n     page,\n+    pageSize,\n     totalPages,\n     total,\n   };\n }",
+  "context": {
+    "files": {
+      "src/utils/paginate.js": "function paginate(items, page, pageSize = 20) {\n  const total = items.length;\n  const totalPages = Math.ceil(total / pageSize);\n  const start = (page - 1) * pageSize;\n  const end = start + pageSize;\n  return {\n    data: items.slice(start, end),\n    page,\n    totalPages,\n    total,\n  };\n}\n\nmodule.exports = { paginate };"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "medium",
+      "category": "logic",
+      "description": "Off-by-one: page 1 skips first pageSize items (0-indexed page with 1-indexed expectation), and Math.floor loses last partial page"
+    }
+  ]
+}

package/benchmarks/dataset/race-condition.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "race-condition",
+  "description": "TOCTOU race condition in balance check and withdrawal",
+  "diff": "diff --git a/src/services/wallet.js b/src/services/wallet.js\nindex 1a2b3c4..5d6e7f8 100644\n--- a/src/services/wallet.js\n+++ b/src/services/wallet.js\n@@ -10,12 +10,18 @@ class WalletService {\n   }\n \n   async withdraw(userId, amount) {\n-    return this.db.transaction(async (trx) => {\n-      const wallet = await trx('wallets').where({ user_id: userId }).forUpdate().first();\n-      if (wallet.balance < amount) throw new Error('Insufficient funds');\n-      await trx('wallets').where({ user_id: userId }).update({ balance: wallet.balance - amount });\n-      return { newBalance: wallet.balance - amount };\n-    });\n+    const wallet = await this.db('wallets').where({ user_id: userId }).first();\n+    if (!wallet) {\n+      throw new Error('Wallet not found');\n+    }\n+    if (wallet.balance < amount) {\n+      throw new Error('Insufficient funds');\n+    }\n+    const newBalance = wallet.balance - amount;\n+    await this.db('wallets')\n+      .where({ user_id: userId })\n+      .update({ balance: newBalance });\n+    return { newBalance };\n   }\n }",
+  "context": {
+    "files": {
+      "src/services/wallet.js": "class WalletService {\n  constructor(db) {\n    this.db = db;\n  }\n\n  async withdraw(userId, amount) {\n    return this.db.transaction(async (trx) => {\n      const wallet = await trx('wallets').where({ user_id: userId }).forUpdate().first();\n      if (wallet.balance < amount) throw new Error('Insufficient funds');\n      await trx('wallets').where({ user_id: userId }).update({ balance: wallet.balance - amount });\n      return { newBalance: wallet.balance - amount };\n    });\n  }\n}"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "critical",
+      "category": "concurrency",
+      "description": "Race condition: balance check and update are not atomic, removed transaction and FOR UPDATE lock"
+    }
+  ]
+}

package/benchmarks/dataset/sql-injection.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "sql-injection",
+  "description": "SQL injection via string concatenation in user lookup query",
+  "diff": "diff --git a/src/routes/users.js b/src/routes/users.js\nindex 3a1b2c3..4d5e6f7 100644\n--- a/src/routes/users.js\n+++ b/src/routes/users.js\n@@ -12,8 +12,12 @@ const db = require('../db');\n \n router.get('/users/search', async (req, res) => {\n   const { username } = req.query;\n-  const users = await db.query('SELECT id, username, email FROM users WHERE username = $1', [username]);\n-  res.json(users.rows);\n+  if (!username) {\n+    return res.status(400).json({ error: 'username is required' });\n+  }\n+  const query = `SELECT id, username, email FROM users WHERE username = '${username}'`;\n+  const users = await db.query(query);\n+  return res.json(users.rows);\n });\n \n router.get('/users/:id', async (req, res) => {",
+  "context": {
+    "files": {
+      "src/routes/users.js": "const express = require('express');\nconst router = express.Router();\nconst db = require('../db');\n\nrouter.get('/users/search', async (req, res) => {\n  const { username } = req.query;\n  const users = await db.query('SELECT id, username, email FROM users WHERE username = $1', [username]);\n  res.json(users.rows);\n});"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "critical",
+      "category": "security",
+      "description": "SQL injection via string interpolation instead of parameterized query"
+    }
+  ]
+}

package/benchmarks/dataset/unvalidated-input.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "unvalidated-input",
+  "description": "User input used directly in file path and shell command without validation",
+  "diff": "diff --git a/src/routes/export.js b/src/routes/export.js\nindex aabb123..ccdd456 100644\n--- a/src/routes/export.js\n+++ b/src/routes/export.js\n@@ -4,12 +4,18 @@ const { execSync } = require('child_process');\n \n router.post('/export', async (req, res) => {\n-  const { format } = req.body;\n-  const allowed = ['csv', 'json', 'xml'];\n-  if (!allowed.includes(format)) {\n-    return res.status(400).json({ error: 'Invalid format' });\n-  }\n-  const data = await db.reports.getAll();\n-  const file = exportService.generate(data, format);\n-  res.download(file);\n+  const { format, filename, startDate, endDate } = req.body;\n+  const data = await db.reports.getAll();\n+  const outputPath = `/tmp/exports/${filename}.${format}`;\n+  fs.writeFileSync(outputPath, exportService.serialize(data, format));\n+  // Compress for large exports\n+  if (req.body.compress) {\n+    execSync(`gzip ${outputPath}`);\n+    return res.download(`${outputPath}.gz`);\n+  }\n+  res.download(outputPath);\n });",
+  "context": {
+    "files": {
+      "src/routes/export.js": "const express = require('express');\nconst router = express.Router();\nconst db = require('../db');\nconst fs = require('fs');\nconst exportService = require('../services/export');\nconst { execSync } = require('child_process');\n\nrouter.post('/export', async (req, res) => {\n  const { format } = req.body;\n  const allowed = ['csv', 'json', 'xml'];\n  if (!allowed.includes(format)) {\n    return res.status(400).json({ error: 'Invalid format' });\n  }\n  const data = await db.reports.getAll();\n  const file = exportService.generate(data, format);\n  res.download(file);\n});\n\nmodule.exports = router;"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "critical",
+      "category": "security",
+      "description": "Unvalidated input: filename and format used in file path (path traversal) and shell command (command injection), format whitelist removed"
+    }
+  ]
+}

package/benchmarks/dataset/xss-vulnerability.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "xss-vulnerability",
+  "description": "User input rendered as raw HTML without sanitization",
+  "diff": "diff --git a/src/views/profile.js b/src/views/profile.js\nindex 1234abc..5678def 100644\n--- a/src/views/profile.js\n+++ b/src/views/profile.js\n@@ -6,11 +6,17 @@ const express = require('express');\n router.get('/profile/:id', async (req, res) => {\n   const user = await db.users.findById(req.params.id);\n-  res.render('profile', {\n-    name: user.displayName,\n-    bio: user.bio,\n-  });\n+  const html = `\n+    <html>\n+    <body>\n+      <h1>${user.displayName}</h1>\n+      <div class=\"bio\">${user.bio}</div>\n+      <div class=\"website\"><a href=\"${user.website}\">${user.website}</a></div>\n+      <div class=\"location\">${user.location}</div>\n+    </body>\n+    </html>`;\n+  res.setHeader('Content-Type', 'text/html');\n+  res.send(html);\n });",
+  "context": {
+    "files": {
+      "src/views/profile.js": "const express = require('express');\nconst router = express.Router();\nconst db = require('../db');\n\nrouter.get('/profile/:id', async (req, res) => {\n  const user = await db.users.findById(req.params.id);\n  res.render('profile', {\n    name: user.displayName,\n    bio: user.bio,\n  });\n});\n\nmodule.exports = router;"
+    }
+  },
+  "expectedIssues": [
+    {
+      "severity": "critical",
+      "category": "security",
+      "description": "XSS vulnerability: user-controlled fields (displayName, bio, website) interpolated directly into HTML without escaping"
+    }
+  ]
+}

package/benchmarks/run.js ADDED Viewed

@@ -0,0 +1,184 @@
+#!/usr/bin/env node
+/**
+ * Benchmark runner for qai code review accuracy.
+ *
+ * Loads curated diffs with known bugs, runs them through each available
+ * provider, and scores true-positive rate, false-positive count, and latency.
+ *
+ * Usage:
+ *   node benchmarks/run.js [--provider <name>] [--json]
+ */
+const fs = require('fs');
+const path = require('path');
+const { getProvider } = require('../src/providers');
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function loadDataset() {
+  const dir = path.join(__dirname, 'dataset');
+  return fs
+    .readdirSync(dir)
+    .filter((f) => f.endsWith('.json'))
+    .map((f) => JSON.parse(fs.readFileSync(path.join(dir, f), 'utf8')));
+}
+/**
+ * Determine whether the review found the expected issue.
+ * We do a fuzzy keyword match on severity, category, and description.
+ */
+function scoreResult(review, expected) {
+  if (!review || !review.issues || !Array.isArray(review.issues)) {
+    return { detected: false, falsePositives: 0 };
+  }
+  const found = expected.every((exp) => {
+    return review.issues.some((issue) => {
+      const descMatch = matchDescription(issue.description || issue.message || '', exp.description);
+      const catMatch =
+        !exp.category || (issue.category || '').toLowerCase().includes(exp.category.toLowerCase());
+      const sevMatch =
+        !exp.severity || (issue.severity || '').toLowerCase().includes(exp.severity.toLowerCase());
+      // A match on description alone is sufficient; category/severity are bonus signals
+      return descMatch || (catMatch && sevMatch);
+    });
+  });
+  // False positives = total issues minus expected matches
+  const falsePositives = Math.max(0, review.issues.length - expected.length);
+  return { detected: found, falsePositives };
+}
+function matchDescription(actual, expected) {
+  // Extract key terms from the expected description and check if most appear
+  const keywords = expected
+    .toLowerCase()
+    .split(/[\s,/]+/)
+    .filter((w) => w.length > 3);
+  const normalised = actual.toLowerCase();
+  const hits = keywords.filter((kw) => normalised.includes(kw));
+  return hits.length >= Math.ceil(keywords.length * 0.4);
+}
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+async function main() {
+  const args = process.argv.slice(2);
+  const jsonOutput = args.includes('--json');
+  const providerIdx = args.indexOf('--provider');
+  const providerName = providerIdx !== -1 ? args[providerIdx + 1] : undefined;
+  const dataset = loadDataset();
+  console.log(`\nLoaded ${dataset.length} benchmark cases\n`);
+  let provider;
+  try {
+    provider = getProvider(providerName);
+  } catch (err) {
+    console.error(
+      `Failed to initialise provider${providerName ? ` "${providerName}"` : ''}: ${err.message}`,
+    );
+    console.error('Set an API key (e.g. ANTHROPIC_API_KEY) or specify --provider <name>');
+    process.exit(1);
+  }
+  const providerLabel = provider.constructor.name || 'unknown';
+  console.log(`Provider: ${providerLabel}\n`);
+  const results = [];
+  let detected = 0;
+  let totalFP = 0;
+  let totalMs = 0;
+  for (const testCase of dataset) {
+    const label = testCase.name.padEnd(25);
+    process.stdout.write(`  ${label} … `);
+    const start = Date.now();
+    let review;
+    let error = null;
+    try {
+      review = await provider.reviewCode(testCase.diff, testCase.context || {}, {
+        focus: 'all',
+      });
+    } catch (err) {
+      error = err.message;
+    }
+    const elapsed = Date.now() - start;
+    totalMs += elapsed;
+    if (error) {
+      console.log(`ERROR (${elapsed}ms) — ${error}`);
+      results.push({
+        name: testCase.name,
+        detected: false,
+        falsePositives: 0,
+        elapsed,
+        error,
+      });
+      continue;
+    }
+    const score = scoreResult(review, testCase.expectedIssues);
+    if (score.detected) detected++;
+    totalFP += score.falsePositives;
+    const icon = score.detected ? '✅' : '❌';
+    console.log(
+      `${icon}  ${elapsed}ms  (FP: ${score.falsePositives}, issues: ${(review.issues || []).length})`,
+    );
+    results.push({
+      name: testCase.name,
+      detected: score.detected,
+      falsePositives: score.falsePositives,
+      issuesFound: (review.issues || []).length,
+      elapsed,
+    });
+  }
+  // Summary
+  const tpr = ((detected / dataset.length) * 100).toFixed(1);
+  const avgMs = (totalMs / dataset.length).toFixed(0);
+  console.log('\n' + '═'.repeat(60));
+  console.log(`  True-positive rate : ${detected}/${dataset.length} (${tpr}%)`);
+  console.log(`  Total false positives: ${totalFP}`);
+  console.log(`  Avg time per review : ${avgMs}ms (total ${(totalMs / 1000).toFixed(1)}s)`);
+  console.log('═'.repeat(60) + '\n');
+  const report = {
+    provider: providerLabel,
+    timestamp: new Date().toISOString(),
+    cases: results,
+    summary: {
+      total: dataset.length,
+      detected,
+      truePositiveRate: parseFloat(tpr),
+      totalFalsePositives: totalFP,
+      avgTimeMs: parseInt(avgMs, 10),
+    },
+  };
+  if (jsonOutput) {
+    console.log(JSON.stringify(report, null, 2));
+  }
+  // Always write report to disk
+  const outDir = path.join(__dirname, 'results');
+  fs.mkdirSync(outDir, { recursive: true });
+  const outFile = path.join(outDir, `report-${providerLabel.toLowerCase()}-${Date.now()}.json`);
+  fs.writeFileSync(outFile, JSON.stringify(report, null, 2));
+  console.log(`Report saved to ${outFile}\n`);
+}
+main().catch((err) => {
+  console.error('Benchmark failed:', err);
+  process.exit(1);
+});

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "qai-cli",
-  "version": "3.1.0",
+  "version": "3.2.0",
   "description": "AI-powered QA engineer. Code review, testing, and bug detection from your terminal.",
   "main": "src/analyze.js",
   "types": "src/types.d.ts",