qai-cli 3.0.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ name: QAI Code Review (Reusable)
2
+
3
+ on:
4
+ workflow_call:
5
+ inputs:
6
+ provider:
7
+ description: 'AI provider to use (anthropic or openai)'
8
+ required: false
9
+ type: string
10
+ default: 'anthropic'
11
+ node-version:
12
+ description: 'Node.js version'
13
+ required: false
14
+ type: string
15
+ default: '20'
16
+ qai-version:
17
+ description: 'qai-cli version (npm version specifier)'
18
+ required: false
19
+ type: string
20
+ default: 'latest'
21
+ secrets:
22
+ api-key:
23
+ description: 'API key for the chosen provider'
24
+ required: true
25
+
26
+ permissions:
27
+ contents: read
28
+ pull-requests: write
29
+
30
+ jobs:
31
+ review:
32
+ runs-on: ubuntu-latest
33
+ steps:
34
+ - name: Checkout
35
+ uses: actions/checkout@v4
36
+ with:
37
+ fetch-depth: 0
38
+
39
+ - name: Setup Node.js
40
+ uses: actions/setup-node@v4
41
+ with:
42
+ node-version: ${{ inputs.node-version }}
43
+
44
+ - name: Install qai-cli
45
+ run: npm install -g qai-cli@${{ inputs.qai-version }}
46
+
47
+ - name: Run QAI Review
48
+ id: review
49
+ env:
50
+ ANTHROPIC_API_KEY: ${{ inputs.provider == 'anthropic' && secrets.api-key || '' }}
51
+ OPENAI_API_KEY: ${{ inputs.provider == 'openai' && secrets.api-key || '' }}
52
+ GH_TOKEN: ${{ github.token }}
53
+ run: |
54
+ set +e
55
+ REVIEW=$(qai review ${{ github.event.pull_request.number }} --json 2>&1)
56
+ EXIT_CODE=$?
57
+ echo "$REVIEW" > review-output.json
58
+ echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"
59
+ set -e
60
+
61
+ - name: Post Review Comment
62
+ env:
63
+ GH_TOKEN: ${{ github.token }}
64
+ run: |
65
+ BODY=$(cat review-output.json)
66
+ gh pr comment ${{ github.event.pull_request.number }} \
67
+ --body "## 🤖 QAI Code Review
68
+
69
+ <details>
70
+ <summary>Review Details</summary>
71
+
72
+ \`\`\`json
73
+ $BODY
74
+ \`\`\`
75
+
76
+ </details>"
77
+
78
+ - name: Fail on Critical Issues
79
+ if: steps.review.outputs.exit_code == '1'
80
+ run: |
81
+ echo "::error::QAI review found critical issues"
82
+ exit 1
@@ -0,0 +1,63 @@
1
+ name: QAI Code Review
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize]
6
+
7
+ permissions:
8
+ contents: read
9
+ pull-requests: write
10
+
11
+ jobs:
12
+ review:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - name: Checkout
16
+ uses: actions/checkout@v4
17
+ with:
18
+ fetch-depth: 0
19
+
20
+ - name: Setup Node.js
21
+ uses: actions/setup-node@v4
22
+ with:
23
+ node-version: 20
24
+
25
+ - name: Install qai-cli
26
+ run: npm install -g qai-cli
27
+
28
+ - name: Run QAI Review
29
+ id: review
30
+ env:
31
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
32
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
33
+ GH_TOKEN: ${{ github.token }}
34
+ run: |
35
+ set +e
36
+ REVIEW=$(qai review ${{ github.event.pull_request.number }} --json 2>&1)
37
+ EXIT_CODE=$?
38
+ echo "$REVIEW" > review-output.json
39
+ echo "exit_code=$EXIT_CODE" >> "$GITHUB_OUTPUT"
40
+ set -e
41
+
42
+ - name: Post Review Comment
43
+ env:
44
+ GH_TOKEN: ${{ github.token }}
45
+ run: |
46
+ BODY=$(cat review-output.json)
47
+ gh pr comment ${{ github.event.pull_request.number }} \
48
+ --body "## 🤖 QAI Code Review
49
+
50
+ <details>
51
+ <summary>Review Details</summary>
52
+
53
+ \`\`\`json
54
+ $BODY
55
+ \`\`\`
56
+
57
+ </details>"
58
+
59
+ - name: Fail on Critical Issues
60
+ if: steps.review.outputs.exit_code == '1'
61
+ run: |
62
+ echo "::error::QAI review found critical issues"
63
+ exit 1
package/README.md CHANGED
@@ -29,7 +29,7 @@ VIEWPORTS=desktop,mobile,tablet qai scan https://mysite.com
29
29
  FOCUS=accessibility qai scan https://mysite.com
30
30
  ```
31
31
 
32
- ### `qai review` — PR Code Review _(Coming Soon)_
32
+ ### `qai review` — PR Code Review
33
33
 
34
34
  Deep code review with full codebase context. Not just the diff — traces through dependencies, callers, and related tests.
35
35
 
@@ -41,7 +41,7 @@ qai review 42
41
41
  qai review --base main
42
42
  ```
43
43
 
44
- ### `qai generate` — Test Generation _(Coming Soon)_
44
+ ### `qai generate` — Test Generation
45
45
 
46
46
  Auto-generate Playwright E2E tests from URLs or unit tests from source files.
47
47
 
@@ -106,6 +106,19 @@ Works with any major LLM. Set one env var:
106
106
  - **Structured reports** — JSON + Markdown output
107
107
  - **CI/CD ready** — GitHub Action + exit codes for pipelines
108
108
 
109
+ ## How It Compares
110
+
111
+ | Feature | **qai** | Paragon | CodeRabbit | Cursor BugBot |
112
+ | ---------------------------------------------- | ----------------------- | --------- | ----------- | ------------- |
113
+ | Open source | ✅ | ❌ | ❌ | ❌ |
114
+ | Visual QA scanning | ✅ | ✅ | ❌ | ❌ |
115
+ | PR code review | ✅ | ❌ | ✅ | ✅ |
116
+ | Test generation | ✅ | ❌ | ❌ | ❌ |
117
+ | Multi-provider (Claude, GPT-4, Gemini, Ollama) | ✅ | ❌ | ❌ | ❌ |
118
+ | Local/offline mode (Ollama) | ✅ | ❌ | ❌ | ❌ |
119
+ | CLI + library + GitHub Action | ✅ | SaaS only | GitHub only | GitHub only |
120
+ | Free | ✅ (bring your own key) | Paid | Freemium | Freemium |
121
+
109
122
  ## License
110
123
 
111
124
  MIT
@@ -0,0 +1,68 @@
1
+ # qai Benchmark Suite
2
+
3
+ Measures code review accuracy across LLM providers.
4
+
5
+ ## Methodology
6
+
7
+ The benchmark uses a curated dataset of **10 realistic code diffs**, each containing a known bug. Bug types span:
8
+
9
+ | Category | Cases |
10
+ | ---------------- | -------------------------------------------------------- |
11
+ | Security | SQL injection, XSS, hardcoded secrets, unvalidated input |
12
+ | Bugs | Null pointer / undefined access |
13
+ | Concurrency | Race condition (TOCTOU) |
14
+ | Error handling | Missing try/catch on file operations |
15
+ | Logic | Off-by-one in pagination |
16
+ | Performance | Memory leak / unclosed resources |
17
+ | Breaking changes | Public API signature change |
18
+
19
+ Each case includes:
20
+
21
+ - A unified diff (10-50 lines)
22
+ - Surrounding file context
23
+ - Expected issues with severity and category
24
+
25
+ ## Scoring
26
+
27
+ For each test case the runner checks:
28
+
29
+ 1. **True positive** — did the LLM identify the known bug? Matched via fuzzy keyword overlap on the issue description, category, and severity.
30
+ 2. **False positives** — how many extra issues were reported beyond the expected ones.
31
+ 3. **Latency** — wall-clock time per review call.
32
+
33
+ ## Running
34
+
35
+ ```bash
36
+ # Default provider (uses first available API key)
37
+ node benchmarks/run.js
38
+
39
+ # Specific provider
40
+ node benchmarks/run.js --provider anthropic
41
+
42
+ # JSON output to stdout
43
+ node benchmarks/run.js --json
44
+ ```
45
+
46
+ Results are always saved to `benchmarks/results/`.
47
+
48
+ ## Adding Test Cases
49
+
50
+ Create a new JSON file in `benchmarks/dataset/`:
51
+
52
+ ```json
53
+ {
54
+ "name": "descriptive-slug",
55
+ "description": "What the bug is",
56
+ "diff": "... unified diff ...",
57
+ "context": { "files": { "path/to/file.js": "full file content" } },
58
+ "expectedIssues": [
59
+ {
60
+ "severity": "critical",
61
+ "category": "security",
62
+ "description": "Short description of expected finding"
63
+ }
64
+ ]
65
+ }
66
+ ```
67
+
68
+ Then re-run the benchmark. The runner auto-discovers all `.json` files in the dataset directory.
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "breaking-api-change",
3
+ "description": "Public API method signature changed without deprecation or major version bump",
4
+ "diff": "diff --git a/src/api/client.js b/src/api/client.js\nindex ab12cd3..ef45gh6 100644\n--- a/src/api/client.js\n+++ b/src/api/client.js\n@@ -15,14 +15,16 @@ class ApiClient {\n /**\n * Fetch a user by ID\n- * @param {string} userId\n- * @param {Object} [options]\n- * @returns {Promise<User>}\n+ * @param {Object} params\n+ * @param {string} params.userId\n+ * @param {string[]} [params.fields]\n+ * @returns {Promise<UserResponse>}\n */\n- async getUser(userId, options = {}) {\n- const res = await this.http.get(`/users/${userId}`, { params: options });\n- return res.data;\n+ async getUser({ userId, fields = ['id', 'name', 'email'] } = {}) {\n+ const query = fields.length ? `?fields=${fields.join(',')}` : '';\n+ const res = await this.http.get(`/users/${userId}${query}`);\n+ return { user: res.data, meta: { fields } };\n }\n \n /**\n * List all users",
5
+ "context": {
6
+ "files": {
7
+ "src/api/client.js": "const axios = require('axios');\n\nclass ApiClient {\n constructor(baseURL) {\n this.http = axios.create({ baseURL });\n }\n\n async getUser(userId, options = {}) {\n const res = await this.http.get(`/users/${userId}`, { params: options });\n return res.data;\n }\n}\n\nmodule.exports = { ApiClient };"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "high",
13
+ "category": "breaking-change",
14
+ "description": "Breaking API change: getUser() signature changed from (userId, options) to ({ userId, fields }), return type also changed"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "hardcoded-secrets",
3
+ "description": "API keys and database credentials hardcoded in source",
4
+ "diff": "diff --git a/src/config/database.js b/src/config/database.js\nindex aabb112..ccdd334 100644\n--- a/src/config/database.js\n+++ b/src/config/database.js\n@@ -1,10 +1,14 @@\n-const dbUrl = process.env.DATABASE_URL;\n-const apiKey = process.env.STRIPE_API_KEY;\n+const dbUrl = 'postgresql://admin:s3cretPassw0rd!@prod-db.internal.company.com:5432/maindb';\n+const apiKey = 'sk_live_FAKE_EXAMPLE_KEY_NOT_REAL_1234567890';\n+const jwtSecret = 'my-super-secret-jwt-key-do-not-share';\n \n module.exports = {\n database: {\n connectionString: dbUrl,\n ssl: true,\n+ pool: { min: 2, max: 10 },\n },\n- stripe: { apiKey },\n+ stripe: { apiKey },\n+ jwt: { secret: jwtSecret, expiresIn: '7d' },\n };",
5
+ "context": {
6
+ "files": {
7
+ "src/config/database.js": "const dbUrl = process.env.DATABASE_URL;\nconst apiKey = process.env.STRIPE_API_KEY;\n\nmodule.exports = {\n database: {\n connectionString: dbUrl,\n ssl: true,\n },\n stripe: { apiKey },\n};"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "critical",
13
+ "category": "security",
14
+ "description": "Hardcoded production database credentials, Stripe live API key, and JWT secret in source code"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "memory-leak",
3
+ "description": "Database connection pool never closed, event listeners accumulate",
4
+ "diff": "diff --git a/src/services/analytics.js b/src/services/analytics.js\nindex 9a8b7c6..5d4e3f2 100644\n--- a/src/services/analytics.js\n+++ b/src/services/analytics.js\n@@ -3,18 +3,22 @@ const { Pool } = require('pg');\n class AnalyticsService {\n constructor(config) {\n this.config = config;\n+ this.pools = [];\n }\n \n async trackEvent(event) {\n- const pool = new Pool(this.config.database);\n- try {\n- const client = await pool.connect();\n- await client.query('INSERT INTO events (type, data, ts) VALUES ($1, $2, NOW())', [\n- event.type,\n- JSON.stringify(event.data),\n- ]);\n- client.release();\n- } finally {\n- await pool.end();\n- }\n+ const pool = new Pool(this.config.database);\n+ this.pools.push(pool);\n+ const client = await pool.connect();\n+ await client.query(\n+ 'INSERT INTO events (type, data, ts) VALUES ($1, $2, NOW())',\n+ [event.type, JSON.stringify(event.data)]\n+ );\n+ // client.release() removed for \"performance\"\n+ pool.on('error', (err) => {\n+ console.error('Pool error:', err);\n+ });\n+ return { success: true };\n }\n }",
5
+ "context": {
6
+ "files": {
7
+ "src/services/analytics.js": "const { Pool } = require('pg');\n\nclass AnalyticsService {\n constructor(config) {\n this.config = config;\n }\n\n async trackEvent(event) {\n const pool = new Pool(this.config.database);\n try {\n const client = await pool.connect();\n await client.query('INSERT INTO events (type, data, ts) VALUES ($1, $2, NOW())', [\n event.type, JSON.stringify(event.data)\n ]);\n client.release();\n } finally {\n await pool.end();\n }\n }\n}"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "high",
13
+ "category": "performance",
14
+ "description": "Memory leak: new Pool created per call and never closed, client never released, error listeners accumulate"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "missing-error-handling",
3
+ "description": "File operations without error handling or cleanup",
4
+ "diff": "diff --git a/src/utils/config.js b/src/utils/config.js\nindex 2a3b4c5..6d7e8f9 100644\n--- a/src/utils/config.js\n+++ b/src/utils/config.js\n@@ -5,15 +5,12 @@ const yaml = require('js-yaml');\n \n /**\n * Load and merge configuration from multiple sources\n- * @param {string[]} configPaths\n+ * @param {string} configPath\n * @returns {Object}\n */\n-function loadConfig(configPaths) {\n- const configs = configPaths.map((p) => {\n- try {\n- const raw = fs.readFileSync(p, 'utf8');\n- return yaml.load(raw);\n- } catch (err) {\n- console.warn(`Config not found: ${p}, skipping`);\n- return {};\n- }\n- });\n- return Object.assign({}, ...configs);\n+function loadConfig(configPath) {\n+ const raw = fs.readFileSync(configPath, 'utf8');\n+ const config = yaml.load(raw);\n+ const overridePath = configPath.replace('.yml', '.local.yml');\n+ const overrideRaw = fs.readFileSync(overridePath, 'utf8');\n+ const override = yaml.load(overrideRaw);\n+ return { ...config, ...override };\n }",
5
+ "context": {
6
+ "files": {
7
+ "src/utils/config.js": "const fs = require('fs');\nconst yaml = require('js-yaml');\n\nfunction loadConfig(configPaths) {\n const configs = configPaths.map((p) => {\n try {\n const raw = fs.readFileSync(p, 'utf8');\n return yaml.load(raw);\n } catch (err) {\n console.warn(`Config not found: ${p}, skipping`);\n return {};\n }\n });\n return Object.assign({}, ...configs);\n}\n\nmodule.exports = { loadConfig };"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "high",
13
+ "category": "error-handling",
14
+ "description": "No error handling for file read operations; will crash if config or override file is missing"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "null-pointer",
3
+ "description": "Null pointer access when optional nested object is missing",
4
+ "diff": "diff --git a/src/services/order.js b/src/services/order.js\nindex a1b2c3d..e4f5678 100644\n--- a/src/services/order.js\n+++ b/src/services/order.js\n@@ -24,9 +24,15 @@ class OrderService {\n * @returns {Object} formatted order summary\n */\n formatOrderSummary(order) {\n- return {\n- id: order.id,\n- total: order.total,\n- };\n+ const address = order.customer.shippingAddress;\n+ return {\n+ id: order.id,\n+ total: order.total,\n+ customerName: order.customer.name,\n+ shippingCity: address.city,\n+ shippingZip: address.zipCode,\n+ formattedAddress: `${address.street}, ${address.city}, ${address.state} ${address.zipCode}`,\n+ };\n }\n }",
5
+ "context": {
6
+ "files": {
7
+ "src/services/order.js": "class OrderService {\n constructor(db) {\n this.db = db;\n }\n\n async getOrder(id) {\n const order = await this.db.orders.findById(id);\n return order; // order.customer.shippingAddress may be null\n }\n\n formatOrderSummary(order) {\n return { id: order.id, total: order.total };\n }\n}"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "high",
13
+ "category": "bug",
14
+ "description": "Null/undefined access on order.customer.shippingAddress without null check"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "off-by-one",
3
+ "description": "Off-by-one error in pagination logic",
4
+ "diff": "diff --git a/src/utils/paginate.js b/src/utils/paginate.js\nindex 1122334..5566778 100644\n--- a/src/utils/paginate.js\n+++ b/src/utils/paginate.js\n@@ -8,10 +8,11 @@\n */\n function paginate(items, page, pageSize = 20) {\n const total = items.length;\n- const totalPages = Math.ceil(total / pageSize);\n- const start = (page - 1) * pageSize;\n+ const totalPages = Math.floor(total / pageSize);\n+ const start = page * pageSize;\n const end = start + pageSize;\n return {\n data: items.slice(start, end),\n page,\n+ pageSize,\n totalPages,\n total,\n };\n }",
5
+ "context": {
6
+ "files": {
7
+ "src/utils/paginate.js": "function paginate(items, page, pageSize = 20) {\n const total = items.length;\n const totalPages = Math.ceil(total / pageSize);\n const start = (page - 1) * pageSize;\n const end = start + pageSize;\n return {\n data: items.slice(start, end),\n page,\n totalPages,\n total,\n };\n}\n\nmodule.exports = { paginate };"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "medium",
13
+ "category": "logic",
14
+ "description": "Off-by-one: page 1 skips first pageSize items (0-indexed page with 1-indexed expectation), and Math.floor loses last partial page"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "race-condition",
3
+ "description": "TOCTOU race condition in balance check and withdrawal",
4
+ "diff": "diff --git a/src/services/wallet.js b/src/services/wallet.js\nindex 1a2b3c4..5d6e7f8 100644\n--- a/src/services/wallet.js\n+++ b/src/services/wallet.js\n@@ -10,12 +10,18 @@ class WalletService {\n }\n \n async withdraw(userId, amount) {\n- return this.db.transaction(async (trx) => {\n- const wallet = await trx('wallets').where({ user_id: userId }).forUpdate().first();\n- if (wallet.balance < amount) throw new Error('Insufficient funds');\n- await trx('wallets').where({ user_id: userId }).update({ balance: wallet.balance - amount });\n- return { newBalance: wallet.balance - amount };\n- });\n+ const wallet = await this.db('wallets').where({ user_id: userId }).first();\n+ if (!wallet) {\n+ throw new Error('Wallet not found');\n+ }\n+ if (wallet.balance < amount) {\n+ throw new Error('Insufficient funds');\n+ }\n+ const newBalance = wallet.balance - amount;\n+ await this.db('wallets')\n+ .where({ user_id: userId })\n+ .update({ balance: newBalance });\n+ return { newBalance };\n }\n }",
5
+ "context": {
6
+ "files": {
7
+ "src/services/wallet.js": "class WalletService {\n constructor(db) {\n this.db = db;\n }\n\n async withdraw(userId, amount) {\n return this.db.transaction(async (trx) => {\n const wallet = await trx('wallets').where({ user_id: userId }).forUpdate().first();\n if (wallet.balance < amount) throw new Error('Insufficient funds');\n await trx('wallets').where({ user_id: userId }).update({ balance: wallet.balance - amount });\n return { newBalance: wallet.balance - amount };\n });\n }\n}"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "critical",
13
+ "category": "concurrency",
14
+ "description": "Race condition: balance check and update are not atomic, removed transaction and FOR UPDATE lock"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "sql-injection",
3
+ "description": "SQL injection via string concatenation in user lookup query",
4
+ "diff": "diff --git a/src/routes/users.js b/src/routes/users.js\nindex 3a1b2c3..4d5e6f7 100644\n--- a/src/routes/users.js\n+++ b/src/routes/users.js\n@@ -12,8 +12,12 @@ const db = require('../db');\n \n router.get('/users/search', async (req, res) => {\n const { username } = req.query;\n- const users = await db.query('SELECT id, username, email FROM users WHERE username = $1', [username]);\n- res.json(users.rows);\n+ if (!username) {\n+ return res.status(400).json({ error: 'username is required' });\n+ }\n+ const query = `SELECT id, username, email FROM users WHERE username = '${username}'`;\n+ const users = await db.query(query);\n+ return res.json(users.rows);\n });\n \n router.get('/users/:id', async (req, res) => {",
5
+ "context": {
6
+ "files": {
7
+ "src/routes/users.js": "const express = require('express');\nconst router = express.Router();\nconst db = require('../db');\n\nrouter.get('/users/search', async (req, res) => {\n const { username } = req.query;\n const users = await db.query('SELECT id, username, email FROM users WHERE username = $1', [username]);\n res.json(users.rows);\n});"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "critical",
13
+ "category": "security",
14
+ "description": "SQL injection via string interpolation instead of parameterized query"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "unvalidated-input",
3
+ "description": "User input used directly in file path and shell command without validation",
4
+ "diff": "diff --git a/src/routes/export.js b/src/routes/export.js\nindex aabb123..ccdd456 100644\n--- a/src/routes/export.js\n+++ b/src/routes/export.js\n@@ -4,12 +4,18 @@ const { execSync } = require('child_process');\n \n router.post('/export', async (req, res) => {\n- const { format } = req.body;\n- const allowed = ['csv', 'json', 'xml'];\n- if (!allowed.includes(format)) {\n- return res.status(400).json({ error: 'Invalid format' });\n- }\n- const data = await db.reports.getAll();\n- const file = exportService.generate(data, format);\n- res.download(file);\n+ const { format, filename, startDate, endDate } = req.body;\n+ const data = await db.reports.getAll();\n+ const outputPath = `/tmp/exports/${filename}.${format}`;\n+ fs.writeFileSync(outputPath, exportService.serialize(data, format));\n+ // Compress for large exports\n+ if (req.body.compress) {\n+ execSync(`gzip ${outputPath}`);\n+ return res.download(`${outputPath}.gz`);\n+ }\n+ res.download(outputPath);\n });",
5
+ "context": {
6
+ "files": {
7
+ "src/routes/export.js": "const express = require('express');\nconst router = express.Router();\nconst db = require('../db');\nconst fs = require('fs');\nconst exportService = require('../services/export');\nconst { execSync } = require('child_process');\n\nrouter.post('/export', async (req, res) => {\n const { format } = req.body;\n const allowed = ['csv', 'json', 'xml'];\n if (!allowed.includes(format)) {\n return res.status(400).json({ error: 'Invalid format' });\n }\n const data = await db.reports.getAll();\n const file = exportService.generate(data, format);\n res.download(file);\n});\n\nmodule.exports = router;"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "critical",
13
+ "category": "security",
14
+ "description": "Unvalidated input: filename and format used in file path (path traversal) and shell command (command injection), format whitelist removed"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "xss-vulnerability",
3
+ "description": "User input rendered as raw HTML without sanitization",
4
+ "diff": "diff --git a/src/views/profile.js b/src/views/profile.js\nindex 1234abc..5678def 100644\n--- a/src/views/profile.js\n+++ b/src/views/profile.js\n@@ -6,11 +6,17 @@ const express = require('express');\n router.get('/profile/:id', async (req, res) => {\n const user = await db.users.findById(req.params.id);\n- res.render('profile', {\n- name: user.displayName,\n- bio: user.bio,\n- });\n+ const html = `\n+ <html>\n+ <body>\n+ <h1>${user.displayName}</h1>\n+ <div class=\"bio\">${user.bio}</div>\n+ <div class=\"website\"><a href=\"${user.website}\">${user.website}</a></div>\n+ <div class=\"location\">${user.location}</div>\n+ </body>\n+ </html>`;\n+ res.setHeader('Content-Type', 'text/html');\n+ res.send(html);\n });",
5
+ "context": {
6
+ "files": {
7
+ "src/views/profile.js": "const express = require('express');\nconst router = express.Router();\nconst db = require('../db');\n\nrouter.get('/profile/:id', async (req, res) => {\n const user = await db.users.findById(req.params.id);\n res.render('profile', {\n name: user.displayName,\n bio: user.bio,\n });\n});\n\nmodule.exports = router;"
8
+ }
9
+ },
10
+ "expectedIssues": [
11
+ {
12
+ "severity": "critical",
13
+ "category": "security",
14
+ "description": "XSS vulnerability: user-controlled fields (displayName, bio, website) interpolated directly into HTML without escaping"
15
+ }
16
+ ]
17
+ }
@@ -0,0 +1,184 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * Benchmark runner for qai code review accuracy.
5
+ *
6
+ * Loads curated diffs with known bugs, runs them through each available
7
+ * provider, and scores true-positive rate, false-positive count, and latency.
8
+ *
9
+ * Usage:
10
+ * node benchmarks/run.js [--provider <name>] [--json]
11
+ */
12
+
13
+ const fs = require('fs');
14
+ const path = require('path');
15
+ const { getProvider } = require('../src/providers');
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // Helpers
19
+ // ---------------------------------------------------------------------------
20
+
21
+ function loadDataset() {
22
+ const dir = path.join(__dirname, 'dataset');
23
+ return fs
24
+ .readdirSync(dir)
25
+ .filter((f) => f.endsWith('.json'))
26
+ .map((f) => JSON.parse(fs.readFileSync(path.join(dir, f), 'utf8')));
27
+ }
28
+
29
+ /**
30
+ * Determine whether the review found the expected issue.
31
+ * We do a fuzzy keyword match on severity, category, and description.
32
+ */
33
+ function scoreResult(review, expected) {
34
+ if (!review || !review.issues || !Array.isArray(review.issues)) {
35
+ return { detected: false, falsePositives: 0 };
36
+ }
37
+
38
+ const found = expected.every((exp) => {
39
+ return review.issues.some((issue) => {
40
+ const descMatch = matchDescription(issue.description || issue.message || '', exp.description);
41
+ const catMatch =
42
+ !exp.category || (issue.category || '').toLowerCase().includes(exp.category.toLowerCase());
43
+ const sevMatch =
44
+ !exp.severity || (issue.severity || '').toLowerCase().includes(exp.severity.toLowerCase());
45
+ // A match on description alone is sufficient; category/severity are bonus signals
46
+ return descMatch || (catMatch && sevMatch);
47
+ });
48
+ });
49
+
50
+ // False positives = total issues minus expected matches
51
+ const falsePositives = Math.max(0, review.issues.length - expected.length);
52
+
53
+ return { detected: found, falsePositives };
54
+ }
55
+
56
+ function matchDescription(actual, expected) {
57
+ // Extract key terms from the expected description and check if most appear
58
+ const keywords = expected
59
+ .toLowerCase()
60
+ .split(/[\s,/]+/)
61
+ .filter((w) => w.length > 3);
62
+ const normalised = actual.toLowerCase();
63
+ const hits = keywords.filter((kw) => normalised.includes(kw));
64
+ return hits.length >= Math.ceil(keywords.length * 0.4);
65
+ }
66
+
67
+ // ---------------------------------------------------------------------------
68
+ // Main
69
+ // ---------------------------------------------------------------------------
70
+
71
+ async function main() {
72
+ const args = process.argv.slice(2);
73
+ const jsonOutput = args.includes('--json');
74
+ const providerIdx = args.indexOf('--provider');
75
+ const providerName = providerIdx !== -1 ? args[providerIdx + 1] : undefined;
76
+
77
+ const dataset = loadDataset();
78
+ console.log(`\nLoaded ${dataset.length} benchmark cases\n`);
79
+
80
+ let provider;
81
+ try {
82
+ provider = getProvider(providerName);
83
+ } catch (err) {
84
+ console.error(
85
+ `Failed to initialise provider${providerName ? ` "${providerName}"` : ''}: ${err.message}`,
86
+ );
87
+ console.error('Set an API key (e.g. ANTHROPIC_API_KEY) or specify --provider <name>');
88
+ process.exit(1);
89
+ }
90
+
91
+ const providerLabel = provider.constructor.name || 'unknown';
92
+ console.log(`Provider: ${providerLabel}\n`);
93
+
94
+ const results = [];
95
+ let detected = 0;
96
+ let totalFP = 0;
97
+ let totalMs = 0;
98
+
99
+ for (const testCase of dataset) {
100
+ const label = testCase.name.padEnd(25);
101
+ process.stdout.write(` ${label} … `);
102
+
103
+ const start = Date.now();
104
+ let review;
105
+ let error = null;
106
+ try {
107
+ review = await provider.reviewCode(testCase.diff, testCase.context || {}, {
108
+ focus: 'all',
109
+ });
110
+ } catch (err) {
111
+ error = err.message;
112
+ }
113
+ const elapsed = Date.now() - start;
114
+ totalMs += elapsed;
115
+
116
+ if (error) {
117
+ console.log(`ERROR (${elapsed}ms) — ${error}`);
118
+ results.push({
119
+ name: testCase.name,
120
+ detected: false,
121
+ falsePositives: 0,
122
+ elapsed,
123
+ error,
124
+ });
125
+ continue;
126
+ }
127
+
128
+ const score = scoreResult(review, testCase.expectedIssues);
129
+ if (score.detected) detected++;
130
+ totalFP += score.falsePositives;
131
+
132
+ const icon = score.detected ? '✅' : '❌';
133
+ console.log(
134
+ `${icon} ${elapsed}ms (FP: ${score.falsePositives}, issues: ${(review.issues || []).length})`,
135
+ );
136
+
137
+ results.push({
138
+ name: testCase.name,
139
+ detected: score.detected,
140
+ falsePositives: score.falsePositives,
141
+ issuesFound: (review.issues || []).length,
142
+ elapsed,
143
+ });
144
+ }
145
+
146
+ // Summary
147
+ const tpr = ((detected / dataset.length) * 100).toFixed(1);
148
+ const avgMs = (totalMs / dataset.length).toFixed(0);
149
+
150
+ console.log('\n' + '═'.repeat(60));
151
+ console.log(` True-positive rate : ${detected}/${dataset.length} (${tpr}%)`);
152
+ console.log(` Total false positives: ${totalFP}`);
153
+ console.log(` Avg time per review : ${avgMs}ms (total ${(totalMs / 1000).toFixed(1)}s)`);
154
+ console.log('═'.repeat(60) + '\n');
155
+
156
+ const report = {
157
+ provider: providerLabel,
158
+ timestamp: new Date().toISOString(),
159
+ cases: results,
160
+ summary: {
161
+ total: dataset.length,
162
+ detected,
163
+ truePositiveRate: parseFloat(tpr),
164
+ totalFalsePositives: totalFP,
165
+ avgTimeMs: parseInt(avgMs, 10),
166
+ },
167
+ };
168
+
169
+ if (jsonOutput) {
170
+ console.log(JSON.stringify(report, null, 2));
171
+ }
172
+
173
+ // Always write report to disk
174
+ const outDir = path.join(__dirname, 'results');
175
+ fs.mkdirSync(outDir, { recursive: true });
176
+ const outFile = path.join(outDir, `report-${providerLabel.toLowerCase()}-${Date.now()}.json`);
177
+ fs.writeFileSync(outFile, JSON.stringify(report, null, 2));
178
+ console.log(`Report saved to ${outFile}\n`);
179
+ }
180
+
181
+ main().catch((err) => {
182
+ console.error('Benchmark failed:', err);
183
+ process.exit(1);
184
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qai-cli",
3
- "version": "3.0.1",
3
+ "version": "3.2.0",
4
4
  "description": "AI-powered QA engineer. Code review, testing, and bug detection from your terminal.",
5
5
  "main": "src/analyze.js",
6
6
  "types": "src/types.d.ts",
@@ -0,0 +1,495 @@
1
+ /**
2
+ * Test Generation Engine
3
+ *
4
+ * Two modes:
5
+ * 1. URL crawl: Navigate a site, record interactions, generate Playwright E2E specs
6
+ * 2. Code analysis: Read source files, generate unit/integration tests
7
+ *
8
+ * Usage:
9
+ * qai generate https://mysite.com # E2E tests from URL
10
+ * qai generate src/billing.ts # Unit tests from source
11
+ * qai generate src/ --pattern "*.service*" # Batch generate
12
+ */
13
+
14
+ const fs = require('fs');
15
+ const path = require('path');
16
+ const { getProvider } = require('./providers');
17
+
18
+ /**
19
+ * Generate tests from a URL (E2E) or source file (unit)
20
+ *
21
+ * @param {Object} options
22
+ * @param {string} options.target - URL or file/directory path
23
+ * @param {string} [options.outDir] - Output directory for generated tests (default: ./tests/generated)
24
+ * @param {string} [options.framework] - Test framework (playwright, jest, vitest)
25
+ * @param {string} [options.pattern] - Glob pattern for batch file mode
26
+ * @param {string} [options.baseUrl] - Base URL for generated E2E tests
27
+ * @param {boolean} [options.dryRun] - Print tests to stdout instead of writing files
28
+ * @returns {Promise<GenerateResult>}
29
+ */
30
+ async function generateTests(options = {}) {
31
+ const {
32
+ target,
33
+ outDir = './tests/generated',
34
+ framework = 'playwright',
35
+ dryRun = false,
36
+ } = options;
37
+
38
+ if (!target) {
39
+ throw new Error('Target is required (URL or file path)');
40
+ }
41
+
42
+ // Determine mode: URL or file
43
+ const isUrl = target.startsWith('http://') || target.startsWith('https://');
44
+
45
+ if (isUrl) {
46
+ return generateE2ETests({ ...options, url: target, outDir, framework, dryRun });
47
+ } else {
48
+ return generateUnitTests({ ...options, filePath: target, outDir, framework, dryRun });
49
+ }
50
+ }
51
+
52
+ /**
53
+ * Generate E2E tests by crawling a URL
54
+ */
55
+ async function generateE2ETests(options) {
56
+ const { url, outDir, framework, dryRun } = options;
57
+
58
+ console.log('[1/3] Crawling site...');
59
+ const siteData = await crawlSite(url);
60
+
61
+ console.log(
62
+ ` Found ${siteData.pages.length} pages, ${siteData.interactions.length} interactive elements`,
63
+ );
64
+
65
+ console.log('[2/3] Generating tests with AI...');
66
+ const provider = getProvider();
67
+ const prompt = buildE2EPrompt(siteData, framework);
68
+ const result = await provider.generateTests(prompt);
69
+
70
+ console.log('[3/3] Writing test files...');
71
+ const files = parseGeneratedFiles(result);
72
+
73
+ if (dryRun) {
74
+ for (const file of files) {
75
+ console.log(`\n--- ${file.name} ---`);
76
+ console.log(file.content);
77
+ }
78
+ } else {
79
+ writeTestFiles(files, outDir);
80
+ }
81
+
82
+ return {
83
+ mode: 'e2e',
84
+ url,
85
+ pagesFound: siteData.pages.length,
86
+ testsGenerated: files.length,
87
+ files: files.map((f) => f.name),
88
+ outDir,
89
+ };
90
+ }
91
+
92
+ /**
93
+ * Generate unit tests from source file(s)
94
+ */
95
+ async function generateUnitTests(options) {
96
+ const { filePath, outDir, framework, dryRun, pattern } = options;
97
+
98
+ console.log('[1/3] Reading source files...');
99
+ const sources = readSourceFiles(filePath, pattern);
100
+ console.log(` Found ${sources.length} source files`);
101
+
102
+ if (sources.length === 0) {
103
+ throw new Error(`No source files found at: ${filePath}`);
104
+ }
105
+
106
+ const allFiles = [];
107
+
108
+ for (const source of sources) {
109
+ console.log(`[2/3] Generating tests for ${source.relativePath}...`);
110
+ const provider = getProvider();
111
+ const prompt = buildUnitTestPrompt(source, framework);
112
+ const result = await provider.generateTests(prompt);
113
+ const files = parseGeneratedFiles(result);
114
+
115
+ allFiles.push(...files);
116
+ }
117
+
118
+ console.log('[3/3] Writing test files...');
119
+ if (dryRun) {
120
+ for (const file of allFiles) {
121
+ console.log(`\n--- ${file.name} ---`);
122
+ console.log(file.content);
123
+ }
124
+ } else {
125
+ writeTestFiles(allFiles, outDir);
126
+ }
127
+
128
+ return {
129
+ mode: 'unit',
130
+ sourcesAnalyzed: sources.length,
131
+ testsGenerated: allFiles.length,
132
+ files: allFiles.map((f) => f.name),
133
+ outDir,
134
+ };
135
+ }
136
+
137
+ /**
138
+ * Crawl a site and gather page data using Playwright
139
+ */
140
+ async function crawlSite(url) {
141
+ let playwright;
142
+ try {
143
+ playwright = require('playwright');
144
+ } catch {
145
+ throw new Error(
146
+ 'Playwright is required for E2E test generation. Install it: npm install playwright',
147
+ );
148
+ }
149
+
150
+ const browser = await playwright.chromium.launch({ headless: true });
151
+ const context = await browser.newContext();
152
+ const page = await context.newPage();
153
+
154
+ const pages = [];
155
+ const interactions = [];
156
+ const visited = new Set();
157
+ const baseUrl = new URL(url);
158
+ const toVisit = [url];
159
+
160
+ // Crawl up to 10 pages
161
+ while (toVisit.length > 0 && visited.size < 10) {
162
+ const currentUrl = toVisit.shift();
163
+ if (visited.has(currentUrl)) continue;
164
+ visited.add(currentUrl);
165
+
166
+ try {
167
+ await page.goto(currentUrl, { waitUntil: 'networkidle', timeout: 15000 });
168
+ await page.waitForTimeout(1000);
169
+
170
+ const title = await page.title();
171
+ const pageUrl = page.url();
172
+
173
+ // Gather interactive elements
174
+ /* eslint-disable no-undef */
175
+ const elements = await page.evaluate(() => {
176
+ const result = [];
177
+
178
+ // Buttons
179
+ document.querySelectorAll('button, [role="button"]').forEach((el) => {
180
+ result.push({
181
+ type: 'button',
182
+ text: el.textContent.trim().slice(0, 100),
183
+ selector: getSelector(el),
184
+ });
185
+ });
186
+
187
+ // Links
188
+ document.querySelectorAll('a[href]').forEach((el) => {
189
+ result.push({
190
+ type: 'link',
191
+ text: el.textContent.trim().slice(0, 100),
192
+ href: el.href,
193
+ selector: getSelector(el),
194
+ });
195
+ });
196
+
197
+ // Forms
198
+ document.querySelectorAll('form').forEach((form) => {
199
+ const inputs = [];
200
+ form.querySelectorAll('input, textarea, select').forEach((input) => {
201
+ inputs.push({
202
+ type: input.type || input.tagName.toLowerCase(),
203
+ name: input.name,
204
+ placeholder: input.placeholder,
205
+ required: input.required,
206
+ selector: getSelector(input),
207
+ });
208
+ });
209
+ result.push({
210
+ type: 'form',
211
+ action: form.action,
212
+ method: form.method,
213
+ inputs,
214
+ selector: getSelector(form),
215
+ });
216
+ });
217
+
218
+ // Navigation elements
219
+ document.querySelectorAll('nav a, [role="navigation"] a').forEach((el) => {
220
+ result.push({
221
+ type: 'nav-link',
222
+ text: el.textContent.trim().slice(0, 100),
223
+ href: el.href,
224
+ selector: getSelector(el),
225
+ });
226
+ });
227
+
228
+ function getSelector(el) {
229
+ if (el.id) return `#${el.id}`;
230
+ if (el.getAttribute('data-testid')) {
231
+ return `[data-testid="${el.getAttribute('data-testid')}"]`;
232
+ }
233
+ if (el.getAttribute('aria-label')) {
234
+ return `[aria-label="${el.getAttribute('aria-label')}"]`;
235
+ }
236
+ const text = el.textContent.trim().slice(0, 30);
237
+ if (text && el.tagName) {
238
+ return `${el.tagName.toLowerCase()}:has-text("${text}")`;
239
+ }
240
+ return null;
241
+ }
242
+
243
+ return result;
244
+ });
245
+ /* eslint-enable no-undef */
246
+
247
+ pages.push({ url: pageUrl, title, elementCount: elements.length });
248
+ interactions.push(...elements.map((e) => ({ ...e, page: pageUrl })));
249
+
250
+ // Find same-origin links to crawl
251
+ const links = elements
252
+ .filter((e) => e.type === 'link' || e.type === 'nav-link')
253
+ .filter((e) => {
254
+ try {
255
+ const linkUrl = new URL(e.href);
256
+ return linkUrl.origin === baseUrl.origin;
257
+ } catch {
258
+ return false;
259
+ }
260
+ })
261
+ .map((e) => e.href);
262
+
263
+ for (const link of links) {
264
+ if (!visited.has(link)) {
265
+ toVisit.push(link);
266
+ }
267
+ }
268
+ } catch {
269
+ // Skip pages that fail to load
270
+ }
271
+ }
272
+
273
+ await browser.close();
274
+
275
+ return { url, pages, interactions };
276
+ }
277
+
278
+ /**
279
+ * Read source file(s) for unit test generation
280
+ */
281
+ function readSourceFiles(filePath, pattern) {
282
+ const sources = [];
283
+ const absPath = path.resolve(filePath);
284
+
285
+ if (fs.existsSync(absPath) && fs.statSync(absPath).isFile()) {
286
+ // Single file
287
+ sources.push({
288
+ relativePath: filePath,
289
+ content: fs.readFileSync(absPath, 'utf-8'),
290
+ ext: path.extname(filePath),
291
+ });
292
+ } else if (fs.existsSync(absPath) && fs.statSync(absPath).isDirectory()) {
293
+ // Directory - find source files
294
+ const exts = ['.js', '.ts', '.jsx', '.tsx', '.mjs'];
295
+ const skipDirs = ['node_modules', '.next', 'dist', '.git', '__tests__', 'test', 'tests'];
296
+
297
+ const walk = (dir) => {
298
+ const entries = fs.readdirSync(dir, { withFileTypes: true });
299
+ for (const entry of entries) {
300
+ const fullPath = path.join(dir, entry.name);
301
+ if (entry.isDirectory()) {
302
+ if (!skipDirs.includes(entry.name)) walk(fullPath);
303
+ } else if (entry.isFile()) {
304
+ const ext = path.extname(entry.name);
305
+ if (!exts.includes(ext)) continue;
306
+ // Skip test files
307
+ if (entry.name.includes('.test.') || entry.name.includes('.spec.')) continue;
308
+ // Apply pattern filter if specified
309
+ if (pattern && !entry.name.match(new RegExp(pattern.replace(/\*/g, '.*')))) continue;
310
+
311
+ const content = fs.readFileSync(fullPath, 'utf-8');
312
+ // Skip very large or very small files
313
+ if (content.length > 30000 || content.length < 50) continue;
314
+
315
+ sources.push({
316
+ relativePath: path.relative(process.cwd(), fullPath),
317
+ content,
318
+ ext,
319
+ });
320
+ }
321
+ }
322
+ };
323
+
324
+ walk(absPath);
325
+ // Cap at 10 files
326
+ sources.splice(10);
327
+ }
328
+
329
+ return sources;
330
+ }
331
+
332
+ /**
333
+ * Build prompt for E2E test generation
334
+ */
335
+ function buildE2EPrompt(siteData, framework) {
336
+ const frameworkGuide = E2E_FRAMEWORKS[framework] || E2E_FRAMEWORKS.playwright;
337
+
338
+ return `You are a senior QA automation engineer. Generate comprehensive E2E tests for this website.
339
+
340
+ ## Site Information
341
+ - URL: ${siteData.url}
342
+ - Pages found: ${siteData.pages.length}
343
+
344
+ ## Pages
345
+ ${siteData.pages.map((p) => `- ${p.url} (${p.title}) - ${p.elementCount} elements`).join('\n')}
346
+
347
+ ## Interactive Elements
348
+ ${JSON.stringify(siteData.interactions.slice(0, 100), null, 2)}
349
+
350
+ ## Framework
351
+ ${frameworkGuide}
352
+
353
+ ## Instructions
354
+ Generate test files that cover:
355
+ 1. Page navigation (all discovered pages load correctly)
356
+ 2. Interactive elements (buttons click, forms submit)
357
+ 3. Navigation flow (links work, nav elements route correctly)
358
+ 4. Form validation (required fields, error states)
359
+ 5. Responsive behavior (test at mobile and desktop viewports)
360
+
361
+ Output format - return ONLY a JSON array of files:
362
+ [
363
+ {
364
+ "name": "homepage.spec.ts",
365
+ "content": "// full test file content here"
366
+ }
367
+ ]
368
+
369
+ Write real, runnable tests. Use descriptive test names. Add meaningful assertions.
370
+ Do NOT generate placeholder or skeleton tests.
371
+ Respond with ONLY the JSON array, no markdown code blocks.`;
372
+ }
373
+
374
+ /**
375
+ * Build prompt for unit test generation
376
+ */
377
+ function buildUnitTestPrompt(source, framework) {
378
+ const frameworkGuide = UNIT_FRAMEWORKS[framework] || UNIT_FRAMEWORKS.jest;
379
+
380
+ return `You are a senior QA automation engineer. Generate comprehensive unit tests for this source file.
381
+
382
+ ## Source File: ${source.relativePath}
383
+ \`\`\`${source.ext.replace('.', '')}
384
+ ${source.content}
385
+ \`\`\`
386
+
387
+ ## Framework
388
+ ${frameworkGuide}
389
+
390
+ ## Instructions
391
+ Generate thorough unit tests that cover:
392
+ 1. All exported functions/classes
393
+ 2. Happy path for each function
394
+ 3. Edge cases (null, undefined, empty, boundary values)
395
+ 4. Error cases (invalid input, thrown errors)
396
+ 5. Any async behavior (resolved/rejected promises)
397
+
398
+ Output format - return ONLY a JSON array of files:
399
+ [
400
+ {
401
+ "name": "${getTestFileName(source.relativePath, framework)}",
402
+ "content": "// full test file content here"
403
+ }
404
+ ]
405
+
406
+ Write real, runnable tests with meaningful assertions.
407
+ Mock external dependencies where appropriate.
408
+ Do NOT generate placeholder or skeleton tests.
409
+ Respond with ONLY the JSON array, no markdown code blocks.`;
410
+ }
411
+
412
+ /**
413
+ * Parse LLM response into file objects
414
+ */
415
+ function parseGeneratedFiles(response) {
416
+ try {
417
+ let text = typeof response === 'string' ? response : response.raw || '';
418
+
419
+ // Remove markdown code blocks
420
+ if (text.startsWith('```')) {
421
+ text = text.replace(/```json?\n?/g, '').replace(/```\n?$/g, '');
422
+ }
423
+
424
+ const parsed = JSON.parse(text);
425
+ if (Array.isArray(parsed)) return parsed;
426
+ if (parsed.files) return parsed.files;
427
+ return [parsed];
428
+ } catch {
429
+ // If we can't parse JSON, treat entire response as a single test file
430
+ const text = typeof response === 'string' ? response : response.raw || '';
431
+ return [{ name: 'generated.spec.ts', content: text }];
432
+ }
433
+ }
434
+
435
+ /**
436
+ * Write test files to disk
437
+ */
438
+ function writeTestFiles(files, outDir) {
439
+ const absOutDir = path.resolve(outDir);
440
+ if (!fs.existsSync(absOutDir)) {
441
+ fs.mkdirSync(absOutDir, { recursive: true });
442
+ }
443
+
444
+ for (const file of files) {
445
+ const filePath = path.join(absOutDir, file.name);
446
+ fs.writeFileSync(filePath, file.content);
447
+ console.log(` Written: ${filePath}`);
448
+ }
449
+ }
450
+
451
+ /**
452
+ * Get test file name from source file path
453
+ */
454
+ function getTestFileName(sourcePath, _framework) {
455
+ const ext = path.extname(sourcePath);
456
+ const base = path.basename(sourcePath, ext);
457
+ const testExt = ext === '.ts' || ext === '.tsx' ? '.test.ts' : '.test.js';
458
+ return `${base}${testExt}`;
459
+ }
460
+
461
+ const E2E_FRAMEWORKS = {
462
+ playwright: `Use @playwright/test:
463
+ - import { test, expect } from '@playwright/test'
464
+ - Use page.goto(), page.click(), page.fill(), page.locator()
465
+ - Use expect(page).toHaveTitle(), expect(locator).toBeVisible()
466
+ - Use test.describe() for grouping
467
+ - Use page.setViewportSize() for responsive tests`,
468
+ };
469
+
470
+ const UNIT_FRAMEWORKS = {
471
+ jest: `Use Jest:
472
+ - describe/it/expect syntax
473
+ - jest.fn() for mocks
474
+ - beforeEach/afterEach for setup
475
+ - Use .toEqual, .toBe, .toThrow, .toBeNull etc.`,
476
+ vitest: `Use Vitest:
477
+ - import { describe, it, expect, vi } from 'vitest'
478
+ - vi.fn() for mocks
479
+ - Same assertion API as Jest`,
480
+ playwright: `Use @playwright/test:
481
+ - import { test, expect } from '@playwright/test'
482
+ - Same assertion API but for component/integration tests`,
483
+ };
484
+
485
+ module.exports = {
486
+ generateTests,
487
+ generateE2ETests,
488
+ generateUnitTests,
489
+ crawlSite,
490
+ readSourceFiles,
491
+ buildE2EPrompt,
492
+ buildUnitTestPrompt,
493
+ parseGeneratedFiles,
494
+ writeTestFiles,
495
+ };
package/src/index.js CHANGED
@@ -4,6 +4,7 @@ const fs = require('fs').promises;
4
4
  const { capturePage } = require('./capture');
5
5
  const { getProvider } = require('./providers');
6
6
  const { reviewPR, formatReviewMarkdown } = require('./review');
7
+ const { generateTests } = require('./generate');
7
8
 
8
9
  // Route to the right command
9
10
  const command = process.argv[2];
@@ -13,6 +14,11 @@ if (command === 'review') {
13
14
  console.error('\nError:', err.message);
14
15
  process.exit(1);
15
16
  });
17
+ } else if (command === 'generate') {
18
+ runGenerate().catch((err) => {
19
+ console.error('\nError:', err.message);
20
+ process.exit(1);
21
+ });
16
22
  } else {
17
23
  main();
18
24
  }
@@ -86,6 +92,65 @@ async function runReview() {
86
92
  }
87
93
  }
88
94
 
95
+ /**
96
+ * Run test generation command
97
+ * Usage: qai generate <url|file> [--out dir] [--framework playwright|jest|vitest] [--dry-run]
98
+ */
99
+ async function runGenerate() {
100
+ const args = process.argv.slice(3);
101
+ const options = {};
102
+
103
+ for (let i = 0; i < args.length; i++) {
104
+ if (args[i] === '--out' && args[i + 1]) {
105
+ options.outDir = args[++i];
106
+ } else if (args[i] === '--framework' && args[i + 1]) {
107
+ options.framework = args[++i];
108
+ } else if (args[i] === '--pattern' && args[i + 1]) {
109
+ options.pattern = args[++i];
110
+ } else if (args[i] === '--dry-run') {
111
+ options.dryRun = true;
112
+ } else if (args[i] === '--json') {
113
+ options.json = true;
114
+ } else if (!args[i].startsWith('--')) {
115
+ options.target = args[i];
116
+ }
117
+ }
118
+
119
+ if (!options.target) {
120
+ console.error(
121
+ 'Usage: qai generate <url|file> [--out dir] [--framework playwright|jest|vitest] [--dry-run]',
122
+ );
123
+ process.exit(1);
124
+ }
125
+
126
+ console.log('='.repeat(60));
127
+ console.log('qai generate');
128
+ console.log('='.repeat(60));
129
+ console.log(`Target: ${options.target}`);
130
+ console.log(`Framework: ${options.framework || 'auto'}`);
131
+ console.log(
132
+ `Output: ${options.dryRun ? 'stdout (dry run)' : options.outDir || './tests/generated'}`,
133
+ );
134
+ console.log('='.repeat(60));
135
+
136
+ const startTime = Date.now();
137
+ const result = await generateTests(options);
138
+ const duration = ((Date.now() - startTime) / 1000).toFixed(1);
139
+
140
+ if (options.json) {
141
+ console.log(JSON.stringify(result, null, 2));
142
+ } else {
143
+ console.log('\n' + '='.repeat(60));
144
+ console.log('Generation Summary');
145
+ console.log('='.repeat(60));
146
+ console.log(`Mode: ${result.mode}`);
147
+ console.log(`Tests generated: ${result.testsGenerated}`);
148
+ console.log(`Files: ${result.files.join(', ')}`);
149
+ console.log(`Duration: ${duration}s`);
150
+ console.log('='.repeat(60));
151
+ }
152
+ }
153
+
89
154
  async function main() {
90
155
  const startTime = Date.now();
91
156
 
@@ -55,6 +55,19 @@ class AnthropicProvider extends BaseProvider {
55
55
  return this.parseResponse(responseText);
56
56
  }
57
57
 
58
+ async generateTests(prompt) {
59
+ const response = await this.client.messages.create({
60
+ model: this.model,
61
+ max_tokens: 8192,
62
+ messages: [{ role: 'user', content: prompt }],
63
+ });
64
+
65
+ return response.content
66
+ .filter((block) => block.type === 'text')
67
+ .map((block) => block.text)
68
+ .join('\n');
69
+ }
70
+
58
71
  async reviewCode(diff, context, options = {}) {
59
72
  const prompt = this.buildReviewPrompt(diff, context, options);
60
73
 
@@ -30,6 +30,16 @@ class BaseProvider {
30
30
  throw new Error('reviewCode() must be implemented by subclass');
31
31
  }
32
32
 
33
+ /**
34
+ * Generate tests from a prompt
35
+ * @param {string} prompt - The generation prompt
36
+ * @returns {Promise<string>} Raw LLM response (JSON array of files)
37
+ */
38
+ // eslint-disable-next-line no-unused-vars
39
+ async generateTests(prompt) {
40
+ throw new Error('generateTests() must be implemented by subclass');
41
+ }
42
+
33
43
  /**
34
44
  * Build the analysis prompt with focus-specific guidance
35
45
  */
@@ -37,6 +37,13 @@ class GeminiProvider extends BaseProvider {
37
37
 
38
38
  return this.parseResponse(responseText);
39
39
  }
40
+ async generateTests(prompt) {
41
+ const model = this.genAI.getGenerativeModel({ model: this.model });
42
+ const result = await model.generateContent([{ text: prompt }]);
43
+ const response = await result.response;
44
+ return response.text();
45
+ }
46
+
40
47
  async reviewCode(diff, context, options = {}) {
41
48
  const prompt = this.buildReviewPrompt(diff, context, options);
42
49
  const model = this.genAI.getGenerativeModel({ model: this.model });
@@ -44,6 +44,26 @@ class OllamaProvider extends BaseProvider {
44
44
  const data = await response.json();
45
45
  return this.parseResponse(data.response || '');
46
46
  }
47
+ async generateTests(prompt) {
48
+ const response = await fetch(`${this.baseUrl}/api/generate`, {
49
+ method: 'POST',
50
+ headers: { 'Content-Type': 'application/json' },
51
+ body: JSON.stringify({
52
+ model: this.model,
53
+ prompt,
54
+ stream: false,
55
+ options: { temperature: 0.1 },
56
+ }),
57
+ });
58
+
59
+ if (!response.ok) {
60
+ throw new Error(`Ollama request failed: ${response.status} ${response.statusText}`);
61
+ }
62
+
63
+ const data = await response.json();
64
+ return data.response || '';
65
+ }
66
+
47
67
  async reviewCode(diff, context, options = {}) {
48
68
  const prompt = this.buildReviewPrompt(diff, context, options);
49
69
 
@@ -49,6 +49,16 @@ class OpenAIProvider extends BaseProvider {
49
49
  const responseText = response.choices[0]?.message?.content || '';
50
50
  return this.parseResponse(responseText);
51
51
  }
52
+ async generateTests(prompt) {
53
+ const response = await this.client.chat.completions.create({
54
+ model: this.model,
55
+ max_tokens: 8192,
56
+ messages: [{ role: 'user', content: prompt }],
57
+ });
58
+
59
+ return response.choices[0]?.message?.content || '';
60
+ }
61
+
52
62
  async reviewCode(diff, context, options = {}) {
53
63
  const prompt = this.buildReviewPrompt(diff, context, options);
54
64