great-cto 2.31.0 → 2.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,213 +0,0 @@
1
- /**
2
- * Loads YAML rule files from rules/*.yaml.
3
- *
4
- * We avoid a YAML dependency by parsing the simple subset we use ourselves —
5
- * each rule file is a list of dash-prefixed entries with key/value lines.
6
- * If we ever need real YAML (anchors, complex nesting), we'll add `yaml` as
7
- * a dep then.
8
- *
9
- * User-defined rules are loaded from ~/.great_cto/guardrails.yml and merged
10
- * with the built-in rules. User rules use the same YAML format but include
11
- * an optional `action: block | audit | redact` field.
12
- */
13
- import { readdirSync, readFileSync, existsSync } from 'node:fs';
14
- import { join, dirname } from 'node:path';
15
- import { homedir } from 'node:os';
16
- import { fileURLToPath } from 'node:url';
17
- const __dirname = dirname(fileURLToPath(import.meta.url));
18
- /**
19
- * Default location: `agentshield-rules/` at the cli-package root.
20
- * Search order accommodates both compiled and direct invocation:
21
- * - dist/agentshield/rules-loader.js → ../../agentshield-rules
22
- * - src/agentshield/rules-loader.ts → ../../agentshield-rules (no compile)
23
- * - legacy standalone layout → ../rules (kept for safety)
24
- */
25
- function defaultRulesDir() {
26
- const candidates = [
27
- join(__dirname, '..', '..', 'agentshield-rules'),
28
- join(__dirname, '..', '..', '..', 'agentshield-rules'),
29
- join(__dirname, '..', 'rules'),
30
- join(__dirname, '..', '..', 'rules'),
31
- ];
32
- for (const c of candidates) {
33
- if (existsSync(c))
34
- return c;
35
- }
36
- return candidates[0];
37
- }
38
- export function loadRules(rulesDir = defaultRulesDir()) {
39
- if (!existsSync(rulesDir)) {
40
- throw new Error(`agentshield: rules directory not found: ${rulesDir}`);
41
- }
42
- const files = readdirSync(rulesDir).filter((f) => f.endsWith('.yaml') || f.endsWith('.yml'));
43
- const rules = [];
44
- for (const f of files) {
45
- const text = readFileSync(join(rulesDir, f), 'utf8');
46
- rules.push(...parseRulesFile(text, f));
47
- }
48
- // Merge user-defined rules from ~/.great_cto/guardrails.yml
49
- const userRules = loadUserRules();
50
- rules.push(...userRules);
51
- return rules;
52
- }
53
- /**
54
- * Default path for user-defined guardrail rules.
55
- * Falls back to legacy .great_cto/guardrails.yml in the current project.
56
- */
57
- export function userGuardrailsPath() {
58
- return join(homedir(), '.great_cto', 'guardrails.yml');
59
- }
60
- /**
61
- * Load user-defined rules from ~/.great_cto/guardrails.yml.
62
- * Returns [] silently if the file does not exist.
63
- * Errors in user rules are surfaced as warnings (console.warn) but do not
64
- * abort the scan — broken user rules should not block CI.
65
- */
66
- export function loadUserRules(path) {
67
- const guardrailsPath = path ?? userGuardrailsPath();
68
- if (!existsSync(guardrailsPath))
69
- return [];
70
- try {
71
- const text = readFileSync(guardrailsPath, 'utf8');
72
- const parsed = parseRulesFile(text, guardrailsPath);
73
- // Mark all user rules as userDefined so scanners can handle action correctly
74
- return parsed.map(r => ({ ...r, userDefined: true }));
75
- }
76
- catch (e) {
77
- console.warn(`agentshield: warning — failed to load user guardrails from ${guardrailsPath}: ${e.message}`);
78
- return [];
79
- }
80
- }
81
- /**
82
- * Parse a minimal YAML format:
83
- *
84
- * - id: PI-001
85
- * scanner: prompt-injection
86
- * title: "Untrusted user input concatenated into system prompt"
87
- * severity: critical
88
- * owasp: "LLM01:2025 — Prompt Injection"
89
- * description: |
90
- * ...
91
- * remediation: |
92
- * ...
93
- * patterns:
94
- * - 'system\s*[:=]\s*["`].*\$\{.*\}'
95
- * file_globs:
96
- * - "**\/*.ts"
97
- * - "**\/*.py"
98
- * negate:
99
- * - "// agentshield:ignore"
100
- */
101
- export function parseRulesFile(text, filename) {
102
- // Strip line comments (# at start of line, ignoring # in quoted values)
103
- const lines = text.split('\n').filter((l) => !/^\s*#/.test(l));
104
- const stripped = lines.join('\n');
105
- const rules = [];
106
- // Split on top-level list markers ("\n- " or "^- "). Each block's first
107
- // key has its `- ` stripped → manually re-pad so all keys share an indent.
108
- const blocks = stripped.split(/^-\s/m)
109
- .filter((b) => b.trim() && /^\s*[a-z_]+:/m.test(b))
110
- .map((b) => ' ' + b); // realign first line to match nested keys
111
- for (const block of blocks) {
112
- try {
113
- rules.push(parseBlock(block, filename));
114
- }
115
- catch (e) {
116
- throw new Error(`agentshield: failed to parse rule in ${filename}: ${e.message}\n--- block ---\n${block}`);
117
- }
118
- }
119
- return rules;
120
- }
121
- function parseBlock(block, filename) {
122
- // Detect the base indent of this block. The first non-empty line is the
123
- // `id:` field (post-split). Subsequent fields share the same indent as the
124
- // base (or deeper for list items / block scalars).
125
- const lines = block.split('\n');
126
- const out = {};
127
- let currentKey = null;
128
- let currentList = null;
129
- let blockScalarLines = null;
130
- // Find base key indent — the smallest indent of any "key:" line in the block.
131
- let baseIndent = Infinity;
132
- for (const raw of lines) {
133
- const m = raw.match(/^( *)[a-z_]+:/);
134
- if (m && m[1].length < baseIndent)
135
- baseIndent = m[1].length;
136
- }
137
- if (baseIndent === Infinity)
138
- baseIndent = 0;
139
- for (const raw of lines) {
140
- if (!raw.trim())
141
- continue;
142
- // Block scalar continuation: any line indented deeper than baseIndent
143
- // belongs to the current block scalar.
144
- if (blockScalarLines !== null) {
145
- const indent = raw.match(/^ */)[0].length;
146
- if (indent > baseIndent) {
147
- blockScalarLines.push(raw.slice(baseIndent + 2)); // strip baseIndent + 2
148
- continue;
149
- }
150
- else {
151
- out[currentKey] = blockScalarLines.join('\n').trim();
152
- blockScalarLines = null;
153
- // fall through to handle this line as a new key
154
- }
155
- }
156
- // List item: indent > baseIndent and starts with "-"
157
- if (currentList !== null) {
158
- const indent = raw.match(/^ */)[0].length;
159
- if (/^\s*-\s+/.test(raw) && indent > baseIndent) {
160
- const item = raw.replace(/^\s*-\s+/, '').replace(/^["']|["']$/g, '');
161
- currentList.push(item);
162
- continue;
163
- }
164
- else {
165
- out[currentKey] = currentList;
166
- currentList = null;
167
- // fall through
168
- }
169
- }
170
- // Key:value line — indent must equal baseIndent
171
- const kvMatch = raw.match(/^( *)([a-z_]+):\s*(.*)$/);
172
- if (!kvMatch)
173
- continue;
174
- const indent = kvMatch[1].length;
175
- if (indent !== baseIndent)
176
- continue; // nested key handled by parent state
177
- const key = kvMatch[2];
178
- const valRaw = kvMatch[3];
179
- currentKey = key;
180
- if (valRaw === '|' || valRaw === '|+' || valRaw === '|-') {
181
- blockScalarLines = [];
182
- }
183
- else if (valRaw === '') {
184
- currentList = [];
185
- }
186
- else {
187
- out[key] = valRaw.replace(/^["']|["']$/g, '');
188
- }
189
- }
190
- if (currentList !== null)
191
- out[currentKey] = currentList;
192
- if (blockScalarLines !== null)
193
- out[currentKey] = blockScalarLines.join('\n').trim();
194
- for (const required of ['id', 'scanner', 'title', 'severity', 'description', 'remediation', 'patterns']) {
195
- if (out[required] === undefined) {
196
- throw new Error(`missing required field "${required}" in rule (block from ${filename})\nparsed: ${JSON.stringify(out)}`);
197
- }
198
- }
199
- const action = out.action;
200
- return {
201
- id: out.id,
202
- scanner: out.scanner,
203
- title: out.title,
204
- severity: out.severity,
205
- owasp: out.owasp,
206
- description: out.description,
207
- remediation: out.remediation,
208
- patterns: out.patterns,
209
- file_globs: out.file_globs,
210
- negate: out.negate,
211
- action: (action === 'block' || action === 'audit' || action === 'redact') ? action : undefined,
212
- };
213
- }
@@ -1,80 +0,0 @@
1
- /**
2
- * SARIF 2.1.0 output for GitHub Code Scanning.
3
- *
4
- * https://docs.github.com/en/code-security/code-scanning/integrating-with-code-scanning/sarif-support-for-code-scanning
5
- */
6
- const SEVERITY_TO_LEVEL = {
7
- critical: 'error',
8
- high: 'error',
9
- medium: 'warning',
10
- low: 'note',
11
- info: 'note',
12
- };
13
- export function toSarif(report) {
14
- // Collect unique rules referenced by findings
15
- const rulesById = new Map();
16
- for (const f of report.findings) {
17
- if (!rulesById.has(f.rule.id)) {
18
- rulesById.set(f.rule.id, toSarifRule(f.rule));
19
- }
20
- }
21
- return {
22
- $schema: 'https://docs.oasis-open.org/sarif/sarif/v2.1.0/cs01/schemas/sarif-schema-2.1.0.json',
23
- version: '2.1.0',
24
- runs: [
25
- {
26
- tool: {
27
- driver: {
28
- name: 'agentshield',
29
- organization: 'great-cto',
30
- informationUri: 'https://greatcto.systems/agentshield',
31
- rules: [...rulesById.values()],
32
- },
33
- },
34
- results: report.findings.map((f) => ({
35
- ruleId: f.rule.id,
36
- level: SEVERITY_TO_LEVEL[f.rule.severity],
37
- message: { text: `${f.rule.title} — ${f.match.slice(0, 100)}` },
38
- locations: [
39
- {
40
- physicalLocation: {
41
- artifactLocation: { uri: f.location.file },
42
- region: {
43
- startLine: f.location.line,
44
- startColumn: f.location.column ?? 1,
45
- snippet: { text: f.location.snippet },
46
- },
47
- },
48
- },
49
- ],
50
- properties: {
51
- severity: f.rule.severity,
52
- scanner: f.rule.scanner,
53
- owasp: f.rule.owasp,
54
- },
55
- })),
56
- },
57
- ],
58
- };
59
- }
60
- function toSarifRule(rule) {
61
- return {
62
- id: rule.id,
63
- name: rule.title,
64
- shortDescription: { text: rule.title },
65
- fullDescription: { text: rule.description },
66
- helpUri: 'https://greatcto.systems/agentshield/rules/' + rule.id,
67
- help: {
68
- text: `${rule.description}\n\nRemediation: ${rule.remediation}`,
69
- markdown: `**${rule.title}**\n\n${rule.description}\n\n**Remediation:** ${rule.remediation}` + (rule.owasp ? `\n\n_OWASP: ${rule.owasp}_` : ''),
70
- },
71
- defaultConfiguration: {
72
- level: SEVERITY_TO_LEVEL[rule.severity],
73
- },
74
- properties: {
75
- severity: rule.severity,
76
- owasp: rule.owasp,
77
- tags: ['ai-security', rule.severity, ...(rule.owasp ? ['owasp-llm'] : [])],
78
- },
79
- };
80
- }
@@ -1,244 +0,0 @@
1
- /**
2
- * Scanner orchestrator.
3
- *
4
- * Walks the filesystem (or an explicit file list), applies all loaded rules
5
- * to each file, and produces a ScanReport.
6
- *
7
- * Pure regex-based — no AST. This is intentional: AST-aware analysis is
8
- * fragile across languages and adds dependencies. Regex catches the
9
- * high-confidence patterns we care about (OWASP LLM Top 10).
10
- */
11
- import { readFileSync, readdirSync, statSync, existsSync } from 'node:fs';
12
- import { join, extname, relative, resolve } from 'node:path';
13
- import { severityRank } from './types.js';
14
- import { loadRules } from './rules-loader.js';
15
- const TEXT_EXTS = new Set([
16
- '.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs',
17
- '.py', '.go', '.rs', '.rb', '.java', '.kt',
18
- '.md', '.mdx', '.yaml', '.yml', '.json',
19
- '.toml', '.ini', '.env',
20
- '.sh', '.bash',
21
- ]);
22
- const DEFAULT_EXCLUDE = [
23
- /\/node_modules\//,
24
- /\/dist\//,
25
- /\/build\//,
26
- /\/\.git\//,
27
- /\/\.next\//,
28
- /\/\.venv\//,
29
- /\/__pycache__\//,
30
- /\/coverage\//,
31
- ];
32
- function* walk(root, exclude) {
33
- for (const entry of readdirSync(root)) {
34
- const full = join(root, entry);
35
- if (exclude.some((re) => re.test(full + '/')))
36
- continue;
37
- let st;
38
- try {
39
- st = statSync(full);
40
- }
41
- catch {
42
- continue;
43
- }
44
- if (st.isDirectory()) {
45
- yield* walk(full, exclude);
46
- }
47
- else if (TEXT_EXTS.has(extname(full).toLowerCase()) || /\.(env|envrc)/.test(entry)) {
48
- // Skip very large files to keep scan fast
49
- if (st.size <= 1_000_000)
50
- yield full;
51
- }
52
- }
53
- }
54
- function fileMatchesGlobs(file, globs) {
55
- if (!globs || globs.length === 0)
56
- return true;
57
- // Normalize path separators for cross-platform matching
58
- const normalized = file.replace(/\\/g, '/');
59
- return globs.some((g) => {
60
- // Token-based glob → regex conversion. Walks character-by-character to
61
- // avoid the substitution-order pitfalls of multiple replace passes.
62
- // Treats `**/` as "zero or more path segments" — so `**/*.ts` matches
63
- // both `foo.ts` (root) and `src/lib/foo.ts` (nested).
64
- let re = '';
65
- for (let i = 0; i < g.length; i++) {
66
- const c = g[i];
67
- if (c === '*' && g[i + 1] === '*') {
68
- // ** consumes the trailing /, so `**/x` becomes `(?:.*\/)?x` not `.*\/x`
69
- if (g[i + 2] === '/') {
70
- re += '(?:.*\\/)?';
71
- i += 2;
72
- }
73
- else {
74
- re += '.*';
75
- i++;
76
- }
77
- }
78
- else if (c === '*') {
79
- re += '[^/]*';
80
- }
81
- else if (c === '?') {
82
- re += '.';
83
- }
84
- else if ('.+^${}()|[]\\'.includes(c)) {
85
- re += '\\' + c;
86
- }
87
- else if (c === '/') {
88
- re += '/';
89
- }
90
- else {
91
- re += c;
92
- }
93
- }
94
- try {
95
- // Match suffix — `src/foo.ts` matches `**/*.ts` regardless of cwd
96
- return new RegExp('(?:^|/)' + re + '$').test(normalized);
97
- }
98
- catch {
99
- return false;
100
- }
101
- });
102
- }
103
- function compilePatterns(patterns) {
104
- return patterns.map((p) => new RegExp(p, 'm'));
105
- }
106
- function lineColAt(text, idx) {
107
- let line = 1;
108
- let lastNewline = -1;
109
- for (let i = 0; i < idx; i++) {
110
- if (text.charCodeAt(i) === 10) {
111
- line++;
112
- lastNewline = i;
113
- }
114
- }
115
- return { line, column: idx - lastNewline };
116
- }
117
- function snippet(text, idx, matchLen) {
118
- const start = text.lastIndexOf('\n', idx - 1) + 1;
119
- let end = text.indexOf('\n', idx + matchLen);
120
- if (end === -1)
121
- end = text.length;
122
- return text.slice(start, end).trim().slice(0, 200);
123
- }
124
- export function scanFile(file, content, rules) {
125
- const findings = [];
126
- for (const rule of rules) {
127
- if (!fileMatchesGlobs(file, rule.file_globs))
128
- continue;
129
- const negators = rule.negate ? compilePatterns(rule.negate) : [];
130
- if (negators.some((re) => re.test(content)))
131
- continue;
132
- const compiled = compilePatterns(rule.patterns);
133
- for (const re of compiled) {
134
- const m = re.exec(content);
135
- if (!m)
136
- continue;
137
- const idx = m.index;
138
- const { line, column } = lineColAt(content, idx);
139
- const location = {
140
- file,
141
- line,
142
- column,
143
- snippet: snippet(content, idx, m[0].length),
144
- };
145
- findings.push({ rule, location, match: m[0] });
146
- // First match per rule per file is enough — avoid noise
147
- break;
148
- }
149
- }
150
- return findings;
151
- }
152
- export function scan(root, options = {}) {
153
- const start = Date.now();
154
- const startedAt = new Date().toISOString();
155
- const errors = [];
156
- let rules;
157
- try {
158
- rules = loadRules();
159
- }
160
- catch (e) {
161
- return {
162
- startedAt,
163
- durationMs: Date.now() - start,
164
- filesScanned: 0,
165
- rulesEvaluated: 0,
166
- findings: [],
167
- errors: [e.message],
168
- };
169
- }
170
- // Filter scanners
171
- if (options.scanners && options.scanners.length > 0) {
172
- const allowed = new Set(options.scanners);
173
- rules = rules.filter((r) => allowed.has(r.scanner));
174
- }
175
- // Filter min severity
176
- if (options.minSeverity) {
177
- const minRank = severityRank(options.minSeverity);
178
- rules = rules.filter((r) => severityRank(r.severity) >= minRank);
179
- }
180
- // Build file list
181
- const exclude = [
182
- ...DEFAULT_EXCLUDE,
183
- ...(options.exclude || []).map((g) => new RegExp(g)),
184
- ];
185
- let files;
186
- if (options.files) {
187
- files = options.files.map((f) => resolve(f));
188
- }
189
- else {
190
- if (!existsSync(root)) {
191
- return {
192
- startedAt,
193
- durationMs: Date.now() - start,
194
- filesScanned: 0,
195
- rulesEvaluated: rules.length,
196
- findings: [],
197
- errors: [`root not found: ${root}`],
198
- };
199
- }
200
- // Allow root to be a single file
201
- const st = statSync(resolve(root));
202
- if (st.isFile()) {
203
- files = [resolve(root)];
204
- }
205
- else {
206
- files = [...walk(resolve(root), exclude)];
207
- }
208
- }
209
- // Scan
210
- const findings = [];
211
- let filesScanned = 0;
212
- const cwd = process.cwd();
213
- for (const file of files) {
214
- let content;
215
- try {
216
- content = readFileSync(file, 'utf8');
217
- }
218
- catch (e) {
219
- errors.push(`${file}: ${e.message}`);
220
- continue;
221
- }
222
- filesScanned++;
223
- const rel = relative(cwd, file) || file;
224
- const fileFindings = scanFile(rel, content, rules);
225
- findings.push(...fileFindings);
226
- if (options.maxFindings && findings.length >= options.maxFindings)
227
- break;
228
- }
229
- // Sort findings: critical→info, then by file
230
- findings.sort((a, b) => {
231
- const sev = severityRank(b.rule.severity) - severityRank(a.rule.severity);
232
- if (sev !== 0)
233
- return sev;
234
- return a.location.file.localeCompare(b.location.file);
235
- });
236
- return {
237
- startedAt,
238
- durationMs: Date.now() - start,
239
- filesScanned,
240
- rulesEvaluated: rules.length,
241
- findings,
242
- errors,
243
- };
244
- }
@@ -1,10 +0,0 @@
1
- /**
2
- * Core types for @great-cto/agentshield.
3
- *
4
- * A scan produces a list of `Finding` objects. Each finding cites a `Rule`
5
- * (loaded from rules/*.yaml) and locates the offending code via `Location`.
6
- */
7
- export const SEVERITY_ORDER = ['info', 'low', 'medium', 'high', 'critical'];
8
- export function severityRank(s) {
9
- return SEVERITY_ORDER.indexOf(s);
10
- }