codegate-ai 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +61 -25
  2. package/dist/cli.d.ts +1 -1
  3. package/dist/cli.js +59 -41
  4. package/dist/commands/scan-command/helpers.d.ts +6 -1
  5. package/dist/commands/scan-command/helpers.js +46 -1
  6. package/dist/commands/scan-command.js +49 -55
  7. package/dist/commands/scan-content-command.d.ts +16 -0
  8. package/dist/commands/scan-content-command.js +61 -0
  9. package/dist/config/suppression-policy.d.ts +14 -0
  10. package/dist/config/suppression-policy.js +81 -0
  11. package/dist/config.d.ts +5 -0
  12. package/dist/config.js +29 -3
  13. package/dist/layer2-static/advisories/agent-components.json +62 -0
  14. package/dist/layer2-static/detectors/advisory-intelligence.d.ts +7 -0
  15. package/dist/layer2-static/detectors/advisory-intelligence.js +170 -0
  16. package/dist/layer2-static/detectors/command-exec.js +6 -0
  17. package/dist/layer2-static/detectors/rule-file.js +5 -0
  18. package/dist/layer2-static/engine.d.ts +4 -1
  19. package/dist/layer2-static/engine.js +97 -0
  20. package/dist/layer2-static/rule-engine.d.ts +1 -1
  21. package/dist/layer2-static/rule-engine.js +1 -13
  22. package/dist/layer2-static/rule-pack-loader.d.ts +10 -0
  23. package/dist/layer2-static/rule-pack-loader.js +187 -0
  24. package/dist/layer3-dynamic/command-builder.d.ts +1 -0
  25. package/dist/layer3-dynamic/command-builder.js +44 -2
  26. package/dist/layer3-dynamic/local-text-analysis.d.ts +9 -1
  27. package/dist/layer3-dynamic/local-text-analysis.js +12 -27
  28. package/dist/layer3-dynamic/meta-agent.d.ts +1 -2
  29. package/dist/layer3-dynamic/meta-agent.js +3 -6
  30. package/dist/layer3-dynamic/prompt-templates/local-text-analysis.md +33 -21
  31. package/dist/layer3-dynamic/prompt-templates/security-analysis.md +11 -1
  32. package/dist/layer3-dynamic/prompt-templates/tool-poisoning.md +9 -1
  33. package/dist/layer3-dynamic/toxic-flow.js +6 -0
  34. package/dist/pipeline.js +9 -8
  35. package/dist/report/finding-fingerprint.d.ts +5 -0
  36. package/dist/report/finding-fingerprint.js +47 -0
  37. package/dist/reporter/markdown.js +25 -3
  38. package/dist/reporter/sarif.js +2 -0
  39. package/dist/reporter/terminal.js +25 -0
  40. package/dist/scan-target/fetch-plan.d.ts +8 -0
  41. package/dist/scan-target/fetch-plan.js +30 -0
  42. package/dist/scan-target/staging.js +60 -5
  43. package/dist/scan.js +3 -0
  44. package/dist/types/finding.d.ts +9 -0
  45. package/package.json +3 -1
package/README.md CHANGED
@@ -108,6 +108,7 @@ See the [Configuration](#configuration) section for full settings and examples.
108
108
  | Command | Purpose |
109
109
  | ------------------------ | ---------------------------------------------------------------------- |
110
110
  | `codegate scan [target]` | Scan a directory, file, or URL target for AI tool config risks. |
111
+ | `codegate scan-content` | Scan inline JSON, YAML, TOML, Markdown, or text content. |
111
112
  | `codegate run <tool>` | Scan current directory, then launch selected AI tool if policy allows. |
112
113
  | `codegate skills [...]` | Wrap `npx skills` and preflight-scan `skills add` targets. |
113
114
  | `codegate clawhub [...]` | Wrap `npx clawhub` and preflight-scan `clawhub install` targets. |
@@ -150,6 +151,28 @@ codegate scan . --remediate --dry-run --patch
150
151
  codegate scan . --reset-state
151
152
  ```
152
153
 
154
+ ## `scan-content` Command
155
+
156
+ `codegate scan-content <content...>` scans inline content directly from the command line. It is useful when you want to inspect JSON, YAML, TOML, Markdown, or plain text before writing it to disk or installing it into a tool configuration.
157
+
158
+ Use `--type` to declare the content format:
159
+
160
+ | Type | Purpose |
161
+ | ---------- | -------------------------------------------------------------------- |
162
+ | `json` | Parse JSON input and run the static scanner on the parsed structure. |
163
+ | `yaml` | Parse YAML input and run the static scanner on the parsed structure. |
164
+ | `toml` | Parse TOML input and run the static scanner on the parsed structure. |
165
+ | `markdown` | Analyze Markdown instruction text as a rule surface. |
166
+ | `text` | Analyze plain text as a rule surface. |
167
+
168
+ Examples:
169
+
170
+ ```bash
171
+ codegate scan-content '{"mcpServers":{"bad":{"command":"bash"}}}' --type json
172
+ codegate scan-content '# Suspicious instructions' --type markdown
173
+ codegate scan-content 'echo hello' --type text
174
+ ```
175
+
153
176
  ## `run` Command
154
177
 
155
178
  `codegate run <tool>` runs scan-first wrapper mode.
@@ -199,6 +222,7 @@ Behavior:
199
222
  - Dangerous findings block execution (fail-closed).
200
223
  - Warning-level findings can still require confirmation unless `--cg-force` is provided.
201
224
  - Non-install subcommands (for example `skills find` or `clawhub search`) are passed through without preflight scanning.
225
+ - Wrapper scans honor the same config policy controls as `codegate scan`, including `suppress_findings`, `suppression_rules`, `rule_pack_paths`, `allowed_rules`, and `skip_rules`.
202
226
 
203
227
  Wrapper flags (consumed by CodeGate, not forwarded):
204
228
 
@@ -299,33 +323,38 @@ codegate init
299
323
  - List values are merged and de-duplicated across levels.
300
324
  - `trusted_directories` is global-only; project config cannot set it.
301
325
  - `blocked_commands` is merged with defaults; defaults are always retained.
326
+ - `rule_pack_paths`, `allowed_rules`, `skip_rules`, `suppress_findings`, and `suppression_rules` merge across global and project config.
302
327
 
303
328
  ### Full Configuration Reference
304
329
 
305
- | Key | Type | Allowed Values | Default |
306
- | -------------------------------- | ---------------- | --------------------------------------------------------------------------- | -------------------------------------------------- |
307
- | `severity_threshold` | string | `critical`, `high`, `medium`, `low`, `info` | `high` |
308
- | `auto_proceed_below_threshold` | boolean | `true`, `false` | `true` |
309
- | `output_format` | string | `terminal`, `json`, `sarif`, `markdown`, `html` | `terminal` |
310
- | `scan_state_path` | string | file path | `~/.codegate/scan-state.json` |
311
- | `scan_user_scope` | boolean | `true`, `false` | `true` |
312
- | `tui.enabled` | boolean | `true`, `false` | `true` |
313
- | `tui.colour_scheme` | string | free string (currently `default`) | `default` |
314
- | `tui.compact_mode` | boolean | `true`, `false` | `false` |
315
- | `tool_discovery.preferred_agent` | string | practical values: `claude`, `claude-code`, `codex`, `codex-cli`, `opencode` | `claude` |
316
- | `tool_discovery.agent_paths` | object | map of agent key -> binary path | `{}` |
317
- | `tool_discovery.skip_tools` | array of strings | tool keys to skip in discovery/selection | `[]` |
318
- | `trusted_directories` | array of strings | directory paths | `[]` |
319
- | `blocked_commands` | array of strings | command names | `["bash","sh","curl","wget","nc","python","node"]` |
320
- | `known_safe_mcp_servers` | array of strings | package/server identifiers | prefilled |
321
- | `known_safe_formatters` | array of strings | formatter names | prefilled |
322
- | `known_safe_lsp_servers` | array of strings | lsp server names | prefilled |
323
- | `known_safe_hooks` | array of strings | relative hook paths such as `.git/hooks/pre-commit` | `[]` |
324
- | `unicode_analysis` | boolean | `true`, `false` | `true` |
325
- | `check_ide_settings` | boolean | `true`, `false` | `true` |
326
- | `owasp_mapping` | boolean | `true`, `false` | `true` |
327
- | `trusted_api_domains` | array of strings | domain names | `[]` |
328
- | `suppress_findings` | array of strings | finding IDs/fingerprints | `[]` |
330
+ | Key | Type | Allowed Values | Default |
331
+ | -------------------------------- | ---------------- | -------------------------------------------------------------------------------------------- | -------------------------------------------------- |
332
+ | `severity_threshold` | string | `critical`, `high`, `medium`, `low`, `info` | `high` |
333
+ | `auto_proceed_below_threshold` | boolean | `true`, `false` | `true` |
334
+ | `output_format` | string | `terminal`, `json`, `sarif`, `markdown`, `html` | `terminal` |
335
+ | `scan_state_path` | string | file path | `~/.codegate/scan-state.json` |
336
+ | `scan_user_scope` | boolean | `true`, `false` | `true` |
337
+ | `tui.enabled` | boolean | `true`, `false` | `true` |
338
+ | `tui.colour_scheme` | string | free string (currently `default`) | `default` |
339
+ | `tui.compact_mode` | boolean | `true`, `false` | `false` |
340
+ | `tool_discovery.preferred_agent` | string | practical values: `claude`, `claude-code`, `codex`, `codex-cli`, `opencode` | `claude` |
341
+ | `tool_discovery.agent_paths` | object | map of agent key -> binary path | `{}` |
342
+ | `tool_discovery.skip_tools` | array of strings | tool keys to skip in discovery/selection | `[]` |
343
+ | `trusted_directories` | array of strings | directory paths | `[]` |
344
+ | `blocked_commands` | array of strings | command names | `["bash","sh","curl","wget","nc","python","node"]` |
345
+ | `known_safe_mcp_servers` | array of strings | package/server identifiers | prefilled |
346
+ | `known_safe_formatters` | array of strings | formatter names | prefilled |
347
+ | `known_safe_lsp_servers` | array of strings | lsp server names | prefilled |
348
+ | `known_safe_hooks` | array of strings | relative hook paths such as `.git/hooks/pre-commit` | `[]` |
349
+ | `unicode_analysis` | boolean | `true`, `false` | `true` |
350
+ | `check_ide_settings` | boolean | `true`, `false` | `true` |
351
+ | `owasp_mapping` | boolean | `true`, `false` | `true` |
352
+ | `trusted_api_domains` | array of strings | domain names | `[]` |
353
+ | `suppress_findings` | array of strings | finding IDs/fingerprints | `[]` |
354
+ | `suppression_rules` | array of objects | rule match objects with `rule_id`, `file_path`, `severity`, `category`, `cwe`, `fingerprint` | `[]` |
355
+ | `rule_pack_paths` | array of strings | extra rule pack files or directories | `[]` |
356
+ | `allowed_rules` | array of strings | rule IDs to keep after loading | `[]` |
357
+ | `skip_rules` | array of strings | rule IDs to drop after loading | `[]` |
329
358
 
330
359
  ### Default Config Example
331
360
 
@@ -359,7 +388,11 @@ codegate init
359
388
  "check_ide_settings": true,
360
389
  "owasp_mapping": true,
361
390
  "trusted_api_domains": [],
362
- "suppress_findings": []
391
+ "suppress_findings": [],
392
+ "suppression_rules": [],
393
+ "rule_pack_paths": [],
394
+ "allowed_rules": [],
395
+ "skip_rules": []
363
396
  }
364
397
  ```
365
398
 
@@ -371,6 +404,9 @@ Configuration notes:
371
404
  - `unicode_analysis=false` disables hidden-unicode findings in Layer 2 rule-file scanning and Layer 3 tool-description scanning. Other rule-file heuristics remain enabled.
372
405
  - `check_ide_settings=false` disables `IDE_SETTINGS` findings.
373
406
  - `owasp_mapping=false` keeps detection behavior unchanged and emits empty `owasp` arrays in reports.
407
+ - `suppression_rules` applies all listed criteria with AND semantics. If a criterion is omitted, it is ignored.
408
+ - `rule_pack_paths` can point to extra JSON rule-pack files or directories of JSON rule packs.
409
+ - `allowed_rules` and `skip_rules` control which loaded rule IDs remain active after rule-pack loading.
374
410
 
375
411
  ## Output Formats
376
412
 
package/dist/cli.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { Command } from "commander";
3
3
  import { type CodeGateConfig, type ResolveConfigOptions } from "./config.js";
4
- import { type ResourceFetchResult } from "./layer3-dynamic/resource-fetcher.js";
4
+ import type { ResourceFetchResult } from "./layer3-dynamic/resource-fetcher.js";
5
5
  import type { LocalTextAnalysisTarget } from "./layer3-dynamic/local-text-analysis.js";
6
6
  import { type DeepScanResource } from "./pipeline.js";
7
7
  import { type ScanDiscoveryCandidate, type ScanDiscoveryContext } from "./scan.js";
package/dist/cli.js CHANGED
@@ -8,8 +8,6 @@ import { pathToFileURL } from "node:url";
8
8
  import { Command, Option } from "commander";
9
9
  import { DEFAULT_CONFIG, OUTPUT_FORMATS, resolveEffectiveConfig, } from "./config.js";
10
10
  import { APP_NAME } from "./index.js";
11
- import { fetchResourceMetadata, } from "./layer3-dynamic/resource-fetcher.js";
12
- import { acquireToolDescriptions } from "./layer3-dynamic/tool-description-acquisition.js";
13
11
  import { runSandboxCommand } from "./layer3-dynamic/sandbox.js";
14
12
  import { loadKnowledgeBase } from "./layer1-discovery/knowledge-base.js";
15
13
  import { createScanDiscoveryContext, discoverDeepScanResources, discoverDeepScanResourcesFromContext, discoverLocalTextAnalysisTargetsFromContext, runScanEngine, } from "./scan.js";
@@ -20,6 +18,7 @@ import { executeWrapperRun } from "./wrapper.js";
20
18
  import { runRemediation as runRemediationWorkflow, } from "./layer4-remediation/remediation-runner.js";
21
19
  import { undoLatestSession } from "./commands/undo.js";
22
20
  import { executeScanCommand } from "./commands/scan-command.js";
21
+ import { executeScanContentCommand, SCAN_CONTENT_TYPES, } from "./commands/scan-content-command.js";
23
22
  import { executeSkillsWrapper, launchSkillsPassthrough, } from "./commands/skills-wrapper.js";
24
23
  import { executeClawhubWrapper, launchClawhubPassthrough, } from "./commands/clawhub-wrapper.js";
25
24
  import { promptDeepAgentSelection, promptDeepScanConsent, promptMetaAgentCommandConsent, promptRemediationConsent, promptSkillSelection, } from "./cli-prompts.js";
@@ -49,25 +48,6 @@ export function isDirectCliInvocation(importMetaUrl, argv1, deps = {}) {
49
48
  return false;
50
49
  }
51
50
  }
52
- function mapAcquisitionFailure(status, error) {
53
- if (status === "auth_failure" ||
54
- status === "timeout" ||
55
- status === "network_error" ||
56
- status === "command_error") {
57
- return {
58
- status,
59
- attempts: 1,
60
- elapsedMs: 0,
61
- error,
62
- };
63
- }
64
- return {
65
- status: "network_error",
66
- attempts: 1,
67
- elapsedMs: 0,
68
- error: error ?? "tool description acquisition failed",
69
- };
70
- }
71
51
  async function runMetaAgentCommandWithSandbox(context) {
72
52
  const commandResult = await runSandboxCommand({
73
53
  command: context.command.command,
@@ -180,27 +160,22 @@ const defaultCliDeps = {
180
160
  includeUserScope: config?.scan_user_scope === true,
181
161
  }),
182
162
  discoverLocalTextTargets: (_scanTarget, _config, discoveryContext) => discoveryContext ? discoverLocalTextAnalysisTargetsFromContext(discoveryContext) : [],
183
- // Keep the default CLI dependency layer as a thin bridge from user-facing commands into the scan engine.
163
+ // Deep resource execution never makes outbound network calls.
164
+ // Connecting to URLs found in scanned config files is a security risk:
165
+ // the endpoint could be malicious (crafted responses, SSRF, IP logging).
166
+ // Instead, we record the URL as metadata for the agent to analyze.
184
167
  executeDeepResource: async (resource) => {
185
- if (resource.request.kind === "http" || resource.request.kind === "sse") {
186
- const acquisition = await acquireToolDescriptions({
187
- serverId: resource.id,
188
- transport: resource.request.kind,
189
- url: resource.request.locator,
190
- });
191
- if (acquisition.status === "ok") {
192
- return {
193
- status: "ok",
194
- attempts: 1,
195
- elapsedMs: 0,
196
- metadata: {
197
- tools: acquisition.tools,
198
- },
199
- };
200
- }
201
- return mapAcquisitionFailure(acquisition.status, acquisition.error);
202
- }
203
- return fetchResourceMetadata(resource.request);
168
+ return {
169
+ status: "ok",
170
+ attempts: 0,
171
+ elapsedMs: 0,
172
+ metadata: {
173
+ resource_id: resource.id,
174
+ resource_kind: resource.request.kind,
175
+ resource_url: resource.request.locator,
176
+ note: "URL recorded for analysis without making outbound connections.",
177
+ },
178
+ };
204
179
  },
205
180
  launchSkills: (args, cwd) => launchSkillsPassthrough(args, cwd),
206
181
  launchClawhub: (args, cwd) => launchClawhubPassthrough(args, cwd),
@@ -340,6 +315,47 @@ function addScanCommand(program, version, deps) {
340
315
  }
341
316
  });
342
317
  }
318
+ function addScanContentCommand(program, version, deps) {
319
+ program
320
+ .command("scan-content <content...>")
321
+ .description("Scan inline content for AI tool config risks")
322
+ .addOption(new Option("--type <type>", "content type")
323
+ .choices([...SCAN_CONTENT_TYPES])
324
+ .makeOptionMandatory())
325
+ .addHelpText("after", renderExampleHelp([
326
+ 'codegate scan-content \'{"mcpServers":{"bad":{"command":"bash"}}}\' --type json',
327
+ "codegate scan-content '# Suspicious instructions' --type markdown",
328
+ "codegate scan-content 'echo hello' --type text",
329
+ ]))
330
+ .action(async (contentParts, options) => {
331
+ try {
332
+ const content = (contentParts ?? []).join(" ");
333
+ const type = options.type;
334
+ if (!type) {
335
+ throw new Error("Missing required option: --type");
336
+ }
337
+ const config = deps.resolveConfig({
338
+ scanTarget: deps.cwd(),
339
+ });
340
+ await executeScanContentCommand({
341
+ version,
342
+ cwd: deps.cwd(),
343
+ content,
344
+ type,
345
+ config,
346
+ }, {
347
+ stdout: deps.stdout,
348
+ stderr: deps.stderr,
349
+ setExitCode: deps.setExitCode,
350
+ });
351
+ }
352
+ catch (error) {
353
+ const message = error instanceof Error ? error.message : String(error);
354
+ deps.stderr(`Scan content failed: ${message}`);
355
+ deps.setExitCode(3);
356
+ }
357
+ });
358
+ }
343
359
  function addRunCommand(program, version, deps) {
344
360
  program
345
361
  .command("run <tool>")
@@ -594,11 +610,13 @@ export function createCli(version = packageJson.version ?? "0.0.0-dev", deps = d
594
610
  "codegate scan .",
595
611
  "codegate scan https://github.com/owner/repo",
596
612
  "codegate scan https://github.com/owner/repo/blob/main/skills/security-review/SKILL.md",
613
+ 'codegate scan-content \'{"mcpServers":{"bad":{"command":"bash"}}}\' --type json',
597
614
  "codegate skills add owner/repo --skill security-review",
598
615
  "codegate clawhub install security-auditor",
599
616
  "codegate run claude",
600
617
  ]));
601
618
  addScanCommand(program, version, deps);
619
+ addScanContentCommand(program, version, deps);
602
620
  addSkillsCommand(program, version, deps);
603
621
  addClawhubCommand(program, version, deps);
604
622
  addRunCommand(program, version, deps);
@@ -12,7 +12,12 @@ export declare function withMetaAgentFinding(metadata: unknown, finding: {
12
12
  }): unknown;
13
13
  export declare function mergeMetaAgentMetadata(baseMetadata: unknown, agentMetadata: unknown): unknown;
14
14
  export declare function noEligibleDeepResourceNotes(): string[];
15
- export declare function parseLocalTextFindings(filePath: string, metadata: unknown): CodeGateReport["findings"];
15
+ /**
16
+ * Deterministically verify that a finding's evidence exists in the claimed file.
17
+ * Returns true if the evidence can be confirmed, false if it cannot.
18
+ */
19
+ export declare function verifyFindingEvidence(scanTarget: string, filePath: string, evidence: string | null | undefined): boolean;
20
+ export declare function parseLocalTextFindings(filePath: string, metadata: unknown, scanTarget?: string): CodeGateReport["findings"];
16
21
  export declare function remediationSummaryLines(input: {
17
22
  scanTarget: string;
18
23
  options: {
@@ -1,3 +1,4 @@
1
+ import { existsSync, readFileSync } from "node:fs";
1
2
  import { resolve } from "node:path";
2
3
  import { renderHtmlReport } from "../../reporter/html.js";
3
4
  import { renderJsonReport } from "../../reporter/json.js";
@@ -136,12 +137,56 @@ export function noEligibleDeepResourceNotes() {
136
137
  "Local stdio commands (for example `bash`) are still detected by Layer 2 but are never executed by deep scan.",
137
138
  ];
138
139
  }
139
- export function parseLocalTextFindings(filePath, metadata) {
140
+ /**
141
+ * Deterministically verify that a finding's evidence exists in the claimed file.
142
+ * Returns true if the evidence can be confirmed, false if it cannot.
143
+ */
144
+ export function verifyFindingEvidence(scanTarget, filePath, evidence) {
145
+ if (!evidence || evidence.trim().length === 0) {
146
+ return false;
147
+ }
148
+ const absolutePath = resolve(scanTarget, filePath);
149
+ if (!existsSync(absolutePath)) {
150
+ return false;
151
+ }
152
+ try {
153
+ const fileContent = readFileSync(absolutePath, "utf8");
154
+ // Normalize whitespace for comparison: collapse runs of whitespace to single spaces.
155
+ const normalizeWhitespace = (text) => text.replace(/\s+/gu, " ").trim();
156
+ const normalizedContent = normalizeWhitespace(fileContent);
157
+ const normalizedEvidence = normalizeWhitespace(evidence);
158
+ // Check if the evidence (or a substantial substring of it) appears in the file.
159
+ if (normalizedContent.includes(normalizedEvidence)) {
160
+ return true;
161
+ }
162
+ // Also try line-by-line matching for shorter evidence strings that may be exact line content.
163
+ const lines = fileContent.split(/\r?\n/u);
164
+ for (const line of lines) {
165
+ if (normalizeWhitespace(line).includes(normalizedEvidence)) {
166
+ return true;
167
+ }
168
+ }
169
+ return false;
170
+ }
171
+ catch {
172
+ return false;
173
+ }
174
+ }
175
+ export function parseLocalTextFindings(filePath, metadata, scanTarget) {
140
176
  if (!isRecord(metadata) || !Array.isArray(metadata.findings)) {
141
177
  return [];
142
178
  }
143
179
  return metadata.findings
144
180
  .filter((item) => isRecord(item))
181
+ .filter((item) => {
182
+ // When a scan target is provided, verify evidence exists in the actual file.
183
+ if (!scanTarget) {
184
+ return true;
185
+ }
186
+ const itemFilePath = typeof item.file_path === "string" ? item.file_path : filePath;
187
+ const itemEvidence = typeof item.evidence === "string" ? item.evidence : null;
188
+ return verifyFindingEvidence(scanTarget, itemFilePath, itemEvidence);
189
+ })
145
190
  .map((item, index) => ({
146
191
  rule_id: typeof item.id === "string" ? item.id : "layer3-local-text-analysis-finding",
147
192
  finding_id: typeof item.id === "string" ? item.id : `L3-local-${filePath}-${index}`,
@@ -1,9 +1,7 @@
1
- import { mkdtempSync, rmSync } from "node:fs";
2
- import { tmpdir } from "node:os";
3
- import { join, resolve } from "node:path";
1
+ import { resolve } from "node:path";
4
2
  import { applyConfigPolicy } from "../config.js";
5
3
  import { buildMetaAgentCommand, } from "../layer3-dynamic/command-builder.js";
6
- import { buildPromptEvidenceText, supportsToollessLocalTextAnalysis, } from "../layer3-dynamic/local-text-analysis.js";
4
+ import { supportsAgentLocalTextAnalysis } from "../layer3-dynamic/local-text-analysis.js";
7
5
  import { buildLocalTextAnalysisPrompt, buildSecurityAnalysisPrompt, } from "../layer3-dynamic/meta-agent.js";
8
6
  import { layer3OutcomesToFindings, mergeLayer3Findings, runDeepScanWithConsent, } from "../pipeline.js";
9
7
  import { mergeMetaAgentMetadata, metadataSummary, noEligibleDeepResourceNotes, parseLocalTextFindings, parseMetaAgentOutput, remediationSummaryLines, renderByFormat, summarizeRequestedTargetFindings, withMetaAgentFinding, } from "./scan-command/helpers.js";
@@ -223,67 +221,63 @@ export async function runScanAnalysis(input, deps) {
223
221
  if (!selectedAgent) {
224
222
  deepScanNotes.push("Local instruction-file analysis skipped because no meta-agent was selected.");
225
223
  }
226
- else if (!supportsToollessLocalTextAnalysis(selectedAgent.metaTool)) {
227
- deepScanNotes.push("Local instruction-file analysis was skipped because the selected agent does not support tool-less analysis.");
224
+ else if (!supportsAgentLocalTextAnalysis(selectedAgent.metaTool)) {
225
+ deepScanNotes.push("Local instruction-file analysis was skipped because the selected agent does not support read-only analysis.");
228
226
  }
229
227
  else {
230
- // Local instruction files are analyzed as inert text only; referenced URLs stay as evidence, not inputs.
228
+ // The agent reads files directly using read-only tools (Read, Glob, Grep).
229
+ // It runs in the scan target directory so it can access the files.
230
+ // No Bash, Write, Edit, or network tools are available — sandboxed to reading only.
231
231
  if (!deps.runMetaAgentCommand) {
232
232
  throw new Error("Meta-agent command runner not configured");
233
233
  }
234
- const isolatedWorkingDirectory = mkdtempSync(join(tmpdir(), "codegate-local-analysis-"));
235
- let executedLocalAnalyses = 0;
236
- try {
237
- for (const target of localTextTargets) {
238
- const prompt = buildLocalTextAnalysisPrompt({
239
- filePath: target.reportPath,
240
- textContent: buildPromptEvidenceText(target.textContent),
241
- referencedUrls: target.referencedUrls,
242
- });
243
- const command = buildMetaAgentCommand({
244
- tool: selectedAgent.metaTool,
245
- prompt,
246
- workingDirectory: isolatedWorkingDirectory,
247
- binaryPath: selectedAgent.binary,
248
- });
249
- command.timeoutMs = 60_000;
250
- const commandContext = {
251
- localFile: target,
252
- agent: selectedAgent,
253
- command,
254
- };
255
- const approvedCommand = input.options.force ||
256
- (deps.requestMetaAgentCommandConsent
257
- ? await deps.requestMetaAgentCommandConsent(commandContext)
258
- : false);
259
- if (!approvedCommand) {
260
- continue;
261
- }
262
- executedLocalAnalyses += 1;
263
- const commandResult = await deps.runMetaAgentCommand(commandContext);
264
- if (commandResult.code !== 0) {
265
- deepScanNotes.push(`Local instruction-file analysis failed for ${target.reportPath}: ${commandResult.stderr || `exit code: ${commandResult.code}`}`);
266
- continue;
267
- }
234
+ // Collect all file paths and referenced URLs for a single agent invocation.
235
+ const allFilePaths = localTextTargets.map((target) => target.reportPath);
236
+ const allReferencedUrls = Array.from(new Set(localTextTargets.flatMap((target) => target.referencedUrls)));
237
+ const prompt = buildLocalTextAnalysisPrompt({
238
+ filePaths: allFilePaths,
239
+ referencedUrls: allReferencedUrls,
240
+ });
241
+ const command = buildMetaAgentCommand({
242
+ tool: selectedAgent.metaTool,
243
+ prompt,
244
+ workingDirectory: input.scanTarget,
245
+ binaryPath: selectedAgent.binary,
246
+ readOnlyAgent: true,
247
+ });
248
+ command.timeoutMs = 120_000;
249
+ const commandContext = {
250
+ localFile: localTextTargets[0],
251
+ agent: selectedAgent,
252
+ command,
253
+ };
254
+ const approvedCommand = input.options.force ||
255
+ (deps.requestMetaAgentCommandConsent
256
+ ? await deps.requestMetaAgentCommandConsent(commandContext)
257
+ : false);
258
+ if (approvedCommand) {
259
+ const commandResult = await deps.runMetaAgentCommand(commandContext);
260
+ if (commandResult.code !== 0) {
261
+ deepScanNotes.push(`Local instruction-file analysis failed: ${commandResult.stderr || `exit code: ${commandResult.code}`}`);
262
+ }
263
+ else {
268
264
  const parsedOutput = parseMetaAgentOutput(commandResult.stdout);
269
265
  if (parsedOutput === null) {
270
- deepScanNotes.push(`Local instruction-file analysis returned invalid JSON for ${target.reportPath}.`);
271
- continue;
266
+ deepScanNotes.push("Local instruction-file analysis returned invalid JSON.");
267
+ }
268
+ else {
269
+ const normalizedOutput = Array.isArray(parsedOutput)
270
+ ? { findings: parsedOutput }
271
+ : parsedOutput;
272
+ // Distribute findings across their respective file paths.
273
+ for (const target of localTextTargets) {
274
+ const localFindings = parseLocalTextFindings(target.reportPath, normalizedOutput, input.scanTarget);
275
+ report = mergeLayer3Findings(report, localFindings);
276
+ }
277
+ deepScanNotes.push(`Local instruction-file analysis executed for ${localTextTargets.length} file${localTextTargets.length === 1 ? "" : "s"} (read-only agent).`);
272
278
  }
273
- const normalizedOutput = Array.isArray(parsedOutput)
274
- ? { findings: parsedOutput }
275
- : parsedOutput;
276
- const localFindings = parseLocalTextFindings(target.reportPath, normalizedOutput);
277
- report = mergeLayer3Findings(report, localFindings);
278
279
  }
279
280
  }
280
- finally {
281
- rmSync(isolatedWorkingDirectory, { recursive: true, force: true });
282
- }
283
- if (executedLocalAnalyses > 0) {
284
- const suffix = executedLocalAnalyses === 1 ? "" : "s";
285
- deepScanNotes.push(`Local instruction-file analysis executed for ${executedLocalAnalyses} file${suffix}.`);
286
- }
287
281
  }
288
282
  }
289
283
  }
@@ -0,0 +1,16 @@
1
+ import { type CodeGateConfig } from "../config.js";
2
+ export declare const SCAN_CONTENT_TYPES: readonly ["json", "yaml", "toml", "markdown", "text"];
3
+ export type ScanContentType = (typeof SCAN_CONTENT_TYPES)[number];
4
+ export interface ExecuteScanContentCommandInput {
5
+ version: string;
6
+ cwd: string;
7
+ content: string;
8
+ type: ScanContentType;
9
+ config: CodeGateConfig;
10
+ }
11
+ export interface ExecuteScanContentCommandDeps {
12
+ stdout: (message: string) => void;
13
+ stderr: (message: string) => void;
14
+ setExitCode: (code: number) => void;
15
+ }
16
+ export declare function executeScanContentCommand(input: ExecuteScanContentCommandInput, deps: ExecuteScanContentCommandDeps): Promise<void>;
@@ -0,0 +1,61 @@
1
+ import { applyConfigPolicy } from "../config.js";
2
+ import { loadKnowledgeBase } from "../layer1-discovery/knowledge-base.js";
3
+ import { parseConfigContent } from "../layer1-discovery/config-parser.js";
4
+ import { runStaticPipeline } from "../pipeline.js";
5
+ import { renderByFormat } from "./scan-command/helpers.js";
6
+ export const SCAN_CONTENT_TYPES = ["json", "yaml", "toml", "markdown", "text"];
7
+ function toReportPath(type) {
8
+ if (type === "markdown") {
9
+ return "scan-content.md";
10
+ }
11
+ if (type === "text") {
12
+ return "scan-content.txt";
13
+ }
14
+ return `scan-content.${type}`;
15
+ }
16
+ export async function executeScanContentCommand(input, deps) {
17
+ try {
18
+ const parsed = parseConfigContent(input.content, input.type);
19
+ if (!parsed.ok) {
20
+ throw new Error(parsed.error);
21
+ }
22
+ const kbVersion = loadKnowledgeBase().schemaVersion;
23
+ const report = applyConfigPolicy(runStaticPipeline({
24
+ version: input.version,
25
+ kbVersion,
26
+ scanTarget: `scan-content:${input.type}`,
27
+ toolsDetected: [],
28
+ projectRoot: input.cwd,
29
+ files: [
30
+ {
31
+ filePath: toReportPath(input.type),
32
+ format: input.type,
33
+ parsed: parsed.data,
34
+ textContent: input.content,
35
+ },
36
+ ],
37
+ symlinkEscapes: [],
38
+ hooks: [],
39
+ config: {
40
+ knownSafeMcpServers: input.config.known_safe_mcp_servers,
41
+ knownSafeFormatters: input.config.known_safe_formatters,
42
+ knownSafeLspServers: input.config.known_safe_lsp_servers,
43
+ knownSafeHooks: input.config.known_safe_hooks,
44
+ blockedCommands: input.config.blocked_commands,
45
+ trustedApiDomains: input.config.trusted_api_domains,
46
+ unicodeAnalysis: input.config.unicode_analysis,
47
+ checkIdeSettings: input.config.check_ide_settings,
48
+ rulePackPaths: input.config.rule_pack_paths,
49
+ allowedRules: input.config.allowed_rules,
50
+ skipRules: input.config.skip_rules,
51
+ },
52
+ }), input.config);
53
+ deps.stdout(renderByFormat(input.config.output_format, report));
54
+ deps.setExitCode(report.summary.exit_code);
55
+ }
56
+ catch (error) {
57
+ const message = error instanceof Error ? error.message : String(error);
58
+ deps.stderr(`Scan content failed: ${message}`);
59
+ deps.setExitCode(3);
60
+ }
61
+ }
@@ -0,0 +1,14 @@
1
+ import type { Finding } from "../types/finding.js";
2
+ export interface SuppressionRule {
3
+ rule_id?: string;
4
+ file_path?: string;
5
+ severity?: Finding["severity"];
6
+ category?: Finding["category"];
7
+ cwe?: string;
8
+ fingerprint?: string;
9
+ }
10
+ export interface SuppressionPolicy {
11
+ suppress_findings?: readonly string[];
12
+ suppression_rules?: readonly SuppressionRule[];
13
+ }
14
+ export declare function applySuppressionPolicy<T extends Finding>(findings: T[], policy: SuppressionPolicy): T[];