npm - trickle-cli - Versions diffs - 0.1.189 → 0.1.191 - Mend

trickle-cli 0.1.189 → 0.1.191

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/commands/eval.d.ts +1 -0
package/dist/commands/eval.js +17 -1
package/dist/commands/security.d.ts +1 -6
package/dist/commands/security.js +95 -1
package/dist/index.js +1 -0
package/package.json +1 -1
package/src/commands/eval.ts +19 -2
package/src/commands/security.ts +101 -3
package/src/index.ts +1 -0

package/dist/commands/eval.d.ts CHANGED Viewed

@@ -13,4 +13,5 @@
  */
 export declare function evalCommand(opts: {
     json?: boolean;
+    failUnder?: string;
 }): void;

package/dist/commands/eval.js CHANGED Viewed

@@ -76,7 +76,15 @@ function evalCommand(opts) {
     }
     const result = scoreRun(agentEvents, llmCalls, errors, mcpCalls);
     if (opts.json) {
-        console.log(JSON.stringify(result, null, 2));
+        const threshold = opts.failUnder ? parseInt(opts.failUnder, 10) : undefined;
+        const output = {
+            ...result,
+            ...(threshold !== undefined ? { threshold, passed: result.overallScore >= threshold } : {}),
+        };
+        console.log(JSON.stringify(output, null, 2));
+        if (threshold !== undefined && result.overallScore < threshold) {
+            process.exit(1);
+        }
         return;
     }
     // Pretty print
@@ -104,6 +112,14 @@ function evalCommand(opts) {
         }
     }
     console.log('');
+    // CI mode: exit with non-zero if score below threshold
+    if (opts.failUnder) {
+        const threshold = parseInt(opts.failUnder, 10);
+        if (!isNaN(threshold) && result.overallScore < threshold) {
+            console.log(chalk_1.default.red(`  FAIL: Score ${result.overallScore} is below threshold ${threshold}`));
+            process.exit(1);
+        }
+    }
 }
 function printDimension(name, dim) {
     const bar = renderBar(dim.score);

package/dist/commands/security.d.ts CHANGED Viewed

@@ -21,12 +21,7 @@ interface SecurityFinding {
 }
 export interface SecurityResult {
     findings: SecurityFinding[];
-    scanned: {
-        variables: number;
-        queries: number;
-        logs: number;
-        observations: number;
-    };
+    scanned: Record<string, number>;
     summary: {
         critical: number;
         warning: number;

package/dist/commands/security.js CHANGED Viewed

@@ -154,6 +154,90 @@ function runSecurityScan(opts) {
         if (o.sampleOutput)
             findings.push(...scanValue(o.sampleOutput, 'function_output', `${o.module}.${o.functionName}`));
     }
+    // ── Agent Security: The "Lethal Trifecta" ──
+    // Scan LLM calls for prompt injection and data exfiltration
+    const llmCalls = readJsonl(path.join(trickleDir, 'llm.jsonl'));
+    for (const c of llmCalls) {
+        // Prompt injection patterns in LLM inputs
+        const input = String(c.inputPreview || '').toLowerCase();
+        const INJECTION_PATTERNS = [
+            { pattern: /ignore\s+(all\s+)?previous\s+instructions/i, name: 'Instruction override' },
+            { pattern: /you\s+are\s+now\s+a\s+/i, name: 'Role hijacking' },
+            { pattern: /system\s*:\s*you\s+are/i, name: 'System prompt injection' },
+            { pattern: /\bdo\s+not\s+follow\s+(any|the)\s+(previous|above)/i, name: 'Instruction bypass' },
+            { pattern: /forget\s+(all|everything|your)\s+(previous|prior|instructions)/i, name: 'Memory wipe attempt' },
+            { pattern: /pretend\s+you\s+(are|have)\s+(no|unrestricted)/i, name: 'Jailbreak attempt' },
+        ];
+        for (const inj of INJECTION_PATTERNS) {
+            if (inj.pattern.test(c.inputPreview || '') || inj.pattern.test(c.systemPrompt || '')) {
+                findings.push({
+                    severity: 'critical', category: 'prompt_injection',
+                    message: `${inj.name} detected in LLM input`,
+                    source: 'llm_call', location: c.model || 'unknown',
+                    evidence: (c.inputPreview || '').substring(0, 100),
+                });
+                break;
+            }
+        }
+        // Secrets in LLM outputs (data exfiltration)
+        const output = String(c.outputPreview || '');
+        if (output) {
+            const outputFindings = scanValue(output, 'llm_output', `${c.provider}/${c.model}`);
+            for (const f of outputFindings) {
+                f.category = 'data_exfiltration';
+                f.message = `LLM output contains ${f.message.toLowerCase()}`;
+                findings.push(f);
+            }
+        }
+        // Secrets in LLM inputs
+        const inputStr = String(c.inputPreview || '');
+        if (inputStr) {
+            const inputFindings = scanValue(inputStr, 'llm_input', `${c.provider}/${c.model}`);
+            for (const f of inputFindings) {
+                f.message = `Secret passed to LLM: ${f.message}`;
+                findings.push(f);
+            }
+        }
+    }
+    // Scan agent events for unauthorized tool calls
+    const agentEvents = readJsonl(path.join(trickleDir, 'agents.jsonl'));
+    const toolErrors = agentEvents.filter(e => e.event === 'tool_error');
+    const toolStarts = agentEvents.filter(e => e.event === 'tool_start');
+    // Detect privilege escalation: agent calling dangerous tools
+    const DANGEROUS_TOOLS = ['Bash', 'bash', 'shell', 'exec', 'eval', 'rm', 'sudo', 'chmod', 'kill'];
+    for (const t of toolStarts) {
+        const toolName = String(t.tool || '');
+        if (DANGEROUS_TOOLS.some(d => toolName.toLowerCase().includes(d.toLowerCase()))) {
+            // Check if tool input contains dangerous commands
+            const toolInput = String(t.toolInput || '').toLowerCase();
+            if (toolInput.includes('rm -rf') || toolInput.includes('sudo') || toolInput.includes('chmod 777') ||
+                toolInput.includes('curl') && toolInput.includes('|') || toolInput.includes('wget') && toolInput.includes('|')) {
+                findings.push({
+                    severity: 'critical', category: 'privilege_escalation',
+                    message: `Agent executed dangerous command via ${toolName}`,
+                    source: 'agent_tool', location: t.framework || 'agent',
+                    evidence: (t.toolInput || '').substring(0, 100),
+                });
+            }
+        }
+    }
+    // Scan MCP tool calls for secrets in args/responses
+    const mcpCalls = readJsonl(path.join(trickleDir, 'mcp.jsonl'));
+    for (const m of mcpCalls) {
+        if (m.args) {
+            const argsStr = typeof m.args === 'string' ? m.args : JSON.stringify(m.args);
+            const argsFindings = scanValue(argsStr, 'mcp_tool_args', `MCP: ${m.tool}`);
+            findings.push(...argsFindings);
+        }
+        if (m.resultPreview) {
+            const resultFindings = scanValue(m.resultPreview, 'mcp_tool_result', `MCP: ${m.tool}`);
+            for (const f of resultFindings) {
+                f.category = 'data_exfiltration';
+                f.message = `MCP tool response contains ${f.message.toLowerCase()}`;
+                findings.push(f);
+            }
+        }
+    }
     // Deduplicate
     const seen = new Set();
     const deduped = findings.filter(f => {
@@ -168,6 +252,9 @@ function runSecurityScan(opts) {
         warning: deduped.filter(f => f.severity === 'warning').length,
         info: deduped.filter(f => f.severity === 'info').length,
     };
+    scanned.llmCalls = llmCalls.length;
+    scanned.agentEvents = agentEvents.length;
+    scanned.mcpCalls = mcpCalls.length;
     const result = { findings: deduped, scanned, summary };
     if (opts?.json) {
         console.log(JSON.stringify(result, null, 2));
@@ -177,7 +264,14 @@ function runSecurityScan(opts) {
     console.log('');
     console.log(chalk_1.default.bold('  trickle security'));
     console.log(chalk_1.default.gray('  ' + '─'.repeat(50)));
-    console.log(chalk_1.default.gray(`  Scanned: ${scanned.variables} vars, ${scanned.queries} queries, ${scanned.logs} logs, ${scanned.observations} functions`));
+    const scanParts = [`${scanned.variables} vars`, `${scanned.queries} queries`, `${scanned.logs} logs`, `${scanned.observations} functions`];
+    if (scanned.llmCalls)
+        scanParts.push(`${scanned.llmCalls} LLM calls`);
+    if (scanned.agentEvents)
+        scanParts.push(`${scanned.agentEvents} agent events`);
+    if (scanned.mcpCalls)
+        scanParts.push(`${scanned.mcpCalls} MCP calls`);
+    console.log(chalk_1.default.gray(`  Scanned: ${scanParts.join(', ')}`));
     if (deduped.length === 0) {
         console.log(chalk_1.default.green('  No security issues found. ✓'));
     }

package/dist/index.js CHANGED Viewed

@@ -918,6 +918,7 @@ program
     .command("eval")
     .description("Score agent runs on reliability — completion, errors, cost efficiency, tool reliability, latency")
     .option("--json", "Output raw JSON for CI integration")
+    .option("--fail-under <score>", "Exit with code 1 if overall score is below this threshold (0-100, for CI)")
     .action(async (opts) => {
     const { evalCommand } = await Promise.resolve().then(() => __importStar(require("./commands/eval")));
     evalCommand(opts);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "trickle-cli",
-  "version": "0.1.189",
+  "version": "0.1.191",
   "description": "CLI for trickle runtime type observability",
   "bin": {
     "trickle": "dist/index.js"

package/src/commands/eval.ts CHANGED Viewed

@@ -36,7 +36,7 @@ function readJsonl(fp: string): any[] {
     .map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
 }
-export function evalCommand(opts: { json?: boolean }): void {
+export function evalCommand(opts: { json?: boolean; failUnder?: string }): void {
   const dir = process.env.TRICKLE_LOCAL_DIR || path.join(process.cwd(), '.trickle');
   const agentEvents = readJsonl(path.join(dir, 'agents.jsonl'));
   const llmCalls = readJsonl(path.join(dir, 'llm.jsonl'));
@@ -51,7 +51,15 @@ export function evalCommand(opts: { json?: boolean }): void {
   const result = scoreRun(agentEvents, llmCalls, errors, mcpCalls);
   if (opts.json) {
-    console.log(JSON.stringify(result, null, 2));
+    const threshold = opts.failUnder ? parseInt(opts.failUnder, 10) : undefined;
+    const output = {
+      ...result,
+      ...(threshold !== undefined ? { threshold, passed: result.overallScore >= threshold } : {}),
+    };
+    console.log(JSON.stringify(output, null, 2));
+    if (threshold !== undefined && result.overallScore < threshold) {
+      process.exit(1);
+    }
     return;
   }
@@ -85,6 +93,15 @@ export function evalCommand(opts: { json?: boolean }): void {
   }
   console.log('');
+  // CI mode: exit with non-zero if score below threshold
+  if (opts.failUnder) {
+    const threshold = parseInt(opts.failUnder, 10);
+    if (!isNaN(threshold) && result.overallScore < threshold) {
+      console.log(chalk.red(`  FAIL: Score ${result.overallScore} is below threshold ${threshold}`));
+      process.exit(1);
+    }
+  }
 }
 function printDimension(name: string, dim: { score: number; detail: string }): void {

package/src/commands/security.ts CHANGED Viewed

@@ -82,14 +82,14 @@ function scanValue(value: unknown, source: string, location: string): SecurityFi
 export interface SecurityResult {
   findings: SecurityFinding[];
-  scanned: { variables: number; queries: number; logs: number; observations: number };
+  scanned: Record<string, number>;
   summary: { critical: number; warning: number; info: number };
 }
 export function runSecurityScan(opts?: { dir?: string; json?: boolean }): SecurityResult {
   const trickleDir = opts?.dir || process.env.TRICKLE_LOCAL_DIR || path.join(process.cwd(), '.trickle');
   const findings: SecurityFinding[] = [];
-  const scanned = { variables: 0, queries: 0, logs: 0, observations: 0 };
+  const scanned: Record<string, number> = { variables: 0, queries: 0, logs: 0, observations: 0 };
   // Scan variables
   const variables = readJsonl(path.join(trickleDir, 'variables.jsonl'));
@@ -133,6 +133,97 @@ export function runSecurityScan(opts?: { dir?: string; json?: boolean }): Securi
     if (o.sampleOutput) findings.push(...scanValue(o.sampleOutput, 'function_output', `${o.module}.${o.functionName}`));
   }
+  // ── Agent Security: The "Lethal Trifecta" ──
+  // Scan LLM calls for prompt injection and data exfiltration
+  const llmCalls = readJsonl(path.join(trickleDir, 'llm.jsonl'));
+  for (const c of llmCalls) {
+    // Prompt injection patterns in LLM inputs
+    const input = String(c.inputPreview || '').toLowerCase();
+    const INJECTION_PATTERNS = [
+      { pattern: /ignore\s+(all\s+)?previous\s+instructions/i, name: 'Instruction override' },
+      { pattern: /you\s+are\s+now\s+a\s+/i, name: 'Role hijacking' },
+      { pattern: /system\s*:\s*you\s+are/i, name: 'System prompt injection' },
+      { pattern: /\bdo\s+not\s+follow\s+(any|the)\s+(previous|above)/i, name: 'Instruction bypass' },
+      { pattern: /forget\s+(all|everything|your)\s+(previous|prior|instructions)/i, name: 'Memory wipe attempt' },
+      { pattern: /pretend\s+you\s+(are|have)\s+(no|unrestricted)/i, name: 'Jailbreak attempt' },
+    ];
+    for (const inj of INJECTION_PATTERNS) {
+      if (inj.pattern.test(c.inputPreview || '') || inj.pattern.test(c.systemPrompt || '')) {
+        findings.push({
+          severity: 'critical', category: 'prompt_injection',
+          message: `${inj.name} detected in LLM input`,
+          source: 'llm_call', location: c.model || 'unknown',
+          evidence: (c.inputPreview || '').substring(0, 100),
+        });
+        break;
+      }
+    }
+    // Secrets in LLM outputs (data exfiltration)
+    const output = String(c.outputPreview || '');
+    if (output) {
+      const outputFindings = scanValue(output, 'llm_output', `${c.provider}/${c.model}`);
+      for (const f of outputFindings) {
+        f.category = 'data_exfiltration';
+        f.message = `LLM output contains ${f.message.toLowerCase()}`;
+        findings.push(f);
+      }
+    }
+    // Secrets in LLM inputs
+    const inputStr = String(c.inputPreview || '');
+    if (inputStr) {
+      const inputFindings = scanValue(inputStr, 'llm_input', `${c.provider}/${c.model}`);
+      for (const f of inputFindings) {
+        f.message = `Secret passed to LLM: ${f.message}`;
+        findings.push(f);
+      }
+    }
+  }
+  // Scan agent events for unauthorized tool calls
+  const agentEvents = readJsonl(path.join(trickleDir, 'agents.jsonl'));
+  const toolErrors = agentEvents.filter(e => e.event === 'tool_error');
+  const toolStarts = agentEvents.filter(e => e.event === 'tool_start');
+  // Detect privilege escalation: agent calling dangerous tools
+  const DANGEROUS_TOOLS = ['Bash', 'bash', 'shell', 'exec', 'eval', 'rm', 'sudo', 'chmod', 'kill'];
+  for (const t of toolStarts) {
+    const toolName = String(t.tool || '');
+    if (DANGEROUS_TOOLS.some(d => toolName.toLowerCase().includes(d.toLowerCase()))) {
+      // Check if tool input contains dangerous commands
+      const toolInput = String(t.toolInput || '').toLowerCase();
+      if (toolInput.includes('rm -rf') || toolInput.includes('sudo') || toolInput.includes('chmod 777') ||
+          toolInput.includes('curl') && toolInput.includes('|') || toolInput.includes('wget') && toolInput.includes('|')) {
+        findings.push({
+          severity: 'critical', category: 'privilege_escalation',
+          message: `Agent executed dangerous command via ${toolName}`,
+          source: 'agent_tool', location: t.framework || 'agent',
+          evidence: (t.toolInput || '').substring(0, 100),
+        });
+      }
+    }
+  }
+  // Scan MCP tool calls for secrets in args/responses
+  const mcpCalls = readJsonl(path.join(trickleDir, 'mcp.jsonl'));
+  for (const m of mcpCalls) {
+    if (m.args) {
+      const argsStr = typeof m.args === 'string' ? m.args : JSON.stringify(m.args);
+      const argsFindings = scanValue(argsStr, 'mcp_tool_args', `MCP: ${m.tool}`);
+      findings.push(...argsFindings);
+    }
+    if (m.resultPreview) {
+      const resultFindings = scanValue(m.resultPreview, 'mcp_tool_result', `MCP: ${m.tool}`);
+      for (const f of resultFindings) {
+        f.category = 'data_exfiltration';
+        f.message = `MCP tool response contains ${f.message.toLowerCase()}`;
+        findings.push(f);
+      }
+    }
+  }
   // Deduplicate
   const seen = new Set<string>();
   const deduped = findings.filter(f => {
@@ -148,6 +239,9 @@ export function runSecurityScan(opts?: { dir?: string; json?: boolean }): Securi
     info: deduped.filter(f => f.severity === 'info').length,
   };
+  scanned.llmCalls = llmCalls.length;
+  scanned.agentEvents = agentEvents.length;
+  scanned.mcpCalls = mcpCalls.length;
   const result: SecurityResult = { findings: deduped, scanned, summary };
   if (opts?.json) {
@@ -159,7 +253,11 @@ export function runSecurityScan(opts?: { dir?: string; json?: boolean }): Securi
   console.log('');
   console.log(chalk.bold('  trickle security'));
   console.log(chalk.gray('  ' + '─'.repeat(50)));
-  console.log(chalk.gray(`  Scanned: ${scanned.variables} vars, ${scanned.queries} queries, ${scanned.logs} logs, ${scanned.observations} functions`));
+  const scanParts = [`${scanned.variables} vars`, `${scanned.queries} queries`, `${scanned.logs} logs`, `${scanned.observations} functions`];
+  if (scanned.llmCalls) scanParts.push(`${scanned.llmCalls} LLM calls`);
+  if (scanned.agentEvents) scanParts.push(`${scanned.agentEvents} agent events`);
+  if (scanned.mcpCalls) scanParts.push(`${scanned.mcpCalls} MCP calls`);
+  console.log(chalk.gray(`  Scanned: ${scanParts.join(', ')}`));
   if (deduped.length === 0) {
     console.log(chalk.green('  No security issues found. ✓'));

package/src/index.ts CHANGED Viewed

@@ -951,6 +951,7 @@ program
   .command("eval")
   .description("Score agent runs on reliability — completion, errors, cost efficiency, tool reliability, latency")
   .option("--json", "Output raw JSON for CI integration")
+  .option("--fail-under <score>", "Exit with code 1 if overall score is below this threshold (0-100, for CI)")
   .action(async (opts) => {
     const { evalCommand } = await import("./commands/eval");
     evalCommand(opts);