trickle-cli 0.1.189 → 0.1.191

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,4 +13,5 @@
13
13
  */
14
14
  export declare function evalCommand(opts: {
15
15
  json?: boolean;
16
+ failUnder?: string;
16
17
  }): void;
@@ -76,7 +76,15 @@ function evalCommand(opts) {
76
76
  }
77
77
  const result = scoreRun(agentEvents, llmCalls, errors, mcpCalls);
78
78
  if (opts.json) {
79
- console.log(JSON.stringify(result, null, 2));
79
+ const threshold = opts.failUnder ? parseInt(opts.failUnder, 10) : undefined;
80
+ const output = {
81
+ ...result,
82
+ ...(threshold !== undefined ? { threshold, passed: result.overallScore >= threshold } : {}),
83
+ };
84
+ console.log(JSON.stringify(output, null, 2));
85
+ if (threshold !== undefined && result.overallScore < threshold) {
86
+ process.exit(1);
87
+ }
80
88
  return;
81
89
  }
82
90
  // Pretty print
@@ -104,6 +112,14 @@ function evalCommand(opts) {
104
112
  }
105
113
  }
106
114
  console.log('');
115
+ // CI mode: exit with non-zero if score below threshold
116
+ if (opts.failUnder) {
117
+ const threshold = parseInt(opts.failUnder, 10);
118
+ if (!isNaN(threshold) && result.overallScore < threshold) {
119
+ console.log(chalk_1.default.red(` FAIL: Score ${result.overallScore} is below threshold ${threshold}`));
120
+ process.exit(1);
121
+ }
122
+ }
107
123
  }
108
124
  function printDimension(name, dim) {
109
125
  const bar = renderBar(dim.score);
@@ -21,12 +21,7 @@ interface SecurityFinding {
21
21
  }
22
22
  export interface SecurityResult {
23
23
  findings: SecurityFinding[];
24
- scanned: {
25
- variables: number;
26
- queries: number;
27
- logs: number;
28
- observations: number;
29
- };
24
+ scanned: Record<string, number>;
30
25
  summary: {
31
26
  critical: number;
32
27
  warning: number;
@@ -154,6 +154,90 @@ function runSecurityScan(opts) {
154
154
  if (o.sampleOutput)
155
155
  findings.push(...scanValue(o.sampleOutput, 'function_output', `${o.module}.${o.functionName}`));
156
156
  }
157
+ // ── Agent Security: The "Lethal Trifecta" ──
158
+ // Scan LLM calls for prompt injection and data exfiltration
159
+ const llmCalls = readJsonl(path.join(trickleDir, 'llm.jsonl'));
160
+ for (const c of llmCalls) {
161
+ // Prompt injection patterns in LLM inputs
162
+ const input = String(c.inputPreview || '').toLowerCase();
163
+ const INJECTION_PATTERNS = [
164
+ { pattern: /ignore\s+(all\s+)?previous\s+instructions/i, name: 'Instruction override' },
165
+ { pattern: /you\s+are\s+now\s+a\s+/i, name: 'Role hijacking' },
166
+ { pattern: /system\s*:\s*you\s+are/i, name: 'System prompt injection' },
167
+ { pattern: /\bdo\s+not\s+follow\s+(any|the)\s+(previous|above)/i, name: 'Instruction bypass' },
168
+ { pattern: /forget\s+(all|everything|your)\s+(previous|prior|instructions)/i, name: 'Memory wipe attempt' },
169
+ { pattern: /pretend\s+you\s+(are|have)\s+(no|unrestricted)/i, name: 'Jailbreak attempt' },
170
+ ];
171
+ for (const inj of INJECTION_PATTERNS) {
172
+ if (inj.pattern.test(c.inputPreview || '') || inj.pattern.test(c.systemPrompt || '')) {
173
+ findings.push({
174
+ severity: 'critical', category: 'prompt_injection',
175
+ message: `${inj.name} detected in LLM input`,
176
+ source: 'llm_call', location: c.model || 'unknown',
177
+ evidence: (c.inputPreview || '').substring(0, 100),
178
+ });
179
+ break;
180
+ }
181
+ }
182
+ // Secrets in LLM outputs (data exfiltration)
183
+ const output = String(c.outputPreview || '');
184
+ if (output) {
185
+ const outputFindings = scanValue(output, 'llm_output', `${c.provider}/${c.model}`);
186
+ for (const f of outputFindings) {
187
+ f.category = 'data_exfiltration';
188
+ f.message = `LLM output contains ${f.message.toLowerCase()}`;
189
+ findings.push(f);
190
+ }
191
+ }
192
+ // Secrets in LLM inputs
193
+ const inputStr = String(c.inputPreview || '');
194
+ if (inputStr) {
195
+ const inputFindings = scanValue(inputStr, 'llm_input', `${c.provider}/${c.model}`);
196
+ for (const f of inputFindings) {
197
+ f.message = `Secret passed to LLM: ${f.message}`;
198
+ findings.push(f);
199
+ }
200
+ }
201
+ }
202
+ // Scan agent events for unauthorized tool calls
203
+ const agentEvents = readJsonl(path.join(trickleDir, 'agents.jsonl'));
204
+ const toolErrors = agentEvents.filter(e => e.event === 'tool_error');
205
+ const toolStarts = agentEvents.filter(e => e.event === 'tool_start');
206
+ // Detect privilege escalation: agent calling dangerous tools
207
+ const DANGEROUS_TOOLS = ['Bash', 'bash', 'shell', 'exec', 'eval', 'rm', 'sudo', 'chmod', 'kill'];
208
+ for (const t of toolStarts) {
209
+ const toolName = String(t.tool || '');
210
+ if (DANGEROUS_TOOLS.some(d => toolName.toLowerCase().includes(d.toLowerCase()))) {
211
+ // Check if tool input contains dangerous commands
212
+ const toolInput = String(t.toolInput || '').toLowerCase();
213
+ if (toolInput.includes('rm -rf') || toolInput.includes('sudo') || toolInput.includes('chmod 777') ||
214
+ toolInput.includes('curl') && toolInput.includes('|') || toolInput.includes('wget') && toolInput.includes('|')) {
215
+ findings.push({
216
+ severity: 'critical', category: 'privilege_escalation',
217
+ message: `Agent executed dangerous command via ${toolName}`,
218
+ source: 'agent_tool', location: t.framework || 'agent',
219
+ evidence: (t.toolInput || '').substring(0, 100),
220
+ });
221
+ }
222
+ }
223
+ }
224
+ // Scan MCP tool calls for secrets in args/responses
225
+ const mcpCalls = readJsonl(path.join(trickleDir, 'mcp.jsonl'));
226
+ for (const m of mcpCalls) {
227
+ if (m.args) {
228
+ const argsStr = typeof m.args === 'string' ? m.args : JSON.stringify(m.args);
229
+ const argsFindings = scanValue(argsStr, 'mcp_tool_args', `MCP: ${m.tool}`);
230
+ findings.push(...argsFindings);
231
+ }
232
+ if (m.resultPreview) {
233
+ const resultFindings = scanValue(m.resultPreview, 'mcp_tool_result', `MCP: ${m.tool}`);
234
+ for (const f of resultFindings) {
235
+ f.category = 'data_exfiltration';
236
+ f.message = `MCP tool response contains ${f.message.toLowerCase()}`;
237
+ findings.push(f);
238
+ }
239
+ }
240
+ }
157
241
  // Deduplicate
158
242
  const seen = new Set();
159
243
  const deduped = findings.filter(f => {
@@ -168,6 +252,9 @@ function runSecurityScan(opts) {
168
252
  warning: deduped.filter(f => f.severity === 'warning').length,
169
253
  info: deduped.filter(f => f.severity === 'info').length,
170
254
  };
255
+ scanned.llmCalls = llmCalls.length;
256
+ scanned.agentEvents = agentEvents.length;
257
+ scanned.mcpCalls = mcpCalls.length;
171
258
  const result = { findings: deduped, scanned, summary };
172
259
  if (opts?.json) {
173
260
  console.log(JSON.stringify(result, null, 2));
@@ -177,7 +264,14 @@ function runSecurityScan(opts) {
177
264
  console.log('');
178
265
  console.log(chalk_1.default.bold(' trickle security'));
179
266
  console.log(chalk_1.default.gray(' ' + '─'.repeat(50)));
180
- console.log(chalk_1.default.gray(` Scanned: ${scanned.variables} vars, ${scanned.queries} queries, ${scanned.logs} logs, ${scanned.observations} functions`));
267
+ const scanParts = [`${scanned.variables} vars`, `${scanned.queries} queries`, `${scanned.logs} logs`, `${scanned.observations} functions`];
268
+ if (scanned.llmCalls)
269
+ scanParts.push(`${scanned.llmCalls} LLM calls`);
270
+ if (scanned.agentEvents)
271
+ scanParts.push(`${scanned.agentEvents} agent events`);
272
+ if (scanned.mcpCalls)
273
+ scanParts.push(`${scanned.mcpCalls} MCP calls`);
274
+ console.log(chalk_1.default.gray(` Scanned: ${scanParts.join(', ')}`));
181
275
  if (deduped.length === 0) {
182
276
  console.log(chalk_1.default.green(' No security issues found. ✓'));
183
277
  }
package/dist/index.js CHANGED
@@ -918,6 +918,7 @@ program
918
918
  .command("eval")
919
919
  .description("Score agent runs on reliability — completion, errors, cost efficiency, tool reliability, latency")
920
920
  .option("--json", "Output raw JSON for CI integration")
921
+ .option("--fail-under <score>", "Exit with code 1 if overall score is below this threshold (0-100, for CI)")
921
922
  .action(async (opts) => {
922
923
  const { evalCommand } = await Promise.resolve().then(() => __importStar(require("./commands/eval")));
923
924
  evalCommand(opts);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "trickle-cli",
3
- "version": "0.1.189",
3
+ "version": "0.1.191",
4
4
  "description": "CLI for trickle runtime type observability",
5
5
  "bin": {
6
6
  "trickle": "dist/index.js"
@@ -36,7 +36,7 @@ function readJsonl(fp: string): any[] {
36
36
  .map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
37
37
  }
38
38
 
39
- export function evalCommand(opts: { json?: boolean }): void {
39
+ export function evalCommand(opts: { json?: boolean; failUnder?: string }): void {
40
40
  const dir = process.env.TRICKLE_LOCAL_DIR || path.join(process.cwd(), '.trickle');
41
41
  const agentEvents = readJsonl(path.join(dir, 'agents.jsonl'));
42
42
  const llmCalls = readJsonl(path.join(dir, 'llm.jsonl'));
@@ -51,7 +51,15 @@ export function evalCommand(opts: { json?: boolean }): void {
51
51
  const result = scoreRun(agentEvents, llmCalls, errors, mcpCalls);
52
52
 
53
53
  if (opts.json) {
54
- console.log(JSON.stringify(result, null, 2));
54
+ const threshold = opts.failUnder ? parseInt(opts.failUnder, 10) : undefined;
55
+ const output = {
56
+ ...result,
57
+ ...(threshold !== undefined ? { threshold, passed: result.overallScore >= threshold } : {}),
58
+ };
59
+ console.log(JSON.stringify(output, null, 2));
60
+ if (threshold !== undefined && result.overallScore < threshold) {
61
+ process.exit(1);
62
+ }
55
63
  return;
56
64
  }
57
65
 
@@ -85,6 +93,15 @@ export function evalCommand(opts: { json?: boolean }): void {
85
93
  }
86
94
 
87
95
  console.log('');
96
+
97
+ // CI mode: exit with non-zero if score below threshold
98
+ if (opts.failUnder) {
99
+ const threshold = parseInt(opts.failUnder, 10);
100
+ if (!isNaN(threshold) && result.overallScore < threshold) {
101
+ console.log(chalk.red(` FAIL: Score ${result.overallScore} is below threshold ${threshold}`));
102
+ process.exit(1);
103
+ }
104
+ }
88
105
  }
89
106
 
90
107
  function printDimension(name: string, dim: { score: number; detail: string }): void {
@@ -82,14 +82,14 @@ function scanValue(value: unknown, source: string, location: string): SecurityFi
82
82
 
83
83
  export interface SecurityResult {
84
84
  findings: SecurityFinding[];
85
- scanned: { variables: number; queries: number; logs: number; observations: number };
85
+ scanned: Record<string, number>;
86
86
  summary: { critical: number; warning: number; info: number };
87
87
  }
88
88
 
89
89
  export function runSecurityScan(opts?: { dir?: string; json?: boolean }): SecurityResult {
90
90
  const trickleDir = opts?.dir || process.env.TRICKLE_LOCAL_DIR || path.join(process.cwd(), '.trickle');
91
91
  const findings: SecurityFinding[] = [];
92
- const scanned = { variables: 0, queries: 0, logs: 0, observations: 0 };
92
+ const scanned: Record<string, number> = { variables: 0, queries: 0, logs: 0, observations: 0 };
93
93
 
94
94
  // Scan variables
95
95
  const variables = readJsonl(path.join(trickleDir, 'variables.jsonl'));
@@ -133,6 +133,97 @@ export function runSecurityScan(opts?: { dir?: string; json?: boolean }): Securi
133
133
  if (o.sampleOutput) findings.push(...scanValue(o.sampleOutput, 'function_output', `${o.module}.${o.functionName}`));
134
134
  }
135
135
 
136
+ // ── Agent Security: The "Lethal Trifecta" ──
137
+
138
+ // Scan LLM calls for prompt injection and data exfiltration
139
+ const llmCalls = readJsonl(path.join(trickleDir, 'llm.jsonl'));
140
+ for (const c of llmCalls) {
141
+ // Prompt injection patterns in LLM inputs
142
+ const input = String(c.inputPreview || '').toLowerCase();
143
+ const INJECTION_PATTERNS = [
144
+ { pattern: /ignore\s+(all\s+)?previous\s+instructions/i, name: 'Instruction override' },
145
+ { pattern: /you\s+are\s+now\s+a\s+/i, name: 'Role hijacking' },
146
+ { pattern: /system\s*:\s*you\s+are/i, name: 'System prompt injection' },
147
+ { pattern: /\bdo\s+not\s+follow\s+(any|the)\s+(previous|above)/i, name: 'Instruction bypass' },
148
+ { pattern: /forget\s+(all|everything|your)\s+(previous|prior|instructions)/i, name: 'Memory wipe attempt' },
149
+ { pattern: /pretend\s+you\s+(are|have)\s+(no|unrestricted)/i, name: 'Jailbreak attempt' },
150
+ ];
151
+ for (const inj of INJECTION_PATTERNS) {
152
+ if (inj.pattern.test(c.inputPreview || '') || inj.pattern.test(c.systemPrompt || '')) {
153
+ findings.push({
154
+ severity: 'critical', category: 'prompt_injection',
155
+ message: `${inj.name} detected in LLM input`,
156
+ source: 'llm_call', location: c.model || 'unknown',
157
+ evidence: (c.inputPreview || '').substring(0, 100),
158
+ });
159
+ break;
160
+ }
161
+ }
162
+
163
+ // Secrets in LLM outputs (data exfiltration)
164
+ const output = String(c.outputPreview || '');
165
+ if (output) {
166
+ const outputFindings = scanValue(output, 'llm_output', `${c.provider}/${c.model}`);
167
+ for (const f of outputFindings) {
168
+ f.category = 'data_exfiltration';
169
+ f.message = `LLM output contains ${f.message.toLowerCase()}`;
170
+ findings.push(f);
171
+ }
172
+ }
173
+
174
+ // Secrets in LLM inputs
175
+ const inputStr = String(c.inputPreview || '');
176
+ if (inputStr) {
177
+ const inputFindings = scanValue(inputStr, 'llm_input', `${c.provider}/${c.model}`);
178
+ for (const f of inputFindings) {
179
+ f.message = `Secret passed to LLM: ${f.message}`;
180
+ findings.push(f);
181
+ }
182
+ }
183
+ }
184
+
185
+ // Scan agent events for unauthorized tool calls
186
+ const agentEvents = readJsonl(path.join(trickleDir, 'agents.jsonl'));
187
+ const toolErrors = agentEvents.filter(e => e.event === 'tool_error');
188
+ const toolStarts = agentEvents.filter(e => e.event === 'tool_start');
189
+
190
+ // Detect privilege escalation: agent calling dangerous tools
191
+ const DANGEROUS_TOOLS = ['Bash', 'bash', 'shell', 'exec', 'eval', 'rm', 'sudo', 'chmod', 'kill'];
192
+ for (const t of toolStarts) {
193
+ const toolName = String(t.tool || '');
194
+ if (DANGEROUS_TOOLS.some(d => toolName.toLowerCase().includes(d.toLowerCase()))) {
195
+ // Check if tool input contains dangerous commands
196
+ const toolInput = String(t.toolInput || '').toLowerCase();
197
+ if (toolInput.includes('rm -rf') || toolInput.includes('sudo') || toolInput.includes('chmod 777') ||
198
+ toolInput.includes('curl') && toolInput.includes('|') || toolInput.includes('wget') && toolInput.includes('|')) {
199
+ findings.push({
200
+ severity: 'critical', category: 'privilege_escalation',
201
+ message: `Agent executed dangerous command via ${toolName}`,
202
+ source: 'agent_tool', location: t.framework || 'agent',
203
+ evidence: (t.toolInput || '').substring(0, 100),
204
+ });
205
+ }
206
+ }
207
+ }
208
+
209
+ // Scan MCP tool calls for secrets in args/responses
210
+ const mcpCalls = readJsonl(path.join(trickleDir, 'mcp.jsonl'));
211
+ for (const m of mcpCalls) {
212
+ if (m.args) {
213
+ const argsStr = typeof m.args === 'string' ? m.args : JSON.stringify(m.args);
214
+ const argsFindings = scanValue(argsStr, 'mcp_tool_args', `MCP: ${m.tool}`);
215
+ findings.push(...argsFindings);
216
+ }
217
+ if (m.resultPreview) {
218
+ const resultFindings = scanValue(m.resultPreview, 'mcp_tool_result', `MCP: ${m.tool}`);
219
+ for (const f of resultFindings) {
220
+ f.category = 'data_exfiltration';
221
+ f.message = `MCP tool response contains ${f.message.toLowerCase()}`;
222
+ findings.push(f);
223
+ }
224
+ }
225
+ }
226
+
136
227
  // Deduplicate
137
228
  const seen = new Set<string>();
138
229
  const deduped = findings.filter(f => {
@@ -148,6 +239,9 @@ export function runSecurityScan(opts?: { dir?: string; json?: boolean }): Securi
148
239
  info: deduped.filter(f => f.severity === 'info').length,
149
240
  };
150
241
 
242
+ scanned.llmCalls = llmCalls.length;
243
+ scanned.agentEvents = agentEvents.length;
244
+ scanned.mcpCalls = mcpCalls.length;
151
245
  const result: SecurityResult = { findings: deduped, scanned, summary };
152
246
 
153
247
  if (opts?.json) {
@@ -159,7 +253,11 @@ export function runSecurityScan(opts?: { dir?: string; json?: boolean }): Securi
159
253
  console.log('');
160
254
  console.log(chalk.bold(' trickle security'));
161
255
  console.log(chalk.gray(' ' + '─'.repeat(50)));
162
- console.log(chalk.gray(` Scanned: ${scanned.variables} vars, ${scanned.queries} queries, ${scanned.logs} logs, ${scanned.observations} functions`));
256
+ const scanParts = [`${scanned.variables} vars`, `${scanned.queries} queries`, `${scanned.logs} logs`, `${scanned.observations} functions`];
257
+ if (scanned.llmCalls) scanParts.push(`${scanned.llmCalls} LLM calls`);
258
+ if (scanned.agentEvents) scanParts.push(`${scanned.agentEvents} agent events`);
259
+ if (scanned.mcpCalls) scanParts.push(`${scanned.mcpCalls} MCP calls`);
260
+ console.log(chalk.gray(` Scanned: ${scanParts.join(', ')}`));
163
261
 
164
262
  if (deduped.length === 0) {
165
263
  console.log(chalk.green(' No security issues found. ✓'));
package/src/index.ts CHANGED
@@ -951,6 +951,7 @@ program
951
951
  .command("eval")
952
952
  .description("Score agent runs on reliability — completion, errors, cost efficiency, tool reliability, latency")
953
953
  .option("--json", "Output raw JSON for CI integration")
954
+ .option("--fail-under <score>", "Exit with code 1 if overall score is below this threshold (0-100, for CI)")
954
955
  .action(async (opts) => {
955
956
  const { evalCommand } = await import("./commands/eval");
956
957
  evalCommand(opts);