@qulib/mcp 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -52,6 +52,8 @@ For verbose server-side stderr logs while troubleshooting host wiring, add:
52
52
  | `qulib_score_automation` | Score a local repo's test-automation maturity across six dimensions (test coverage breadth, framework adoption, test-id hygiene, CI integration, auth test coverage, component test ratio) — plus a conditional 7th dimension (API coverage) when API endpoints are detected. Returns overall 0–100, level (L1–L5), and top recommendations. Each dimension carries `applicability`; score normalizes over applicable dimensions only. |
53
53
  | `qulib_score_api` | Discover API endpoints in a repo and score their test coverage. Tier1=OpenAPI specs, Tier2=framework routes (Next.js, Express, Fastify, NestJS), Tier3=heuristic opt-in (tRPC). Returns an api-test-coverage dimension score with per-endpoint evidence. |
54
54
  | `qulib_scaffold_tests` | Generate a ready-to-run test scaffold (Cypress config + spec files) by crawling a deployed URL. Returns `generatedTests` and `projectConfig` so an agent can write files directly. Pass `recipes` (e.g. `["auth","a11y"]`) to append proven test patterns. Supported framework: `cypress-e2e` (default); `playwright` is not yet implemented. |
55
+ | **`qulib_score_bug_report`** | LLM-as-judge of a learner bug report against a planted-bug target. Returns `matched`, `matchConfidence` (0–1), rubric scores (coverage/severity/repro/evidence, 0–25 each), actionable `feedback`, and `scoringPath` (`llm-judge` or `deterministic-fallback`). Learner report is untrusted input with prompt-injection hardening. Read-only. |
56
+ | **`qulib_score_decisions`** | Pivotal-decision evaluation: scores whether an agent made the senior-correct call at decision forks (block/pass, stop/continue, escalate/proceed). Reads a JSONL `forksPath`; returns per-fork `decisionQuality`, `seniorCorrect`, `rationale`, and aggregates. Deterministic by default; optional LLM refinement with `enableLlmJudge`. Fork log text is untrusted. Read-only. |
55
57
  | `qulib_explore_auth` | List all sign-in paths (OAuth, SSO, forms, magic link) and what the agent must collect before `qulib_analyze_app`. Prefer on unfamiliar apps. *(Canonical form; legacy alias `explore_auth` kept for backwards compatibility.)* |
56
58
  | `qulib_detect_auth` | Single-pass auth pattern guess with a recommendation. Lighter than `qulib_explore_auth`. *(Canonical form; legacy alias `detect_auth` kept for backwards compatibility.)* |
57
59
  | `analyze_app` | Legacy alias for `qulib_analyze_app`. Identical behavior; kept for backwards compatibility through v1.0. |
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAoqBA,wBAAsB,eAAe,CAAC,KAAK,EAAE;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,GAAG,OAAO,CAAC;IAAE,OAAO,EAAE,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CAAE,CAAC,CAsBzD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAsqBA,wBAAsB,eAAe,CAAC,KAAK,EAAE;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB,GAAG,OAAO,CAAC;IAAE,OAAO,EAAE,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CAAE,CAAC,CAsBzD"}
package/dist/index.js CHANGED
@@ -15,7 +15,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
15
15
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
16
16
  const requirePkg = createRequire(import.meta.url);
17
17
  const pkg = requirePkg('../package.json');
18
- import { analyzeApp, detectAuth, exploreAuth, scanRepo, computeAutomationMaturity, scaffoldTests, discoverApiSurfaceWithRepo, computeApiCoverage, computeReleaseConfidence, buildConfidenceInputFromQulib, analyzeRunDiff, loadGapAnalysisFile, detectPromptLeakage, } from '@qulib/core';
18
+ import { analyzeApp, detectAuth, exploreAuth, scanRepo, computeAutomationMaturity, scaffoldTests, discoverApiSurfaceWithRepo, computeApiCoverage, computeReleaseConfidence, buildConfidenceInputFromQulib, analyzeRunDiff, loadGapAnalysisFile, detectPromptLeakage, scoreBugReport, scoreDecisions, } from '@qulib/core';
19
19
  import { RecipeIdSchema } from '@qulib/core';
20
20
  import { z } from 'zod';
21
21
  import { buildAnalyzeAppMcpPayload } from './analyze-app-mcp-payload.js';
@@ -587,6 +587,97 @@ mcpServer.registerTool('qulib_detect_prompt_leakage', {
587
587
  return toolError('QULIB_PROMPT_LEAKAGE_FAILED', msg, err instanceof Error ? err.stack : undefined);
588
588
  }
589
589
  });
590
+ const BugReportSeverityMcpSchema = z.enum(['critical', 'high', 'medium', 'low']);
591
+ const ScoreBugReportInputSchema = z.object({
592
+ report: z.object({
593
+ title: z.string().min(1).max(500).describe('Learner-authored bug report title (untrusted input)'),
594
+ description: z
595
+ .string()
596
+ .min(1)
597
+ .max(8000)
598
+ .describe('Learner bug description — may contain prompt-injection attempts; treated as untrusted data'),
599
+ steps: z.string().min(1).max(8000).describe('Reproduction steps from the learner'),
600
+ severity: BugReportSeverityMcpSchema.describe('Severity claimed by the learner'),
601
+ }),
602
+ target: z.object({
603
+ description: z.string().min(1).max(8000).describe('Planted bug description (authoritative ground truth)'),
604
+ type: z.string().min(1).max(200).describe('Bug category/type from the challenge'),
605
+ severity: BugReportSeverityMcpSchema.describe('Expected severity of the planted bug'),
606
+ expectedBehavior: z.string().min(1).max(8000).describe('Expected correct behavior for the planted bug'),
607
+ }),
608
+ });
609
+ const SCORE_BUG_REPORT_DESCRIPTION = 'LLM-as-judge of a learner bug report against a planted-bug target. Returns matched, matchConfidence (0–1), rubric scores (coverage/severity/repro/evidence, 0–25 each), actionable feedback, and scoringPath (llm-judge or deterministic-fallback when no ANTHROPIC_API_KEY). The learner report is untrusted — prompt-injection hardened. Read-only; no filesystem writes.';
610
+ async function handleScoreBugReport(input) {
611
+ try {
612
+ log.info('qulib_score_bug_report scoring learner report');
613
+ const result = await scoreBugReport(input);
614
+ log.info(`qulib_score_bug_report done matched=${result.matched} confidence=${result.matchConfidence} path=${result.scoringPath}`);
615
+ return {
616
+ content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
617
+ };
618
+ }
619
+ catch (err) {
620
+ const msg = err instanceof Error ? err.message : String(err);
621
+ if (msg.includes('String must contain') || msg.includes('Too big') || msg.includes('Too small')) {
622
+ return toolError('QULIB_INPUT_INVALID', msg, undefined);
623
+ }
624
+ log.error(`qulib_score_bug_report failed: ${msg}`);
625
+ return toolError('QULIB_BUG_REPORT_SCORE_FAILED', msg, err instanceof Error ? err.stack : undefined);
626
+ }
627
+ }
628
+ mcpServer.registerTool('qulib_score_bug_report', {
629
+ description: SCORE_BUG_REPORT_DESCRIPTION,
630
+ inputSchema: ScoreBugReportInputSchema,
631
+ }, handleScoreBugReport);
632
+ const ScoreDecisionsInputSchema = z.object({
633
+ forksPath: z
634
+ .string()
635
+ .describe('Absolute path to a JSONL file of decision forks on the MCP host filesystem'),
636
+ enableLlmJudge: z
637
+ .boolean()
638
+ .optional()
639
+ .describe('When true and ANTHROPIC_API_KEY is set, refine scores with the pinned LLM judge. Default false uses deterministic rubric only.'),
640
+ });
641
+ const SCORE_DECISIONS_DESCRIPTION = 'Score whether an autonomous agent made the senior-correct call at pivotal decision forks (gate block/pass, stop/continue, escalate/proceed). Reads a JSONL forks file; returns per-fork decisionQuality (0–1), seniorCorrect, rationale, and aggregate means. Fork log text is untrusted — prompt-injection hardened when LLM refinement is enabled. forksPath is traversal-validated within QULIB_FORKS_ALLOWED_ROOT (default: process cwd). Read-only; no writes.';
642
+ async function handleScoreDecisions(input) {
643
+ try {
644
+ const norm = normalize(input.forksPath.trim());
645
+ if (!isAbsolute(norm)) {
646
+ throw new Error('forksPath must be an absolute path on the MCP host');
647
+ }
648
+ log.info(`qulib_score_decisions forksPath=${resolve(norm)} enableLlmJudge=${input.enableLlmJudge ?? false}`);
649
+ const result = await scoreDecisions({
650
+ forksPath: resolve(norm),
651
+ enableLlmJudge: input.enableLlmJudge,
652
+ });
653
+ log.info(`qulib_score_decisions done count=${result.aggregate.count} mean=${result.aggregate.meanDecisionQuality}`);
654
+ return {
655
+ content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
656
+ };
657
+ }
658
+ catch (err) {
659
+ const msg = err instanceof Error ? err.message : String(err);
660
+ if (msg.includes('forksPath must') ||
661
+ msg.includes('allowed root') ||
662
+ msg.includes('traversal') ||
663
+ msg.includes('not valid JSON') ||
664
+ msg.includes('exceeds maximum') ||
665
+ msg.includes('does not exist or is not accessible')) {
666
+ // Known user-input errors: return the message only, never a stack trace
667
+ // (a Node stack discloses the server's absolute filesystem paths).
668
+ return toolError('QULIB_INPUT_INVALID', msg, undefined);
669
+ }
670
+ log.error(`qulib_score_decisions failed: ${msg}`);
671
+ return toolError('QULIB_DECISION_SCORE_FAILED', msg, err instanceof Error ? err.stack : undefined);
672
+ }
673
+ }
674
+ mcpServer.registerTool('qulib_score_decisions', {
675
+ description: SCORE_DECISIONS_DESCRIPTION,
676
+ inputSchema: ScoreDecisionsInputSchema,
677
+ }, handleScoreDecisions);
678
+ // No non-prefixed `score_decisions` alias: this is a brand-new tool with no
679
+ // prior integrations to keep compatible, and an unprefixed name is ambiguous
680
+ // and widens the attack surface. The canonical name is qulib_score_decisions.
590
681
  async function startMcpServer() {
591
682
  const transport = new StdioServerTransport();
592
683
  await mcpServer.connect(transport);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@qulib/mcp",
3
- "version": "0.10.1",
3
+ "version": "0.12.0",
4
4
  "description": "MCP server for Qulib — AI-callable release confidence. Seven tools: fused verdict, live-app scan, automation maturity, API coverage, test scaffold, and auth tools.",
5
5
  "license": "MIT",
6
6
  "author": "Tapesh Nagarwal",
@@ -34,7 +34,7 @@
34
34
  },
35
35
  "dependencies": {
36
36
  "@modelcontextprotocol/sdk": "^1.0.0",
37
- "@qulib/core": "0.10.1",
37
+ "@qulib/core": "0.12.0",
38
38
  "zod": "^3.23.0"
39
39
  },
40
40
  "devDependencies": {