npm - @jayarrowz/mcp-arsr - Versions diffs - 1.0.0 - Mend

@jayarrowz/mcp-arsr 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/.gitattributes +2 -0
package/Dockerfile +19 -0
package/LICENSE +21 -0
package/README.md +125 -0
package/dist/src/index.d.ts +1 -0
package/dist/src/index.js +325 -0
package/dist/src/schemas/tools.d.ts +85 -0
package/dist/src/schemas/tools.js +100 -0
package/dist/src/services/llm.d.ts +56 -0
package/dist/src/services/llm.js +361 -0
package/dist/src/types.d.ts +53 -0
package/dist/src/types.js +7 -0
package/glama.json +6 -0
package/package.json +31 -0
package/smithery.yaml +13 -0
package/src/index.ts +395 -0
package/src/schemas/tools.ts +118 -0
package/src/services/llm.ts +480 -0
package/src/types.ts +67 -0
package/tsconfig.json +16 -0

package/src/index.ts ADDED Viewed

@@ -0,0 +1,395 @@
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
+import express from "express";
+import {
+  DraftInputSchema,
+  DecomposeInputSchema,
+  ScoreInputSchema,
+  RetrieveInputSchema,
+  ReviseInputSchema,
+  ContinueInputSchema,
+} from "./schemas/tools.js";
+import type {
+  DraftInput,
+  DecomposeInput,
+  ScoreInput,
+  RetrieveInput,
+  ReviseInput,
+  ContinueInput,
+} from "./schemas/tools.js";
+import {
+  generateDraft,
+  decomposeClaims,
+  scoreClaims,
+  retrieveEvidence,
+  reviseDraft,
+  shouldContinue,
+} from "./services/llm.js";
+import { DEFAULT_CONFIG } from "./types.js";
+const server = new McpServer({
+  name: "arsr-mcp-server",
+  version: "0.1.0",
+});
+server.registerTool(
+  "arsr_draft_response",
+  {
+    title: "Draft Response",
+    description: `Generate an initial candidate response to a user query. This is the first step in the ARSR refinement loop.
+The draft is generated by an inner LLM and may contain inaccuracies — that's expected. Subsequent tools (decompose, score, retrieve, revise) will iteratively correct it.
+Args:
+  - query (string): The user's question to answer
+  - context (string, optional): Additional context or constraints
+Returns:
+  { "draft": "The generated response text", "is_refusal": false }`,
+    inputSchema: DraftInputSchema,
+    annotations: {
+      readOnlyHint: true,
+      destructiveHint: false,
+      idempotentHint: false,
+      openWorldHint: true,
+    },
+  },
+  async (params: DraftInput) => {
+    try {
+      const { draft, is_refusal } = await generateDraft(params.query, params.context);
+      const output = { draft, is_refusal };
+      return {
+        content: [{ type: "text", text: JSON.stringify(output, null, 2) }],
+        structuredContent: output,
+      };
+    } catch (error) {
+      return {
+        isError: true,
+        content: [{
+          type: "text",
+          text: `Error generating draft: ${error instanceof Error ? error.message : String(error)}. Ensure ANTHROPIC_API_KEY is set.`,
+        }],
+      };
+    }
+  }
+);
+server.registerTool(
+  "arsr_decompose_claims",
+  {
+    title: "Decompose Claims",
+    description: `Split a draft response into individually verifiable atomic claims.
+Each claim is a single factual statement that can be independently fact-checked. Opinions, hedges, and meta-commentary are excluded.
+If is_refusal is true (from arsr_draft_response output) and original_query is provided, claims will be extracted from the query instead of the draft.
+Args:
+  - draft (string): The response text to decompose
+  - original_query (string, optional): The user's original question, used as fallback if draft is a refusal
+  - is_refusal (boolean, optional): Whether the draft was classified as a refusal by arsr_draft_response
+Returns:
+  { "claims": [{ "id": "c1", "text": "The claim as a statement", "source_span": "exact quote from draft" }] }`,
+    inputSchema: DecomposeInputSchema,
+    annotations: {
+      readOnlyHint: true,
+      destructiveHint: false,
+      idempotentHint: true,
+      openWorldHint: false,
+    },
+  },
+  async (params: DecomposeInput) => {
+    try {
+      const claims = await decomposeClaims(params.draft, params.original_query, params.is_refusal ?? false);
+      const output = { claims, count: claims.length };
+      return {
+        content: [{ type: "text", text: JSON.stringify(output, null, 2) }],
+        structuredContent: output,
+      };
+    } catch (error) {
+      return {
+        isError: true,
+        content: [{
+          type: "text",
+          text: `Error decomposing claims: ${error instanceof Error ? error.message : String(error)}`,
+        }],
+      };
+    }
+  }
+);
+server.registerTool(
+  "arsr_score_uncertainty",
+  {
+    title: "Score Claim Uncertainty",
+    description: `Estimate confidence for each claim using semantic entropy and consistency analysis.
+Each claim receives a confidence score (0-1) and entropy score (0-1). Low confidence / high entropy indicates the claim should be fact-checked.
+Args:
+  - claims (array): Claims to score, each with { id, text, source_span }
+  - n_samples (number, optional): Number of rephrasings for entropy (default: 3)
+Returns:
+  { "scored": [{ "id": "c1", "text": "...", "confidence": 0.92, "entropy": 0.15, "method": "semantic_entropy" }] }
+Use the confidence_threshold (default 0.85) to filter which claims need evidence retrieval.`,
+    inputSchema: ScoreInputSchema,
+    annotations: {
+      readOnlyHint: true,
+      destructiveHint: false,
+      idempotentHint: false, // Stochastic
+      openWorldHint: false,
+    },
+  },
+  async (params: ScoreInput) => {
+    try {
+      const scored = await scoreClaims(params.claims);
+      const avgConfidence = scored.reduce((s, c) => s + c.confidence, 0) / scored.length;
+      const lowConfCount = scored.filter((c) => c.confidence < DEFAULT_CONFIG.confidence_threshold).length;
+      const output = {
+        scored,
+        summary: {
+          total_claims: scored.length,
+          avg_confidence: Math.round(avgConfidence * 1000) / 1000,
+          low_confidence_count: lowConfCount,
+          threshold: DEFAULT_CONFIG.confidence_threshold,
+        },
+      };
+      return {
+        content: [{ type: "text", text: JSON.stringify(output, null, 2) }],
+        structuredContent: output,
+      };
+    } catch (error) {
+      return {
+        isError: true,
+        content: [{
+          type: "text",
+          text: `Error scoring claims: ${error instanceof Error ? error.message : String(error)}`,
+        }],
+      };
+    }
+  }
+);
+server.registerTool(
+  "arsr_retrieve_evidence",
+  {
+    title: "Retrieve Evidence",
+    description: `Fetch evidence for low-confidence claims using uncertainty-guided retrieval.
+For each claim, the inner LLM generates smart search queries (adversarial by default — designed to DISPROVE the claim), executes web searches, and evaluates each result's stance (supports/contradicts/neutral).
+Args:
+  - claims_to_check (array): Low-confidence ScoredClaims to investigate
+  - strategy (string, optional): "adversarial" (default), "confirmatory", or "balanced"
+Returns:
+  { "evidence": [{ "claim_id": "c1", "docs": [...], "overall_stance": "contradicted", "summary": "..." }] }
+IMPORTANT: Only pass claims with confidence BELOW the threshold. Do not waste budget on high-confidence claims.`,
+    inputSchema: RetrieveInputSchema,
+    annotations: {
+      readOnlyHint: true,
+      destructiveHint: false,
+      idempotentHint: false,
+      openWorldHint: true,
+    },
+  },
+  async (params: RetrieveInput) => {
+    try {
+      const evidence = await retrieveEvidence(
+        params.claims_to_check.map((c) => ({
+          ...c,
+          method: "semantic_entropy" as const,
+        })),
+        params.strategy ?? "adversarial"
+      );
+      const supported = evidence.filter((e) => e.overall_stance === "supported").length;
+      const contradicted = evidence.filter((e) => e.overall_stance === "contradicted").length;
+      const mixed = evidence.filter((e) => e.overall_stance === "mixed").length;
+      const output = {
+        evidence,
+        summary: {
+          claims_checked: evidence.length,
+          supported,
+          contradicted,
+          mixed,
+          insufficient: evidence.length - supported - contradicted - mixed,
+        },
+      };
+      return {
+        content: [{ type: "text", text: JSON.stringify(output, null, 2) }],
+        structuredContent: output,
+      };
+    } catch (error) {
+      return {
+        isError: true,
+        content: [{
+          type: "text",
+          text: `Error retrieving evidence: ${error instanceof Error ? error.message : String(error)}`,
+        }],
+      };
+    }
+  }
+);
+server.registerTool(
+  "arsr_revise_response",
+  {
+    title: "Revise Response",
+    description: `Rewrite the draft integrating evidence findings. Corrects contradicted claims, hedges mixed claims, and flags irreconcilable conflicts.
+IMPORTANT: If the draft was a refusal/non-answer (is_refusal from arsr_draft_response), pass original_query and is_refusal. The revision engine will then generate a COMPLETELY NEW answer from the evidence instead of trying to edit a non-answer.
+Args:
+  - draft (string): The current draft to revise
+  - evidence (array): Evidence from retrieve_evidence
+  - scored (array): Scored claims from score_uncertainty
+  - original_query (string, optional): The user's original question, critical if draft was a refusal
+  - is_refusal (boolean, optional): Whether the draft was classified as a refusal by arsr_draft_response
+Returns:
+  {
+    "revised": "The corrected response text",
+    "changes": [{ "claim_id": "c1", "action": "corrected", "original": "...", "revised": "...", "reason": "..." }],
+    "conflicts": [{ "claim_id": "c2", "description": "Sources disagree about..." }]
+  }`,
+    inputSchema: ReviseInputSchema,
+    annotations: {
+      readOnlyHint: true,
+      destructiveHint: false,
+      idempotentHint: false,
+      openWorldHint: false,
+    },
+  },
+  async (params: ReviseInput) => {
+    try {
+      const result = await reviseDraft(
+        params.draft,
+        params.evidence,
+        params.scored.map((s) => ({
+          ...s,
+          method: "semantic_entropy" as const,
+        })),
+        params.original_query,
+        params.is_refusal ?? false
+      );
+      return {
+        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        structuredContent: result,
+      };
+    } catch (error) {
+      return {
+        isError: true,
+        content: [{
+          type: "text",
+          text: `Error revising response: ${error instanceof Error ? error.message : String(error)}`,
+        }],
+      };
+    }
+  }
+);
+server.registerTool(
+  "arsr_should_continue",
+  {
+    title: "Should Continue",
+    description: `Decide whether to run another refinement iteration or finalize the response.
+Uses three stopping criteria:
+1. Budget: stop if iteration >= max_iterations
+2. Threshold: stop if ALL claims exceed confidence_threshold
+3. Convergence: stop if average confidence didn't improve by ≥0.02
+Args:
+  - iteration (number): Current iteration (1-based)
+  - scored (array): Current confidence scores [{ id, confidence, entropy }]
+  - budget (number, optional): Max iterations (default: 3)
+  - confidence_threshold (number, optional): Target confidence (default: 0.85)
+  - previous_avg_confidence (number | null, optional): Prior iteration's avg confidence
+Returns:
+  { "decision": "continue" | "stop", "reason": "Explanation of why" }`,
+    inputSchema: ContinueInputSchema,
+    annotations: {
+      readOnlyHint: true,
+      destructiveHint: false,
+      idempotentHint: true,
+      openWorldHint: false,
+    },
+  },
+  async (params: ContinueInput) => {
+    try {
+      const result = await shouldContinue(
+        params.iteration,
+        params.scored.map((s) => ({
+          ...s,
+          text: "",
+          source_span: "",
+          method: "semantic_entropy" as const,
+        })),
+        params.budget ?? DEFAULT_CONFIG.max_iterations,
+        params.confidence_threshold ?? DEFAULT_CONFIG.confidence_threshold,
+        params.previous_avg_confidence ?? null
+      );
+      return {
+        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        structuredContent: result,
+      };
+    } catch (error) {
+      return {
+        isError: true,
+        content: [{
+          type: "text",
+          text: `Error in continue decision: ${error instanceof Error ? error.message : String(error)}`,
+        }],
+      };
+    }
+  }
+);
+async function runStdio(): Promise<void> {
+  const transport = new StdioServerTransport();
+  await server.connect(transport);
+  console.error("ARSR MCP Server running on stdio");
+}
+async function runHTTP(): Promise<void> {
+  const app = express();
+  app.use(express.json());
+  app.post("/mcp", async (req, res) => {
+    const transport = new StreamableHTTPServerTransport({
+      sessionIdGenerator: undefined,
+      enableJsonResponse: true,
+    });
+    res.on("close", () => transport.close());
+    await server.connect(transport);
+    await transport.handleRequest(req, res, req.body);
+  });
+  const port = parseInt(process.env.PORT || "3001");
+  app.listen(port, () => {
+    console.error(`ARSR MCP Server running on http://localhost:${port}/mcp`);
+  });
+}
+const transport = process.env.TRANSPORT || "stdio";
+if (transport === "http") {
+  runHTTP().catch((error) => {
+    console.error("Server error:", error);
+    process.exit(1);
+  });
+} else {
+  runStdio().catch((error) => {
+    console.error("Server error:", error);
+    process.exit(1);
+  });
+}

package/src/schemas/tools.ts ADDED Viewed

@@ -0,0 +1,118 @@
+import { z } from "zod";
+export const DraftInputSchema = z.object({
+  query: z.string()
+    .min(1, "Query must not be empty")
+    .describe("The user's question or request to answer"),
+  context: z.string()
+    .optional()
+    .describe("Optional additional context, prior conversation, or domain constraints"),
+}).strict();
+export type DraftInput = z.infer<typeof DraftInputSchema>;
+export const DecomposeInputSchema = z.object({
+  draft: z.string()
+    .min(1, "Draft must not be empty")
+    .describe("The candidate response text to decompose into atomic claims"),
+  original_query: z.string()
+    .optional()
+    .describe("The user's original question. If the draft is a refusal/non-answer, claims will be extracted from this query instead."),
+  is_refusal: z.boolean()
+    .optional()
+    .describe("Whether the draft was classified as a refusal/non-answer by arsr_draft_response. If true (and original_query is provided), claims are extracted from the query instead of the draft."),
+}).strict();
+export type DecomposeInput = z.infer<typeof DecomposeInputSchema>;
+export const ScoreInputSchema = z.object({
+  claims: z.array(z.object({
+    id: z.string().describe("Claim identifier (e.g. 'c1')"),
+    text: z.string().describe("The claim as a standalone factual statement"),
+    source_span: z.string().describe("The exact substring from the original draft"),
+  })).min(1, "At least one claim required")
+    .describe("Array of atomic claims to score"),
+  n_samples: z.number()
+    .int().min(1).max(10).default(3)
+    .optional()
+    .describe("Number of rephrasings for semantic entropy (default: 3)"),
+}).strict();
+export type ScoreInput = z.infer<typeof ScoreInputSchema>;
+export const RetrieveInputSchema = z.object({
+  claims_to_check: z.array(z.object({
+    id: z.string(),
+    text: z.string(),
+    source_span: z.string(),
+    confidence: z.number(),
+    entropy: z.number(),
+    method: z.string(),
+  })).min(1, "At least one claim required")
+    .describe("Low-confidence claims to gather evidence for"),
+  strategy: z.enum(["adversarial", "confirmatory", "balanced"])
+    .default("adversarial")
+    .optional()
+    .describe("Retrieval strategy: 'adversarial' generates counter-queries, 'confirmatory' seeks support, 'balanced' does both"),
+}).strict();
+export type RetrieveInput = z.infer<typeof RetrieveInputSchema>;
+export const ReviseInputSchema = z.object({
+  draft: z.string()
+    .min(1)
+    .describe("The current draft text to revise"),
+  evidence: z.array(z.object({
+    claim_id: z.string(),
+    claim_text: z.string(),
+    docs: z.array(z.object({
+      title: z.string(),
+      url: z.string(),
+      snippet: z.string(),
+      stance: z.enum(["supports", "contradicts", "neutral", "unclear"]),
+    })),
+    overall_stance: z.enum(["supported", "contradicted", "mixed", "insufficient"]),
+    summary: z.string(),
+  })).describe("Evidence gathered for each claim"),
+  scored: z.array(z.object({
+    id: z.string(),
+    text: z.string(),
+    source_span: z.string(),
+    confidence: z.number(),
+    entropy: z.number(),
+    method: z.string(),
+  })).describe("The scored claims from the uncertainty step"),
+  original_query: z.string()
+    .optional()
+    .describe("The user's original question. If the draft was a refusal, this is used to generate a new answer from evidence."),
+  is_refusal: z.boolean()
+    .optional()
+    .describe("Whether the draft was classified as a refusal/non-answer by arsr_draft_response. If true, a new response is generated from evidence instead of revising the refusal."),
+}).strict();
+export type ReviseInput = z.infer<typeof ReviseInputSchema>;
+export const ContinueInputSchema = z.object({
+  iteration: z.number()
+    .int().min(1)
+    .describe("Current iteration number (1-based)"),
+  scored: z.array(z.object({
+    id: z.string(),
+    confidence: z.number(),
+    entropy: z.number(),
+  })).describe("Current confidence scores for all claims"),
+  budget: z.number()
+    .int().min(1).max(10).default(3)
+    .optional()
+    .describe("Maximum iterations allowed (default: 3)"),
+  confidence_threshold: z.number()
+    .min(0).max(1).default(0.85)
+    .optional()
+    .describe("Stop when all claims exceed this confidence (default: 0.85)"),
+  previous_avg_confidence: z.number()
+    .nullable().default(null)
+    .optional()
+    .describe("Average confidence from previous iteration for convergence detection"),
+}).strict();
+export type ContinueInput = z.infer<typeof ContinueInputSchema>;