npm - skilltest - Versions diffs - 0.6.0 → 0.7.0 - Mend

skilltest 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -8,11 +8,15 @@ The testing framework for Agent Skills. Lint, test triggering, and evaluate your
 `skilltest` is a standalone CLI for the Agent Skills ecosystem (spec: https://agentskills.io). Think of it as pytest for skills.
+The repository itself uses a fast Vitest suite for offline unit and integration
+coverage of the parser, linters, trigger math, config resolution, reporters,
+and linter orchestration.
 ## Demo
 GIF coming soon.
-![skilltest demo placeholder](https://via.placeholder.com/1200x420?text=skilltest+demo+gif+coming+soon)
+<!-- ![skilltest demo placeholder](https://via.placeholder.com/1200x420?text=skilltest+demo+gif+coming+soon) -->
 ## Why skilltest?
@@ -375,6 +379,8 @@ jobs:
         with:
           node-version: "20"
       - run: npm ci
+      - run: npm run lint
+      - run: npm run test
       - run: npm run build
       - run: npx skilltest lint path/to/skill --json
 ```
@@ -410,11 +416,15 @@ jobs:
 ```bash
 npm install
 npm run lint
+npm run test
 npm run build
 node dist/index.js --help
 ```
-Smoke tests:
+`npm test` runs the Vitest suite. The tests are offline and do not call model
+providers.
+Manual CLI smoke tests:
 ```bash
 node dist/index.js lint test-fixtures/sample-skill/

package/dist/index.js CHANGED Viewed

@@ -581,24 +581,6 @@ function runContentChecks(context) {
       message: "No obvious vague placeholder phrasing found."
     });
   }
-  if (context.frontmatter.rawFrontmatter && /[<>]/.test(context.frontmatter.rawFrontmatter)) {
-    issues.push({
-      id: "content.frontmatter-angle-brackets",
-      checkId: "content:angle-brackets",
-      title: "Frontmatter Angle Brackets",
-      status: "warn",
-      message: "Frontmatter contains angle bracket characters (< or >), which can be misinterpreted in some agents.",
-      suggestion: "Remove XML-like tags from frontmatter values when possible."
-    });
-  } else {
-    issues.push({
-      id: "content.frontmatter-angle-brackets",
-      checkId: "content:angle-brackets",
-      title: "Frontmatter Angle Brackets",
-      status: "pass",
-      message: "No angle bracket tokens detected in frontmatter."
-    });
-  }
   const secretsIssue = buildSecretsIssue(context);
   if (secretsIssue) {
     issues.push(secretsIssue);
@@ -951,6 +933,24 @@ function runFrontmatterChecks(context) {
       message: "license field is present."
     });
   }
+  if (context.frontmatter.rawFrontmatter && /[<>]/.test(context.frontmatter.rawFrontmatter)) {
+    issues.push({
+      id: "frontmatter.angle-brackets",
+      checkId: "frontmatter:angle-brackets",
+      title: "Frontmatter Angle Brackets",
+      status: "warn",
+      message: "Frontmatter contains angle bracket characters (< or >), which can be misinterpreted in some agents.",
+      suggestion: "Remove XML-like tags from frontmatter values when possible."
+    });
+  } else {
+    issues.push({
+      id: "frontmatter.angle-brackets",
+      checkId: "frontmatter:angle-brackets",
+      title: "Frontmatter Angle Brackets",
+      status: "pass",
+      message: "No angle bracket tokens detected in frontmatter."
+    });
+  }
   if (description && description.trim() !== "" && !descriptionLooksActionable(description)) {
     issues.push({
       id: "frontmatter.description.triggerability",
@@ -2314,12 +2314,13 @@ function extractJsonObject(raw) {
   }
   throw new Error("Grader did not return a JSON object.");
 }
-async function gradeResponse(options) {
-  const assertionList = options.assertions && options.assertions.length > 0 ? options.assertions : [
-    "The response follows the skill instructions faithfully.",
-    "The response is well-structured and actionable.",
-    "The response addresses the user prompt directly."
-  ];
+var DEFAULT_ASSERTIONS = [
+  "The response follows the skill instructions faithfully.",
+  "The response is well-structured and actionable.",
+  "The response addresses the user prompt directly."
+];
+function buildGraderPrompts(options) {
+  const assertions = options.assertions && options.assertions.length > 0 ? options.assertions : DEFAULT_ASSERTIONS;
   const systemPrompt = [
     "You are a strict evaluator for agent skill outputs.",
     "Assess each assertion and return JSON only.",
@@ -2336,15 +2337,26 @@ async function gradeResponse(options) {
     options.modelResponse,
     "",
     "Assertions to evaluate:",
-    assertionList.map((assertion, index) => `${index + 1}. ${assertion}`).join("\n")
+    assertions.map((assertion, index) => `${index + 1}. ${assertion}`).join("\n")
   ].join("\n");
-  const raw = await options.provider.sendMessage(systemPrompt, userPrompt, { model: options.model });
+  return {
+    assertions,
+    systemPrompt,
+    userPrompt
+  };
+}
+function parseGraderOutput(raw) {
   const parsed = graderOutputSchema.safeParse(extractJsonObject(raw));
   if (!parsed.success) {
     throw new Error(`Failed to parse grader output: ${parsed.error.issues[0]?.message ?? "invalid grader JSON"}`);
   }
   return parsed.data.assertions;
 }
+async function gradeResponse(options) {
+  const prompts = buildGraderPrompts(options);
+  const raw = await options.provider.sendMessage(prompts.systemPrompt, prompts.userPrompt, { model: options.model });
+  return parseGraderOutput(raw);
+}
 // src/utils/concurrency.ts
 async function pMap(items, fn, concurrency) {
@@ -2499,6 +2511,7 @@ var triggerQuerySchema = z4.object({
   should_trigger: z4.boolean()
 });
 var triggerQueryArraySchema = z4.array(triggerQuerySchema);
+var triggerNumQueriesSchema = z4.number().int().min(2).refine((value) => value % 2 === 0, "numQueries must be an even number.");
 var FAKE_SKILLS = [
   { name: "code-review", description: "Reviews code changes for bugs, regressions, and maintainability issues." },
   { name: "api-tester", description: "Designs and runs REST API tests, validating status codes and response shapes." },
@@ -2539,6 +2552,9 @@ function shuffle(values, rng) {
 function sample(values, count, rng) {
   return shuffle(values, rng).slice(0, Math.max(0, Math.min(count, values.length)));
 }
+function validateNumQueries(numQueries) {
+  return triggerNumQueriesSchema.parse(numQueries);
+}
 function parseJsonArrayFromModelOutput(raw) {
   const trimmed = raw.trim();
   if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
@@ -2553,6 +2569,7 @@ function parseJsonArrayFromModelOutput(raw) {
   throw new Error("Model did not return a JSON array.");
 }
 async function generateQueriesWithModel(skill, provider, model, numQueries) {
+  validateNumQueries(numQueries);
   const shouldTriggerCount = Math.floor(numQueries / 2);
   const shouldNotTriggerCount = numQueries - shouldTriggerCount;
   const systemPrompt = [
@@ -2597,6 +2614,29 @@ function parseDecision(rawResponse, skillNames) {
   }
   return "unrecognized";
 }
+function prepareTriggerQueries(skill, queries, seed) {
+  const rng = createRng(seed);
+  return queries.map((testQuery) => {
+    const fakeCount = 5 + Math.floor(rng() * 5);
+    const fakeSkills = sample(FAKE_SKILLS, fakeCount, rng);
+    const allSkills = shuffle(
+      [
+        ...fakeSkills,
+        {
+          name: skill.frontmatter.name,
+          description: skill.frontmatter.description
+        }
+      ],
+      rng
+    );
+    return {
+      testQuery,
+      fakeSkills,
+      allSkills,
+      skillListText: allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n")
+    };
+  });
+}
 function calculateMetrics(skillName, cases) {
   let truePositives = 0;
   let trueNegatives = 0;
@@ -2649,28 +2689,9 @@ function buildSuggestions(metrics) {
   return suggestions;
 }
 async function runTriggerTest(skill, options) {
-  const rng = createRng(options.seed);
   const queries = options.queries && options.queries.length > 0 ? triggerQueryArraySchema.parse(options.queries) : await generateQueriesWithModel(skill, options.provider, options.model, options.numQueries);
   const skillName = skill.frontmatter.name;
-  const preparedQueries = queries.map((testQuery) => {
-    const fakeCount = 5 + Math.floor(rng() * 5);
-    const fakeSkills = sample(FAKE_SKILLS, fakeCount, rng);
-    const allSkills = shuffle([
-      ...fakeSkills,
-      {
-        name: skill.frontmatter.name,
-        description: skill.frontmatter.description
-      }
-    ], rng);
-    const skillListText = allSkills.map((entry) => `- ${entry.name}: ${entry.description}`).join("\n");
-    return {
-      testQuery,
-      fakeCount,
-      fakeSkills,
-      allSkills,
-      skillListText
-    };
-  });
+  const preparedQueries = prepareTriggerQueries(skill, queries, options.seed);
   const systemPrompt = [
     "You are selecting one skill to activate for a user query.",
     "Choose the single best matching skill name from the provided list, or 'none' if no skill is a good fit.",