npm - workflow-ai - Versions diffs - 1.0.62 → 1.0.63 - Mend

workflow-ai 1.0.62 → 1.0.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +61 -0
package/agent-templates/CLAUDE.md.tpl +2 -0
package/agent-templates/QWEN.md.tpl +2 -0
package/package.json +2 -1
package/src/init.mjs +5 -4
package/src/lib/agent-spawner.mjs +338 -0
package/src/runner.mjs +15 -14
package/src/scripts/get-next-test-id.js +94 -0
package/src/scripts/migrate-backlog-to-tests.js +406 -0
package/src/scripts/run-skill-tests.js +1491 -0
package/src/scripts/scan-fixtures-for-secrets.js +248 -0
package/src/scripts/tests/timeout-cascade.test.js +28 -0
package/templates/plan-template.md +1 -0

package/README.md CHANGED Viewed

@@ -121,6 +121,67 @@ Skills are stored globally in `~/.workflow/skills/` and linked into projects via
 Use `workflow eject <skill>` to copy a skill into the project for customization.
+## Skill regression tests
+Трёхуровневая система тестирования скилов для проверки качества AI-агентов.
+### Три слоя тестирования
+| Level | Name | Description |
+|-------|------|-------------|
+| L0 | Static | Базовая проверка синтаксиса и структуры: YAML-валидация, проверка обязательных полей, линтер |
+| L1 | Deterministic | Детерминированные тесты: эталонные входные данные → ожидаемый результат (strict match) |
+| L2 | Rubric | Гибкая оценка по критериям: scorer выставляет баллы на основе качества результата |
+### Структура директорий
+```
+src/skills/<name>/tests/
+├── index.yaml      # Метаданные тестов, список test cases
+├── cases/          # Входные данные для тестов
+│   └── <case-id>/
+│       └── input.yaml
+├── fixtures/       # Ожидаемые выходные данные (для L1)
+│   └── <case-id>/
+│       └── expected.yaml
+└── rubrics/        # Критерии оценки (для L2)
+    └── <case-id>/
+        └── rubric.yaml
+```
+### Запуск тестов
+```bash
+npm run test:skills
+```
+### CLI-флаги
+| Flag | Description |
+|------|-------------|
+| `--skill <name>` | Запустить тесты только для указанного скила |
+| `--relevant` | Запустить только тесты, соответствующие изменённым файлам |
+| `--establish-baseline` | Запустить тесты и сохранить результаты как baseline |
+| `--baseline-ref <ref>` | Использовать конкретный baseline (коммит, тег) |
+| `--yes` | Автоматически подтверждать все действия |
+### Verdict-режимы
+| Mode | Description |
+|------|-------------|
+| `no-baseline` | Первый запуск — результаты сохраняются как baseline без сравнения |
+| `no-regression` | Сравнение с baseline — тест считается пройденным, если результат не хуже baseline |
+### Принцип git write
+Runner и коуч **не выполняют git write-операций**. Все изменения в кодовой базе делает исключительно пользователь. Runner только анализирует и рекомендует, но не коммитит.
+### First run on a new project
+1. Запустить тесты с флагом `--establish-baseline`
+2. Проверить результаты: красные тесты — ожидаемы для нового проекта
+3. Зафиксировать baseline: `git commit current/` как baseline-коммит
 ## Scripts
 Scripts are stored globally in `~/.workflow/scripts/` and linked as a single junction into `.workflow/src/scripts/`.

package/agent-templates/CLAUDE.md.tpl CHANGED Viewed

@@ -21,6 +21,8 @@
 | Выбор следующей задачи | `node .workflow/src/scripts/pick-next-task.js` |
 | Перемещение готовых в ready | `node .workflow/src/scripts/move-to-ready.js` |
+**Регрессионное тестирование скилов:** `node .workflow/src/scripts/run-skill-tests.js --skill <name>`. Подробности — в `.workflow/src/skills/<name>/tests/index.yaml`.
 ### Кастомизация (eject)
 | Действие | Команда |

package/agent-templates/QWEN.md.tpl CHANGED Viewed

@@ -21,6 +21,8 @@
 | Выбор следующей задачи | `node .workflow/src/scripts/pick-next-task.js` |
 | Перемещение готовых в ready | `node .workflow/src/scripts/move-to-ready.js` |
+**Регрессионное тестирование скилов:** `node .workflow/src/scripts/run-skill-tests.js --skill <name>`. Подробности — в `.workflow/src/skills/<name>/tests/index.yaml`.
 ### Кастомизация (eject)
 | Действие | Команда |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "workflow-ai",
-  "version": "1.0.62",
+  "version": "1.0.63",
   "description": "AI Agent Workflow Coordinator — kanban-based pipeline for AI coding agents",
   "type": "module",
   "bin": {
@@ -32,6 +32,7 @@
   },
   "scripts": {
     "test": "node --test src/tests/*.test.mjs",
+    "test:skills": "node src/scripts/run-skill-tests.js --all",
     "release": "npm version patch && npm publish"
   },
   "dependencies": {

package/src/init.mjs CHANGED Viewed

@@ -357,7 +357,7 @@ export function initProject(targetPath = process.cwd(), options = {}) {
     errors: []
   };
-  // Step 1: Create .workflow/ structure (15 directories)
+  // Step 1: Create .workflow/ structure (16 directories)
   const directories = [
     'tickets/backlog',
     'tickets/ready',
@@ -370,13 +370,14 @@ export function initProject(targetPath = process.cwd(), options = {}) {
     'reports',
     'logs',
     'templates',
-    'src/skills'
+    'src/skills',
+    'tests/skills'
   ];
   for (const dir of directories) {
     ensureDir(join(workflowRoot, dir));
   }
-  result.steps.push('Created .workflow/ directory structure (15 directories)');
+  result.steps.push('Created .workflow/ directory structure (16 directories)');
   // Step 2: Ensure global dir and create skill junctions
   const globalDir = getGlobalDir();

package/src/lib/agent-spawner.mjs ADDED Viewed

@@ -0,0 +1,338 @@
+#!/usr/bin/env node
+import { spawn, execSync } from 'child_process';
+import path from 'path';
+const ResultParser = {
+  STATUS_ALIASES: {
+    pass: 'passed',
+    approved: 'passed',
+    success: 'passed',
+    succeeded: 'passed',
+    ok: 'passed',
+    accepted: 'passed',
+    lgtm: 'passed',
+    fixed: 'passed',
+    resolved: 'passed',
+    fail: 'failed',
+    rejected: 'failed',
+    denied: 'failed',
+    not_passed: 'failed',
+    err: 'error',
+    crash: 'error',
+    timeout: 'error',
+  },
+  normalizeStatus(status) {
+    const lower = status.toLowerCase();
+    const canonical = ResultParser.STATUS_ALIASES[lower];
+    if (canonical) {
+      return canonical;
+    }
+    return status;
+  },
+  parse(output, stageId) {
+    const marker = '---RESULT---';
+    const startIdx = output.indexOf(marker);
+    const endIdx = startIdx !== -1 ? output.indexOf(marker, startIdx + marker.length) : -1;
+    if (startIdx !== -1 && endIdx !== -1) {
+      const resultBlock = output.substring(startIdx + marker.length, endIdx).trim();
+      const data = ResultParser.parseResultBlock(resultBlock);
+      const normalizedStatus = ResultParser.normalizeStatus(data.status || 'default');
+      return {
+        status: normalizedStatus,
+        data: data.data || {},
+        raw: output,
+        parsed: true
+      };
+    }
+    return ResultParser.fallbackParse(output, stageId);
+  },
+  parseResultBlock(block) {
+    const lines = block.split('\n');
+    const data = {};
+    let status = 'default';
+    let currentKey = null;
+    let multilineValue = null;
+    const flushMultiline = () => {
+      if (currentKey !== null && multilineValue !== null) {
+        data[currentKey] = multilineValue.replace(/\n$/, '');
+        currentKey = null;
+        multilineValue = null;
+      }
+    };
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      const topLevelMatch = line.match(/^([^:\s][^:]*):\s*(.*)$/);
+      if (topLevelMatch) {
+        flushMultiline();
+        const key = topLevelMatch[1].trim();
+        const value = topLevelMatch[2].trim();
+        if (value !== '') {
+          if (key === 'status') {
+            status = value;
+          } else {
+            data[key] = value;
+          }
+        } else {
+          currentKey = key;
+          multilineValue = '';
+        }
+      } else if (currentKey !== null && (line.startsWith(' ') || line.startsWith('\t') || line === '')) {
+        multilineValue += line + '\n';
+      } else if (currentKey !== null) {
+        flushMultiline();
+      }
+    }
+    flushMultiline();
+    return { status, data };
+  },
+  fallbackParse(output, stageId) {
+    const lines = output.split('\n');
+    let status = 'default';
+    const extractedData = {};
+    let inResultSection = false;
+    for (const line of lines) {
+      const trimmedLine = line.trim();
+      const statusMatch = trimmedLine.match(/^(?:status|Status):\s*(\w+)/i);
+      if (statusMatch) {
+        status = statusMatch[1];
+        inResultSection = true;
+        continue;
+      }
+      if (inResultSection) {
+        const dataMatch = trimmedLine.match(/^(\w+):\s*(.+)$/i);
+        if (dataMatch && dataMatch[1].toLowerCase() !== 'status') {
+          extractedData[dataMatch[1]] = dataMatch[2];
+        }
+      }
+    }
+    if (status === 'default') {
+      const lowerOutput = output.toLowerCase();
+      if (lowerOutput.includes('completed') || lowerOutput.includes('success') || lowerOutput.includes('done')) {
+        status = 'default';
+        extractedData._inferred = 'success_keywords';
+      } else if (lowerOutput.includes('error') || lowerOutput.includes('failed')) {
+        status = 'error';
+        extractedData._inferred = 'error_keywords';
+      }
+    }
+    const normalizedStatus = ResultParser.normalizeStatus(status);
+    return {
+      status: normalizedStatus,
+      data: extractedData,
+      raw: output,
+      parsed: false
+    };
+  }
+};
+export async function spawnAgent(agentConfig, prompt, options = {}) {
+  const {
+    timeout = 300,
+    logger = null,
+    resultParser = ResultParser,
+    stageId = 'unknown',
+    skillId = null,
+    projectRoot = process.cwd(),
+    currentChildRef = null
+  } = options;
+  return new Promise((resolve, reject) => {
+    const args = [...agentConfig.args];
+    const finalPrompt = prompt;
+    const useShell = process.platform === 'win32' && agentConfig.command !== 'node';
+    const useStdin = useShell && finalPrompt.includes('\n');
+    if (!useStdin) {
+      args.push(finalPrompt);
+    }
+    if (logger) {
+      const displayArgs = skillId ? [...args.slice(0, -1), skillId] : args;
+      logger.info(`RUN ${agentConfig.command} ${displayArgs.join(' ')}`, stageId);
+      const promptLines = prompt.split('\n').filter(l => l.trim());
+      if (promptLines.length > 1) {
+        for (const line of promptLines.slice(1)) {
+          logger.info(`  ${line}`, stageId);
+        }
+      }
+    }
+    const startTime = Date.now();
+    const child = spawn(agentConfig.command, args, {
+      cwd: path.resolve(projectRoot, agentConfig.workdir || '.'),
+      stdio: ['pipe', 'pipe', 'pipe'],
+      shell: useShell
+    });
+    if (currentChildRef) {
+      currentChildRef.current = child;
+    }
+    if (useStdin) {
+      child.stdin.write(finalPrompt);
+      child.stdin.end();
+    } else {
+      child.stdin.end();
+    }
+    let stdout = '';
+    let stderr = '';
+    let timedOut = false;
+    const timeoutId = setTimeout(() => {
+      timedOut = true;
+      if (process.platform === 'win32' && child.pid) {
+        try { execSync(`taskkill /pid ${child.pid} /T /F`, { stdio: 'pipe' }); } catch {}
+      } else {
+        child.kill('SIGTERM');
+      }
+      if (logger) {
+        logger.timeout(stageId, timeout);
+      }
+      reject(new Error(`Stage "${stageId}" timed out after ${timeout}s`));
+    }, timeout * 1000);
+    let stdoutBuffer = '';
+    let agentText = '';
+    child.stdout.on('data', (data) => {
+      const chunk = data.toString();
+      stdout += chunk;
+      stdoutBuffer += chunk;
+      const lines = stdoutBuffer.split('\n');
+      stdoutBuffer = lines.pop();
+      for (const line of lines) {
+        if (!line.trim()) continue;
+        try {
+          const obj = JSON.parse(line);
+          if (obj.type === 'content_block_delta' && obj.delta?.text) {
+            process.stdout.write(obj.delta.text);
+            agentText += obj.delta.text;
+          } else if (obj.type === 'assistant' && obj.message?.content) {
+            for (const block of obj.message.content) {
+              if (block.type === 'text' && block.text) {
+                process.stdout.write(block.text);
+                agentText += block.text;
+              }
+            }
+          }
+        } catch {
+          process.stdout.write(line + '\n');
+          agentText += line + '\n';
+        }
+      }
+    });
+    child.stderr.on('data', (data) => {
+      stderr += data.toString();
+      process.stderr.write(data);
+    });
+    child.on('close', (code) => {
+      if (currentChildRef) {
+        currentChildRef.current = null;
+      }
+      clearTimeout(timeoutId);
+      const durationMs = Date.now() - startTime;
+      if (stdoutBuffer.trim()) {
+        try {
+          const obj = JSON.parse(stdoutBuffer);
+          if (obj.type === 'content_block_delta' && obj.delta?.text) {
+            process.stdout.write(obj.delta.text);
+          }
+        } catch {
+          process.stdout.write(stdoutBuffer + '\n');
+        }
+      }
+      process.stdout.write('\n');
+      if (timedOut) return;
+      if (logger) {
+        logger.cliCall(agentConfig.command, args, code);
+        const trimmedOutput = agentText.trim();
+        if (trimmedOutput) {
+          logger.info(`OUTPUT ↓`, stageId);
+          for (const line of trimmedOutput.split('\n')) {
+            logger.info(`  ${line}`, stageId);
+          }
+          logger.info(`OUTPUT ↑`, stageId);
+        }
+        if (stderr.trim()) {
+          logger.warn(`STDERR ↓`, stageId);
+          for (const line of stderr.trim().split('\n')) {
+            logger.warn(`  ${line}`, stageId);
+          }
+          logger.warn(`STDERR ↑`, stageId);
+        }
+      }
+      const result = resultParser.parse(stdout, stageId);
+      if (code !== 0 && result.parsed && result.status && result.status !== 'default') {
+        if (logger) {
+          logger.warn(
+            `Agent exited with code ${code}, but RESULT was parsed (status: ${result.status}). Using parsed result.`,
+            stageId
+          );
+        }
+      } else if (code !== 0) {
+        const err = new Error(`Agent exited with code ${code}`);
+        err.code = 'NON_ZERO_EXIT';
+        err.exitCode = code;
+        err.stderr = stderr;
+        if (logger) {
+          logger.error(`Agent exited with code ${code}`, stageId);
+          if (stderr.trim()) {
+            for (const line of stderr.trim().split('\n')) {
+              logger.error(`  stderr: ${line}`, stageId);
+            }
+          }
+        }
+        reject(err);
+        return;
+      }
+      resolve({
+        status: result.status || 'default',
+        output: stdout,
+        stderr: stderr,
+        result: result.data || {},
+        exitCode: code,
+        parsed: result.parsed,
+        durationMs
+      });
+    });
+    child.on('error', (err) => {
+      clearTimeout(timeoutId);
+      if (!timedOut) {
+        if (logger) {
+          logger.error(`CLI error: ${err.message}`, stageId);
+        }
+        reject(err);
+      }
+    });
+  });
+}
+export { ResultParser };
+export default { spawnAgent, ResultParser };

package/src/runner.mjs CHANGED Viewed

@@ -429,10 +429,19 @@ class ResultParser {
   parse(output, stageId) {
     const marker = '---RESULT---';
-    // Ищем ПОСЛЕДНЮЮ пару маркеров: printResult всегда печатается в конце скрипта,
-    // а маркер может случайно встретиться в логах до него (напр. в заголовке тикета).
-    const endIdx = output.lastIndexOf(marker);
-    const startIdx = endIdx !== -1 ? output.lastIndexOf(marker, endIdx - 1) : -1;
+    // Ищем маркеры ТОЛЬКО на отдельных строках (printResult выводит их на своих строках).
+    // Берём последнюю пару: маркер может случайно встретиться в логах/заголовках тикетов
+    // до финального блока (напр. "title: ... ---RESULT--- ..."). Regex ^---RESULT---$
+    // с multi-line флагом отсеивает такие вхождения.
+    const lineMarkerRegex = /^---RESULT---\s*$/gm;
+    const markerPositions = [];
+    let m;
+    while ((m = lineMarkerRegex.exec(output)) !== null) {
+      markerPositions.push(m.index);
+    }
+    const endIdx = markerPositions.length >= 2 ? markerPositions[markerPositions.length - 1] : -1;
+    const startIdx = markerPositions.length >= 2 ? markerPositions[markerPositions.length - 2] : -1;
     if (startIdx !== -1 && endIdx !== -1 && startIdx !== endIdx) {
       // Найдены маркеры — парсим структурированный блок
@@ -922,16 +931,8 @@ class StageExecutor {
       };
     }
-    // Курсор = (attempt - 1), clamped
-    const cursor = attempt - 1;
-    if (cursor >= compatible.length) {
-      return {
-        blocked: 'attempts_exhausted',
-        reason: `Attempt ${attempt} exceeds compatible agents list length (${compatible.length})`,
-        attempt,
-        triedAgents: compatible
-      };
-    }
+    // Курсор = (attempt - 1) % length — ротация по кругу
+    const cursor = (attempt - 1) % compatible.length;
     const agentId = compatible[cursor];
     // Клонируем stage с подменой instructions (для agents_by_type override)

package/src/scripts/get-next-test-id.js ADDED Viewed

@@ -0,0 +1,94 @@
+#!/usr/bin/env node
+/**
+ * get-next-test-id.js - Генератор ID для тест-кейсов
+ *
+ * Usage:
+ *   node get-next-test-id.js --skill coach
+ *   Output:
+ *   ---RESULT---
+ *   next_id: TC-COACH-002
+ *   ---RESULT---
+ */
+import fs from "fs";
+import path from "path";
+import { findProjectRoot } from "workflow-ai/lib/find-root.mjs";
+import { printResult } from "workflow-ai/lib/utils.mjs";
+const PROJECT_DIR = findProjectRoot();
+function parseArgs() {
+  const args = process.argv.slice(2);
+  let skill = null;
+  for (let i = 0; i < args.length; i++) {
+    if (args[i] === "--skill" && i + 1 < args.length) {
+      skill = args[i + 1];
+      i++;
+    }
+  }
+  return skill;
+}
+function findMaxNumber(skillLower, skillUpper) {
+  let maxNum = 0;
+  const regex = new RegExp(`^TC-${skillUpper}-(\\d+)\\.yaml$`, "i");
+  const source1 = path.join(PROJECT_DIR, "src", "skills", skillLower, "tests", "cases");
+  const source2 = path.join(PROJECT_DIR, ".workflow", "tests", "skills", skillLower, "cases");
+  const dirs = [source1, source2];
+  for (const dir of dirs) {
+    if (!fs.existsSync(dir)) {
+      continue;
+    }
+    const entries = fs.readdirSync(dir, { withFileTypes: true });
+    for (const entry of entries) {
+      if (entry.isFile()) {
+        const match = entry.name.match(regex);
+        if (match) {
+          const num = parseInt(match[1], 10);
+          if (num > maxNum) {
+            maxNum = num;
+          }
+        }
+      }
+    }
+  }
+  return maxNum;
+}
+function formatNumber(num) {
+  return num.toString().padStart(3, "0");
+}
+function main() {
+  const skill = parseArgs();
+  if (!skill) {
+    console.error("Usage:");
+    console.error("  node get-next-test-id.js --skill <name>");
+    printResult({
+      status: "error",
+      error: "Missing required argument: --skill <name>",
+    });
+    process.exit(1);
+  }
+  const skillLower = skill.toLowerCase();
+  const skillUpper = skill.toUpperCase().replace(/-/g, "-");
+  const maxNum = findMaxNumber(skillLower, skillUpper);
+  const nextNum = maxNum + 1;
+  const nextId = `TC-${skillUpper}-${formatNumber(nextNum)}`;
+  printResult({ status: "success", next_id: nextId });
+}
+main();