npm - aiox-core - Versions diffs - 5.0.2 → 5.0.3 - Mend

aiox-core 5.0.2 → 5.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (547) hide show

package/pro/squads/squad-creator-pro/scripts/create-agent-preflight.py ADDED Viewed

@@ -0,0 +1,243 @@
+#!/usr/bin/env python3
+"""
+create-agent-preflight.py - Deterministic preflight checks for create-agent task
+Purpose: Execute all deterministic validations BEFORE LLM engagement
+Usage: python3 create-agent-preflight.py --squad <squad_name> --specialist <slug> [--sources <path>]
+Output: JSON with validation results
+Deterministic checks (no LLM needed):
+1. Squad exists at squads/{squad_name}/
+2. Squad has config.yaml
+3. Squad has agents/ directory
+4. If specialist: check local sources exist
+5. Count source files and lines
+6. Validate naming conventions
+"""
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+from datetime import datetime
+def count_lines(file_path: Path) -> int:
+    """Count lines in a file."""
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            return sum(1 for _ in f)
+    except Exception:
+        return 0
+def validate_squad(squad_name: str, squads_path: str = "squads") -> dict:
+    """Validate squad exists and has required structure."""
+    squad_path = Path(squads_path) / squad_name
+    result = {
+        "squad_name": squad_name,
+        "squad_path": str(squad_path),
+        "exists": squad_path.exists(),
+        "has_config": (squad_path / "config.yaml").exists(),
+        "has_agents_dir": (squad_path / "agents").exists(),
+        "has_readme": (squad_path / "README.md").exists(),
+        "agent_count": 0,
+        "existing_agents": []
+    }
+    if result["has_agents_dir"]:
+        agents = list((squad_path / "agents").glob("*.md"))
+        result["agent_count"] = len(agents)
+        result["existing_agents"] = [a.stem for a in agents]
+    return result
+def check_local_sources(specialist_slug: str, sources_path: str = None) -> dict:
+    """Check local knowledge sources for a specialist."""
+    # Default paths to search
+    search_paths = [
+        f"outputs/minds/{specialist_slug}/sources",
+        f"outputs/minds/{specialist_slug}/analysis",
+        f"squads/*/minds/{specialist_slug}",
+    ]
+    if sources_path:
+        search_paths.insert(0, sources_path)
+    result = {
+        "specialist_slug": specialist_slug,
+        "sources_found": False,
+        "total_files": 0,
+        "total_lines": 0,
+        "coverage_estimate": 0,
+        "paths_checked": [],
+        "files": []
+    }
+    for search_pattern in search_paths:
+        # Handle glob patterns
+        if "*" in search_pattern:
+            from glob import glob
+            matches = glob(search_pattern)
+            paths_to_check = [Path(m) for m in matches]
+        else:
+            paths_to_check = [Path(search_pattern)]
+        for path in paths_to_check:
+            result["paths_checked"].append(str(path))
+            if path.exists() and path.is_dir():
+                result["sources_found"] = True
+                for ext in ["*.md", "*.txt", "*.yaml", "*.json"]:
+                    for file in path.rglob(ext):
+                        lines = count_lines(file)
+                        result["files"].append({
+                            "path": str(file),
+                            "lines": lines
+                        })
+                        result["total_files"] += 1
+                        result["total_lines"] += lines
+    # Estimate coverage based on lines
+    if result["total_lines"] >= 5000:
+        result["coverage_estimate"] = 90
+    elif result["total_lines"] >= 2000:
+        result["coverage_estimate"] = 70
+    elif result["total_lines"] >= 500:
+        result["coverage_estimate"] = 40
+    elif result["total_lines"] > 0:
+        result["coverage_estimate"] = 20
+    return result
+def validate_agent_name(name: str) -> dict:
+    """Validate agent naming conventions."""
+    import re
+    # kebab-case pattern
+    kebab_pattern = r'^[a-z][a-z0-9]*(-[a-z0-9]+)*$'
+    # snake_case pattern
+    snake_pattern = r'^[a-z][a-z0-9]*(_[a-z0-9]+)*$'
+    result = {
+        "name": name,
+        "is_kebab_case": bool(re.match(kebab_pattern, name)),
+        "is_snake_case": bool(re.match(snake_pattern, name)),
+        "valid": False,
+        "suggested": None
+    }
+    result["valid"] = result["is_kebab_case"] or result["is_snake_case"]
+    if not result["valid"]:
+        # Try to generate a valid name
+        suggested = name.lower()
+        suggested = re.sub(r'[^a-z0-9]+', '-', suggested)
+        suggested = re.sub(r'^-|-$', '', suggested)
+        result["suggested"] = suggested
+    return result
+def main():
+    parser = argparse.ArgumentParser(description="Preflight checks for create-agent")
+    parser.add_argument("--squad", help="Target squad name")
+    parser.add_argument("--pack", dest="legacy_pack", help="Legacy alias for --squad")
+    parser.add_argument("--specialist", help="Specialist slug (optional)")
+    parser.add_argument("--sources", help="Custom sources path")
+    parser.add_argument("--agent-name", help="Proposed agent name to validate")
+    parser.add_argument("--squads-path", default="squads", help="Path to squads directory")
+    parser.add_argument("--format", choices=["json", "text"], default="json")
+    args = parser.parse_args()
+    squad_name = args.squad or args.legacy_pack
+    if not squad_name:
+        parser.error("Missing required argument: --squad (or legacy --pack)")
+    # Run validations
+    result = {
+        "timestamp": datetime.now().isoformat(),
+        "squad_validation": validate_squad(squad_name, args.squads_path),
+        "pack_validation": None,  # legacy key, filled below for compatibility
+        "sources_validation": None,
+        "name_validation": None,
+        "overall_status": "READY",
+        "blockers": [],
+        "warnings": [],
+        "recommendations": []
+    }
+    # Backward-compatible payload alias
+    result["pack_validation"] = result["squad_validation"]
+    # Check squad
+    if not result["squad_validation"]["exists"]:
+        result["blockers"].append(f"Squad '{squad_name}' does not exist at squads/{squad_name}/")
+        result["overall_status"] = "BLOCKED"
+    elif not result["squad_validation"]["has_config"]:
+        result["warnings"].append("Squad missing config.yaml")
+    # Check sources if specialist provided
+    if args.specialist:
+        result["sources_validation"] = check_local_sources(args.specialist, args.sources)
+        if not result["sources_validation"]["sources_found"]:
+            result["warnings"].append(f"No local sources found for '{args.specialist}'")
+            result["recommendations"].append("Run *auto-acquire-sources or provide sources manually")
+        elif result["sources_validation"]["coverage_estimate"] < 50:
+            result["warnings"].append(f"Low source coverage ({result['sources_validation']['coverage_estimate']}%)")
+            result["recommendations"].append("Consider gathering more sources before extraction")
+    # Validate agent name if provided
+    if args.agent_name:
+        result["name_validation"] = validate_agent_name(args.agent_name)
+        if not result["name_validation"]["valid"]:
+            result["warnings"].append(f"Agent name '{args.agent_name}' doesn't follow conventions")
+            result["recommendations"].append(f"Suggested name: {result['name_validation']['suggested']}")
+    # Output
+    if args.format == "json":
+        print(json.dumps(result, indent=2))
+    else:
+        print(f"=== CREATE-AGENT PREFLIGHT ===")
+        print(f"Status: {result['overall_status']}")
+        print(f"\nSquad: {squad_name}")
+        print(f"  Exists: {'Yes' if result['squad_validation']['exists'] else 'No'}")
+        print(f"  Agents: {result['squad_validation']['agent_count']}")
+        if result["sources_validation"]:
+            print(f"\nSources for {args.specialist}:")
+            print(f"  Found: {'Yes' if result['sources_validation']['sources_found'] else 'No'}")
+            print(f"  Files: {result['sources_validation']['total_files']}")
+            print(f"  Lines: {result['sources_validation']['total_lines']}")
+            print(f"  Coverage: {result['sources_validation']['coverage_estimate']}%")
+        if result["blockers"]:
+            print(f"\nBLOCKERS:")
+            for b in result["blockers"]:
+                print(f"  - {b}")
+        if result["warnings"]:
+            print(f"\nWARNINGS:")
+            for w in result["warnings"]:
+                print(f"  - {w}")
+        if result["recommendations"]:
+            print(f"\nRECOMMENDATIONS:")
+            for r in result["recommendations"]:
+                print(f"  - {r}")
+    # Exit code based on status
+    sys.exit(0 if result["overall_status"] == "READY" else 1)
+if __name__ == "__main__":
+    main()

package/pro/squads/squad-creator-pro/scripts/cross-provider/compare-results.js ADDED Viewed

@@ -0,0 +1,281 @@
+#!/usr/bin/env node
+/**
+ * Cross-Provider Results Comparator
+ *
+ * Compara resultados de Opus (baseline) vs modelo candidato.
+ * Gera relatório de qualificação automático.
+ * Usa paths do squad-config.yaml (zero hardcoded paths).
+ *
+ * Usage:
+ *   node compare-results.js --task extract-knowledge --baseline opus --candidate glm5
+ */
+const fs = require('fs');
+const path = require('path');
+// Load config (auto-detects project root)
+const config = require('../lib/config-loader');
+const PATHS = {
+  outputDir: config.paths.llmTests
+};
+// ============================================================================
+// RESULT LOADER
+// ============================================================================
+function loadLatestResult(taskName, modelName) {
+  const dir = path.join(PATHS.outputDir, taskName, modelName);
+  if (!fs.existsSync(dir)) {
+    return null;
+  }
+  const files = fs.readdirSync(dir)
+    .filter(f => f.startsWith('run-') && f.endsWith('.yaml'))
+    .sort()
+    .reverse();
+  if (files.length === 0) return null;
+  const content = fs.readFileSync(path.join(dir, files[0]), 'utf-8');
+  return { file: files[0], content, parsed: parseYaml(content) };
+}
+function parseYaml(content) {
+  const result = {};
+  const taskMatch = content.match(/task: "([^"]+)"/);
+  const modelMatch = content.match(/model: "([^"]+)"/);
+  const hashMatch = content.match(/task_hash: "([^"]+)"/);
+  result.task = taskMatch?.[1];
+  result.model = modelMatch?.[1];
+  result.task_hash = hashMatch?.[1];
+  const latencyMatch = content.match(/latency_seconds: ([\d.]+)/);
+  const costMatch = content.match(/cost_usd: ([\d.]+)/);
+  const promptTokens = content.match(/prompt: (\d+)/);
+  const completionTokens = content.match(/completion: (\d+)/);
+  result.latency = latencyMatch ? parseFloat(latencyMatch[1]) : null;
+  result.cost = costMatch ? parseFloat(costMatch[1]) : null;
+  result.tokens = {
+    prompt: promptTokens ? parseInt(promptTokens[1]) : null,
+    completion: completionTokens ? parseInt(completionTokens[1]) : null
+  };
+  const outputMatch = content.match(/output: \|\n([\s\S]+)$/);
+  result.output = outputMatch ? outputMatch[1].replace(/^  /gm, '').trim() : null;
+  return result;
+}
+// ============================================================================
+// COMPARISON METRICS
+// ============================================================================
+function compare(baseline, candidate) {
+  const metrics = {
+    latency: {
+      baseline: baseline.latency,
+      candidate: candidate.latency,
+      ratio: candidate.latency / baseline.latency,
+      improvement: `${((1 - candidate.latency / baseline.latency) * 100).toFixed(1)}% faster`
+    },
+    cost: {
+      baseline: baseline.cost,
+      candidate: candidate.cost,
+      ratio: candidate.cost / baseline.cost,
+      savings: `${((1 - candidate.cost / baseline.cost) * 100).toFixed(1)}% cheaper`
+    },
+    tokens: {
+      baseline: baseline.tokens,
+      candidate: candidate.tokens
+    }
+  };
+  const baselineLines = baseline.output?.split('\n').length || 0;
+  const candidateLines = candidate.output?.split('\n').length || 0;
+  metrics.content = {
+    baseline_lines: baselineLines,
+    candidate_lines: candidateLines,
+    ratio: candidateLines / baselineLines
+  };
+  return metrics;
+}
+// ============================================================================
+// REPORT GENERATOR
+// ============================================================================
+function generateReport(taskName, baseline, candidate, comparison) {
+  const timestamp = new Date().toISOString();
+  let report = `# Cross-Provider Qualification Report
+**Task:** ${taskName}
+**Baseline:** ${baseline.model} (hash: ${baseline.task_hash})
+**Candidate:** ${candidate.model}
+**Date:** ${timestamp.split('T')[0]}
+---
+## Performance Comparison
+| Metric | ${baseline.model} | ${candidate.model} | Delta |
+|--------|----------|-----------|-------|
+| Latency | ${baseline.latency?.toFixed(1)}s | ${candidate.latency?.toFixed(1)}s | ${comparison.latency.improvement} |
+| Cost | $${baseline.cost?.toFixed(4)} | $${candidate.cost?.toFixed(4)} | ${comparison.cost.savings} |
+| Output Lines | ${comparison.content.baseline_lines} | ${comparison.content.candidate_lines} | ${(comparison.content.ratio * 100).toFixed(0)}% |
+---
+## Qualification Metrics
+### Speed
+- **${comparison.latency.ratio < 1 ? '✅' : '❌'} Latency:** ${comparison.latency.improvement}
+### Cost
+- **${comparison.cost.ratio < 0.5 ? '✅' : '⚠️'} Savings:** ${comparison.cost.savings}
+### Content Completeness
+- **${comparison.content.ratio > 0.8 ? '✅' : '❌'} Output Volume:** ${(comparison.content.ratio * 100).toFixed(0)}% of baseline
+---
+## Manual Review Required
+- [ ] **Anti-Invention:** Zero unsourced claims?
+- [ ] **Accuracy:** Citations correct?
+- [ ] **Completeness:** All key items extracted?
+- [ ] **Format:** Valid YAML/MD structure?
+- [ ] **PT-BR Quality:** Natural Portuguese?
+---
+## Recommendation
+`;
+  const speedOK = comparison.latency.ratio < 1;
+  const costOK = comparison.cost.ratio < 0.5;
+  const sizeOK = comparison.content.ratio > 0.7;
+  if (speedOK && costOK && sizeOK) {
+    report += `**LIKELY QUALIFIED** ✅
+Candidate shows:
+- ${comparison.latency.improvement}
+- ${comparison.cost.savings}
+- ${(comparison.content.ratio * 100).toFixed(0)}% output completeness
+Proceed with manual quality review.`;
+  } else {
+    report += `**NEEDS REVIEW** ⚠️
+Potential issues:
+${!speedOK ? '- Slower than baseline\n' : ''}${!costOK ? '- Limited cost savings\n' : ''}${!sizeOK ? '- Significantly less output\n' : ''}
+Review output quality before qualifying.`;
+  }
+  report += `
+---
+## Raw Outputs
+### ${baseline.model} Output
+\`\`\`yaml
+${baseline.output?.slice(0, 2000) || 'N/A'}${baseline.output?.length > 2000 ? '\n... (truncated)' : ''}
+\`\`\`
+### ${candidate.model} Output
+\`\`\`yaml
+${candidate.output?.slice(0, 2000) || 'N/A'}${candidate.output?.length > 2000 ? '\n... (truncated)' : ''}
+\`\`\`
+`;
+  return report;
+}
+// ============================================================================
+// CLI
+// ============================================================================
+async function main() {
+  const args = process.argv.slice(2);
+  if (args.includes('--help') || args.length === 0) {
+    console.log(`
+Cross-Provider Results Comparator
+Usage:
+  node compare-results.js --task <name> --baseline <model> --candidate <model>
+Options:
+  --task       Task name
+  --baseline   Baseline model (default: opus)
+  --candidate  Candidate model (e.g., glm5)
+Paths (from squad-config.yaml):
+  Output: ${PATHS.outputDir}
+Example:
+  node compare-results.js --task extract-knowledge --candidate glm5
+    `);
+    process.exit(0);
+  }
+  const getArg = (name, defaultValue = null) => {
+    const idx = args.indexOf(`--${name}`);
+    return idx !== -1 ? args[idx + 1] : defaultValue;
+  };
+  const taskName = getArg('task');
+  const baselineModel = getArg('baseline', 'opus');
+  const candidateModel = getArg('candidate');
+  if (!taskName || !candidateModel) {
+    console.error('❌ Missing required arguments. Use --help for usage.');
+    process.exit(1);
+  }
+  console.log(`\n📊 Comparing ${taskName}: ${baselineModel} vs ${candidateModel}\n`);
+  const baseline = loadLatestResult(taskName, baselineModel);
+  const candidate = loadLatestResult(taskName, candidateModel);
+  if (!baseline) {
+    console.error(`❌ No baseline results found for ${taskName}/${baselineModel}`);
+    process.exit(1);
+  }
+  if (!candidate) {
+    console.error(`❌ No candidate results found for ${taskName}/${candidateModel}`);
+    process.exit(1);
+  }
+  console.log(`✓ Loaded baseline: ${baseline.file}`);
+  console.log(`✓ Loaded candidate: ${candidate.file}`);
+  const comparison = compare(baseline.parsed, candidate.parsed);
+  const report = generateReport(taskName, baseline.parsed, candidate.parsed, comparison);
+  const reportPath = path.join(PATHS.outputDir, taskName, candidateModel, 'qualification-report.md');
+  fs.writeFileSync(reportPath, report);
+  console.log(`\n📄 Report saved: ${reportPath}`);
+  console.log(`\n${'='.repeat(50)}`);
+  console.log('Quick Summary:');
+  console.log(`${'='.repeat(50)}`);
+  console.log(`Latency: ${comparison.latency.improvement}`);
+  console.log(`Cost:    ${comparison.cost.savings}`);
+  console.log(`Output:  ${(comparison.content.ratio * 100).toFixed(0)}% of baseline`);
+}
+main();