npm - create-walle - Versions diffs - 0.9.21 → 0.9.23 - Mend

create-walle 0.9.21 → 0.9.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (500) hide show

package/template/wall-e/eval/fixtures/wall-e-subset/eval/scorer.js DELETED Viewed

@@ -1,73 +0,0 @@
-'use strict';
-/**
- * Simplified scoring with 3 dimensions:
- *   correctness (weight 0.5), efficiency (weight 0.3), quality (weight 0.2)
- */
-const DIMENSIONS = {
-  correctness: { weight: 0.5, min: 0, max: 1 },
-  efficiency:  { weight: 0.3, min: 0, max: 1 },
-  quality:     { weight: 0.2, min: 0, max: 1 },
-};
-/**
- * Clamp a value between min and max.
- */
-function clamp(val, min, max) {
-  return Math.max(min, Math.min(max, val));
-}
-/**
- * Compute a weighted composite score from individual dimension scores.
- * @param {object} scores - { correctness, efficiency, quality } each 0-1
- * @returns {object} { correctness, efficiency, quality, composite }
- */
-function score(scores) {
-  const result = {};
-  let composite = 0;
-  for (const [dim, config] of Object.entries(DIMENSIONS)) {
-    const raw = scores[dim] ?? 0;
-    const clamped = clamp(raw, config.min, config.max);
-    result[dim] = Math.round(clamped * 1000) / 1000;
-    composite += clamped * config.weight;
-  }
-  result.composite = Math.round(composite * 1000) / 1000;
-  return result;
-}
-/**
- * Score based on trait matching.
- * @param {string[]} expectedTraits - traits the benchmark expects
- * @param {string[]} foundTraits - traits detected in the response
- * @returns {object} dimension scores
- */
-function scoreFromTraits(expectedTraits, foundTraits) {
-  if (!expectedTraits || expectedTraits.length === 0) {
-    return score({ correctness: 0, efficiency: 0, quality: 0 });
-  }
-  const expected = new Set(expectedTraits);
-  const found = new Set(foundTraits);
-  let matched = 0;
-  for (const t of expected) {
-    if (found.has(t)) matched++;
-  }
-  const ratio = matched / expected.size;
-  // Bonus for no extra spurious traits
-  const spurious = [...found].filter(t => !expected.has(t)).length;
-  const efficiencyPenalty = Math.min(spurious * 0.1, 0.3);
-  return score({
-    correctness: ratio,
-    efficiency: Math.max(0, ratio - efficiencyPenalty),
-    quality: ratio >= 0.8 ? 1.0 : ratio,
-  });
-}
-module.exports = { score, scoreFromTraits, DIMENSIONS, clamp };

package/template/wall-e/eval/fixtures/wall-e-subset/eval/test.js DELETED Viewed

@@ -1,134 +0,0 @@
-'use strict';
-const { describe, it } = require('node:test');
-const assert = require('node:assert/strict');
-const path = require('path');
-const { score, scoreFromTraits, clamp, DIMENSIONS } = require('./scorer');
-const { detectTraits, loadBenchmarks, loadAll, TRAIT_MATCHERS } = require('./benchmarks');
-const { Aggregator } = require('./aggregator');
-const { runBenchmark, createSandbox, cleanSandbox } = require('./runner');
-// ---- Scorer tests ----
-describe('scorer', () => {
-  it('should compute weighted composite correctly', () => {
-    const result = score({ correctness: 1.0, efficiency: 1.0, quality: 1.0 });
-    assert.equal(result.composite, 1.0);
-  });
-  it('should handle zero scores', () => {
-    const result = score({ correctness: 0, efficiency: 0, quality: 0 });
-    assert.equal(result.composite, 0);
-  });
-  it('should clamp values to 0-1 range', () => {
-    const result = score({ correctness: 1.5, efficiency: -0.2, quality: 0.5 });
-    assert.equal(result.correctness, 1.0);
-    assert.equal(result.efficiency, 0);
-    assert.equal(result.quality, 0.5);
-  });
-  it('should score from traits correctly', () => {
-    const result = scoreFromTraits(
-      ['has-function', 'has-error-handling'],
-      ['has-function', 'has-error-handling']
-    );
-    assert.equal(result.correctness, 1.0);
-    assert.ok(result.composite > 0.8);
-  });
-  it('should penalize missing traits', () => {
-    const result = scoreFromTraits(
-      ['has-function', 'has-error-handling', 'has-test'],
-      ['has-function']
-    );
-    assert.ok(result.correctness < 0.5);
-  });
-});
-// ---- Benchmarks tests ----
-describe('benchmarks', () => {
-  it('should detect traits in code text', () => {
-    const code = `
-      const add = (a, b) => a + b;
-      try { add(1,2); } catch(e) {}
-      for (const x of items) {}
-    `;
-    const traits = detectTraits(code);
-    assert.ok(traits.includes('has-function'));
-    assert.ok(traits.includes('has-error-handling'));
-    assert.ok(traits.includes('has-loop'));
-  });
-  it('should load coding benchmarks from JSON', () => {
-    const benchmarks = loadBenchmarks(path.join(__dirname, 'benchmarks', 'coding.json'));
-    assert.ok(benchmarks.length >= 5);
-    assert.ok(benchmarks[0].prompt);
-    assert.ok(benchmarks[0].expectedTraits);
-  });
-  it('should load all benchmarks with category tags', () => {
-    const all = loadAll(path.join(__dirname, 'benchmarks'));
-    assert.ok(all.length >= 8);
-    const categories = new Set(all.map(b => b.category));
-    assert.ok(categories.has('coding'));
-    assert.ok(categories.has('chat'));
-  });
-});
-// ---- Aggregator tests ----
-describe('aggregator', () => {
-  it('should compute rolling average', () => {
-    const agg = new Aggregator(10);
-    agg.add({ model: 'test', scores: { composite: 0.8, correctness: 0.9, efficiency: 0.7, quality: 0.8 } });
-    agg.add({ model: 'test', scores: { composite: 0.6, correctness: 0.7, efficiency: 0.5, quality: 0.6 } });
-    const avg = agg.average('test');
-    assert.equal(avg.count, 2);
-    assert.equal(avg.avgComposite, 0.7);
-  });
-  it('should respect window size', () => {
-    const agg = new Aggregator(3);
-    for (let i = 0; i < 5; i++) {
-      agg.add({ model: 'a', scores: { composite: i * 0.2 } });
-    }
-    assert.equal(agg.size, 3);
-  });
-  it('should compute trend', () => {
-    const agg = new Aggregator(100);
-    // First half: low scores
-    for (let i = 0; i < 10; i++) {
-      agg.add({ model: 'm', scores: { composite: 0.3 } });
-    }
-    // Second half: high scores
-    for (let i = 0; i < 10; i++) {
-      agg.add({ model: 'm', scores: { composite: 0.9 } });
-    }
-    const t = agg.trend('m');
-    assert.ok(t > 0, 'trend should be positive (improving)');
-  });
-});
-// ---- Runner tests ----
-describe('runner', () => {
-  it('should run a benchmark with a mock agent', async () => {
-    const benchmark = {
-      id: 'test-1',
-      prompt: 'Write a function that adds two numbers',
-      expectedTraits: ['has-function'],
-    };
-    const mockAgent = async (prompt) => {
-      return 'function add(a, b) { return a + b; }';
-    };
-    const result = await runBenchmark(benchmark, mockAgent);
-    assert.equal(result.benchmarkId, 'test-1');
-    assert.ok(result.detectedTraits.includes('has-function'));
-    assert.ok(result.scores.correctness > 0);
-  });
-});

package/template/wall-e/eval/fixtures/wall-e-subset/llm/client.js DELETED Viewed

@@ -1,99 +0,0 @@
-'use strict';
-/**
- * Mock LLM client that returns canned responses.
- * No external dependencies.
- */
-const CANNED_RESPONSES = {
-  'default': 'function hello() { return "Hello, world!"; }',
-  'code': `
-const process = require('node:process');
-/**
- * Process items with error handling.
- */
-async function processItems(items) {
-  const results = [];
-  try {
-    for (const item of items) {
-      if (!item || typeof item !== 'object') {
-        throw new Error('Invalid item');
-      }
-      results.push({ ...item, processed: true });
-    }
-  } catch (err) {
-    console.error('Processing failed:', err.message);
-    return [];
-  }
-  return results;
-}
-module.exports = { processItems };
-`,
-  'chat': 'Here is a brief explanation with an example:\n\nif (condition) { /* do something */ } else { /* fallback */ }',
-  'test': `
-const assert = require('node:assert');
-function test(name, fn) {
-  try {
-    fn();
-    console.log('PASS:', name);
-  } catch (e) {
-    console.error('FAIL:', name, e.message);
-  }
-}
-test('add', () => {
-  assert.strictEqual(1 + 1, 2);
-});
-`,
-};
-class MockLLMClient {
-  /**
-   * @param {object} [options]
-   * @param {string} [options.provider] - provider name
-   * @param {string} [options.model] - model name
-   * @param {object} [options.customResponses] - override canned responses
-   */
-  constructor(options = {}) {
-    this.provider = options.provider || 'mock';
-    this.model = options.model || 'mock-v1';
-    this.responses = { ...CANNED_RESPONSES, ...(options.customResponses || {}) };
-    this.callLog = [];
-  }
-  /**
-   * Generate a response for a prompt.
-   * Selects a canned response based on keyword matching.
-   * @param {string} prompt
-   * @returns {Promise<string>}
-   */
-  async generate(prompt) {
-    this.callLog.push({ prompt, timestamp: Date.now() });
-    const lower = prompt.toLowerCase();
-    if (lower.includes('test') || lower.includes('assert')) return this.responses.test;
-    if (lower.includes('function') || lower.includes('write') || lower.includes('create')) return this.responses.code;
-    if (lower.includes('explain') || lower.includes('describe')) return this.responses.chat;
-    return this.responses.default;
-  }
-  /**
-   * Get the number of calls made.
-   */
-  get callCount() {
-    return this.callLog.length;
-  }
-  /**
-   * Reset call log.
-   */
-  reset() {
-    this.callLog = [];
-  }
-}
-module.exports = { MockLLMClient, CANNED_RESPONSES };

package/template/wall-e/eval/fixtures/wall-e-subset/llm/providers.js DELETED Viewed

@@ -1,63 +0,0 @@
-'use strict';
-/**
- * Provider registry for LLM backends.
- * Simplified version — no actual API calls.
- */
-const PROVIDERS = {
-  anthropic: {
-    name: 'anthropic',
-    models: ['claude-sonnet', 'claude-haiku'],
-    defaultModel: 'claude-haiku',
-    endpoint: 'https://api.anthropic.com/v1/messages',
-  },
-  ollama: {
-    name: 'ollama',
-    models: ['gemma4:e4b', 'llama3.2'],
-    defaultModel: 'gemma4:e4b',
-    endpoint: 'http://localhost:11434/api/generate',
-  },
-};
-/**
- * Get a provider by name.
- * @param {string} name
- * @returns {object|null}
- */
-function getProvider(name) {
-  return PROVIDERS[name] || null;
-}
-/**
- * List all registered provider names.
- * @returns {string[]}
- */
-function listProviders() {
-  return Object.keys(PROVIDERS);
-}
-/**
- * Register a new provider.
- * @param {string} name
- * @param {object} config - { models, defaultModel, endpoint }
- */
-function registerProvider(name, config) {
-  PROVIDERS[name] = { name, ...config };
-}
-/**
- * Get all models across all providers.
- * @returns {object[]} - [{ provider, model }]
- */
-function allModels() {
-  const result = [];
-  for (const [provName, prov] of Object.entries(PROVIDERS)) {
-    for (const model of prov.models) {
-      result.push({ provider: provName, model });
-    }
-  }
-  return result;
-}
-module.exports = { getProvider, listProviders, registerProvider, allModels, PROVIDERS };

package/template/wall-e/eval/fixtures/wall-e-subset/llm/test.js DELETED Viewed

@@ -1,70 +0,0 @@
-'use strict';
-const { describe, it } = require('node:test');
-const assert = require('node:assert/strict');
-const { MockLLMClient } = require('./client');
-const { getProvider, listProviders, registerProvider, allModels } = require('./providers');
-describe('MockLLMClient', () => {
-  it('should return code response for code prompts', async () => {
-    const client = new MockLLMClient();
-    const resp = await client.generate('Write a function to sort an array');
-    assert.ok(resp.includes('function') || resp.includes('const'));
-    assert.equal(client.callCount, 1);
-  });
-  it('should return chat response for explanation prompts', async () => {
-    const client = new MockLLMClient();
-    const resp = await client.generate('Explain how closures work');
-    assert.ok(resp.includes('explanation') || resp.includes('example'));
-  });
-  it('should support custom responses', async () => {
-    const client = new MockLLMClient({
-      customResponses: { default: 'custom output' },
-    });
-    const resp = await client.generate('something random');
-    assert.equal(resp, 'custom output');
-  });
-  it('should track call log and allow reset', async () => {
-    const client = new MockLLMClient();
-    await client.generate('prompt 1');
-    await client.generate('prompt 2');
-    assert.equal(client.callCount, 2);
-    client.reset();
-    assert.equal(client.callCount, 0);
-  });
-});
-describe('providers', () => {
-  it('should list built-in providers', () => {
-    const providers = listProviders();
-    assert.ok(providers.includes('anthropic'));
-    assert.ok(providers.includes('ollama'));
-  });
-  it('should get provider by name', () => {
-    const prov = getProvider('anthropic');
-    assert.ok(prov);
-    assert.equal(prov.name, 'anthropic');
-    assert.ok(prov.models.length > 0);
-  });
-  it('should return null for unknown provider', () => {
-    assert.equal(getProvider('nonexistent'), null);
-  });
-  it('should register a new provider', () => {
-    registerProvider('test-prov', { models: ['t1'], defaultModel: 't1', endpoint: 'http://test' });
-    const prov = getProvider('test-prov');
-    assert.ok(prov);
-    assert.deepEqual(prov.models, ['t1']);
-  });
-  it('should list all models across providers', () => {
-    const models = allModels();
-    assert.ok(models.length >= 4);
-    assert.ok(models.some(m => m.provider === 'anthropic'));
-  });
-});

package/template/wall-e/eval/fixtures/wall-e-subset/package.json DELETED Viewed

@@ -1,10 +0,0 @@
-{
-  "name": "wall-e-eval-subset",
-  "version": "1.0.0",
-  "description": "Simplified/sanitized copy of Wall-E eval system for dogfooding",
-  "main": "test.js",
-  "scripts": {
-    "test": "node test.js"
-  },
-  "license": "MIT"
-}

package/template/wall-e/eval/fixtures/wall-e-subset/test.js DELETED Viewed

@@ -1,86 +0,0 @@
-'use strict';
-/**
- * Root test runner — loads all test files.
- * Run with: node test.js
- *
- * Uses Node's built-in test runner (node:test).
- */
-// Load all test suites
-require('./eval/test');
-require('./llm/test');
-// Brain tests inline
-const { describe, it } = require('node:test');
-const assert = require('node:assert/strict');
-const { Brain } = require('./brain');
-describe('Brain', () => {
-  it('should insert and query results', () => {
-    const brain = new Brain();
-    brain.insert({ benchmarkId: 'b1', model: 'test-model', scores: { composite: 0.8 } });
-    brain.insert({ benchmarkId: 'b2', model: 'test-model', scores: { composite: 0.6 } });
-    brain.insert({ benchmarkId: 'b1', model: 'other-model', scores: { composite: 0.9 } });
-    const results = brain.query({ model: 'test-model' });
-    assert.equal(results.length, 2);
-  });
-  it('should filter by benchmarkId', () => {
-    const brain = new Brain();
-    brain.insert({ benchmarkId: 'b1', model: 'a', scores: { composite: 0.5 } });
-    brain.insert({ benchmarkId: 'b2', model: 'a', scores: { composite: 0.7 } });
-    const results = brain.query({ benchmarkId: 'b1' });
-    assert.equal(results.length, 1);
-    assert.equal(results[0].benchmarkId, 'b1');
-  });
-  it('should filter by minScore', () => {
-    const brain = new Brain();
-    brain.insert({ benchmarkId: 'b1', model: 'a', scores: { composite: 0.3 } });
-    brain.insert({ benchmarkId: 'b2', model: 'a', scores: { composite: 0.8 } });
-    const results = brain.query({ minScore: 0.5 });
-    assert.equal(results.length, 1);
-    assert.equal(results[0].scores.composite, 0.8);
-  });
-  it('should compute stats for a model', () => {
-    const brain = new Brain();
-    brain.insert({ benchmarkId: 'b1', model: 'm1', scores: { composite: 0.6 } });
-    brain.insert({ benchmarkId: 'b2', model: 'm1', scores: { composite: 0.8 } });
-    const stats = brain.stats('m1');
-    assert.equal(stats.count, 2);
-    assert.equal(stats.avgComposite, 0.7);
-    assert.equal(stats.best, 0.8);
-    assert.equal(stats.worst, 0.6);
-  });
-  it('should handle metadata', () => {
-    const brain = new Brain();
-    brain.setMeta('version', '1.0');
-    assert.equal(brain.getMeta('version'), '1.0');
-    assert.equal(brain.getMeta('missing'), undefined);
-  });
-  it('should clear all data', () => {
-    const brain = new Brain();
-    brain.insert({ benchmarkId: 'b1', model: 'a', scores: { composite: 0.5 } });
-    brain.setMeta('k', 'v');
-    brain.clear();
-    assert.equal(brain.query().length, 0);
-    assert.equal(brain.getMeta('k'), undefined);
-  });
-  it('should respect limit in query', () => {
-    const brain = new Brain();
-    for (let i = 0; i < 10; i++) {
-      brain.insert({ benchmarkId: `b${i}`, model: 'a', scores: { composite: i * 0.1 } });
-    }
-    const results = brain.query({ limit: 3 });
-    assert.equal(results.length, 3);
-  });
-});