npm - crewswarm - Versions diffs - 0.9.4 → 1.0.0 - Mend

crewswarm 0.9.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/lib/engines/rt-envelope.mjs CHANGED Viewed

@@ -1068,11 +1068,14 @@ export async function handleRealtimeEnvelope(envelope, client, bridge) {
       /and nothing else\b/i.test(prompt || "");
     // Append original task spec for self-verification (LangChain pattern)
-    // Skip strict-output prompts where any extra text would violate the task.
+    // Skip strict-output prompts and trivial/empty replies where the reminder adds noise.
+    const replyStripped = (reply || "").replace(/\s+/g, " ").trim();
+    const isTrivialReply = replyStripped.length < 50 || /^\(completed\)$/i.test(replyStripped);
     if (
       reply &&
       prompt &&
       !requestsExactReply &&
+      !isTrivialReply &&
       !reply.includes("[ORIGINAL TASK]")
     ) {
       const taskSpecReminder = `\n\n---\n**[ORIGINAL TASK]:**\n${prompt.slice(0, 500)}${prompt.length > 500 ? "..." : ""}\n\nDoes your implementation address ALL requirements above?`;

package/lib/memory/relevance-scorer.mjs ADDED Viewed

@@ -0,0 +1,199 @@
+/**
+ * Memory Relevance Scorer
+ *
+ * Score memories by relevance to a query using:
+ * 1. Recency     — newer memories score higher (exponential decay, ~30-day half-life)
+ * 2. Frequency   — memories accessed more often score higher (normalised log)
+ * 3. Keyword     — TF-IDF-like scoring against query terms (inverse-length weighting)
+ * 4. Context     — memories from the same project/agent/session score higher
+ *
+ * Pure functions only — zero I/O, zero dependencies.
+ */
+// ─── Internal helpers ────────────────────────────────────────────────────────
+/**
+ * Tokenise a string into lowercase alpha-numeric tokens of length >= 2.
+ * @param {string} text
+ * @returns {string[]}
+ */
+function tokenise(text) {
+  if (!text || typeof text !== 'string') return [];
+  return text
+    .toLowerCase()
+    .replace(/[^a-z0-9\s_-]/g, ' ')
+    .split(/\s+/)
+    .filter(t => t.length >= 2);
+}
+// ─── Individual scoring components ──────────────────────────────────────────
+/**
+ * Recency score: exponential decay with ~30-day half-life.
+ * Returns 1.0 for brand-new memories, approaching 0 for very old ones.
+ *
+ * @param {string|number|Date} timestamp - ISO string, epoch ms, or Date
+ * @param {number} nowMs - current epoch ms (injectable for testing)
+ * @returns {number} [0, 1]
+ */
+export function computeRecency(timestamp, nowMs = Date.now()) {
+  if (timestamp == null) return 0;
+  const createdAt = timestamp instanceof Date
+    ? timestamp.getTime()
+    : new Date(timestamp).getTime();
+  if (Number.isNaN(createdAt)) return 0;
+  const daysSince = Math.max(0, (nowMs - createdAt) / (1000 * 60 * 60 * 24));
+  return Math.exp(-daysSince / 30);
+}
+/**
+ * Frequency score: log-normalised access count relative to a max.
+ * Both accessCount and maxAccessCount must be >= 0.
+ *
+ * @param {number} accessCount
+ * @param {number} maxAccessCount - upper bound for normalisation (default 100)
+ * @returns {number} [0, 1]
+ */
+export function computeFrequency(accessCount, maxAccessCount = 100) {
+  const count = Math.max(0, Number(accessCount) || 0);
+  const maxCount = Math.max(1, Number(maxAccessCount) || 100);
+  return Math.min(1, Math.log(1 + count) / Math.log(1 + maxCount));
+}
+/**
+ * Keyword match score: TF-IDF-like overlap between query tokens and memory content.
+ * Rarer (longer) query words are weighted more heavily.
+ *
+ * @param {string} content - memory content
+ * @param {string} query
+ * @returns {number} [0, 1]
+ */
+export function computeKeywordMatch(content, query) {
+  const queryTokens = tokenise(query);
+  const contentTokens = tokenise(content);
+  if (queryTokens.length === 0 || contentTokens.length === 0) return 0;
+  const contentSet = new Set(contentTokens);
+  // Weight each query token by its length (longer words are more specific)
+  let weightedMatch = 0;
+  let totalWeight = 0;
+  for (const token of queryTokens) {
+    // IDF proxy: weight proportional to token length (longer = rarer heuristic)
+    const weight = Math.log(1 + token.length);
+    totalWeight += weight;
+    if (contentSet.has(token)) {
+      weightedMatch += weight;
+    }
+  }
+  if (totalWeight === 0) return 0;
+  return weightedMatch / totalWeight;
+}
+/**
+ * Context match score: bonus points for shared project / agent / session.
+ *
+ * @param {object} memory - memory object with optional projectId, agentId, sessionId
+ * @param {object} context - { projectId?, agentId?, sessionId? }
+ * @returns {number} [0, 1]
+ */
+export function computeContextMatch(memory, context = {}) {
+  if (!memory || !context) return 0;
+  let score = 0;
+  if (context.projectId && memory.projectId &&
+      context.projectId === memory.projectId) {
+    score += 0.5;
+  }
+  if (context.agentId && memory.agentId &&
+      context.agentId === memory.agentId) {
+    score += 0.3;
+  }
+  if (context.sessionId && memory.sessionId &&
+      context.sessionId === memory.sessionId) {
+    score += 0.2;
+  }
+  // Cap at 1.0
+  return Math.min(1, score);
+}
+// ─── Public API ──────────────────────────────────────────────────────────────
+/**
+ * Score a single memory object for relevance to a query + context.
+ *
+ * Expected memory shape (all fields optional except content):
+ * {
+ *   content:     string,
+ *   timestamp:   string|number|Date,   // ISO or epoch ms
+ *   accessCount: number,
+ *   projectId:   string,
+ *   agentId:     string,
+ *   sessionId:   string,
+ * }
+ *
+ * @param {object} memory
+ * @param {string} query
+ * @param {object} [context]   - { projectId?, agentId?, sessionId? }
+ * @param {object} [opts]
+ * @param {number} [opts.nowMs]            - override current time (for testing)
+ * @param {number} [opts.maxAccessCount]   - normalisation ceiling for frequency
+ * @returns {number} weighted relevance score in [0, 1]
+ */
+export function scoreMemory(memory, query, context = {}, opts = {}) {
+  if (!memory) return 0;
+  const nowMs = opts.nowMs != null ? opts.nowMs : Date.now();
+  const maxAccessCount = opts.maxAccessCount != null ? opts.maxAccessCount : 100;
+  const recencyScore  = computeRecency(memory.timestamp, nowMs);
+  const frequencyScore = computeFrequency(memory.accessCount || 0, maxAccessCount);
+  const keywordScore  = computeKeywordMatch(memory.content || '', query);
+  const contextScore  = computeContextMatch(memory, context);
+  return (
+    0.30 * recencyScore  +
+    0.20 * frequencyScore +
+    0.35 * keywordScore  +
+    0.15 * contextScore
+  );
+}
+/**
+ * Rank an array of memories by relevance and return the top N.
+ * Attaches a `relevanceScore` property to each returned object.
+ *
+ * @param {object[]} memories
+ * @param {string}   query
+ * @param {object}   [context]   - { projectId?, agentId?, sessionId? }
+ * @param {number}   [maxResults=10]
+ * @param {object}   [opts]      - forwarded to scoreMemory
+ * @returns {object[]} sorted slice with relevanceScore attached
+ */
+export function rankMemories(memories, query, context = {}, maxResults = 10, opts = {}) {
+  if (!Array.isArray(memories) || memories.length === 0) return [];
+  return memories
+    .map(m => ({ ...m, relevanceScore: scoreMemory(m, query, context, opts) }))
+    .sort((a, b) => b.relevanceScore - a.relevanceScore)
+    .slice(0, Math.max(1, maxResults));
+}
+/**
+ * Derive the max accessCount from a collection of memories.
+ * Useful for caller-side normalisation when passing opts.maxAccessCount.
+ *
+ * @param {object[]} memories
+ * @returns {number}
+ */
+export function maxAccessCount(memories) {
+  if (!Array.isArray(memories) || memories.length === 0) return 0;
+  return memories.reduce((max, m) => Math.max(max, m.accessCount || 0), 0);
+}

package/lib/memory/shared-adapter.mjs CHANGED Viewed

@@ -12,6 +12,7 @@ import fs from 'node:fs';
 import path from 'node:path';
 import os from 'node:os';
 import { createRequire } from 'node:module';
+import { rankMemories, maxAccessCount } from './relevance-scorer.mjs';
 const require = createRequire(import.meta.url);
@@ -124,41 +125,106 @@ export function rememberFact(agentId, content, options = {}) {
 /**
  * Recall memory context for a task using MemoryBroker (blends AgentKeeper + AgentMemory + Collections).
+ * ENHANCED: Applies relevance scoring (recency + frequency + keyword + context) and tracks access metadata.
  * ENHANCED: Also includes relevant conversation history from project messages.
  * @param {string} projectDir - Project directory
  * @param {string} query - Task description or search query
- * @param {object} options - { maxResults?, includeDocs?, includeCode?, pathHints?, preferSuccessful?, userId?, projectId? }
+ * @param {object} options - { maxResults?, includeDocs?, includeCode?, pathHints?, preferSuccessful?, userId?, projectId?, agentId?, sessionId? }
  * @returns {Promise<string>} - Formatted context block
  */
 export async function recallMemoryContext(projectDir, query, options = {}) {
   const broker = getMemoryBroker(projectDir, { crewId: options.crewId || 'crew-lead' });
   let memoryContext = '';
   // Get standard memory (AgentKeeper + AgentMemory + Collections)
-  // NEW: Filter for success-weighted, high-signal artifacts only
+  // Fetch a larger candidate set so the relevance ranker has room to reorder
   if (broker) {
-    memoryContext = await broker.recallAsContext(query, {
-      maxResults: options.maxResults || 5,
-      includeDocs: options.includeDocs !== false,
-      includeCode: Boolean(options.includeCode),
-      pathHints: options.pathHints || [],
-      preferSuccessful: options.preferSuccessful !== false,
-      // NEW: Quality filters to prevent context contamination
-      minScore: 0.7,  // Only high-confidence matches
-      excludeFailed: true,  // Filter out failed tasks
-      excludeErrors: true,  // Filter out error-only entries
-      excludeTimeouts: true  // Filter out timeout failures
-    });
+    const maxResults = options.maxResults || 5;
+    const candidateLimit = Math.max(maxResults * 3, 15);
+    // Pull raw structured hits when available, fall back to formatted context
+    let rawHits = null;
+    if (typeof broker.recall === 'function') {
+      try {
+        rawHits = await broker.recall(query, {
+          maxResults: candidateLimit,
+          includeDocs: options.includeDocs !== false,
+          includeCode: Boolean(options.includeCode),
+          pathHints: options.pathHints || [],
+          preferSuccessful: options.preferSuccessful !== false,
+          minScore: 0.7,
+          excludeFailed: true,
+          excludeErrors: true,
+          excludeTimeouts: true
+        });
+      } catch {
+        rawHits = null;
+      }
+    }
+    if (rawHits && Array.isArray(rawHits) && rawHits.length > 0) {
+      // Build a relevance context for the ranker
+      const scoringContext = {
+        projectId: options.projectId,
+        agentId:   options.agentId,
+        sessionId: options.sessionId
+      };
+      // Normalise hits to the shape scoreMemory expects
+      const now = Date.now();
+      const normalised = rawHits.map(hit => ({
+        ...hit,
+        content:     hit.text || hit.content || '',
+        timestamp:   hit.metadata?.timestamp || hit.timestamp || new Date(now - 86400000).toISOString(),
+        accessCount: (hit.accessCount || 0) + 1,  // count this retrieval
+        lastAccessed: new Date(now).toISOString(),
+        projectId:   hit.metadata?.projectId || hit.projectId,
+        agentId:     hit.metadata?.agentId   || hit.agentId,
+        sessionId:   hit.metadata?.sessionId || hit.sessionId
+      }));
+      const scoringOpts = { nowMs: now, maxAccessCount: maxAccessCount(normalised) };
+      const ranked = rankMemories(normalised, query, scoringContext, maxResults, scoringOpts);
+      // Re-serialise to context string (mirrors broker.recallAsContext format)
+      if (ranked.length > 0) {
+        const lines = ranked.map((hit, i) => {
+          const score = hit.relevanceScore.toFixed(3);
+          const source = hit.source || hit.metadata?.source || 'memory';
+          const title  = hit.title  || hit.metadata?.title  || `Result ${i + 1}`;
+          return `[${source}] ${title} (relevance: ${score})\n${hit.content}`;
+        });
+        memoryContext = lines.join('\n\n---\n\n');
+      }
+    } else {
+      // Fallback: broker doesn't expose raw hits — use formatted context as-is
+      try {
+        memoryContext = await broker.recallAsContext(query, {
+          maxResults: options.maxResults || 5,
+          includeDocs: options.includeDocs !== false,
+          includeCode: Boolean(options.includeCode),
+          pathHints: options.pathHints || [],
+          preferSuccessful: options.preferSuccessful !== false,
+          // Quality filters to prevent context contamination
+          minScore: 0.7,
+          excludeFailed: true,
+          excludeErrors: true,
+          excludeTimeouts: true
+        });
+      } catch {
+        memoryContext = '';
+      }
+    }
   }
   // ENHANCEMENT: Add relevant conversation history from project messages
   // This lets agents see past discussions about the same topic
   if (options.projectId) {
     try {
       const ragModule = await getProjectMessagesRag();
       const conversationContext = ragModule?.getConversationContext(options.projectId, query, 3);
       if (conversationContext) {
         memoryContext += (memoryContext ? '\n\n' : '') + conversationContext;
       }
@@ -167,7 +233,7 @@ export async function recallMemoryContext(projectDir, query, options = {}) {
       console.warn('[shared-adapter] Failed to load conversation context:', e.message);
     }
   }
   return memoryContext;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "crewswarm",
-  "version": "0.9.4",
+  "version": "1.0.0",
   "description": "Local-first multi-agent orchestration platform — coordinate AI coding agents, LLMs, and tools from a single dashboard",
   "type": "module",
   "license": "MIT",
@@ -8,6 +8,10 @@
     "type": "git",
     "url": "https://github.com/crewswarm/crewswarm.git"
   },
+  "homepage": "https://crewswarm.ai",
+  "bugs": {
+    "url": "https://github.com/crewswarm/crewswarm/issues"
+  },
   "keywords": [
     "ai",
     "agents",
@@ -110,6 +114,7 @@
     "vibe:start": "cd apps/vibe && NODE_DISABLE_COMPILE_CACHE=1 npm start",
     "vibe:watch": "NODE_DISABLE_COMPILE_CACHE=1 node apps/vibe/watch-server.mjs",
     "vibe:full": "bash scripts/start-studio-full.sh",
+    "test:playwright": "npx playwright test --reporter=line",
     "test:e2e:vibe": "node node_modules/playwright/cli.js test --config=playwright.config.js",
     "test:e2e:vibe:headed": "node node_modules/playwright/cli.js test --config=playwright.config.js --headed",
     "crew-lead": "node crew-lead.mjs",
@@ -128,10 +133,12 @@
     "release:check": "bash scripts/release-check.sh",
     "test:report": "node scripts/test-report-summary.mjs",
     "test:rerun": "node scripts/test-rerun.mjs",
-    "test:stale": "node scripts/test-rerun.mjs --stale"
+    "test:stale": "node scripts/test-rerun.mjs --stale",
+    "typecheck": "tsc -p tsconfig.json"
   },
   "devDependencies": {
     "@playwright/test": "^1.58.2",
-    "puppeteer-core": "^24.40.0"
+    "puppeteer-core": "^24.40.0",
+    "typescript": "^5.9.3"
   }
 }