npm - autosnippet - Versions diffs - 2.18.0 → 2.19.0 - Mend

autosnippet 2.18.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dashboard/dist/assets/{icons-C6kshpB1.js → icons-C7FN32VL.js} +1 -1
package/dashboard/dist/assets/index-D8dCXLzr.js +129 -0
package/dashboard/dist/index.html +2 -2
package/lib/external/ai/AiProvider.js +42 -11
package/lib/external/ai/providers/ClaudeProvider.js +4 -2
package/lib/external/ai/providers/GoogleGeminiProvider.js +66 -8
package/lib/external/ai/providers/OpenAiProvider.js +48 -2
package/lib/external/mcp/handlers/bootstrap.js +1 -2
package/lib/http/HttpServer.js +4 -0
package/lib/http/routes/candidates.js +405 -0
package/lib/http/routes/search.js +113 -0
package/lib/infrastructure/vector/Chunker.js +3 -8
package/lib/infrastructure/vector/JsonVectorAdapter.js +2 -9
package/lib/service/candidate/SimilarityService.js +7 -35
package/lib/service/chat/ChatAgent.js +28 -686
package/lib/service/chat/ContextWindow.js +87 -3
package/lib/service/chat/ConversationStore.js +3 -4
package/lib/service/chat/ProjectSemanticMemory.js +9 -14
package/lib/service/chat/ReasoningLayer.js +10 -54
package/lib/service/chat/ToolRegistry.js +0 -52
package/lib/service/chat/tools.js +7 -6
package/lib/service/cursor/TokenBudget.js +4 -21
package/lib/service/search/CrossEncoderReranker.js +163 -0
package/lib/service/search/RetrievalFunnel.js +9 -36
package/lib/service/skills/SignalCollector.js +28 -28
package/lib/shared/similarity.js +101 -0
package/lib/shared/token-utils.js +46 -0
package/package.json +1 -1
package/dashboard/dist/assets/index-9byoG7kd.js +0 -129

package/lib/service/skills/SignalCollector.js CHANGED Viewed

@@ -205,8 +205,8 @@ export class SignalCollector {
       this.#logger.debug('[SignalCollector] invoking ChatAgent for analysis...');
       const { reply, toolCalls } = await this.#chatAgent.execute(prompt, { history: [], source: 'system' });
-      // 4. 解析 AI 响应
-      const parsed = this.#parseAiResponse(reply);
+      // 4. 解析 AI 响应 — 使用 AiProvider.extractJSON 统一 structured output 解析
+      const parsed = this.#parseStructuredReply(reply);
       const suggestions = parsed.suggestions || [];
       // 5. 过滤已推送
@@ -444,47 +444,47 @@ ${JSON.stringify(signals.codeChanges, null, 2)}
   }
   // ═══════════════════════════════════════════════════════
-  //  AI 响应解析
+  //  AI 响应解析 — 统一使用 AiProvider.extractJSON (Structured Output)
   // ═══════════════════════════════════════════════════════
-  #parseAiResponse(reply) {
-    if (!reply) return { suggestions: [], nextIntervalMinutes: null, summary: '' };
+  /**
+   * 从 ChatAgent ReAct 回复中提取结构化 JSON
+   *
+   * 优先级链:
+   *   1. AiProvider.extractJSON (支持 markdown 清理、截断修复、trailing comma 等)
+   *   2. 最后一行 JSON 回退 (兼容 prompt 要求的 "最后一行输出 JSON" 格式)
+   *
+   * @param {string} reply — ChatAgent.execute() 的回复文本
+   * @returns {{ suggestions: Array, nextIntervalMinutes: number|null, summary: string }}
+   */
+  #parseStructuredReply(reply) {
+    const defaultResult = { suggestions: [], nextIntervalMinutes: null, summary: '' };
+    if (!reply) return defaultResult;
     try {
-      // 策略 1：尝试从最后一行解析 JSON
+      // 策略 1: 通过 AiProvider.extractJSON 统一解析
+      const aiProvider = this.#chatAgent?.aiProvider;
+      if (aiProvider && typeof aiProvider.extractJSON === 'function') {
+        const obj = aiProvider.extractJSON(reply, '{', '}');
+        if (obj && Array.isArray(obj.suggestions)) return obj;
+      }
+      // 策略 2: 回退 — 从最后一行提取 JSON (兼容 prompt 指令)
       const lines = reply.trim().split('\n');
-      for (let i = lines.length - 1; i >= 0; i--) {
+      for (let i = lines.length - 1; i >= Math.max(0, lines.length - 5); i--) {
         const line = lines[i].trim();
         if (line.startsWith('{') && line.endsWith('}')) {
           try {
             const obj = JSON.parse(line);
             if (obj.suggestions) return obj;
-          } catch { /* 继续尝试 */ }
+          } catch { /* 继续 */ }
         }
       }
-      // 策略 2：尝试从 ```json ... ``` 块解析
-      const codeBlockMatch = reply.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);
-      if (codeBlockMatch) {
-        try {
-          const obj = JSON.parse(codeBlockMatch[1].trim());
-          if (obj.suggestions) return obj;
-        } catch { /* fallthrough */ }
-      }
-      // 策略 3：尝试找到任何 JSON 对象
-      const jsonMatch = reply.match(/\{[\s\S]*"suggestions"\s*:\s*\[[\s\S]*\][\s\S]*\}/);
-      if (jsonMatch) {
-        try {
-          const obj = JSON.parse(jsonMatch[0]);
-          if (obj.suggestions) return obj;
-        } catch { /* fallthrough */ }
-      }
     } catch {
-      this.#logger.warn('[SignalCollector] failed to parse AI response');
+      this.#logger.warn('[SignalCollector] failed to parse structured reply');
     }
-    return { suggestions: [], nextIntervalMinutes: null, summary: '' };
+    return defaultResult;
   }
   // ═══════════════════════════════════════════════════════

package/lib/shared/similarity.js ADDED Viewed

@@ -0,0 +1,101 @@
+/**
+ * similarity — 统一相似度计算工具
+ *
+ * 项目内所有文本/向量相似度计算统一使用此模块：
+ *   - jaccardSimilarity: 基于 token 集合的 Jaccard 系数
+ *   - cosineSimilarity:  向量余弦相似度
+ *   - textSimilarity:    高层文本相似度（Jaccard + 可选子串加分）
+ *   - tokenizeForSimilarity: 通用 bigram 分词（面向相似度场景）
+ *
+ * @module shared/similarity
+ */
+/**
+ * 通用 bigram 分词 — 面向相似度计算
+ *
+ * 将文本小写化、去标点后，生成 word + character n-gram 集合。
+ * 同时支持 CJK（单字 + bigram）和英文（整词 + bigram）。
+ *
+ * @param {string} text — 原始文本
+ * @param {number} [n=2] — n-gram 长度
+ * @returns {Set<string>} token 集合
+ */
+export function tokenizeForSimilarity(text, n = 2) {
+  if (!text) return new Set();
+  const lower = text.toLowerCase().replace(/[^a-z0-9\u4e00-\u9fff\u3400-\u4dbf]+/g, ' ').trim();
+  const tokens = new Set();
+  const words = lower.split(/\s+/);
+  for (const w of words) {
+    if (w.length >= n) tokens.add(w);
+    for (let i = 0; i <= w.length - n; i++) {
+      tokens.add(w.slice(i, i + n));
+    }
+  }
+  return tokens;
+}
+/**
+ * Jaccard 相似度 — |A ∩ B| / |A ∪ B|
+ *
+ * @param {Set<string>} a — token 集合 A
+ * @param {Set<string>} b — token 集合 B
+ * @returns {number} 0.0 - 1.0
+ */
+export function jaccardSimilarity(a, b) {
+  if ((!a || a.size === 0) && (!b || b.size === 0)) return 0;
+  if (!a || a.size === 0 || !b || b.size === 0) return 0;
+  let intersection = 0;
+  const smaller = a.size <= b.size ? a : b;
+  const larger = a.size <= b.size ? b : a;
+  for (const t of smaller) {
+    if (larger.has(t)) intersection++;
+  }
+  const union = a.size + b.size - intersection;
+  return union > 0 ? intersection / union : 0;
+}
+/**
+ * 余弦相似度 — 向量点积 / (||a|| * ||b||)
+ *
+ * @param {number[]} a — 向量 A
+ * @param {number[]} b — 向量 B
+ * @returns {number} 0.0 - 1.0（输入均为正值时）
+ */
+export function cosineSimilarity(a, b) {
+  if (!a || !b || a.length !== b.length || a.length === 0) return 0;
+  let dotProduct = 0, normA = 0, normB = 0;
+  for (let i = 0; i < a.length; i++) {
+    dotProduct += a[i] * b[i];
+    normA += a[i] * a[i];
+    normB += b[i] * b[i];
+  }
+  const denom = Math.sqrt(normA) * Math.sqrt(normB);
+  return denom > 0 ? dotProduct / denom : 0;
+}
+/**
+ * 高层文本相似度 — Jaccard + 可选子串包含加分
+ *
+ * @param {string} textA — 文本 A
+ * @param {string} textB — 文本 B
+ * @param {object} [opts]
+ * @param {number} [opts.n=2] — n-gram 长度
+ * @param {boolean} [opts.substringBonus=false] — 是否启用子串包含加分 (+0.3)
+ * @returns {number} 0.0 - 1.0
+ */
+export function textSimilarity(textA, textB, opts = {}) {
+  const { n = 2, substringBonus = false } = opts;
+  const tokensA = tokenizeForSimilarity(textA, n);
+  const tokensB = tokenizeForSimilarity(textB, n);
+  let sim = jaccardSimilarity(tokensA, tokensB);
+  if (substringBonus) {
+    const lowerA = (textA || '').toLowerCase();
+    const lowerB = (textB || '').toLowerCase();
+    if (lowerA && lowerB && (lowerA.includes(lowerB) || lowerB.includes(lowerA))) {
+      sim = Math.min(1.0, sim + 0.3);
+    }
+  }
+  return sim;
+}

package/lib/shared/token-utils.js ADDED Viewed

@@ -0,0 +1,46 @@
+/**
+ * token-utils — 统一 Token 估算工具
+ *
+ * 项目内所有 token 估算统一使用此模块，避免各处使用不同的字符/token 比率。
+ *
+ * 算法：CJK 字符按 ~2 chars/token，ASCII 字符按 ~4 chars/token。
+ * 这与主流 tokenizer (tiktoken / SentencePiece) 的行为一致：
+ *   - GPT-4 tokenizer: 英文 ~4 chars/token, 中文 ~1.5 chars/token
+ *   - Gemini (SentencePiece): 类似比率
+ *   - 本实现取保守值, 宁多不少
+ *
+ * @module shared/token-utils
+ */
+/**
+ * 估算文本的 token 数量
+ *
+ * @param {string} text — 待估算的文本
+ * @returns {number} 估算 token 数（向上取整）
+ */
+export function estimateTokens(text) {
+  if (!text) return 0;
+  let tokens = 0;
+  for (const ch of text) {
+    // CJK Unified Ideographs + 扩展区 + 常见符号区
+    if (ch.charCodeAt(0) > 0x2e80) {
+      tokens += 0.5; // ~2 chars per token for CJK
+    } else {
+      tokens += 0.25; // ~4 chars per token for English/ASCII
+    }
+  }
+  return Math.ceil(tokens);
+}
+/**
+ * 快速估算 — 纯 ASCII 场景下的快速路径（不区分 CJK，统一按 3.5 chars/token）
+ *
+ * 适用于已知只含英文 / 混合语言但无需精确的场景（如 ContextWindow 内部压缩阈值）。
+ *
+ * @param {string} text
+ * @returns {number}
+ */
+export function estimateTokensFast(text) {
+  if (!text) return 0;
+  return Math.ceil(text.length / 3.5);
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "autosnippet",
-  "version": "2.18.0",
+  "version": "2.19.0",
   "description": "AutoSnippet - 连接开发者、AI 与项目知识库的工具",
   "type": "module",
   "main": "lib/bootstrap.js",