npm - @witty-ai/skill-insight - Versions diffs - 0.5.0-beta → 0.6.0-beta - Mend

@witty-ai/skill-insight 0.5.0-beta → 0.6.0-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (223) hide show

package/.next/standalone/.next/server/chunks/src_98433cb8._.js ADDED Viewed

@@ -0,0 +1,175 @@
+module.exports=[50374,e=>{"use strict";function t(e){if(!e||!Array.isArray(e)||0===e.length)return[];if(e.some(e=>e&&(e.requestMessages||e.responseMessage)))return e;let t=[],n=[],s=e=>{if(0===e.length)return;let n=-1;for(let t=e.length-1;t>=0;t--)if("assistant"===e[t].role){n=t;break}-1!==n?t.push({requestMessages:e.slice(0,n),responseMessage:e[n]}):t.push({requestMessages:e,responseMessage:null})};for(let t of e)t&&("user"===(t.role||"unknown")&&n.length>0&&(s(n),n=[]),n.push(t));return s(n),t}function n(e){let t=new Set,n=[],s=/^[a-zA-Z0-9_\-\.]+$/,i=e=>{if(e)for(let i of e.tool_calls||e.toolCalls||[]){let e=(i?.function?.name??i?.name??"").toLowerCase(),l=i?.function?.arguments??i?.arguments??"";try{let i="string"==typeof l?JSON.parse(l):l;if("skill"===e||"load_skill"===e){let e=i?.name??i?.skill_name??i?.skillName??i?.skill;if(null!=e&&String(e).trim()){let l=String(e).trim().replace(/^['"]+|['"]+$/g,"");if(s.test(l)&&!t.has(l)){t.add(l);let e=i?.version!=null?Number(i.version):null;n.push({name:l,version:null===e||isNaN(e)?null:e})}}continue}if("task"===e){let e=i?.load_skills??i?.loadSkills??i?.skills??[];if(Array.isArray(e))for(let i of e){let e="string"==typeof i?i:i?.name??i?.skill??i?.skill_name??i?.skillName;if(null==e||!String(e).trim())continue;let l=String(e).trim().replace(/^['"]+|['"]+$/g,"");if(!s.test(l)||t.has(l))continue;t.add(l);let r="object"==typeof i?i?.version:null,o=null!=r?Number(r):null;n.push({name:l,version:null===o||isNaN(o)?null:o})}}}catch{}}};for(let t of e)for(let e of(i(t.responseMessage),t.requestMessages||[]))"assistant"===e.role&&i(e);return n}function s(e){let t=new Set,n=[],s=e=>{if(e&&Array.isArray(e))for(let s of e){if(s?.type!=="tool_use")continue;let e=(s?.name||"").toLowerCase();if("skill"!==e&&"load_skill"!==e)continue;let i=s.input,l=i?.skill??i?.skill_name??i?.skillName??i?.name;if(null==l||!String(l).trim())continue;let r=String(l).trim().replace(/^['"]+|['"]+$/g,"");if(/^[a-zA-Z0-9_\-\.]+$/.test(r)&&!t.has(r)){t.add(r);let e=i?.version!=null?Number(i.version):null;n.push({name:r,version:null===e||isNaN(e)?null:e})}}};for(let t of e)if(t.responseMessage?.content&&s(t.responseMessage.content),t.requestMessages)for(let e of t.requestMessages)"assistant"===e.role&&e.content&&s(e.content);return n}function i(e){let t=new Set,n=[],s=e=>{if(e&&Array.isArray(e))for(let s of e){if(s?.type!=="toolCall")continue;let e=(s?.name||"").toLowerCase();if("skill"!==e&&"load_skill"!==e)continue;let i=s?.arguments,l=i?.skill??i?.skill_name??i?.skillName??i?.name;if(null==l||!String(l).trim())continue;let r=String(l).trim().replace(/^['"]+|['"]+$/g,"");if(/^[a-zA-Z0-9_\-\.]+$/.test(r)&&!t.has(r)){t.add(r);let e=i?.version!=null?Number(i.version):null;n.push({name:r,version:null===e||isNaN(e)?null:e})}}};for(let t of e)if(t.responseMessage?.content&&s(t.responseMessage.content),t.requestMessages)for(let e of t.requestMessages)"assistant"===e.role&&e.content&&s(e.content);return n}e.s(["extractSkillsWithVersionsFromClaudeSession",()=>s,"extractSkillsWithVersionsFromOpenClawSession",()=>i,"extractSkillsWithVersionsFromOpencodeSession",()=>n,"normalizeInteractions",()=>t])},62414,e=>{"use strict";function t(e,t,n,s,i){let l=i?`Reference Skill Definition (Using this skill as the context for evaluation):
+${i}
+`:"";return`
+You are an objective and strict judge. Your task is to evaluate a "User Answer" against a set of weighted criteria for a given "User Query".
+${l}User Query: ${e}
+User Answer: ${t}
+Evaluation Criteria (Score strictly based on these weighted items):
+1. Root Causes (Must identify these issues):
+${n.map(e=>`   - [ID: ${e.id}] [Weight: ${e.weight}] ${e.content}`).join("\n")||"   (None)"}
+2. Key Actions (Must perform these actions):
+${s.map(e=>`   - [ID: ${e.id}] [Weight: ${e.weight}] ${e.content}`).join("\n")||"   (None)"}
+Evaluation Steps:
+1. For each item listed above (marked with [ID: ...]), determine the degree of match (0.0 to 1.0).
+   - 0.0 = Not mentioned or completely wrong.
+   - 0.5 = Partially mentioned or vague.
+   - 1.0 = Clearly and correctly addressed.
+   **CRITICAL**: For Key Actions involving specific operations (e.g., "backup", "modify", "restart"), you must find EXPLICT EVIDENCE in the User Answer that these actions were performed (checking/reading is NOT the same as backing up).
+2. Provide a brief explanation (in Chinese) for your evaluation of each item.
+3. If a Reference Skill Definition is provided, consider whether the answer aligns with the skill's capabilities and instructions, but primarily score based on the specific Root Causes and Key Actions listed above.
+Respond ONLY with a JSON object in the following format:
+{
+  "evaluations": [
+    { "id": "RC-0", "match_score": 0.5, "explanation": "此处用中文简要解释评分理由..." },
+    { "id": "KA-0", "match_score": 1.0, "explanation": "..." }
+    ...
+  ]
+}
+`}e.s(["generateJudgePrompt",()=>t])},33526,e=>{"use strict";let t=`
+你是一个专业的 Skill 优化分析师。你的任务是：
+1. 分析 Agent 在某个评分项上扣分的原因
+2. 判断该扣分是否可以通过优化 Skill 定义来解决
+3. 检查 Agent 的实际执行过程是否符合 Skill 中定义的步骤
+## 判断标准
+**是 Skill 问题的情况**：
+- Skill 中完全没有提到该知识点或操作步骤
+- Skill 中的描述不够清晰或不够详细，导致 Agent 无法正确理解
+- Skill 中缺少关键的参数说明、工具使用方法或注意事项
+- Skill 中的信息过时或与实际情况不符
+- Agent 的执行步骤与 Skill 定义不一致，但 Skill 的指导本身存在问题（如步骤顺序不合理、缺少前置条件说明等）
+**不是 Skill 问题的情况**：
+- Skill 中已经清楚地包含了相关信息，但 Agent 没有正确使用（模型能力问题）
+- 该评分项要求的是通用知识，不在 Skill 的职责范围内
+- 该评分项涉及的是运行时环境问题（网络、权限等）
+- Agent 的执行过程偏离了 Skill 的指导，但 Skill 的指导本身是正确和清晰的
+## 分析步骤
+1. **阅读 Skill 定义**：理解 Skill 规定的操作步骤和知识要点
+2. **查看交互历史**：观察 Agent 实际执行了哪些操作
+3. **对比分析**：
+   - Agent 的执行是否遵循了 Skill 的指导？
+   - 如果没有遵循，是因为 Skill 不清晰，还是 Agent 自身的问题？
+   - 评分项要求的内容在 Skill 中是否有足够的覆盖？
+4. **得出结论**：判断是否是 Skill 问题，并给出改进建议
+## 输出格式
+请以 JSON 格式返回：
+{
+    "id": "评分项ID",
+    "is_skill_issue": true或false,
+    "reasoning": "使用中文详细解释判断依据，需要：1) 引用 Skill 定义中的相关内容 2) 说明 Agent 执行过程中的表现 3) 解释为什么是/不是 Skill 问题",
+    "improvement_suggestion": "如果是 Skill 问题，给出具体的改进建议，说明应该在 Skill 中添加或修改什么内容；否则留空或不填"
+}
+`;function n(e,n,s,i,l){return`${t}
+---
+## 分析材料
+### [当前 Skill 定义]
+\`\`\`
+${e}
+\`\`\`
+### [用户问题]
+${s}
+### [Agent 执行过程 - 完整交互历史]
+\`\`\`
+${l}
+\`\`\`
+### [Agent 最终回答]
+${i}
+### [待分析的评分项]
+- **ID**: ${n.id}
+- **类型**: ${"root_cause"===n.type?"Root Cause (根因分析)":"Key Action (关键操作)"}
+- **评分标准**: ${n.content}
+- **得分**: ${(100*n.match_score).toFixed(0)}% (满分 100%)
+- **扣分原因**: ${n.explanation}
+- **权重**: ${n.weight}
+---
+请根据以上材料，判断此评分项的扣分是否可以通过优化 Skill 定义来改善，并输出 JSON 结果：`}e.s(["SKILL_ISSUE_ANALYSIS_PROMPT",0,t,"generateSkillIssuePrompt",()=>n])},44087,e=>{"use strict";let t=`
+你是一位专家级的日志分析师。你的任务是分析用户与 AI 助手之间的对话历史（包括工具输出），并提取任何“中间故障”或“异常过程”。
+“中间故障”或“异常过程”定义如下：
+1. **工具执行错误**：代理尝试运行工具（例如 bash 命令、python 脚本）但失败了（非零退出代码、堆栈跟踪、错误消息）。
+2. **逻辑/推理修正**：代理意识到自己犯了错误并明确纠正自己（例如，“我犯了一个错误...”，“之前的方法失败了...”）。
+3. **超时/卡住**：代理提到等待太久或进程卡住。
+4. **无效参数**：代理尝试使用带有无效参数的工具并被系统拒绝。
+你将获得包含完整对话历史的最后一次交互内容。
+逐步分析历史记录。对于发现的每个故障，提取：
+- failure_type: (Tool Error / Reasoning Error / Timeout / Invalid Usage)
+- description: 用中文简要总结出了什么问题。
+- context: 导致失败的具体命令或推理内容。
+- recovery: 代理如何尝试恢复（如果有）。
+仅以以下 JSON 格式响应：
+{
+  "failures": [
+    {
+      "failure_type": "Tool Error",
+      "description": "无法安装包 'xyz'",
+      "context": "pip install xyz",
+      "recovery": "代理尝试改用 apt-get。"
+    },
+    ...
+  ]
+}
+如果未发现故障，请返回：
+{
+  "failures": []
+}
+`;function n(e){return`
+${t}
+Conversation History:
+${e}
+`}e.s(["FAILURE_EXTRACTION_PROMPT",0,t,"generateFailureAnalysisPrompt",()=>n])},18868,e=>{"use strict";let t=`
+你是一位专家级的日志分析师。你的任务是分析用户与 AI 助手之间的完整对话历史，并精准提取本次会话的**最终执行结果**。
+**提取规则：**
+1. **必须完整保留**：如果最终结果分散在多个连续的回复中（例如先输出长篇详细报告，紧接着输出总结摘要），必须将它们**全部提取并按顺序拼接**，形成一个完整的报告。
+2. **剔除过程噪声**：只提取最终交付给用户的有效信息（如分析报告、代码方案、问题解答）。必须剔除之前的思考过程、工具调用命令（如 bash/python）、工具执行日志、自我纠正的尝试等中间过程。
+3. **保持原样**：提取的内容必须**完全保持原样（Verbatim）**，不要进行总结、改写或缩减。包括 Markdown 格式（标题、列表、代码块）都要保留。
+4. **识别头部**：通常最终结果会以明确的标题开始（如 "# 分析报告"、"## 结论"），或者直接给出用户请求的答案。
+**输入数据：**
+完整的对话历史记录。
+**输出格式：**
+请直接返回提取后的最终结果内容。不要包含 JSON 格式，不要包含 "Here is the result:" 等废话，由于内容可能包含 Markdown，请确保输出是纯文本格式的 Markdown 内容。
+`;function n(e){return`
+${t}
+**对话历史：**
+${e}
+**请提取最终结果：**
+`}e.s(["EXTRACTION_PROMPT",0,t,"generateExtractionPrompt",()=>n])},85874,e=>e.a(async(t,n)=>{try{var s=e.i(22734);e.i(89228);var i=e.i(88045),l=e.i(14747),r=e.i(74008),o=e.i(22965),a=e.i(50374),c=t([o]);[o]=c.then?(await c)():c;let k=l.default.join(process.cwd(),"data","model_debug.jsonl");function u(e,t,n){try{let i=l.default.dirname(k);s.default.existsSync(i)||s.default.mkdirSync(i,{recursive:!0});let r={timestamp:new Date().toISOString(),stage:e,input:t,output:n};s.default.appendFileSync(k,JSON.stringify(r)+"\n")}catch(e){console.error("Log error",e)}}async function f(e){let t=await (0,o.getActiveConfig)(e);if(!t)return{client:null,model:null};let n=t.apiKey||"no-api-key-required",s=t.baseUrl||"https://api.deepseek.com",{customFetch:l}=(0,r.getProxyConfig)();return{client:new i.OpenAI({apiKey:n,baseURL:s,fetch:l}),model:t.model||"deepseek-chat"}}async function g(t,n,s,i){let{client:l,model:r}=await f(i);if(!l||!l.apiKey)return console.warn("LLM Evaluation disabled or missing config. Skipping."),{is_correct:!1,score:0,reason:"请在首页左上角的设置中配置 LLM"};try{let i=n.root_causes||[],o=n.key_actions||[],a=i.map((e,t)=>({id:`RC-${t}`,...e})),c=o.map((e,t)=>({id:`KA-${t}`,...e})),{generateJudgePrompt:f}=e.r(62414),g=f(t,s,a,c,n.skill_definition),m=await l.chat.completions.create({messages:[{role:"user",content:g}],model:r});console.log(`[Judge API Debug] Model: ${r}. Received response choices:`,m?.choices?.length);let d=m.choices?.[0]?.message?.content;if(u("result_evaluation",{prompt:g},{raw_output:d}),!d)throw console.error("\n[Judge API Error 🚨] LLM content is empty or undefined!"),console.error(">>> Full LLM Response:"),console.error(JSON.stringify(m,null,2)),console.error("<<<\n"),Error("No content from evaluation model");let h=d.trim(),p=h.match(/```(?:json)?\\s*([\\s\\S]*?)\\s*```/i);if(p)h=p[1];else{let e=h.indexOf("{"),t=h.lastIndexOf("}");-1!==e&&-1!==t&&t>=e&&(h=h.substring(e,t+1))}let y=JSON.parse(h).evaluations||[],k=0,S=0,A=[];a.forEach(e=>{let t=y.find(t=>t.id===e.id),n=t?Math.max(0,Math.min(1,Number(t.match_score))):0,s=t?.explanation||"未找到评分结果";k+=n*e.weight,S+=e.weight,A.push(`1. **Root Cause** [${e.content.replace(/\n/g," ")}]: ${(100*n).toFixed(0)}% match. ${s} (Weight: ${e.weight})`)}),c.forEach(e=>{let t=y.find(t=>t.id===e.id),n=t?Math.max(0,Math.min(1,Number(t.match_score))):0,s=t?.explanation||"未找到评分结果";k+=n*e.weight,S+=e.weight,A.push(`2. **Key Action** [${e.content.replace(/\n/g," ")}]: ${(100*n).toFixed(0)}% match. ${s} (Weight: ${e.weight})`)});let $=0;$=S>0?k/S:0;let _=A.map(e=>{let t=e.match(/(\d+)% match/),n=e.match(/\(Weight: ([\d\.]+)\)/);if(t&&n){let e=(parseInt(t[1])/100).toFixed(1),s=parseFloat(n[1]).toFixed(1);return`${e}*${s}`}return null}).filter(e=>e).join(" + "),w=A.map(e=>{let t=e.match(/\(Weight: ([\d\.]+)\)/);return t?parseFloat(t[1]).toFixed(1):null}).filter(e=>e).join(" + "),N=`**Calculation**: (${_}) / (${w}) = ${k.toFixed(2)} / ${S.toFixed(2)} = ${$.toFixed(2)}`;A.push("",N);let x=A.join("\n");return{is_correct:$>=.8,score:$,reason:x}}catch(e){return console.error("LLM Judgment Error:",e),{is_correct:!1,score:0,reason:"Judgment API failed"}}}async function m(t,n,s,i,l,r){console.log("[ItemAttribution] Starting analysis...");let{client:o,model:a}=await f(r);if(!o||!t||!n)return console.warn(`[ItemAttribution] ✗ Early return: client=${o?"present":"absent"}, skillDef=${t?"present":"absent"}, judgmentReason=${n?"present":"absent"}`),[];let c=function(e){let t=[];if(!e)return t;let n=e.split("\n"),s={rc:0,ka:0};for(let e of n){let n=e.match(/\*\*Root Cause\*\*\s*\[(.*?)\]\s*.*?:\s*(\d+)%\s*match\.\s*(.+?)\s*\(Weight:\s*([\d.]+)\)/);if(n){t.push({id:`RC-${s.rc++}`,type:"root_cause",content:n[1].replace(/\.{3}$/,""),match_score:parseInt(n[2])/100,explanation:n[3].trim(),weight:parseFloat(n[4])});continue}let i=e.match(/\*\*Key Action\*\*\s*\[(.*?)\]\s*.*?:\s*(\d+)%\s*match\.\s*(.+?)\s*\(Weight:\s*([\d.]+)\)/);i&&t.push({id:`KA-${s.ka++}`,type:"key_action",content:i[1].replace(/\.{3}$/,""),match_score:parseInt(i[2])/100,explanation:i[3].trim(),weight:parseFloat(i[4])})}return t}(n);console.log(`[ItemAttribution] Parsed ${c.length} evaluation items from judgment reason`);let g=c.filter(e=>e.match_score<1);if(console.log(`[ItemAttribution] ${g.length} items need analysis (score < 100%)`),0===g.length)return console.log(`[ItemAttribution] ✗ No imperfect items found, returning empty array`),[];let{generateSkillIssuePrompt:m}=e.r(33526),d=[];for(let e of g)try{let n=m(t,e,s,i,l),r=await o.chat.completions.create({messages:[{role:"user",content:n}],model:a}),c=r.choices?.[0]?.message?.content;if(!c){console.error(`
+[SkillAnalysis API Error 🚨] LLM content is empty for item: ${e.id}`),console.error(">>> Full LLM Response:"),console.error(JSON.stringify(r,null,2)),console.error("<<<\n");continue}u("skill_issue_analysis",{item_id:e.id,prompt:n},{raw_output:c});let f=c.trim(),g=f.match(/```(?:json)?\\s*([\\s\\S]*?)\\s*```/i);if(g)f=g[1];else{let e=f.indexOf("{"),t=f.lastIndexOf("}");-1!==e&&-1!==t&&t>=e&&(f=f.substring(e,t+1))}let h=JSON.parse(f);!0===h.is_skill_issue?(d.push({...e,is_skill_issue:!0,reasoning:h.reasoning??"",improvement_suggestion:h.improvement_suggestion}),console.log(`[SkillAnalysis] ${e.id}: IS Skill Issue - ${h.reasoning?.substring(0,50)}...`)):console.log(`[SkillAnalysis] ${e.id}: NOT Skill Issue - ${h.reasoning?.substring(0,50)}...`)}catch(t){console.error(`[SkillAnalysis] Error analyzing ${e.id}:`,t.message),u("skill_issue_analysis_error",{item_id:e.id},{error:t.message})}return d}async function d(t,n,s,i,l,r,o,a){let c=p(t);if(!c||0===c.length)return{failures:[]};try{let{client:t,model:g}=await f(a);if(!t||!t.apiKey)return console.warn("LLM Analysis disabled. Skipping."),{failures:[],skill_issues:[]};let d="",h=c[c.length-1];(h.requestMessages||[]).forEach(e=>{let t="";"string"==typeof e.content?t=e.content:Array.isArray(e.content)&&(t=JSON.stringify(e.content)),d+=`[${(e.role||"UNKNOWN").toUpperCase()}]: ${t}
+`});let p=h.responseMessage;if(p){let e="";"string"==typeof p.content?e=p.content:Array.isArray(p.content)&&(e=JSON.stringify(p.content)),d+=`[ASSISTANT]: ${e}
+`}else h.debug_raw_stream&&(d+=`[SYSTEM/TOOL OUTPUTS]: (Check raw logs for full details)
+`);let{generateFailureAnalysisPrompt:y}=e.r(44087),k=y(d),S=await t.chat.completions.create({messages:[{role:"user",content:k}],model:g}),A=S.choices?.[0]?.message?.content;if(!A)return console.error("\n[Failure Analysis API Error 🚨] LLM content is empty!"),console.error(">>> Full LLM Response:"),console.error(JSON.stringify(S,null,2)),console.error("<<<\n"),{failures:[]};u("failure_analysis",{prompt:k,history_length:d.length},{raw_output:A});let $=A.trim(),_=$.match(/```(?:json)?\\s*([\\s\\S]*?)\\s*```/i);if(_)$=_[1];else{let e=$.indexOf("{"),t=$.lastIndexOf("}");-1!==e&&-1!==t&&t>=e&&($=$.substring(e,t+1))}let w=JSON.parse($).failures||[],N=[];if(console.log(`[SkillAnalysis] Checking: skillName=${n||"none"}, skillDef=${s?"present":"absent"}, failuresCount=${w.length}, answerScore=${i}`),n&&s){console.log(`[SkillAnalysis] ✓ Skill name and definition found`);let e=o||"(见交互历史)",t=r||"(未知)";console.log(`[SkillAnalysis] Conditions check: answerScore=${i}, judgmentReason=${l?"present":"absent"}, history=${d?"present":"absent"}`),void 0!==i&&i<1&&l&&d?(console.log(`[SkillAnalysis] ✓ All conditions met. Score is imperfect (${i}). Analyzing which items are Skill issues...`),console.log(`[SkillAnalysis] Using: query=${t.substring(0,50)}..., answer=${e.substring(0,50)}..., history_len=${d.length}`),N=await m(s,l,t,e,d,a),console.log(`[SkillAnalysis] Analysis complete: ${N.length} items identified as Skill issues`)):void 0!==i&&i>=1?console.log(`[SkillAnalysis] ✗ Skipped: Perfect score (${i}). No Skill analysis needed.`):console.log(`[SkillAnalysis] ✗ Skipped: Conditions not met - answerScore=${i}, judgmentReason=${l?"present":"absent"}, history=${d?"present":"absent"}`)}else console.warn(`[SkillAnalysis] ✗ Skipped: Missing skillName (${n||"none"}) or skillDef (${s?"present":"absent"})`);return{failures:w,skill_issues:N.length>0?N:void 0}}catch(e){return console.error("Failure Analysis Error:",e),{failures:[]}}}function h(e){if(!e||"string"!=typeof e)return!1;try{let t=JSON.parse(e);if(t&&"object"==typeof t)return!0}catch(e){}return!1}function p(e){return(0,a.normalizeInteractions)(e)}async function y(t,n){let s=p(t);if(!s||0===s.length)return{query:"",skill:"",final_result:""};let i="",l="",r="";for(let e of s){let t=(e.requestMessages||[]).find(e=>"user"===e.role);if(t){let e="";if("string"==typeof t.content)e=t.content;else if(Array.isArray(t.content)){let n=t.content.filter(e=>"text"===e.type&&!e.text.trim().startsWith("<system-reminder>")&&!e.text.includes("[SUGGESTION MODE:"));n.length>0&&(e=n.map(e=>e.text).join("\n").trim())}if(e){let t=(e=e.replace(/<EXTRA_INFO>[\s\S]*?<\/EXTRA_INFO>/g,"")).trim();if("count"===t||"ping"===t||t.startsWith("Please write a 5-10 word title")||"hi"===t||"hello"===t&&s.length>2)continue;if(i||(i=t),i)break}}}for(let e=s.length-1;e>=0;e--){let t=s[e];if((t.requestMessages||[]).some(e=>"string"==typeof e.content?e.content.includes("[SUGGESTION MODE:"):!!Array.isArray(e.content)&&e.content.some(e=>e.text&&e.text.includes("[SUGGESTION MODE:"))))continue;let n=t.responseMessage,i="";if(n&&n.content&&"string"==typeof n.content)i=n.content;else if(n&&Array.isArray(n.content)){let e=n.content.find(e=>"text"===e.type);e&&(i=e.text)}if((!i||""===i.trim())&&t.debug_raw_stream)try{let e="";for(let n of t.debug_raw_stream)n.choices&&n.choices[0]&&n.choices[0].delta&&n.choices[0].delta.content&&(e+=n.choices[0].delta.content),"content_block_delta"===n.type&&n.delta&&"text_delta"===n.delta.type&&(e+=n.delta.text);e&&(i=e)}catch(e){}let r=[];if(t.requestMessages&&t.requestMessages.length>0)for(let e=t.requestMessages.length-1;e>=0;e--){let n=t.requestMessages[e];if("assistant"===n.role){let e="";if("string"==typeof n.content)e=n.content;else if(Array.isArray(n.content)){let t=n.content.find(e=>"text"===e.type);t&&(e=t.text)}if(e&&e.trim()){let t=e.trim();if(!h(t)&&(r.unshift(t),t.startsWith("#")||t.startsWith("##")||t.startsWith("###")))break}else break}else break}if(i&&i.trim()){let e=i.trim();!h(e)&&(r.push(e),e.startsWith("#")||e.startsWith("##")||e.startsWith("###"))}if(r.length>0){l=r.join("\n\n");break}}let o=new Set,a="";for(let e of s){(e.requestMessages||[]).forEach(e=>{"string"==typeof e.content?a+=e.content+"\n":Array.isArray(e.content)&&e.content.forEach(e=>{"text"===e.type&&(a+=e.text+"\n")})});let t=e.responseMessage;t&&("string"==typeof t.content?a+=t.content+"\n":Array.isArray(t.content)&&t.content.forEach(e=>{"text"===e.type&&(a+=e.text+"\n")}),t.tool_calls&&Array.isArray(t.tool_calls)&&t.tool_calls.forEach(e=>{let t=e.function?.name??e.name;if(t)if("skill"===t||"load_skill"===t)try{let t=e.function?.arguments??e.arguments,n="string"==typeof t?JSON.parse(t):t,s=n?.name??n?.skill_name??n?.skillName??n?.skill;s&&o.add(s)}catch(e){}else o.add(t)}),t.function_call&&t.function_call.name&&o.add(t.function_call.name))}let c=a.match(/(?:Loading skill|Load skill)[:\s]+([a-zA-Z0-9_\-\.]+)/i);c&&c[1]&&o.add(c[1].trim());let g=a.match(/Skill\s+([a-zA-Z0-9_\-\.]+)\s+loaded/i);g&&g[1]&&o.add(g[1].trim()),o.size>0&&(r=Array.from(o).join(", ")),console.log(`[Rule-Based Analysis] Query: ${i.substring(0,20)}..., Skill: ${r}, Result Length: ${l.length}`),u("extraction",{messages_summary:`Total ${s.length} interactions`,notes:"Scanned full history"},{query:i,skill:r,final_result:l});let m="";try{let{client:t,model:i}=await f(n);if(t&&t.apiKey){let n="";s.forEach(e=>{(e.requestMessages||[]).forEach(e=>{let t="";if("string"==typeof e.content)t=e.content;else if(Array.isArray(e.content)){let n=e.content.find(e=>"text"===e.type);n&&(t=n.text)}n+=`[${(e.role||"UNKNOWN").toUpperCase()}]: ${t}
+`});let t=e.responseMessage;if(t){let e="";if("string"==typeof t.content)e=t.content;else if(Array.isArray(t.content)){let n=t.content.find(e=>"text"===e.type);n&&(e=n.text)}n+=`[ASSISTANT]: ${e}
+`}});let{generateExtractionPrompt:l}=e.r(18868),r=l(n),o=(await t.chat.completions.create({messages:[{role:"user",content:r}],model:i,temperature:.1})).choices[0].message.content;o&&(m=o.trim(),u("result_extraction_llm",{history_length:n.length},{extracted:m}))}}catch(e){console.error("LLM Extraction Failed",e)}return m&&m.length>20?(console.log(`[Judge] LLM extraction preferred (Length: ${m.length} vs Rule: ${l.length})`),l=m):l&&console.log(`[Judge] Rule-Based extraction used (Length: ${l.length})`),{query:i,skill:r,final_result:l}}e.s(["analyzeFailures",()=>d,"analyzeSession",()=>y,"judgeAnswer",()=>g,"normalizeInteractions",()=>p]),n()}catch(e){n(e)}},!1)];
+//# sourceMappingURL=src_98433cb8._.js.map

package/.next/standalone/.next/server/chunks/src_lib_12408140._.js CHANGED Viewed

@@ -1,3 +1,3 @@
-module.exports=[11811,e=>{"use strict";var t=e.i(22734),l=e.i(14747);let i={"claude-opus-4-6":{inputTokenPrice:5,outputTokenPrice:25,cacheReadInputTokenPrice:.5,cacheCreationInputTokenPrice:6.25},"claude-sonnet-4-6":{inputTokenPrice:3,outputTokenPrice:15,cacheReadInputTokenPrice:.3,cacheCreationInputTokenPrice:3.75},"deepseek-chat":{inputTokenPrice:.28,outputTokenPrice:.42,cacheReadInputTokenPrice:.028},"deepseek-reasoner":{inputTokenPrice:.28,outputTokenPrice:.42,cacheReadInputTokenPrice:.028},"minimax-m2.5-free":{inputTokenPrice:0,outputTokenPrice:0}},n={"claude-opus-4-6":1e6,"claude-sonnet-4-6":1e6,"deepseek-chat":128e3,"deepseek-reasoner":128e3,"minimax-m2.5-free":196608},s=l.default.join(process.cwd(),"custom-models.json"),o={},r={},a=-1;function u(){try{let e=t.default.statSync(s).mtimeMs;if(e===a)return{pricing:o,contextWindows:r};let l=JSON.parse(t.default.readFileSync(s,"utf-8")),i={},n={};for(let[e,t]of Object.entries(l))e.startsWith("_")||("number"==typeof t.inputTokenPrice&&"number"==typeof t.outputTokenPrice&&(i[e]=t),"number"==typeof t.contextWindow&&(n[e]=t.contextWindow));o=i,r=n,a=e}catch(e){e instanceof SyntaxError&&console.warn("[model-config] Failed to parse custom-models.json:",e.message),o={},r={},a=-1}return{pricing:o,contextWindows:r}}function c(e,t){if(t[e])return t[e];for(let[l,i]of Object.entries(t).sort((e,t)=>t[0].length-e[0].length))if(e.startsWith(l))return i;return null}function d(e){let{pricing:t}=u(),l=c(e,t);if(l)return{pricing:l,source:"custom"};let n=c(e,i);return n?{pricing:n,source:"default"}:null}function k(e){let{contextWindows:t}=u(),l=c(e,t);if(null!=l)return{contextWindow:l,source:"custom"};let i=c(e,n);return null!=i?{contextWindow:i,source:"default"}:null}function _(e,t,l,i,n){let s=l.cacheReadInputTokenPrice??.1*l.inputTokenPrice,o=l.cacheCreationInputTokenPrice??1.25*l.inputTokenPrice;return(e*l.inputTokenPrice+(i??0)*s+(n??0)*o+t*l.outputTokenPrice)/1e6}e.s(["DEFAULT_CACHE_CREATION_RATIO",0,1.25,"DEFAULT_CACHE_READ_RATIO",0,.1,"calculateCost",()=>_,"getModelContextWindow",()=>k,"getModelPricing",()=>d])},85765,e=>e.a(async(t,l)=>{try{var i=e.i(22734),n=e.i(14747),s=e.i(31390),o=e.i(98043),r=e.i(11811),a=t([s,o]);function u(e){let t=e.trim(),l=[['"','"'],["'","'"],["“","”"],["‘","’"],["`","`"],["《","》"],["（","）"],["(",")"],["【","】"],["[","]"],["{","}"],["<",">"]];for(let e=0;e<6;e++){let e=t;for(let[e,i]of(t=t.trim(),l))t.startsWith(e)&&t.endsWith(i)&&t.length>=e.length+i.length+1&&(t=t.slice(e.length,-i.length));if(t===e)break}return t=(t=(t=t.replace(/[\s"'“”‘’`。.]/g,"")).replace(/^[\s.,，。!?！？;；:：、·…]+|[\s.,，。!?！？;；:：、·…]+$/g,"")).replace(/\s+/g," ").trim()}function c(e,t){if(!t)return;let l=u(t);if(!l)return;let i=e.filter(e=>e.query&&e.query.trim()).filter(e=>{let t=u(e.query);return!!t&&l.endsWith(t)});if(0!==i.length)return i.reduce((e,t)=>{let l=u(e.query).length;return u(t.query).length>l?t:e})}[s,o]=a.then?(await a)():a;let p=n.default.join(process.cwd(),"data"),f=n.default.join(p,"evaluation_result.json");async function d(e,t){let l={};if(e&&(l.OR=[{user:e},{user:null}]),!t?.query&&t?.taskId){let e=await o.db.findExecutionById(t.taskId);e&&e.query?(l.query=e.query,t.framework&&(l.framework=t.framework)):l.id=t.taskId}else t?.query&&(l.query=t.query,t.framework&&(l.framework=t.framework));t?.skill!==void 0&&(l.skill=t.skill),t?.skillVersion!==void 0&&(l.skillVersion=t.skillVersion);let i=await o.db.findExecutions(l,{timestamp:"desc"}),n=new Map;for(let e of i){let t=e.taskId||null;t&&(n.has(t)||n.set(t,[]),n.get(t).push(e))}let s=new Set;for(let[e,t]of n.entries()){if(1===t.length){s.add(t[0].id);continue}let l=t.find(t=>t.id===e);if(l){s.add(l.id);continue}let i=t.slice().sort((e,t)=>{let l=new Date(e.timestamp).getTime(),i=new Date(t.timestamp).getTime();if(i!==l)return i-l;let n=String(e.finalResult||"").length;return String(t.finalResult||"").length-n});s.add(i[0].id)}let a=i.filter(e=>!e.taskId||s.has(e.id));for(let[e,t]of n.entries())if(!(t.length<=1))for(let e of t)s.has(e.id)||o.db.deleteExecution(e.id).catch(()=>{});return a.map(e=>{let t=e.model??null,l=t?(0,r.getModelPricing)(t):null,i=l?.pricing??null,n=t&&null!=e.maxSingleCallTokens?(0,r.getModelContextWindow)(t):null;return{...e,upload_id:e.id,task_id:e.taskId||void 0,query:e.query||void 0,framework:e.framework||void 0,tokens:e.tokens||void 0,cost:i&&null!=e.inputTokens&&null!=e.outputTokens?(0,r.calculateCost)(e.inputTokens,e.outputTokens,i,e.cacheReadInputTokens??void 0,e.cacheCreationInputTokens??void 0):void 0,latency:e.latency||void 0,timestamp:e.timestamp?.toISOString?.()||e.timestamp,final_result:e.finalResult||void 0,skill:e.skill||void 0,skills:e.skills?JSON.parse(e.skills):void 0,invokedSkills:e.invokedSkills?JSON.parse(e.invokedSkills):void 0,is_skill_correct:e.isSkillCorrect||!1,is_answer_correct:e.isAnswerCorrect||!1,answer_score:void 0!==e.answerScore?e.answerScore:void 0,skill_score:void 0!==e.skillScore?e.skillScore:void 0,judgment_reason:e.judgmentReason||void 0,failures:e.failures?JSON.parse(e.failures):void 0,label:e.label??null,user:e.user??null,skill_issues:e.skillIssues?JSON.parse(e.skillIssues):[],skill_version:e.skillVersion??null,model:t,tool_call_count:e.toolCallCount??void 0,llm_call_count:e.llmCallCount??void 0,input_tokens:e.inputTokens??void 0,output_tokens:e.outputTokens??void 0,tool_call_error_count:e.toolCallErrorCount??void 0,cache_read_input_tokens:e.cacheReadInputTokens??void 0,cache_creation_input_tokens:e.cacheCreationInputTokens??void 0,max_single_call_tokens:e.maxSingleCallTokens??void 0,expected_skill_version:e.expectedSkillVersion??null,skill_recall_rate:e.skillRecallRate??null,context_window_pct:null!=e.maxSingleCallTokens&&n?Math.round(e.maxSingleCallTokens/n.contextWindow*1e3)/10:void 0,context_window_limit:n?.contextWindow,context_window_source:n?.source,cost_pricing:i?{inputTokenPrice:i.inputTokenPrice,outputTokenPrice:i.outputTokenPrice,cacheReadInputTokenPrice:i.cacheReadInputTokenPrice??i.inputTokenPrice*r.DEFAULT_CACHE_READ_RATIO,cacheCreationInputTokenPrice:i.cacheCreationInputTokenPrice??i.inputTokenPrice*r.DEFAULT_CACHE_CREATION_RATIO,source:l?.source??"default"}:null}})}async function k(e){let t={};return e&&(t.OR=[{user:e},{user:null}]),(await o.db.findConfigs(t)).map(e=>{let t=(t,l)=>{if(t)try{return JSON.parse(t)}catch(t){console.error(`[readConfig] Failed to parse ${l} for config ${e.id}:`,t);return}};return{id:e.id,query:e.query,skill:e.skill,skillVersion:e.skillVersion,expectedSkills:t(e.expectedSkills,"expectedSkills"),standard_answer:e.standardAnswer,root_causes:t(e.rootCauses,"rootCauses"),key_actions:t(e.keyActions,"keyActions"),parse_status:e.parseStatus||"completed"}})}async function _(e){let t=e.upload_id||e.task_id||crypto.randomUUID();if(e.task_id)try{let l={taskId:e.task_id};e.framework&&(l.framework=e.framework);let i=await o.db.findExecutions(l,{timestamp:"desc"});if(i&&i.length>0&&i[0]?.id){let l=i.find(t=>t.id===e.task_id),n=l&&l.id?l.id:i[0].id;n!==t&&(t=n)}}catch{}let l=null,n=await o.db.findExecutionById(t);n&&(l={...n,upload_id:n.id,task_id:n.taskId||void 0,query:n.query||void 0,framework:n.framework||void 0,tokens:n.tokens||void 0,cost:n.cost||void 0,latency:n.latency||void 0,timestamp:n.timestamp?.toISOString?.()||n.timestamp,final_result:n.finalResult||void 0,skill:n.skill||void 0,skills:n.skills?JSON.parse(n.skills):void 0,invokedSkills:n.invokedSkills?(()=>{try{return JSON.parse(n.invokedSkills)}catch{return}})():void 0,is_skill_correct:n.isSkillCorrect||!1,is_answer_correct:n.isAnswerCorrect||!1,answer_score:n.answerScore||void 0,skill_score:n.skillScore||void 0,judgment_reason:n.judgmentReason||void 0,failures:n.failures?JSON.parse(n.failures):void 0,skill_issues:n.skillIssues?JSON.parse(n.skillIssues):void 0,label:n.label||void 0,user:n.user||void 0,skill_version:n.skillVersion??void 0,expected_skill_version:n.expectedSkillVersion??null,skill_recall_rate:n.skillRecallRate??null,model:n.model||void 0,tool_call_count:n.toolCallCount??void 0,llm_call_count:n.llmCallCount??void 0,input_tokens:n.inputTokens??void 0,output_tokens:n.outputTokens??void 0,tool_call_error_count:n.toolCallErrorCount??void 0,cache_read_input_tokens:n.cacheReadInputTokens??void 0,cache_creation_input_tokens:n.cacheCreationInputTokens??void 0,max_single_call_tokens:n.maxSingleCallTokens??void 0});let r=l?{...l}:{},a=!!l;a||r.timestamp||e.timestamp?e.timestamp&&(r.timestamp=e.timestamp):r.timestamp=new Date().toISOString();let u=!!e.force_query_update,d="string"==typeof l?.query?l.query.trim():"",_="string"==typeof e.query?e.query.trim():"";if(r={...r,...e},d&&!u?r.query=d:!d&&_?r.query=_:"string"!=typeof r.query||r.query.trim()?"string"==typeof r.query&&(r.query=r.query.trim()):r.query=void 0,!r.upload_id&&r.task_id&&(r.upload_id=r.task_id),!r.task_id&&r.upload_id&&(r.task_id=r.upload_id),r.upload_id=t,(!r.label||!r.model||!r.user)&&r.task_id){let e=await o.db.findSessionByTaskId(r.task_id);e&&(!r.label&&e.label&&(r.label=e.label),!r.model&&e.model&&(r.model=e.model),!r.user&&e.user&&(r.user=e.user))}if(!r.user)try{let e=o.db.getClient();if("query"in e){let t=await e.query('SELECT username FROM "User" LIMIT 1');t.rows[0]&&(r.user=t.rows[0].username,console.log(`[Data-Service] Fallback resolved user for task ${r.task_id} to: ${r.user}`))}}catch(e){console.warn("[Data-Service] Fallback user lookup failed:",e)}let p=e.Token||e.token||e.tokens;void 0!==p&&(r.tokens=Number(p)),void 0!==e.tool_call_count&&(r.tool_call_count=Number(e.tool_call_count)),void 0!==e.llm_call_count&&(r.llm_call_count=Number(e.llm_call_count)),void 0!==e.input_tokens&&(r.input_tokens=Number(e.input_tokens)),void 0!==e.output_tokens&&(r.output_tokens=Number(e.output_tokens)),void 0!==e.tool_call_error_count&&(r.tool_call_error_count=Number(e.tool_call_error_count)),void 0!==e.cache_read_input_tokens&&(r.cache_read_input_tokens=Number(e.cache_read_input_tokens)),void 0!==e.cache_creation_input_tokens&&(r.cache_creation_input_tokens=Number(e.cache_creation_input_tokens)),void 0!==e.max_single_call_tokens&&(r.max_single_call_tokens=Number(e.max_single_call_tokens));let m="未找到匹配的评测配置",v=!1,y=r.is_answer_correct||!1,g=r.judgment_reason||m,w=await k(r.user);if(r.query&&w.length>0){let t=c(w,r.query);if(t){let i=Array.isArray(r.invokedSkills)?r.invokedSkills:[],n=(Array.isArray(r.skills)?r.skills:[]).map(e=>({name:e,version:null})),u=t.expectedSkills||[];if(u.length>0){let e=i.length>0?i:n;if(e.length>0){let t=0,l=u.filter(e=>e.skill?.trim()),i=l.map(e=>e.skill.trim()),n=new Map;if(i.length>0)try{for(let e of(await o.db.findSkills({name:{in:i},user:r.user||null})))n.set(e.name,e)}catch(e){console.error("[Judgment] Error fetching skills for version check:",e)}for(let i of l){let l=i.skill.trim(),s=i.version??null,o=e.find(e=>e.name===l);if(o){let e=!1;if(null===s)e=!0;else if(null!==o.version)e=o.version===s;else{let t=n.get(l);e=!!t&&(t.activeVersion||0)===s}e&&(t++,v||(v=!0))}}l.length>0&&(r.skill_recall_rate=t/l.length)}}if(r.is_skill_correct=v,void 0!==r.final_result){let i=!0;if(a&&!e.force_judgment&&l&&l.query===r.query&&l.final_result===r.final_result&&(i=!1),i&&!r.skip_evaluation){let e,l=(r.skill||t.skill||"").trim();if(l)try{let t=await o.db.findSkill(l,r.user||null);if(t){let l=t.activeVersion||0,i=t.versions?.find(e=>e.version===l);if(i&&i.content)e=i.content,r.skill_version=i.version;else if(t.versions&&t.versions.length>0){let l=t.versions[0];l&&l.content&&(e=l.content,r.skill_version=l.version)}}}catch(e){console.error("[Judgment] Error fetching skill definition:",e)}let i=await (0,s.judgeAnswer)(r.query||"",{standard_answer_example:t.standard_answer,root_causes:t.root_causes,key_actions:t.key_actions,skill_definition:e},r.final_result,r.user);y=i.is_correct,r.answer_score=i.score,g=i.reason||"Judged by Evaluation Model"}}}else a&&!e.force_judgment||r.answer_score||(y=!1,g=m,r.answer_score=null)}else r.query&&(!a||e.force_judgment)&&!r.answer_score&&(y=!1,g=m,r.answer_score=null);e.skip_evaluation&&(r.answer_score=null,g="结果评估中..."),r.is_skill_correct=v,r.is_answer_correct=y,r.judgment_reason=g;let h=Array.isArray(r.skills)&&r.skills.length>0?r.skills[0]:void 0;if(h){let e=function(){if(!i.default.existsSync(f))return{};try{return JSON.parse(i.default.readFileSync(f,"utf-8"))}catch{return{}}}()[h];e&&(r.skill_score=parseFloat(e))}if(r.skill&&void 0!==r.skill_version&&null!==r.skill_version?r.label=`${r.skill}-v${r.skill_version}`:r.skill?r.label=`${r.skill}-v1`:r.label="without-skill",await o.db.upsertExecution({where:{id:t},create:{id:t,taskId:r.task_id,query:r.query,framework:r.framework,tokens:r.tokens,cost:r.cost,latency:r.latency,timestamp:r.timestamp?new Date(r.timestamp):new Date,finalResult:r.final_result,skill:r.skill,skills:r.skills?JSON.stringify(r.skills):null,invokedSkills:r.invokedSkills?JSON.stringify(r.invokedSkills):null,isSkillCorrect:r.is_skill_correct,isAnswerCorrect:r.is_answer_correct,answerScore:r.answer_score,skillScore:r.skill_score,judgmentReason:r.judgment_reason,failures:r.failures?JSON.stringify(r.failures):null,skillIssues:r.skill_issues?JSON.stringify(r.skill_issues):null,label:r.label,user:r.user,skillVersion:r.skill_version,model:r.model,toolCallCount:r.tool_call_count,llmCallCount:r.llm_call_count,inputTokens:r.input_tokens,outputTokens:r.output_tokens,toolCallErrorCount:r.tool_call_error_count,skillRecallRate:r.skill_recall_rate,cacheReadInputTokens:r.cache_read_input_tokens,cacheCreationInputTokens:r.cache_creation_input_tokens,maxSingleCallTokens:r.max_single_call_tokens},update:{taskId:r.task_id,query:r.query,framework:r.framework,tokens:r.tokens,cost:r.cost,latency:r.latency,timestamp:r.timestamp?new Date(r.timestamp):new Date,finalResult:r.final_result,skill:r.skill,skills:r.skills?JSON.stringify(r.skills):null,invokedSkills:r.invokedSkills?JSON.stringify(r.invokedSkills):null,isSkillCorrect:r.is_skill_correct,isAnswerCorrect:r.is_answer_correct,answerScore:r.answer_score,skillScore:r.skill_score,judgmentReason:r.judgment_reason,failures:r.failures?JSON.stringify(r.failures):null,skillIssues:r.skill_issues?JSON.stringify(r.skill_issues):null,label:r.label,user:r.user,skillVersion:r.skill_version,model:r.model,toolCallCount:r.tool_call_count,llmCallCount:r.llm_call_count,inputTokens:r.input_tokens,outputTokens:r.output_tokens,toolCallErrorCount:r.tool_call_error_count,skillRecallRate:r.skill_recall_rate,cacheReadInputTokens:r.cache_read_input_tokens,cacheCreationInputTokens:r.cache_creation_input_tokens,maxSingleCallTokens:r.max_single_call_tokens}}),e.upload_id&&e.task_id&&e.upload_id!==t)try{let t=await o.db.findExecutionById(e.upload_id);t&&t.taskId===e.task_id&&await o.db.deleteExecution(e.upload_id)}catch{}if(r.task_id&&r.interactions){let e="string"==typeof r.interactions?(()=>{try{return JSON.parse(r.interactions)}catch{return[]}})():r.interactions,t=e;try{let l=await o.db.findSessionByTaskId(r.task_id),i=l?.interactions?(()=>{try{return JSON.parse(l.interactions)}catch{return[]}})():[];Array.isArray(i)&&i.length>0&&(t=!Array.isArray(e)||e.length<i.length?i:e.map((e,t)=>{let l=i[t],n=e?.content===""||e?.content==null,s="string"==typeof l?.content&&l.content.length>0;return n&&s&&l?.role===e?.role?{...e,content:l.content}:e}))}catch{}await o.db.upsertSession(r.task_id,{taskId:r.task_id,query:r.query,label:r.label,user:r.user,model:r.model,interactions:JSON.stringify(t)},{query:r.query,label:r.label,user:r.user,model:r.model,interactions:JSON.stringify(t)})}return{success:!0,record:r}}e.s(["findBestMatchConfig",()=>c,"readConfig",()=>k,"readRecords",()=>d,"saveExecutionRecord",()=>_]),l()}catch(e){l(e)}},!1)];
+module.exports=[11811,e=>{"use strict";var t=e.i(22734),l=e.i(14747);let i={"claude-opus-4-6":{inputTokenPrice:5,outputTokenPrice:25,cacheReadInputTokenPrice:.5,cacheCreationInputTokenPrice:6.25},"claude-sonnet-4-6":{inputTokenPrice:3,outputTokenPrice:15,cacheReadInputTokenPrice:.3,cacheCreationInputTokenPrice:3.75},"deepseek-chat":{inputTokenPrice:.28,outputTokenPrice:.42,cacheReadInputTokenPrice:.028},"deepseek-reasoner":{inputTokenPrice:.28,outputTokenPrice:.42,cacheReadInputTokenPrice:.028},"minimax-m2.5-free":{inputTokenPrice:0,outputTokenPrice:0}},n={"claude-opus-4-6":1e6,"claude-sonnet-4-6":1e6,"deepseek-chat":128e3,"deepseek-reasoner":128e3,"minimax-m2.5-free":196608},o=l.default.join(process.cwd(),"custom-models.json"),s={},r={},a=-1;function u(){try{let e=t.default.statSync(o).mtimeMs;if(e===a)return{pricing:s,contextWindows:r};let l=JSON.parse(t.default.readFileSync(o,"utf-8")),i={},n={};for(let[e,t]of Object.entries(l))e.startsWith("_")||("number"==typeof t.inputTokenPrice&&"number"==typeof t.outputTokenPrice&&(i[e]=t),"number"==typeof t.contextWindow&&(n[e]=t.contextWindow));s=i,r=n,a=e}catch(e){e instanceof SyntaxError&&console.warn("[model-config] Failed to parse custom-models.json:",e.message),s={},r={},a=-1}return{pricing:s,contextWindows:r}}function c(e,t){if(t[e])return t[e];for(let[l,i]of Object.entries(t).sort((e,t)=>t[0].length-e[0].length))if(e.startsWith(l))return i;return null}function d(e){let{pricing:t}=u(),l=c(e,t);if(l)return{pricing:l,source:"custom"};let n=c(e,i);return n?{pricing:n,source:"default"}:null}function k(e){let{contextWindows:t}=u(),l=c(e,t);if(null!=l)return{contextWindow:l,source:"custom"};let i=c(e,n);return null!=i?{contextWindow:i,source:"default"}:null}function _(e,t,l,i,n){let o=l.cacheReadInputTokenPrice??.1*l.inputTokenPrice,s=l.cacheCreationInputTokenPrice??1.25*l.inputTokenPrice;return(e*l.inputTokenPrice+(i??0)*o+(n??0)*s+t*l.outputTokenPrice)/1e6}e.s(["DEFAULT_CACHE_CREATION_RATIO",0,1.25,"DEFAULT_CACHE_READ_RATIO",0,.1,"calculateCost",()=>_,"getModelContextWindow",()=>k,"getModelPricing",()=>d])},85765,e=>e.a(async(t,l)=>{try{var i=e.i(22734),n=e.i(14747),o=e.i(85874),s=e.i(98043),r=e.i(11811),a=t([o,s]);function u(e){let t=e.trim(),l=[['"','"'],["'","'"],["“","”"],["‘","’"],["`","`"],["《","》"],["（","）"],["(",")"],["【","】"],["[","]"],["{","}"],["<",">"]];for(let e=0;e<6;e++){let e=t;for(let[e,i]of(t=t.trim(),l))t.startsWith(e)&&t.endsWith(i)&&t.length>=e.length+i.length+1&&(t=t.slice(e.length,-i.length));if(t===e)break}return t=(t=(t=t.replace(/[\s"'“”‘’`。.]/g,"")).replace(/^[\s.,，。!?！？;；:：、·…]+|[\s.,，。!?！？;；:：、·…]+$/g,"")).replace(/\s+/g," ").trim()}function c(e,t){if(!t)return;let l=u(t);if(!l)return;let i=e.filter(e=>e.query&&e.query.trim()).filter(e=>{let t=u(e.query);return!!t&&l.endsWith(t)});if(0!==i.length)return i.reduce((e,t)=>{let l=u(e.query).length;return u(t.query).length>l?t:e})}[o,s]=a.then?(await a)():a;let p=n.default.join(process.cwd(),"data"),f=n.default.join(p,"evaluation_result.json");async function d(e,t){let l={};if(e&&(l.OR=[{user:e},{user:null}]),!t?.query&&t?.taskId){let e=await s.db.findExecutionById(t.taskId);e&&e.query?(l.query=e.query,t.framework&&(l.framework=t.framework)):l.id=t.taskId}else t?.query&&(l.query=t.query,t.framework&&(l.framework=t.framework));t?.skill!==void 0&&(l.skill=t.skill),t?.skillVersion!==void 0&&(l.skillVersion=t.skillVersion);let i=await s.db.findExecutions(l,{timestamp:"desc"}),n=new Map;for(let e of i){let t=e.taskId||null;t&&(n.has(t)||n.set(t,[]),n.get(t).push(e))}let o=new Set;for(let[e,t]of n.entries()){if(1===t.length){o.add(t[0].id);continue}let l=t.find(t=>t.id===e);if(l){o.add(l.id);continue}let i=t.slice().sort((e,t)=>{let l=new Date(e.timestamp).getTime(),i=new Date(t.timestamp).getTime();if(i!==l)return i-l;let n=String(e.finalResult||"").length;return String(t.finalResult||"").length-n});o.add(i[0].id)}let a=i.filter(e=>!e.taskId||o.has(e.id));for(let[e,t]of n.entries())if(!(t.length<=1))for(let e of t)o.has(e.id)||s.db.deleteExecution(e.id).catch(()=>{});return a.map(e=>{let t=e.model??null,l=t?(0,r.getModelPricing)(t):null,i=l?.pricing??null,n=t&&null!=e.maxSingleCallTokens?(0,r.getModelContextWindow)(t):null;return{...e,upload_id:e.id,task_id:e.taskId||void 0,query:e.query||void 0,framework:e.framework||void 0,tokens:e.tokens||void 0,cost:i&&null!=e.inputTokens&&null!=e.outputTokens?(0,r.calculateCost)(e.inputTokens,e.outputTokens,i,e.cacheReadInputTokens??void 0,e.cacheCreationInputTokens??void 0):void 0,latency:e.latency||void 0,timestamp:e.timestamp?.toISOString?.()||e.timestamp,final_result:e.finalResult||void 0,skill:e.skill||void 0,skills:e.skills?JSON.parse(e.skills):void 0,invokedSkills:e.invokedSkills?JSON.parse(e.invokedSkills):void 0,is_skill_correct:e.isSkillCorrect||!1,is_answer_correct:e.isAnswerCorrect||!1,answer_score:void 0!==e.answerScore?e.answerScore:void 0,skill_score:void 0!==e.skillScore?e.skillScore:void 0,judgment_reason:e.judgmentReason||void 0,failures:e.failures?JSON.parse(e.failures):void 0,label:e.label??null,user:e.user??null,skill_issues:e.skillIssues?JSON.parse(e.skillIssues):[],skill_version:e.skillVersion??null,model:t,tool_call_count:e.toolCallCount??void 0,llm_call_count:e.llmCallCount??void 0,input_tokens:e.inputTokens??void 0,output_tokens:e.outputTokens??void 0,tool_call_error_count:e.toolCallErrorCount??void 0,cache_read_input_tokens:e.cacheReadInputTokens??void 0,cache_creation_input_tokens:e.cacheCreationInputTokens??void 0,max_single_call_tokens:e.maxSingleCallTokens??void 0,reasoning_tokens:e.reasoningTokens??void 0,expected_skill_version:e.expectedSkillVersion??null,skill_recall_rate:e.skillRecallRate??null,context_window_pct:null!=e.maxSingleCallTokens&&n?Math.round(e.maxSingleCallTokens/n.contextWindow*1e3)/10:void 0,context_window_limit:n?.contextWindow,context_window_source:n?.source,cost_pricing:i?{inputTokenPrice:i.inputTokenPrice,outputTokenPrice:i.outputTokenPrice,cacheReadInputTokenPrice:i.cacheReadInputTokenPrice??i.inputTokenPrice*r.DEFAULT_CACHE_READ_RATIO,cacheCreationInputTokenPrice:i.cacheCreationInputTokenPrice??i.inputTokenPrice*r.DEFAULT_CACHE_CREATION_RATIO,source:l?.source??"default"}:null}})}async function k(e){let t={};return e&&(t.OR=[{user:e},{user:null}]),(await s.db.findConfigs(t)).map(e=>{let t=(t,l)=>{if(t)try{return JSON.parse(t)}catch(t){console.error(`[readConfig] Failed to parse ${l} for config ${e.id}:`,t);return}};return{id:e.id,query:e.query,skill:e.skill,skillVersion:e.skillVersion,expectedSkills:t(e.expectedSkills,"expectedSkills"),standard_answer:e.standardAnswer,root_causes:t(e.rootCauses,"rootCauses"),key_actions:t(e.keyActions,"keyActions"),parse_status:e.parseStatus||"completed"}})}async function _(e){let t=e.upload_id||e.task_id||crypto.randomUUID();if(e.task_id)try{let l={taskId:e.task_id};e.framework&&(l.framework=e.framework);let i=await s.db.findExecutions(l,{timestamp:"desc"});if(i&&i.length>0&&i[0]?.id){let l=i.find(t=>t.id===e.task_id),n=l&&l.id?l.id:i[0].id;n!==t&&(t=n)}}catch{}let l=null,n=await s.db.findExecutionById(t);n&&(l={...n,upload_id:n.id,task_id:n.taskId||void 0,query:n.query||void 0,framework:n.framework||void 0,tokens:n.tokens||void 0,cost:n.cost||void 0,latency:n.latency||void 0,timestamp:n.timestamp?.toISOString?.()||n.timestamp,final_result:n.finalResult||void 0,skill:n.skill||void 0,skills:n.skills?JSON.parse(n.skills):void 0,invokedSkills:n.invokedSkills?(()=>{try{return JSON.parse(n.invokedSkills)}catch{return}})():void 0,is_skill_correct:n.isSkillCorrect||!1,is_answer_correct:n.isAnswerCorrect||!1,answer_score:n.answerScore||void 0,skill_score:n.skillScore||void 0,judgment_reason:n.judgmentReason||void 0,failures:n.failures?JSON.parse(n.failures):void 0,skill_issues:n.skillIssues?JSON.parse(n.skillIssues):void 0,label:n.label||void 0,user:n.user||void 0,skill_version:n.skillVersion??void 0,expected_skill_version:n.expectedSkillVersion??null,skill_recall_rate:n.skillRecallRate??null,model:n.model||void 0,tool_call_count:n.toolCallCount??void 0,llm_call_count:n.llmCallCount??void 0,input_tokens:n.inputTokens??void 0,output_tokens:n.outputTokens??void 0,tool_call_error_count:n.toolCallErrorCount??void 0,cache_read_input_tokens:n.cacheReadInputTokens??void 0,cache_creation_input_tokens:n.cacheCreationInputTokens??void 0,max_single_call_tokens:n.maxSingleCallTokens??void 0,reasoning_tokens:n.reasoningTokens??void 0});let r=l?{...l}:{},a=!!l;a||r.timestamp||e.timestamp?e.timestamp&&(r.timestamp=e.timestamp):r.timestamp=new Date().toISOString();let u=!!e.force_query_update,d="string"==typeof l?.query?l.query.trim():"",_="string"==typeof e.query?e.query.trim():"";if(r={...r,...e},d&&!u?r.query=d:!d&&_?r.query=_:"string"!=typeof r.query||r.query.trim()?"string"==typeof r.query&&(r.query=r.query.trim()):r.query=void 0,!r.upload_id&&r.task_id&&(r.upload_id=r.task_id),!r.task_id&&r.upload_id&&(r.task_id=r.upload_id),r.upload_id=t,(!r.label||!r.model||!r.user)&&r.task_id){let e=await s.db.findSessionByTaskId(r.task_id);e&&(!r.label&&e.label&&(r.label=e.label),!r.model&&e.model&&(r.model=e.model),!r.user&&e.user&&(r.user=e.user))}if(!r.user)try{let e=s.db.getClient();if("query"in e){let t=await e.query('SELECT username FROM "User" LIMIT 1');t.rows[0]&&(r.user=t.rows[0].username,console.log(`[Data-Service] Fallback resolved user for task ${r.task_id} to: ${r.user}`))}}catch(e){console.warn("[Data-Service] Fallback user lookup failed:",e)}let p=e.Token||e.token||e.tokens;void 0!==p&&(r.tokens=Number(p)),void 0!==e.tool_call_count&&(r.tool_call_count=Number(e.tool_call_count)),void 0!==e.llm_call_count&&(r.llm_call_count=Number(e.llm_call_count)),void 0!==e.input_tokens&&(r.input_tokens=Number(e.input_tokens)),void 0!==e.output_tokens&&(r.output_tokens=Number(e.output_tokens)),void 0!==e.tool_call_error_count&&(r.tool_call_error_count=Number(e.tool_call_error_count)),void 0!==e.cache_read_input_tokens&&(r.cache_read_input_tokens=Number(e.cache_read_input_tokens)),void 0!==e.cache_creation_input_tokens&&(r.cache_creation_input_tokens=Number(e.cache_creation_input_tokens)),void 0!==e.max_single_call_tokens&&(r.max_single_call_tokens=Number(e.max_single_call_tokens)),void 0!==e.reasoning_tokens&&(r.reasoning_tokens=Number(e.reasoning_tokens));let m="未找到匹配的评测配置",v=!1,g=r.is_answer_correct||!1,y=r.judgment_reason||m,w=await k(r.user);if(r.query&&w.length>0){let t=c(w,r.query);if(t){let i=Array.isArray(r.invokedSkills)?r.invokedSkills:[],n=(Array.isArray(r.skills)?r.skills:[]).map(e=>({name:e,version:null})),u=t.expectedSkills||[];if(u.length>0){let e=i.length>0?i:n;if(e.length>0){let t=0,l=u.filter(e=>e.skill?.trim()),i=l.map(e=>e.skill.trim()),n=new Map;if(i.length>0)try{for(let e of(await s.db.findSkills({name:{in:i},user:r.user||null})))n.set(e.name,e)}catch(e){console.error("[Judgment] Error fetching skills for version check:",e)}for(let i of l){let l=i.skill.trim(),o=i.version??null,s=e.find(e=>e.name===l);if(s){let e=!1;if(null===o)e=!0;else if(null!==s.version)e=s.version===o;else{let t=n.get(l);e=!!t&&(t.activeVersion||0)===o}e&&(t++,v||(v=!0))}}l.length>0&&(r.skill_recall_rate=t/l.length)}}if(r.is_skill_correct=v,void 0!==r.final_result){let i=!0;if(a&&!e.force_judgment&&l&&l.query===r.query&&l.final_result===r.final_result&&(i=!1),i&&!r.skip_evaluation){let e,l=(r.skill||t.skill||"").trim();if(l)try{let t=await s.db.findSkill(l,r.user||null);if(t){let l=t.activeVersion||0,i=t.versions?.find(e=>e.version===l);if(i&&i.content)e=i.content,r.skill_version=i.version;else if(t.versions&&t.versions.length>0){let l=t.versions[0];l&&l.content&&(e=l.content,r.skill_version=l.version)}}}catch(e){console.error("[Judgment] Error fetching skill definition:",e)}let i=await (0,o.judgeAnswer)(r.query||"",{standard_answer_example:t.standard_answer,root_causes:t.root_causes,key_actions:t.key_actions,skill_definition:e},r.final_result,r.user);g=i.is_correct,r.answer_score=i.score,y=i.reason||"Judged by Evaluation Model"}}}else a&&!e.force_judgment||r.answer_score||(g=!1,y=m,r.answer_score=null)}else r.query&&(!a||e.force_judgment)&&!r.answer_score&&(g=!1,y=m,r.answer_score=null);e.skip_evaluation&&(r.answer_score=null,y="结果评估中..."),r.is_skill_correct=v,r.is_answer_correct=g,r.judgment_reason=y;let h=Array.isArray(r.skills)&&r.skills.length>0?r.skills[0]:void 0;if(h){let e=function(){if(!i.default.existsSync(f))return{};try{return JSON.parse(i.default.readFileSync(f,"utf-8"))}catch{return{}}}()[h];e&&(r.skill_score=parseFloat(e))}if(r.skill&&void 0!==r.skill_version&&null!==r.skill_version?r.label=`${r.skill}-v${r.skill_version}`:r.skill?r.label=`${r.skill}-v1`:r.label="without-skill",await s.db.upsertExecution({where:{id:t},create:{id:t,taskId:r.task_id,query:r.query,framework:r.framework,tokens:r.tokens,cost:r.cost,latency:r.latency,timestamp:r.timestamp?new Date(r.timestamp):new Date,finalResult:r.final_result,skill:r.skill,skills:r.skills?JSON.stringify(r.skills):null,invokedSkills:r.invokedSkills?JSON.stringify(r.invokedSkills):null,isSkillCorrect:r.is_skill_correct,isAnswerCorrect:r.is_answer_correct,answerScore:r.answer_score,skillScore:r.skill_score,judgmentReason:r.judgment_reason,failures:r.failures?JSON.stringify(r.failures):null,skillIssues:r.skill_issues?JSON.stringify(r.skill_issues):null,label:r.label,user:r.user,skillVersion:r.skill_version,model:r.model,toolCallCount:r.tool_call_count,llmCallCount:r.llm_call_count,inputTokens:r.input_tokens,outputTokens:r.output_tokens,toolCallErrorCount:r.tool_call_error_count,skillRecallRate:r.skill_recall_rate,cacheReadInputTokens:r.cache_read_input_tokens,cacheCreationInputTokens:r.cache_creation_input_tokens,maxSingleCallTokens:r.max_single_call_tokens,reasoningTokens:r.reasoning_tokens},update:{taskId:r.task_id,query:r.query,framework:r.framework,tokens:r.tokens,cost:r.cost,latency:r.latency,timestamp:r.timestamp?new Date(r.timestamp):new Date,finalResult:r.final_result,skill:r.skill,skills:r.skills?JSON.stringify(r.skills):null,invokedSkills:r.invokedSkills?JSON.stringify(r.invokedSkills):null,isSkillCorrect:r.is_skill_correct,isAnswerCorrect:r.is_answer_correct,answerScore:r.answer_score,skillScore:r.skill_score,judgmentReason:r.judgment_reason,failures:r.failures?JSON.stringify(r.failures):null,skillIssues:r.skill_issues?JSON.stringify(r.skill_issues):null,label:r.label,user:r.user,skillVersion:r.skill_version,model:r.model,toolCallCount:r.tool_call_count,llmCallCount:r.llm_call_count,inputTokens:r.input_tokens,outputTokens:r.output_tokens,toolCallErrorCount:r.tool_call_error_count,skillRecallRate:r.skill_recall_rate,cacheReadInputTokens:r.cache_read_input_tokens,cacheCreationInputTokens:r.cache_creation_input_tokens,maxSingleCallTokens:r.max_single_call_tokens,reasoningTokens:r.reasoning_tokens}}),e.upload_id&&e.task_id&&e.upload_id!==t)try{let t=await s.db.findExecutionById(e.upload_id);t&&t.taskId===e.task_id&&await s.db.deleteExecution(e.upload_id)}catch{}if(r.task_id&&r.interactions){let e="string"==typeof r.interactions?(()=>{try{return JSON.parse(r.interactions)}catch{return[]}})():r.interactions,t=e;try{let l=await s.db.findSessionByTaskId(r.task_id),i=l?.interactions?(()=>{try{return JSON.parse(l.interactions)}catch{return[]}})():[];Array.isArray(i)&&i.length>0&&(t=!Array.isArray(e)||e.length<i.length?i:e.map((e,t)=>{let l=i[t],n=e?.content===""||e?.content==null,o="string"==typeof l?.content&&l.content.length>0;return n&&o&&l?.role===e?.role?{...e,content:l.content}:e}))}catch{}await s.db.upsertSession(r.task_id,{taskId:r.task_id,query:r.query,label:r.label,user:r.user,model:r.model,interactions:JSON.stringify(t)},{query:r.query,label:r.label,user:r.user,model:r.model,interactions:JSON.stringify(t)})}return{success:!0,record:r}}e.s(["findBestMatchConfig",()=>c,"readConfig",()=>k,"readRecords",()=>d,"saveExecutionRecord",()=>_]),l()}catch(e){l(e)}},!1)];
 //# sourceMappingURL=src_lib_12408140._.js.map