npm - railwise-darwin-x64 - Versions diffs - 1.2.29 - Mend

railwise-darwin-x64 1.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (380) hide show

package/bin/skill/mcp-builder/scripts/connections.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""Lightweight connection handling for MCP servers."""
+from abc import ABC, abstractmethod
+from contextlib import AsyncExitStack
+from typing import Any
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.sse import sse_client
+from mcp.client.stdio import stdio_client
+from mcp.client.streamable_http import streamablehttp_client
+class MCPConnection(ABC):
+    """Base class for MCP server connections."""
+    def __init__(self):
+        self.session = None
+        self._stack = None
+    @abstractmethod
+    def _create_context(self):
+        """Create the connection context based on connection type."""
+    async def __aenter__(self):
+        """Initialize MCP server connection."""
+        self._stack = AsyncExitStack()
+        await self._stack.__aenter__()
+        try:
+            ctx = self._create_context()
+            result = await self._stack.enter_async_context(ctx)
+            if len(result) == 2:
+                read, write = result
+            elif len(result) == 3:
+                read, write, _ = result
+            else:
+                raise ValueError(f"Unexpected context result: {result}")
+            session_ctx = ClientSession(read, write)
+            self.session = await self._stack.enter_async_context(session_ctx)
+            await self.session.initialize()
+            return self
+        except BaseException:
+            await self._stack.__aexit__(None, None, None)
+            raise
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Clean up MCP server connection resources."""
+        if self._stack:
+            await self._stack.__aexit__(exc_type, exc_val, exc_tb)
+        self.session = None
+        self._stack = None
+    async def list_tools(self) -> list[dict[str, Any]]:
+        """Retrieve available tools from the MCP server."""
+        response = await self.session.list_tools()
+        return [
+            {
+                "name": tool.name,
+                "description": tool.description,
+                "input_schema": tool.inputSchema,
+            }
+            for tool in response.tools
+        ]
+    async def call_tool(self, tool_name: str, arguments: dict[str, Any]) -> Any:
+        """Call a tool on the MCP server with provided arguments."""
+        result = await self.session.call_tool(tool_name, arguments=arguments)
+        return result.content
+class MCPConnectionStdio(MCPConnection):
+    """MCP connection using standard input/output."""
+    def __init__(self, command: str, args: list[str] = None, env: dict[str, str] = None):
+        super().__init__()
+        self.command = command
+        self.args = args or []
+        self.env = env
+    def _create_context(self):
+        return stdio_client(
+            StdioServerParameters(command=self.command, args=self.args, env=self.env)
+        )
+class MCPConnectionSSE(MCPConnection):
+    """MCP connection using Server-Sent Events."""
+    def __init__(self, url: str, headers: dict[str, str] = None):
+        super().__init__()
+        self.url = url
+        self.headers = headers or {}
+    def _create_context(self):
+        return sse_client(url=self.url, headers=self.headers)
+class MCPConnectionHTTP(MCPConnection):
+    """MCP connection using Streamable HTTP."""
+    def __init__(self, url: str, headers: dict[str, str] = None):
+        super().__init__()
+        self.url = url
+        self.headers = headers or {}
+    def _create_context(self):
+        return streamablehttp_client(url=self.url, headers=self.headers)
+def create_connection(
+    transport: str,
+    command: str = None,
+    args: list[str] = None,
+    env: dict[str, str] = None,
+    url: str = None,
+    headers: dict[str, str] = None,
+) -> MCPConnection:
+    """Factory function to create the appropriate MCP connection.
+    Args:
+        transport: Connection type ("stdio", "sse", or "http")
+        command: Command to run (stdio only)
+        args: Command arguments (stdio only)
+        env: Environment variables (stdio only)
+        url: Server URL (sse and http only)
+        headers: HTTP headers (sse and http only)
+    Returns:
+        MCPConnection instance
+    """
+    transport = transport.lower()
+    if transport == "stdio":
+        if not command:
+            raise ValueError("Command is required for stdio transport")
+        return MCPConnectionStdio(command=command, args=args, env=env)
+    elif transport == "sse":
+        if not url:
+            raise ValueError("URL is required for sse transport")
+        return MCPConnectionSSE(url=url, headers=headers)
+    elif transport in ["http", "streamable_http", "streamable-http"]:
+        if not url:
+            raise ValueError("URL is required for http transport")
+        return MCPConnectionHTTP(url=url, headers=headers)
+    else:
+        raise ValueError(f"Unsupported transport type: {transport}. Use 'stdio', 'sse', or 'http'")

package/bin/skill/mcp-builder/scripts/evaluation.py ADDED Viewed

@@ -0,0 +1,373 @@
+"""MCP Server Evaluation Harness
+This script evaluates MCP servers by running test questions against them using Claude.
+"""
+import argparse
+import asyncio
+import json
+import re
+import sys
+import time
+import traceback
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from typing import Any
+from anthropic import Anthropic
+from connections import create_connection
+EVALUATION_PROMPT = """You are an AI assistant with access to tools.
+When given a task, you MUST:
+1. Use the available tools to complete the task
+2. Provide summary of each step in your approach, wrapped in <summary> tags
+3. Provide feedback on the tools provided, wrapped in <feedback> tags
+4. Provide your final response, wrapped in <response> tags
+Summary Requirements:
+- In your <summary> tags, you must explain:
+  - The steps you took to complete the task
+  - Which tools you used, in what order, and why
+  - The inputs you provided to each tool
+  - The outputs you received from each tool
+  - A summary for how you arrived at the response
+Feedback Requirements:
+- In your <feedback> tags, provide constructive feedback on the tools:
+  - Comment on tool names: Are they clear and descriptive?
+  - Comment on input parameters: Are they well-documented? Are required vs optional parameters clear?
+  - Comment on descriptions: Do they accurately describe what the tool does?
+  - Comment on any errors encountered during tool usage: Did the tool fail to execute? Did the tool return too many tokens?
+  - Identify specific areas for improvement and explain WHY they would help
+  - Be specific and actionable in your suggestions
+Response Requirements:
+- Your response should be concise and directly address what was asked
+- Always wrap your final response in <response> tags
+- If you cannot solve the task return <response>NOT_FOUND</response>
+- For numeric responses, provide just the number
+- For IDs, provide just the ID
+- For names or text, provide the exact text requested
+- Your response should go last"""
+def parse_evaluation_file(file_path: Path) -> list[dict[str, Any]]:
+    """Parse XML evaluation file with qa_pair elements."""
+    try:
+        tree = ET.parse(file_path)
+        root = tree.getroot()
+        evaluations = []
+        for qa_pair in root.findall(".//qa_pair"):
+            question_elem = qa_pair.find("question")
+            answer_elem = qa_pair.find("answer")
+            if question_elem is not None and answer_elem is not None:
+                evaluations.append({
+                    "question": (question_elem.text or "").strip(),
+                    "answer": (answer_elem.text or "").strip(),
+                })
+        return evaluations
+    except Exception as e:
+        print(f"Error parsing evaluation file {file_path}: {e}")
+        return []
+def extract_xml_content(text: str, tag: str) -> str | None:
+    """Extract content from XML tags."""
+    pattern = rf"<{tag}>(.*?)</{tag}>"
+    matches = re.findall(pattern, text, re.DOTALL)
+    return matches[-1].strip() if matches else None
+async def agent_loop(
+    client: Anthropic,
+    model: str,
+    question: str,
+    tools: list[dict[str, Any]],
+    connection: Any,
+) -> tuple[str, dict[str, Any]]:
+    """Run the agent loop with MCP tools."""
+    messages = [{"role": "user", "content": question}]
+    response = await asyncio.to_thread(
+        client.messages.create,
+        model=model,
+        max_tokens=4096,
+        system=EVALUATION_PROMPT,
+        messages=messages,
+        tools=tools,
+    )
+    messages.append({"role": "assistant", "content": response.content})
+    tool_metrics = {}
+    while response.stop_reason == "tool_use":
+        tool_use = next(block for block in response.content if block.type == "tool_use")
+        tool_name = tool_use.name
+        tool_input = tool_use.input
+        tool_start_ts = time.time()
+        try:
+            tool_result = await connection.call_tool(tool_name, tool_input)
+            tool_response = json.dumps(tool_result) if isinstance(tool_result, (dict, list)) else str(tool_result)
+        except Exception as e:
+            tool_response = f"Error executing tool {tool_name}: {str(e)}\n"
+            tool_response += traceback.format_exc()
+        tool_duration = time.time() - tool_start_ts
+        if tool_name not in tool_metrics:
+            tool_metrics[tool_name] = {"count": 0, "durations": []}
+        tool_metrics[tool_name]["count"] += 1
+        tool_metrics[tool_name]["durations"].append(tool_duration)
+        messages.append({
+            "role": "user",
+            "content": [{
+                "type": "tool_result",
+                "tool_use_id": tool_use.id,
+                "content": tool_response,
+            }]
+        })
+        response = await asyncio.to_thread(
+            client.messages.create,
+            model=model,
+            max_tokens=4096,
+            system=EVALUATION_PROMPT,
+            messages=messages,
+            tools=tools,
+        )
+        messages.append({"role": "assistant", "content": response.content})
+    response_text = next(
+        (block.text for block in response.content if hasattr(block, "text")),
+        None,
+    )
+    return response_text, tool_metrics
+async def evaluate_single_task(
+    client: Anthropic,
+    model: str,
+    qa_pair: dict[str, Any],
+    tools: list[dict[str, Any]],
+    connection: Any,
+    task_index: int,
+) -> dict[str, Any]:
+    """Evaluate a single QA pair with the given tools."""
+    start_time = time.time()
+    print(f"Task {task_index + 1}: Running task with question: {qa_pair['question']}")
+    response, tool_metrics = await agent_loop(client, model, qa_pair["question"], tools, connection)
+    response_value = extract_xml_content(response, "response")
+    summary = extract_xml_content(response, "summary")
+    feedback = extract_xml_content(response, "feedback")
+    duration_seconds = time.time() - start_time
+    return {
+        "question": qa_pair["question"],
+        "expected": qa_pair["answer"],
+        "actual": response_value,
+        "score": int(response_value == qa_pair["answer"]) if response_value else 0,
+        "total_duration": duration_seconds,
+        "tool_calls": tool_metrics,
+        "num_tool_calls": sum(len(metrics["durations"]) for metrics in tool_metrics.values()),
+        "summary": summary,
+        "feedback": feedback,
+    }
+REPORT_HEADER = """
+# Evaluation Report
+## Summary
+- **Accuracy**: {correct}/{total} ({accuracy:.1f}%)
+- **Average Task Duration**: {average_duration_s:.2f}s
+- **Average Tool Calls per Task**: {average_tool_calls:.2f}
+- **Total Tool Calls**: {total_tool_calls}
+---
+"""
+TASK_TEMPLATE = """
+### Task {task_num}
+**Question**: {question}
+**Ground Truth Answer**: `{expected_answer}`
+**Actual Answer**: `{actual_answer}`
+**Correct**: {correct_indicator}
+**Duration**: {total_duration:.2f}s
+**Tool Calls**: {tool_calls}
+**Summary**
+{summary}
+**Feedback**
+{feedback}
+---
+"""
+async def run_evaluation(
+    eval_path: Path,
+    connection: Any,
+    model: str = "claude-3-7-sonnet-20250219",
+) -> str:
+    """Run evaluation with MCP server tools."""
+    print("🚀 Starting Evaluation")
+    client = Anthropic()
+    tools = await connection.list_tools()
+    print(f"📋 Loaded {len(tools)} tools from MCP server")
+    qa_pairs = parse_evaluation_file(eval_path)
+    print(f"📋 Loaded {len(qa_pairs)} evaluation tasks")
+    results = []
+    for i, qa_pair in enumerate(qa_pairs):
+        print(f"Processing task {i + 1}/{len(qa_pairs)}")
+        result = await evaluate_single_task(client, model, qa_pair, tools, connection, i)
+        results.append(result)
+    correct = sum(r["score"] for r in results)
+    accuracy = (correct / len(results)) * 100 if results else 0
+    average_duration_s = sum(r["total_duration"] for r in results) / len(results) if results else 0
+    average_tool_calls = sum(r["num_tool_calls"] for r in results) / len(results) if results else 0
+    total_tool_calls = sum(r["num_tool_calls"] for r in results)
+    report = REPORT_HEADER.format(
+        correct=correct,
+        total=len(results),
+        accuracy=accuracy,
+        average_duration_s=average_duration_s,
+        average_tool_calls=average_tool_calls,
+        total_tool_calls=total_tool_calls,
+    )
+    report += "".join([
+        TASK_TEMPLATE.format(
+            task_num=i + 1,
+            question=qa_pair["question"],
+            expected_answer=qa_pair["answer"],
+            actual_answer=result["actual"] or "N/A",
+            correct_indicator="✅" if result["score"] else "❌",
+            total_duration=result["total_duration"],
+            tool_calls=json.dumps(result["tool_calls"], indent=2),
+            summary=result["summary"] or "N/A",
+            feedback=result["feedback"] or "N/A",
+        )
+        for i, (qa_pair, result) in enumerate(zip(qa_pairs, results))
+    ])
+    return report
+def parse_headers(header_list: list[str]) -> dict[str, str]:
+    """Parse header strings in format 'Key: Value' into a dictionary."""
+    headers = {}
+    if not header_list:
+        return headers
+    for header in header_list:
+        if ":" in header:
+            key, value = header.split(":", 1)
+            headers[key.strip()] = value.strip()
+        else:
+            print(f"Warning: Ignoring malformed header: {header}")
+    return headers
+def parse_env_vars(env_list: list[str]) -> dict[str, str]:
+    """Parse environment variable strings in format 'KEY=VALUE' into a dictionary."""
+    env = {}
+    if not env_list:
+        return env
+    for env_var in env_list:
+        if "=" in env_var:
+            key, value = env_var.split("=", 1)
+            env[key.strip()] = value.strip()
+        else:
+            print(f"Warning: Ignoring malformed environment variable: {env_var}")
+    return env
+async def main():
+    parser = argparse.ArgumentParser(
+        description="Evaluate MCP servers using test questions",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Evaluate a local stdio MCP server
+  python evaluation.py -t stdio -c python -a my_server.py eval.xml
+  # Evaluate an SSE MCP server
+  python evaluation.py -t sse -u https://example.com/mcp -H "Authorization: Bearer token" eval.xml
+  # Evaluate an HTTP MCP server with custom model
+  python evaluation.py -t http -u https://example.com/mcp -m claude-3-5-sonnet-20241022 eval.xml
+        """,
+    )
+    parser.add_argument("eval_file", type=Path, help="Path to evaluation XML file")
+    parser.add_argument("-t", "--transport", choices=["stdio", "sse", "http"], default="stdio", help="Transport type (default: stdio)")
+    parser.add_argument("-m", "--model", default="claude-3-7-sonnet-20250219", help="Claude model to use (default: claude-3-7-sonnet-20250219)")
+    stdio_group = parser.add_argument_group("stdio options")
+    stdio_group.add_argument("-c", "--command", help="Command to run MCP server (stdio only)")
+    stdio_group.add_argument("-a", "--args", nargs="+", help="Arguments for the command (stdio only)")
+    stdio_group.add_argument("-e", "--env", nargs="+", help="Environment variables in KEY=VALUE format (stdio only)")
+    remote_group = parser.add_argument_group("sse/http options")
+    remote_group.add_argument("-u", "--url", help="MCP server URL (sse/http only)")
+    remote_group.add_argument("-H", "--header", nargs="+", dest="headers", help="HTTP headers in 'Key: Value' format (sse/http only)")
+    parser.add_argument("-o", "--output", type=Path, help="Output file for evaluation report (default: stdout)")
+    args = parser.parse_args()
+    if not args.eval_file.exists():
+        print(f"Error: Evaluation file not found: {args.eval_file}")
+        sys.exit(1)
+    headers = parse_headers(args.headers) if args.headers else None
+    env_vars = parse_env_vars(args.env) if args.env else None
+    try:
+        connection = create_connection(
+            transport=args.transport,
+            command=args.command,
+            args=args.args,
+            env=env_vars,
+            url=args.url,
+            headers=headers,
+        )
+    except ValueError as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+    print(f"🔗 Connecting to MCP server via {args.transport}...")
+    async with connection:
+        print("✅ Connected successfully")
+        report = await run_evaluation(args.eval_file, connection, args.model)
+        if args.output:
+            args.output.write_text(report)
+            print(f"\n✅ Report saved to {args.output}")
+        else:
+            print("\n" + report)
+if __name__ == "__main__":
+    asyncio.run(main())

package/bin/skill/mcp-builder/scripts/example_evaluation.xml ADDED Viewed

@@ -0,0 +1,22 @@
+<evaluation>
+   <qa_pair>
+      <question>Calculate the compound interest on $10,000 invested at 5% annual interest rate, compounded monthly for 3 years. What is the final amount in dollars (rounded to 2 decimal places)?</question>
+      <answer>11614.72</answer>
+   </qa_pair>
+   <qa_pair>
+      <question>A projectile is launched at a 45-degree angle with an initial velocity of 50 m/s. Calculate the total distance (in meters) it has traveled from the launch point after 2 seconds, assuming g=9.8 m/s². Round to 2 decimal places.</question>
+      <answer>87.25</answer>
+   </qa_pair>
+   <qa_pair>
+      <question>A sphere has a volume of 500 cubic meters. Calculate its surface area in square meters. Round to 2 decimal places.</question>
+      <answer>304.65</answer>
+   </qa_pair>
+   <qa_pair>
+      <question>Calculate the population standard deviation of this dataset: [12, 15, 18, 22, 25, 30, 35]. Round to 2 decimal places.</question>
+      <answer>7.61</answer>
+   </qa_pair>
+   <qa_pair>
+      <question>Calculate the pH of a solution with a hydrogen ion concentration of 3.5 × 10^-5 M. Round to 2 decimal places.</question>
+      <answer>4.46</answer>
+   </qa_pair>
+</evaluation>

package/bin/skill/mcp-builder/scripts/requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ anthropic>=0.39.0
2	+ mcp>=1.1.0

package/bin/skill/monitoring-design/SKILL.md ADDED Viewed

@@ -0,0 +1,79 @@
+---
+name: monitoring-design
+description: 工程监测方案设计专业技能包。当需要编制地铁保护区监测、深基坑监测或结构健康监测方案时加载此技能，获取规范要求、测点布设原则和仪器选型指引。
+---
+## 适用场景
+- 编制地铁保护区第三方监测方案
+- 设计深基坑施工监测方案
+- 规划高层建筑变形监测系统
+- 制定自动化监测网络架构
+## 核心规范体系
+| 规范编号 | 规范名称 | 适用场景 |
+|---------|---------|---------|
+| GB 50911 | 城市轨道交通工程监测技术规范 | 地铁保护区监测 |
+| GB 50497 | 建筑基坑工程监测技术标准 | 深基坑监测 |
+| JGJ 8   | 建筑变形测量规范 | 建筑沉降/倾斜监测 |
+| GB 50026 | 工程测量标准 | 控制测量与地形测量 |
+| GB 55017 | 工程勘察通用规范（强制性） | 勘察全流程 |
+## 地铁保护区监测必测项目
+**一级保护区（距结构外边线0~5m）**：
+- 地铁结构沉降（每隔5~10m布点）
+- 地铁结构水平位移
+- 相邻建筑物倾斜与沉降
+- 地面沉降（影响范围内按网格布点）
+**二级保护区（5~15m）**：
+- 地面沉降
+- 地下管线位移（重要管线加密）
+- 周边建筑物沉降
+**监测频率**：
+- 正常施工期：1次/2天
+- 较大施工扰动期：1次/天
+- 报警后或特殊工况：2次/天及以上
+## 深基坑必测项目清单
+| 监测项目 | 测点布设间距 | 监测等级（一/二/三级） | 频率要求 |
+|---------|-----------|-------------------|--------|
+| 围护桩顶水平位移 | 20~50m | 必测/必测/必测 | 1次/天 |
+| 周边地表沉降 | 10~20m | 必测/必测/必测 | 1次/天 |
+| 坑外地下水位 | 30~50m | 必测/必测/选测 | 1次/2天 |
+| 支撑轴力 | 每道支撑 | 必测/必测/选测 | 1次/天 |
+| 立柱桩沉降 | 关键立柱 | 必测/选测/选测 | 1次/2天 |
+| 深部水平位移（测斜） | 20~30m | 必测/必测/选测 | 1次/天 |
+| 坑底隆起 | 基坑中部 | 选测/选测/选测 | 1次/2天 |
+## 控制指标设定原则
+控制指标须同时满足：
+1. **绝对值控制**：如围护桩顶累计位移不超过30mm
+2. **速率控制**：如日变化量不超过3mm/d（发展速率异常时须上调频率）
+3. **与设计计算值挂钩**：若设计计算值更严，以设计值为准
+**禁止直接套用**，须依据地质条件、周边环境敏感度和设计要求综合拟定，并由质检总工（qa_reviewer）审核确认。
+## 仪器精度要求速查
+| 监测项目 | 最低精度要求 | 推荐仪器型号示例 |
+|---------|-----------|--------------|
+| 精密水准 | ±0.3mm/km | 徕卡NA3003、天宝DiNi03 |
+| 平面位移 | ±1mm | 徕卡TS60、天宝S9 全站仪 |
+| 静力水准 | ±0.1mm | 基康BGK-4600 |
+| 测斜仪 | ±0.02mm/500mm | 基康BGK-6000 |
+| GNSS | 平面±2mm,高程±3mm | 华测i50、中海达V200 |
+| 振弦传感器（轴力） | ±0.1% F.S | 基康BGK-4911 |
+## 自动化监测系统架构要素
+1. **感知层**：传感器阵列（静力水准+测斜仪+振弦传感器）
+2. **传输层**：4G/NB-IoT 无线数模块，支持断点续传
+3. **处理层**：边缘计算网关，实现数据预处理与去噪
+4. **平台层**：云端监测管理平台，支持多项目统一管理、超限自动报警推送（短信+APP）
+5. **展示层**：Web 端与移动端看板，支持实时曲线图与历史回溯