npm - astron-eval - Versions diffs - 0.0.1 - Mend

astron-eval 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

package/skills/model-evaluation/scripts/files/streaming.py ADDED Viewed

@@ -0,0 +1,245 @@
+# -*- coding: utf-8 -*-
+"""
+流式文件读取模块
+提供大文件的流式读取能力，使用生成器模式逐行处理，
+避免一次性加载整个文件到内存。
+函数:
+    load_jsonl_stream: 流式读取 JSONL 文件
+    load_csv_stream: 流式读取 CSV 文件
+"""
+import json
+import csv
+from pathlib import Path
+from typing import Generator, Dict, Any
+from utils.constants import ERR_FILE_NOT_FOUND, ERR_FILE_ENCODING, ERR_FILE_PARSE
+# ============================================================================
+# 内部迭代器类 - 用于支持 skipped_lines 属性
+# ============================================================================
+class _ErrorGenerator:
+    """
+    错误生成器 - 用于文件不存在等场景
+    yield 一个错误对象后结束，支持 skipped_lines 属性
+    """
+    def __init__(self, code: int, message: str):
+        self._code = code
+        self._message = message
+        self._yielded = False
+        self.skipped_lines = 0
+    def __iter__(self):
+        return self
+    def __next__(self):
+        if self._yielded:
+            raise StopIteration
+        self._yielded = True
+        return {
+            "success": False,
+            "message": self._message,
+            "code": self._code
+        }
+class _JsonlStreamIterator:
+    """
+    JSONL 流式读取迭代器
+    逐行读取 JSONL 文件，支持 skipped_lines 属性
+    """
+    def __init__(self, path: Path, encoding: str):
+        self._path = path
+        self._encoding = encoding
+        self._file = None
+        self._line_num = 0
+        self.skipped_lines = 0
+        self._encoding_error = False
+    def __iter__(self):
+        return self
+    def __next__(self):
+        # 编码错误已在第一次读取时检测
+        if self._encoding_error:
+            raise StopIteration
+        # 延迟打开文件
+        if self._file is None:
+            try:
+                self._file = open(self._path, 'r', encoding=self._encoding)
+            except UnicodeDecodeError:
+                self._encoding_error = True
+                # 返回编码错误，下次迭代结束
+                return {
+                    "success": False,
+                    "message": f"无法使用 {self._encoding} 编码读取文件: {self._path}",
+                    "code": ERR_FILE_ENCODING
+                }
+        # 逐行读取
+        while True:
+            try:
+                line = self._file.readline()
+            except UnicodeDecodeError:
+                # 编码错误可能在读取时发生
+                self._encoding_error = True
+                return {
+                    "success": False,
+                    "message": f"无法使用 {self._encoding} 编码读取文件: {self._path}",
+                    "code": ERR_FILE_ENCODING
+                }
+            if not line:
+                self._file.close()
+                raise StopIteration
+            self._line_num += 1
+            stripped = line.strip()
+            # 跳过空行（D-31）
+            if not stripped:
+                self.skipped_lines += 1
+                continue
+            # 解析 JSON
+            try:
+                data = json.loads(stripped)
+                return {"data": data, "line": self._line_num}
+            except json.JSONDecodeError as e:
+                # D-29: yield 错误对象，不抛异常
+                # D-33: 使用 ERR_FILE_PARSE (1003)
+                return {
+                    "success": False,
+                    "line": self._line_num,
+                    "message": f"JSON 解析失败: {e}",
+                    "code": ERR_FILE_PARSE
+                }
+class _CsvStreamIterator:
+    """
+    CSV 流式读取迭代器
+    逐行读取 CSV 文件，支持 skipped_lines 属性
+    """
+    def __init__(self, path: Path, encoding: str):
+        self._path = path
+        self._encoding = encoding
+        self._file = None
+        self._reader = None
+        self.skipped_lines = 0
+        self._encoding_error = False
+    def __iter__(self):
+        return self
+    def __next__(self):
+        # 编码错误已在初始化时检测
+        if self._encoding_error:
+            raise StopIteration
+        # 延迟打开文件
+        if self._file is None:
+            try:
+                self._file = open(self._path, 'r', encoding=self._encoding, newline='')
+                self._reader = csv.DictReader(self._file)
+            except UnicodeDecodeError:
+                self._encoding_error = True
+                return {
+                    "success": False,
+                    "message": f"无法使用 {self._encoding} 编码读取文件: {self._path}",
+                    "code": ERR_FILE_ENCODING
+                }
+        # 逐行读取
+        while True:
+            try:
+                row = next(self._reader)
+            except StopIteration:
+                self._file.close()
+                raise
+            except UnicodeDecodeError:
+                # 编码错误可能在读取时发生
+                self._encoding_error = True
+                return {
+                    "success": False,
+                    "message": f"无法使用 {self._encoding} 编码读取文件: {self._path}",
+                    "code": ERR_FILE_ENCODING
+                }
+            # CSV 行号：header=1，数据从 2 开始
+            line_num = self._reader.line_num
+            # 检查是否为空行（所有值为空或 None）
+            if not row or all(v is None or v.strip() == '' for v in row.values() if v):
+                self.skipped_lines += 1
+                continue
+            return {"data": dict(row), "line": line_num}
+def load_jsonl_stream(path: str, encoding: str = "utf-8") -> Generator[Dict[str, Any], None, None]:
+    """
+    流式读取 JSONL 文件，逐行返回数据
+    Args:
+        path: 文件路径
+        encoding: 文件编码（默认 utf-8）
+    Yields:
+        成功: {"data": <解析后的数据>, "line": <行号>}
+        错误: {"success": False, "line": <行号>, "message": "<错误信息>", "code": <错误码>}
+    属性:
+        skipped_lines (int): 生成器耗尽后可访问，返回跳过的空行数
+    Example:
+        >>> gen = load_jsonl_stream("data.jsonl")
+        >>> for item in gen:
+        ...     if item.get("success") is False:
+        ...         print(f"Error at line {item['line']}: {item['message']}")
+        ...     else:
+        ...         process(item["data"])
+        >>> print(f"Skipped {gen.skipped_lines} empty lines")
+    """
+    p = Path(path)
+    # 文件不存在 - 返回单元素错误生成器
+    if not p.exists():
+        return _ErrorGenerator(ERR_FILE_NOT_FOUND, f"文件不存在: {path}")
+    # 使用迭代器包装器实现 skipped_lines 属性
+    return _JsonlStreamIterator(p, encoding)
+def load_csv_stream(path: str, encoding: str = "utf-8") -> Generator[Dict[str, Any], None, None]:
+    """
+    流式读取 CSV 文件，逐行返回数据
+    Args:
+        path: 文件路径
+        encoding: 文件编码（默认 utf-8）
+    Yields:
+        {"data": <字典形式行数据>, "line": <行号>}
+    属性:
+        skipped_lines (int): 生成器耗尽后可访问，返回跳过的空行数
+    Note:
+        CSV 文件第一行为 header，数据行从 line=2 开始
+    """
+    p = Path(path)
+    # 文件不存在 - 返回单元素错误生成器
+    if not p.exists():
+        return _ErrorGenerator(ERR_FILE_NOT_FOUND, f"文件不存在: {path}")
+    # 使用迭代器包装器实现 skipped_lines 属性
+    return _CsvStreamIterator(p, encoding)

package/skills/model-evaluation/scripts/utils/__init__.py ADDED Viewed

@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+"""
+基础工具模块
+包含通用基础设施：
+- 常量定义
+- 异常类和结果构建器
+- 时间处理工具
+"""
+from .constants import (
+    # 超时配置
+    DEFAULT_TIMEOUT,
+    DEFAULT_TOKEN_EXPIRY,
+    DEFAULT_POLL_INTERVAL,
+    DEFAULT_POLL_TIMEOUT,
+    # 重试配置
+    MAX_RETRIES,
+    RETRY_BACKOFF_FACTOR,
+    # 错误码
+    ERR_FILE_NOT_FOUND,
+    ERR_FILE_ENCODING,
+    ERR_FILE_PARSE,
+    ERR_CONFIG_INVALID,
+    ERR_NETWORK_TIMEOUT,
+    ERR_NETWORK_CONNECTION,
+    ERR_NETWORK_RETRY_EXHAUSTED,
+    ERR_REMOTE_AUTH_EXPIRED,
+    ERR_REMOTE_DEFAULT,
+    # 默认路径
+    DEFAULT_AUTH_CONFIG,
+    DEFAULT_SERVER_CONFIG,
+    DEFAULT_AUTH_CACHE,
+    # OAuth 配置
+    OOB_REDIRECT,
+    DEFAULT_CALLBACK_HOST,
+    DEFAULT_CALLBACK_PORT,
+    DEFAULT_CALLBACK_PATH,
+    DEFAULT_CALLBACK_TIMEOUT,
+    # 状态
+    TERMINAL_STATES,
+    # 维度配置
+    VALID_DIMENSION_TYPES,
+    BUILTIN_FUNCTIONS,
+    # 字段映射
+    FIELD_PATTERNS,
+    REQUIRED_FIELDS,
+    OPTIONAL_FIELDS,
+)
+from .errors import (
+    result,
+    ResultDict,
+    handle_cli_error,
+    EvalError,
+    FileEncodingError,
+    FileParseError,
+    FileNotFoundError,
+    ConfigError,
+    NetworkError,
+    NetworkTimeoutError,
+    NetworkConnectionError,
+    AuthExpiredError,
+    ApiError,
+)
+from .datetime_utils import (
+    parse_iso_datetime,
+    is_expired,
+)
+from .keypoint_prompts import (
+    SYSTEM_PROMPT,
+    build_user_prompt,
+)
+__all__ = [
+    # 常量
+    'DEFAULT_TIMEOUT',
+    'DEFAULT_TOKEN_EXPIRY',
+    'DEFAULT_POLL_INTERVAL',
+    'DEFAULT_POLL_TIMEOUT',
+    'MAX_RETRIES',
+    'RETRY_BACKOFF_FACTOR',
+    'ERR_FILE_NOT_FOUND',
+    'ERR_FILE_ENCODING',
+    'ERR_FILE_PARSE',
+    'ERR_CONFIG_INVALID',
+    'ERR_NETWORK_TIMEOUT',
+    'ERR_NETWORK_CONNECTION',
+    'ERR_NETWORK_RETRY_EXHAUSTED',
+    'ERR_REMOTE_AUTH_EXPIRED',
+    'ERR_REMOTE_DEFAULT',
+    'DEFAULT_AUTH_CONFIG',
+    'DEFAULT_SERVER_CONFIG',
+    'DEFAULT_AUTH_CACHE',
+    'OOB_REDIRECT',
+    'DEFAULT_CALLBACK_HOST',
+    'DEFAULT_CALLBACK_PORT',
+    'DEFAULT_CALLBACK_PATH',
+    'DEFAULT_CALLBACK_TIMEOUT',
+    'TERMINAL_STATES',
+    'VALID_DIMENSION_TYPES',
+    'BUILTIN_FUNCTIONS',
+    'FIELD_PATTERNS',
+    'REQUIRED_FIELDS',
+    'OPTIONAL_FIELDS',
+    # 错误处理
+    'result',
+    'ResultDict',
+    'handle_cli_error',
+    'EvalError',
+    'FileEncodingError',
+    'FileParseError',
+    'FileNotFoundError',
+    'ConfigError',
+    'NetworkError',
+    'NetworkTimeoutError',
+    'NetworkConnectionError',
+    'AuthExpiredError',
+    'ApiError',
+    # 时间处理
+    'parse_iso_datetime',
+    'is_expired',
+    # 评测点生成
+    'SYSTEM_PROMPT',
+    'build_user_prompt',
+]

package/skills/model-evaluation/scripts/utils/constants.py ADDED Viewed

@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+"""
+常量定义模块
+集中管理超时、默认值、错误码等
+"""
+# ============================================================================
+# 超时配置（秒）
+# ============================================================================
+DEFAULT_TIMEOUT = 30           # HTTP 请求默认超时
+DEFAULT_TOKEN_EXPIRY = 7200    # Token 默认过期时间（2小时）
+DEFAULT_POLL_INTERVAL = 30     # 任务轮询间隔
+DEFAULT_POLL_TIMEOUT = 3600    # 任务轮询总超时（1小时）
+# ============================================================================
+# 重试配置
+# ============================================================================
+MAX_RETRIES = 3
+RETRY_BACKOFF_FACTOR = 1.0
+# ============================================================================
+# 错误码定义
+# ============================================================================
+# 错误码范围说明：
+# - 脚本本地错误码: 1000-4999
+# - 远程服务错误码: 10000-99999（透传，不修改）
+# 文件相关错误 (1000-1999)
+ERR_FILE_NOT_FOUND = 1001
+ERR_FILE_ENCODING = 1002
+ERR_FILE_PARSE = 1003
+# 配置相关错误 (2000-2999)
+ERR_CONFIG_INVALID = 2001
+ERR_CONFIG_MISSING = 2002
+# 网络相关错误 (3000-3999)
+ERR_NETWORK_TIMEOUT = 3001
+ERR_NETWORK_CONNECTION = 3002
+ERR_NETWORK_RETRY_EXHAUSTED = 3003
+# 数据相关错误 (4000-4999)
+ERR_DATA_INVALID = 4001
+ERR_DATA_MISSING_FIELD = 4002
+# 远程服务错误码（透传，仅作参考）
+# 认证服务错误码: 10000-19999
+ERR_REMOTE_AUTH_EXPIRED = 10002  # Token 过期
+ERR_REMOTE_DEFAULT = 10001       # 未知远程错误
+# ============================================================================
+# 默认路径
+# ============================================================================
+DEFAULT_AUTH_CONFIG = "scripts/cfg/eval-auth.cfg"
+DEFAULT_SERVER_CONFIG = "scripts/cfg/eval-server.cfg"
+DEFAULT_AUTH_CACHE = "./.eval/auth.json"
+# ============================================================================
+# OAuth 配置
+# ============================================================================
+OOB_REDIRECT = "urn:ietf:wg:oauth:2.0:oob"
+# OAuth2 回调配置（loopback 模式）
+DEFAULT_CALLBACK_HOST = "127.0.0.1"
+DEFAULT_CALLBACK_PORT = 51943
+DEFAULT_CALLBACK_PATH = "/callback"
+DEFAULT_CALLBACK_TIMEOUT = 120  # 秒
+# ============================================================================
+# 任务状态
+# ============================================================================
+TERMINAL_STATES = {"Succeeded", "Failed", "Cancelled"}
+# ============================================================================
+# 维度配置
+# ============================================================================
+VALID_DIMENSION_TYPES = {"llm-score", "llm-judge", "builtin"}
+BUILTIN_FUNCTIONS = {"BLEU", "ROUGE", "BERTScore", "COMET", "TER", "Cosine"}
+# ============================================================================
+# 评测集字段映射
+# ============================================================================
+FIELD_PATTERNS = {
+    # 必填字段
+    'question': ['question', 'prompt', 'input', 'query', '问题', '提问', '用户问题'],
+    'answer': ['answer', 'response', 'output', 'reply', '回答', '回复', '模型回复'],
+    'model': ['model', 'model_name', 'model_id', 'llm', 'llm_name', '模型', '模型名称', '大模型', '大语言模型'],
+    'case_id': ['case_id', 'caseid', '用例id', '用例ID'],
+    # 可选字段
+    'system': ['system', 'system_prompt', '系统提示', '系统提示词'],
+    'context': ['context', '上下文'],
+    'category': ['category', 'type', '分类', '类别'],
+    'reference': ['reference', 'ref', 'gold', '参考答案', '标准答案'],
+    'keypoint': ['keypoint', 'keypoints', '关键点', '评测点', '评估点'],
+}
+# 特殊处理：'id' 字段需精确匹配到 case_id，避免 seq_id、user_id 误匹配
+CASE_ID_EXACT_MATCH = ['id']
+REQUIRED_FIELDS = ['question', 'answer', 'model', 'case_id']
+OPTIONAL_FIELDS = ['system', 'context', 'category', 'reference', 'keypoint']

package/skills/model-evaluation/scripts/utils/datetime_utils.py ADDED Viewed

@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+"""
+时间处理工具函数
+提供 ISO 格式时间解析和过期检查功能
+"""
+from datetime import datetime
+from typing import Optional
+def parse_iso_datetime(dt_str: str) -> Optional[datetime]:
+    """
+    解析 ISO 格式时间字符串，支持带时区和不带时区格式
+    Args:
+        dt_str: ISO 格式时间字符串
+    Returns:
+        datetime 对象，解析失败返回 None
+    Examples:
+        >>> parse_iso_datetime("2024-01-15T10:30:00")
+        datetime(2024, 1, 15, 10, 30, 0)
+        >>> parse_iso_datetime("2024-01-15T10:30:00Z")
+        datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone.utc)
+        >>> parse_iso_datetime("2024-01-15T10:30:00+08:00")
+        datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone(timedelta(hours=8)))
+    """
+    try:
+        # Python 3.7+ 支持 datetime.fromisoformat
+        if '+' in dt_str or dt_str.endswith('Z'):
+            return datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
+        return datetime.fromisoformat(dt_str)
+    except ValueError:
+        return None
+def is_expired(expires_at: str) -> bool:
+    """
+    检查过期时间是否已过期
+    Args:
+        expires_at: ISO 格式的过期时间字符串
+    Returns:
+        True 表示已过期或无法解析，False 表示未过期
+    Examples:
+        >>> is_expired("2020-01-01T00:00:00")  # 过去时间
+        True
+        >>> is_expired("2099-12-31T23:59:59")  # 未来时间
+        False
+    """
+    expire_time = parse_iso_datetime(expires_at)
+    if expire_time is None:
+        return True
+    now = datetime.now()
+    if expire_time.tzinfo:
+        now = now.astimezone()
+    return now >= expire_time