npm - paperfit-cli - Versions diffs - 1.0.0 - Mend

paperfit-cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/.claude/commands/adjust-length.md +21 -0
package/.claude/commands/check-visual.md +27 -0
package/.claude/commands/fix-layout.md +31 -0
package/.claude/commands/migrate-template.md +23 -0
package/.claude/commands/repair-table.md +21 -0
package/.claude/commands/show-status.md +32 -0
package/.claude-plugin/README.md +77 -0
package/.claude-plugin/marketplace.json +41 -0
package/.claude-plugin/plugin.json +39 -0
package/CLAUDE.md +266 -0
package/CONTRIBUTING.md +131 -0
package/LICENSE +21 -0
package/README.md +164 -0
package/agents/code-surgeon-agent.md +214 -0
package/agents/layout-detective-agent.md +229 -0
package/agents/orchestrator-agent.md +254 -0
package/agents/quality-gatekeeper-agent.md +270 -0
package/agents/rule-engine-agent.md +224 -0
package/agents/semantic-polish-agent.md +250 -0
package/bin/paperfit.js +176 -0
package/config/agent_roles.yaml +56 -0
package/config/layout_rules.yaml +54 -0
package/config/templates.yaml +241 -0
package/config/vto_taxonomy.yaml +489 -0
package/config/writing_rules.yaml +64 -0
package/install.sh +30 -0
package/package.json +52 -0
package/requirements.txt +5 -0
package/scripts/benchmark_runner.py +629 -0
package/scripts/compile.sh +244 -0
package/scripts/config_validator.py +339 -0
package/scripts/cv_detector.py +600 -0
package/scripts/evidence_collector.py +167 -0
package/scripts/float_fixers.py +861 -0
package/scripts/inject_defects.py +549 -0
package/scripts/install-claude-global.js +148 -0
package/scripts/install.js +66 -0
package/scripts/install.sh +106 -0
package/scripts/overflow_fixers.py +656 -0
package/scripts/package-for-opensource.sh +138 -0
package/scripts/parse_log.py +260 -0
package/scripts/postinstall.js +38 -0
package/scripts/pre_tool_use.py +265 -0
package/scripts/render_pages.py +244 -0
package/scripts/session_logger.py +329 -0
package/scripts/space_util_fixers.py +773 -0
package/scripts/state_manager.py +352 -0
package/scripts/test_commands.py +187 -0
package/scripts/test_cv_detector.py +214 -0
package/scripts/test_integration.py +290 -0
package/skills/consistency-polisher/SKILL.md +337 -0
package/skills/float-optimizer/SKILL.md +284 -0
package/skills/latex_fixers/__init__.py +82 -0
package/skills/latex_fixers/float_fixers.py +392 -0
package/skills/latex_fixers/fullwidth_fixers.py +375 -0
package/skills/latex_fixers/overflow_fixers.py +250 -0
package/skills/latex_fixers/semantic_micro_tuning.py +362 -0
package/skills/latex_fixers/space_util_fixers.py +389 -0
package/skills/latex_fixers/utils.py +55 -0
package/skills/overflow-repair/SKILL.md +304 -0
package/skills/space-util-fixer/SKILL.md +307 -0
package/skills/taxonomy-vto/SKILL.md +486 -0
package/skills/template-migrator/SKILL.md +251 -0
package/skills/visual-inspector/SKILL.md +217 -0
package/skills/writing-polish/SKILL.md +289 -0

package/scripts/compile.sh ADDED Viewed

@@ -0,0 +1,244 @@
+#!/bin/bash
+#
+# PaperFit LaTeX 编译脚本
+#
+# 封装 LaTeX 编译流程，支持 latexmk 和 pdflatex 两种方式，
+# 自动处理参考文献、交叉引用的多次编译，并输出结构化日志。
+#
+# 用法:
+#   ./compile.sh <main_tex> [--clean] [--engine pdflatex|xelatex|lualatex]
+#
+# 示例:
+#   ./compile.sh main.tex
+#   ./compile.sh main.tex --clean
+#   ./compile.sh main.tex --engine xelatex
+set -e  # 遇到错误立即退出
+# 颜色输出
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+# 默认参数
+MAIN_TEX=""
+CLEAN_MODE=false
+ENGINE="pdflatex"
+USE_LATEXMK=true
+BIBER=false
+# 帮助信息
+usage() {
+    cat << EOF
+用法: $0 <main_tex> [选项]
+选项:
+  --clean           清理临时文件后重新编译
+  --engine ENGINE   指定编译引擎 (pdflatex, xelatex, lualatex) [默认: pdflatex]
+  --no-latexmk      禁用 latexmk，使用手动多次编译
+  --biber           使用 biber 处理参考文献（默认使用 bibtex）
+  -h, --help        显示此帮助信息
+示例:
+  $0 main.tex
+  $0 main.tex --clean --engine xelatex
+  $0 main.tex --no-latexmk --biber
+EOF
+    exit 0
+}
+# 解析参数
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --clean)
+            CLEAN_MODE=true
+            shift
+            ;;
+        --engine)
+            ENGINE="$2"
+            shift 2
+            ;;
+        --no-latexmk)
+            USE_LATEXMK=false
+            shift
+            ;;
+        --biber)
+            BIBER=true
+            shift
+            ;;
+        -h|--help)
+            usage
+            ;;
+        *.tex)
+            MAIN_TEX="$1"
+            shift
+            ;;
+        *)
+            echo -e "${RED}未知参数: $1${NC}"
+            usage
+            ;;
+    esac
+done
+# 检查主文件是否提供
+if [[ -z "$MAIN_TEX" ]]; then
+    echo -e "${RED}错误: 未指定主 .tex 文件${NC}"
+    usage
+fi
+# 检查主文件是否存在
+if [[ ! -f "$MAIN_TEX" ]]; then
+    echo -e "${RED}错误: 文件不存在: $MAIN_TEX${NC}"
+    exit 1
+fi
+# 获取文件名（不含扩展名）
+BASENAME=$(basename "$MAIN_TEX" .tex)
+DIRNAME=$(dirname "$MAIN_TEX")
+if [[ "$DIRNAME" == "." ]]; then
+    DIRNAME=""
+fi
+MAIN_NAME="${DIRNAME:+$DIRNAME/}$BASENAME"
+# 进入主文件所在目录
+cd "$(dirname "$MAIN_TEX")" || exit 1
+MAIN_TEX_FILE=$(basename "$MAIN_TEX")
+echo -e "${GREEN}========================================${NC}"
+echo -e "${GREEN}PaperFit LaTeX 编译${NC}"
+echo -e "${GREEN}========================================${NC}"
+echo "主文件: $MAIN_TEX"
+echo "编译引擎: $ENGINE"
+echo "使用 latexmk: $USE_LATEXMK"
+echo "参考文献工具: $([ "$BIBER" = true ] && echo "biber" || echo "bibtex")"
+echo ""
+# 清理模式
+if [[ "$CLEAN_MODE" == true ]]; then
+    echo -e "${YELLOW}清理临时文件...${NC}"
+    if [[ "$USE_LATEXMK" == true ]]; then
+        latexmk -C "$MAIN_TEX_FILE"
+    else
+        rm -f "$BASENAME".{aux,log,out,toc,lof,lot,blg,bbl,bcf,run.xml,fls,fdb_latexmk,synctex.gz,nav,snm,vrb}
+    fi
+    echo -e "${GREEN}清理完成${NC}\n"
+fi
+# 编译函数
+compile_with_latexmk() {
+    echo -e "${YELLOW}使用 latexmk 编译...${NC}"
+    local latexmk_opts="-pdf -interaction=nonstopmode"
+    if [[ "$ENGINE" != "pdflatex" ]]; then
+        latexmk_opts="$latexmk_opts -$ENGINE"
+    fi
+    if latexmk $latexmk_opts "$MAIN_TEX_FILE"; then
+        echo -e "${GREEN}latexmk 编译成功${NC}"
+        return 0
+    else
+        echo -e "${RED}latexmk 编译失败${NC}"
+        return 1
+    fi
+}
+compile_manual() {
+    echo -e "${YELLOW}使用手动多次编译...${NC}"
+    local tex_cmd="$ENGINE -interaction=nonstopmode"
+    # 第一遍编译
+    echo "第一遍 $ENGINE..."
+    if ! $tex_cmd "$MAIN_TEX_FILE" > /dev/null 2>&1; then
+        echo -e "${RED}第一遍编译失败，请查看 ${BASENAME}.log${NC}"
+        return 1
+    fi
+    # 处理参考文献
+    if grep -q "\\bibliography{" "$MAIN_TEX_FILE" || grep -q "\\bibdata{" "${BASENAME}.aux" 2>/dev/null; then
+        echo "处理参考文献..."
+        if [[ "$BIBER" == true ]]; then
+            if biber "$BASENAME" > /dev/null 2>&1; then
+                echo "biber 成功"
+            else
+                echo -e "${YELLOW}biber 失败，尝试 bibtex...${NC}"
+                bibtex "$BASENAME" > /dev/null 2>&1 || echo -e "${YELLOW}警告: 参考文献处理可能有问题${NC}"
+            fi
+        else
+            bibtex "$BASENAME" > /dev/null 2>&1 || echo -e "${YELLOW}警告: bibtex 失败或无需参考文献${NC}"
+        fi
+    fi
+    # 第二遍编译（更新交叉引用）
+    echo "第二遍 $ENGINE..."
+    if ! $tex_cmd "$MAIN_TEX_FILE" > /dev/null 2>&1; then
+        echo -e "${RED}第二遍编译失败${NC}"
+        return 1
+    fi
+    # 检查是否需要第三次编译（交叉引用未稳定）
+    if grep -q "Rerun to get" "${BASENAME}.log"; then
+        echo "第三次 $ENGINE (稳定交叉引用)..."
+        if ! $tex_cmd "$MAIN_TEX_FILE" > /dev/null 2>&1; then
+            echo -e "${YELLOW}第三次编译有警告，但 PDF 已生成${NC}"
+        fi
+    fi
+    echo -e "${GREEN}手动编译完成${NC}"
+    return 0
+}
+# 执行编译
+START_TIME=$(date +%s)
+if [[ "$USE_LATEXMK" == true ]]; then
+    compile_with_latexmk
+    COMPILE_STATUS=$?
+else
+    compile_manual
+    COMPILE_STATUS=$?
+fi
+END_TIME=$(date +%s)
+DURATION=$((END_TIME - START_TIME))
+# 检查 PDF 是否生成
+PDF_FILE="${BASENAME}.pdf"
+if [[ -f "$PDF_FILE" ]]; then
+    PDF_SIZE=$(du -h "$PDF_FILE" | cut -f1)
+    PDF_PAGES=$(pdfinfo "$PDF_FILE" 2>/dev/null | grep "Pages:" | awk '{print $2}' || echo "?")
+    echo ""
+    echo -e "${GREEN}========================================${NC}"
+    echo -e "${GREEN}编译结果${NC}"
+    echo -e "${GREEN}========================================${NC}"
+    echo "PDF 文件: $PDF_FILE"
+    echo "文件大小: $PDF_SIZE"
+    echo "页数: $PDF_PAGES"
+    echo "编译耗时: ${DURATION}s"
+    # 输出日志摘要
+    LOG_FILE="${BASENAME}.log"
+    if [[ -f "$LOG_FILE" ]]; then
+        ERROR_COUNT=$(grep -c "^!" "$LOG_FILE" 2>/dev/null || echo 0)
+        WARNING_COUNT=$(grep -c "Warning:" "$LOG_FILE" 2>/dev/null || echo 0)
+        OVERFULL_COUNT=$(grep -c "Overfull" "$LOG_FILE" 2>/dev/null || echo 0)
+        echo "错误数: $ERROR_COUNT"
+        echo "警告数: $WARNING_COUNT"
+        echo "Overfull hbox: $OVERFULL_COUNT"
+    fi
+    echo -e "${GREEN}========================================${NC}"
+else
+    echo -e "${RED}========================================${NC}"
+    echo -e "${RED}编译失败: PDF 未生成${NC}"
+    echo -e "${RED}========================================${NC}"
+    echo "请查看日志文件: ${BASENAME}.log"
+    exit 1
+fi
+# 根据编译状态返回退出码
+if [[ $COMPILE_STATUS -eq 0 ]] && [[ -f "$PDF_FILE" ]]; then
+    exit 0
+else
+    exit 1
+fi

package/scripts/config_validator.py ADDED Viewed

@@ -0,0 +1,339 @@
+#!/usr/bin/env python3
+"""
+PaperFit 配置验证器
+验证所有 YAML 配置文件的完整性和一致性。
+防止配置错误导致检测失效或 Agent 行为异常。
+用法:
+    python config_validator.py [--verbose]
+"""
+import os
+import sys
+import yaml
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from dataclasses import dataclass
+from typing import TypedDict
+# ============================================================
+# 配置 Schema 定义
+# ============================================================
+CONFIG_DIR = Path(__file__).parent.parent / "config"
+CONFIG_FILES = {
+    "vto_taxonomy.yaml": "VTO 缺陷分类体系",
+    "layout_rules.yaml": "版式硬规则与阈值",
+    "writing_rules.yaml": "写作硬规则",
+    "templates.yaml": "模板参数",
+    "agent_roles.yaml": "Agent 职责描述",
+}
+@dataclass
+class ValidationError:
+    """验证错误"""
+    file: str
+    field: str
+    message: str
+    severity: str  # "error" | "warning"
+class LayoutRulesSchema(TypedDict, total=False):
+    """layout_rules.yaml 的 schema"""
+    whitespace: Dict[str, float]
+    table: Dict[str, Any]
+    float: Dict[str, Any]
+    equation: Dict[str, float]
+    paragraph: Dict[str, Any]
+    font: Dict[str, Any]
+    consistency: Dict[str, bool]
+class VtoTaxonomySchema(TypedDict, total=False):
+    """vto_taxonomy.yaml 的 schema"""
+    version: str
+    description: str
+    categories: List[Dict[str, Any]]
+    defects: List[Dict[str, Any]]
+    severity_levels: Dict[str, Any]
+    skill_routing: Dict[str, List[str]]
+# ============================================================
+# 验证器类
+# ============================================================
+class ConfigValidator:
+    """配置验证器"""
+    def __init__(self, config_dir: Path = CONFIG_DIR):
+        self.config_dir = config_dir
+        self.errors: List[ValidationError] = []
+        self.warnings: List[ValidationError] = []
+        self.loaded_configs: Dict[str, Any] = {}
+    def validate_all(self) -> Tuple[bool, List[ValidationError], List[ValidationError]]:
+        """验证所有配置文件"""
+        print(f"验证配置目录：{self.config_dir}")
+        print("-" * 50)
+        for config_file, description in CONFIG_FILES.items():
+            config_path = self.config_dir / config_file
+            if not config_path.exists():
+                self.errors.append(ValidationError(
+                    file=config_file,
+                    field="",
+                    message=f"配置文件不存在：{config_file}",
+                    severity="error"
+                ))
+                print(f"[缺失] {config_file}: {description}")
+                continue
+            print(f"[检查] {config_file}: {description}")
+            self._load_and_validate(config_file)
+        print("-" * 50)
+        # 输出错误和警告
+        if self.errors:
+            print(f"\n发现 {len(self.errors)} 个错误:")
+            for err in self.errors:
+                print(f"  [ERROR] {err.file}: {err.field} - {err.message}")
+        if self.warnings:
+            print(f"\n发现 {len(self.warnings)} 个警告:")
+            for warn in self.warnings:
+                print(f"  [WARN] {warn.file}: {warn.field} - {warn.message}")
+        success = len(self.errors) == 0
+        print(f"\n验证结果：{'通过' if success else '失败'} ({len(self.warnings)} 个警告)")
+        return success, self.errors, self.warnings
+    def _load_and_validate(self, config_file: str) -> None:
+        """加载并验证单个配置文件"""
+        config_path = self.config_dir / config_file
+        try:
+            with open(config_path, 'r', encoding='utf-8') as f:
+                config = yaml.safe_load(f)
+            self.loaded_configs[config_file] = config
+        except yaml.YAMLError as e:
+            self.errors.append(ValidationError(
+                file=config_file,
+                field="",
+                message=f"YAML 解析错误：{e}",
+                severity="error"
+            ))
+            return
+        except Exception as e:
+            self.errors.append(ValidationError(
+                file=config_file,
+                field="",
+                message=f"读取失败：{e}",
+                severity="error"
+            ))
+            return
+        # 根据文件类型进行特定验证
+        if config_file == "layout_rules.yaml":
+            self._validate_layout_rules(config_file, config)
+        elif config_file == "vto_taxonomy.yaml":
+            self._validate_vto_taxonomy(config_file, config)
+        elif config_file == "templates.yaml":
+            self._validate_templates(config_file, config)
+        elif config_file == "writing_rules.yaml":
+            self._validate_writing_rules(config_file, config)
+        elif config_file == "agent_roles.yaml":
+            self._validate_agent_roles(config_file, config)
+    def _validate_layout_rules(self, file: str, config: Dict) -> None:
+        """验证 layout_rules.yaml"""
+        # 验证 whitespace 部分
+        whitespace = config.get('whitespace', {})
+        if 'trailing_whitespace_max_ratio' in whitespace:
+            val = whitespace['trailing_whitespace_max_ratio']
+            if not (0 < val < 1):
+                self.errors.append(ValidationError(
+                    file=file, field="whitespace.trailing_whitespace_max_ratio",
+                    message=f"阈值必须在 0-1 之间，当前值：{val}", severity="error"
+                ))
+        # 验证 table 部分
+        table = config.get('table', {})
+        if 'min_width_utilization' in table and 'max_width_utilization' in table:
+            if table['min_width_utilization'] >= table['max_width_utilization']:
+                self.errors.append(ValidationError(
+                    file=file, field="table.width_utilization",
+                    message="min_width_utilization 必须小于 max_width_utilization",
+                    severity="error"
+                ))
+        # 验证 float 部分
+        float_cfg = config.get('float', {})
+        if 'max_reference_distance_pages' in float_cfg:
+            val = float_cfg['max_reference_distance_pages']
+            if val < 0:
+                self.errors.append(ValidationError(
+                    file=file, field="float.max_reference_distance_pages",
+                    message=f"距离不能为负数，当前值：{val}", severity="error"
+                ))
+    def _validate_vto_taxonomy(self, file: str, config: Dict) -> None:
+        """验证 vto_taxonomy.yaml"""
+        # 验证 categories
+        categories = config.get('categories', [])
+        category_ids = set()
+        for cat in categories:
+            cat_id = cat.get('id')
+            if not cat_id:
+                self.errors.append(ValidationError(
+                    file=file, field="categories[].id",
+                    message="分类缺少 id 字段", severity="error"
+                ))
+                continue
+            if cat_id in category_ids:
+                self.errors.append(ValidationError(
+                    file=file, field="categories[].id",
+                    message=f"分类 ID 重复：{cat_id}", severity="error"
+                ))
+            category_ids.add(cat_id)
+        # 验证 defects
+        defects = config.get('defects', [])
+        defect_ids = set()
+        for defect in defects:
+            defect_id = defect.get('id')
+            if not defect_id:
+                self.errors.append(ValidationError(
+                    file=file, field="defects[].id",
+                    message="缺陷缺少 id 字段", severity="error"
+                ))
+                continue
+            if defect_id in defect_ids:
+                self.errors.append(ValidationError(
+                    file=file, field="defects[].id",
+                    message=f"缺陷 ID 重复：{defect_id}", severity="error"
+                ))
+            defect_ids.add(defect_id)
+            # 验证 category 引用
+            category = defect.get('category')
+            if category and category not in category_ids:
+                self.errors.append(ValidationError(
+                    file=file, field=f"defects[{defect_id}].category",
+                    message=f"引用不存在的分类：{category}", severity="error"
+                ))
+            # 验证 skill_routing
+            skill_routing = defect.get('fix_strategy', {}).get('primary_skill')
+            if skill_routing:
+                valid_skills = [
+                    'space-util-fixer', 'float-optimizer', 'consistency-polisher',
+                    'overflow-repair', 'template-migrator', 'visual-inspector', 'writing-polish'
+                ]
+                if skill_routing not in valid_skills:
+                    self.warnings.append(ValidationError(
+                        file=file, field=f"defects[{defect_id}].fix_strategy.primary_skill",
+                        message=f"未知的技能：{skill_routing}", severity="warning"
+                    ))
+        # 验证 skill_routing 一致性
+        skill_routing_cfg = config.get('skill_routing', {})
+        for skill, defect_list in skill_routing_cfg.items():
+            for defect_ref in defect_list:
+                if defect_ref not in defect_ids:
+                    self.errors.append(ValidationError(
+                        file=file, field=f"skill_routing.{skill}",
+                        message=f"引用不存在的缺陷 ID: {defect_ref}", severity="error"
+                    ))
+    def _validate_templates(self, file: str, config: Dict) -> None:
+        """验证 templates.yaml"""
+        templates = config.get('templates', {})
+        for name, template in templates.items():
+            if 'columns' in template:
+                cols = template['columns']
+                if cols not in [1, 2]:
+                    self.errors.append(ValidationError(
+                        file=file, field=f"templates.{name}.columns",
+                        message=f"栏数必须是 1 或 2，当前值：{cols}", severity="error"
+                    ))
+            if 'target_pages' in template:
+                pages = template['target_pages']
+                if pages < 1:
+                    self.errors.append(ValidationError(
+                        file=file, field=f"templates.{name}.target_pages",
+                        message=f"页数必须大于 0，当前值：{pages}", severity="error"
+                    ))
+    def _validate_writing_rules(self, file: str, config: Dict) -> None:
+        """验证 writing_rules.yaml"""
+        # 验证写作规则的基本结构
+        if 'forbidden_words' in config:
+            if not isinstance(config['forbidden_words'], list):
+                self.errors.append(ValidationError(
+                    file=file, field="forbidden_words",
+                    message="必须是列表类型", severity="error"
+                ))
+        if 'required_tense' in config:
+            tense = config['required_tense']
+            valid_tenses = ['present', 'past', 'present_perfect']
+            if tense not in valid_tenses:
+                self.errors.append(ValidationError(
+                    file=file, field="required_tense",
+                    message=f"无效的时态：{tense}，有效值：{valid_tenses}",
+                    severity="error"
+                ))
+    def _validate_agent_roles(self, file: str, config: Dict) -> None:
+        """验证 agent_roles.yaml"""
+        agents = config.get('agents', {})
+        for name, role in agents.items():
+            if 'description' not in role:
+                self.warnings.append(ValidationError(
+                    file=file, field=f"agents.{name}",
+                    message="缺少 description 字段", severity="warning"
+                ))
+            if 'tools' not in role:
+                self.warnings.append(ValidationError(
+                    file=file, field=f"agents.{name}",
+                    message="缺少 tools 字段", severity="warning"
+                ))
+# ============================================================
+# 主函数
+# ============================================================
+def main():
+    """主函数"""
+    import argparse
+    parser = argparse.ArgumentParser(description="PaperFit 配置验证器")
+    parser.add_argument('--verbose', '-v', action='store_true',
+                        help='显示详细输出')
+    parser.add_argument('--config-dir', type=str, default=None,
+                        help='配置文件目录')
+    args = parser.parse_args()
+    config_dir = Path(args.config_dir) if args.config_dir else CONFIG_DIR
+    validator = ConfigValidator(config_dir)
+    success, errors, warnings = validator.validate_all()
+    if args.verbose:
+        print("\n已加载的配置:")
+        for file, config in validator.loaded_configs.items():
+            print(f"  - {file}: {len(str(config))} 字符")
+    sys.exit(0 if success else 1)
+if __name__ == "__main__":
+    main()