RubyGems - llm_translate - Versions diffs - 0.1.0 - Mend

llm_translate 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +7 -0
data/.rspec_status +14 -0
data/README.md +301 -0
data/README.zh.md +209 -0
data/Rakefile +12 -0
data/content/changelog-1.md +12 -0
data/content/changelog-2.md +12 -0
data/content/llm_translate.yml +189 -0
data/content/prompt.md +8 -0
data/content/todo.md +115 -0
data/exe/llm_translate +6 -0
data/lib/llm_translate/ai_client.rb +95 -0
data/lib/llm_translate/cli.rb +205 -0
data/lib/llm_translate/config.rb +279 -0
data/lib/llm_translate/file_finder.rb +153 -0
data/lib/llm_translate/logger.rb +170 -0
data/lib/llm_translate/translator_engine.rb +233 -0
data/lib/llm_translate/version.rb +5 -0
data/lib/llm_translate.rb +16 -0
data/llm_translate.gemspec +41 -0
data/llm_translate.yml +189 -0
data/test_config.yml +52 -0
data/test_docs/sample.md +22 -0
data/test_docs_translated/sample.zh.md +22 -0
data/test_llm_translate.yml +180 -0
data/test_new_config.yml +189 -0
metadata +143 -0

data/content/llm_translate.yml ADDED Viewed

@@ -0,0 +1,189 @@
+# llm_translate.yml - 翻译工具配置文件
+# AI 模型配置
+ai:
+  # API 密钥
+  api_key: xxxx
+  # API 主机地址
+  host: https://aihubmix.com
+  # 模型提供商
+  provider: "claude"
+  # 模型名称
+  model: "claude-3-7-sonnet-20250219"
+  # 模型参数
+  temperature: 0.3
+  max_tokens: 4000
+  top_p: 1.0
+  # 请求重试配置
+  retry_attempts: 3
+  retry_delay: 2  # 秒
+  # 请求超时时间
+  timeout: 60  # 秒
+# 翻译配置
+translation:
+  # 默认翻译 prompt
+  default_prompt: |
+    请将以下 Markdown 内容翻译为中文，保持所有格式不变：
+    - 保留代码块、链接、图片等 Markdown 语法
+    - 保留英文的专业术语和产品名称
+    - 确保翻译自然流畅
+    内容：
+    {content}
+  # 目标语言
+  target_language: "zh-CN"
+  # 源语言（auto 为自动检测）
+  source_language: "auto"
+  # 是否保留原文格式
+  preserve_formatting: true
+  # 是否翻译代码注释
+  translate_code_comments: false
+  # 需要保留不翻译的内容模式
+  preserve_patterns:
+    - "```[\\s\\S]*?```"  # 代码块
+    - "`[^`]+`"            # 行内代码
+    - "\\[.*?\\]\\(.*?\\)" # 链接
+    - "!\\[.*?\\]\\(.*?\\)" # 图片
+# 文件处理配置
+files:
+  # 输入目录
+  input_directory: "./docs"
+  # 输出目录
+  output_directory: "./docs-translated"
+  # 输入文件
+  input_file: "./README.md"
+  # 输出文件
+  output_file: "./README.zh.md"
+  # 文件名后缀策略
+  filename_strategy: "suffix"  # suffix, replace, directory
+  filename_suffix: ".zh"       # 仅当 strategy 为 suffix 时使用
+  # 包含的文件模式
+  include_patterns:
+    - "**/*.md"
+    - "**/*.markdown"
+  # 排除的文件模式
+  exclude_patterns:
+    - "**/node_modules/**"
+    - "**/.*"
+    - "**/*.tmp"
+    - "**/README.md"  # 示例：排除 README 文件
+  # 是否保持目录结构
+  preserve_directory_structure: true
+  # 文件覆盖策略
+  overwrite_policy: "ask"  # ask, overwrite, skip, backup
+  # 备份目录（当 overwrite_policy 为 backup 时）
+  backup_directory: "./backups"
+# 日志配置
+logging:
+  # 日志级别
+  level: "info"  # debug, info, warn, error
+  # 日志输出位置
+  output: "console"  # console, file, both
+  # 日志文件路径（当 output 包含 file 时）
+  file_path: "./logs/llm_translate.log"
+  # 是否记录详细的翻译过程
+  verbose_translation: false
+  # 错误日志文件
+  error_log_path: "./logs/errors.log"
+# 错误处理配置
+error_handling:
+  # 遇到错误时的行为
+  on_error: "log_and_continue"  # stop, log_and_continue, skip_file
+  # 最大连续错误数（超过则停止）
+  max_consecutive_errors: 5
+  # 错误重试次数
+  retry_on_failure: 2
+  # 生成错误报告
+  generate_error_report: true
+  error_report_path: "./logs/error_report.md"
+# 性能配置
+performance:
+  # 并发处理文件数
+  concurrent_files: 3
+  # 批处理大小（同时翻译的文件数）
+  batch_size: 5
+  # 请求间隔（避免 API 限流）
+  request_interval: 1  # 秒
+  # 内存使用限制
+  max_memory_mb: 500
+# 输出配置
+output:
+  # 是否显示进度条
+  show_progress: true
+  # 是否显示翻译统计
+  show_statistics: true
+  # 是否生成翻译报告
+  generate_report: true
+  report_path: "./reports/translation_report.md"
+  # 输出格式
+  format: "markdown"  # markdown, json, yaml
+  # 是否保留元数据
+  include_metadata: true
+# 预设配置（可通过 --preset 参数使用）
+presets:
+  chinese:
+    translation:
+      target_language: "zh-CN"
+      default_prompt: "翻译为简体中文，保持技术术语的准确性"
+  japanese:
+    translation:
+      target_language: "ja"
+      default_prompt: "日本語に翻訳してください。技術用語は正確に保ってください"
+  english:
+    translation:
+      target_language: "en"
+      default_prompt: "Translate to English, maintaining technical accuracy"
+# 自定义 Hook（高级功能）
+hooks:
+  # 翻译前处理
+  pre_translation: null
+  # 翻译后处理
+  post_translation: null
+  # 文件处理完成后
+  post_file_processing: null

data/content/prompt.md ADDED Viewed

@@ -0,0 +1,8 @@
+## Translator
+ruby Gem 实现传入的 markdown 文件，使用 AI 进行翻译，再生成文件
+- 指定传入的目录
+- 指定输出目录
+- 翻译的 prompt 可以自定义
+-  llm 使用 rubyllm gem

data/content/todo.md ADDED Viewed

@@ -0,0 +1,115 @@
+# Translator Ruby Gem 开发计划
+## 项目概述
+实现一个 Ruby Gem，用于将 Markdown 文件通过 AI 进行翻译并生成新文件。
+## 详细任务列表
+### 1. 项目基础设施
+#### 1.1 设置 Ruby Gem 项目结构
+- [ ] 创建 gemspec 文件
+- [ ] 创建 Gemfile 和依赖管理
+- [ ] 设置 lib 目录结构
+- [ ] 创建 bin 目录和可执行文件
+- [ ] 配置 .gitignore 和基础文件
+#### 1.2 实现命令行界面
+- [ ] 使用 OptionParser 或 Thor 创建 CLI
+- [ ] 支持指定输入目录参数
+- [ ] 支持指定输出目录参数
+- [ ] 支持自定义翻译 prompt 参数
+- [ ] 添加帮助信息和版本显示
+### 2. 核心功能开发
+#### 2.1 实现 Markdown 文件发现功能
+- [ ] 递归扫描指定目录中的所有 .md 文件
+- [ ] 支持文件过滤和排除规则
+- [ ] 维护原始目录结构信息
+#### 2.2 集成 rubyllm gem
+- [ ] 添加 rubyllm 到依赖列表 https://rubyllm.com/
+- [ ] 配置 AI 模型连接参数
+- [ ] 实现 API 调用封装
+- [ ] 处理 API 限流和重试机制
+#### 2.3 实现翻译核心逻辑
+- [ ] 读取 Markdown 文件内容
+- [ ] 解析 Markdown 格式结构
+- [ ] 调用 AI 进行翻译
+- [ ] 保持 Markdown 格式完整性（代码块、链接、图片等）
+- [ ] 处理多语言内容混合情况
+#### 2.4 实现可自定义翻译 prompt 功能
+- [ ] 支持从配置文件读取 prompt
+- [ ] 提供默认翻译 prompt 模板
+- [ ] 支持 prompt 变量替换（如目标语言）
+#### 2.5 实现输出文件管理
+- [ ] 在指定目录生成翻译后的文件
+- [ ] 保持原始目录结构
+- [ ] 支持文件名后缀配置（如 .zh.md）
+- [ ] 处理文件覆盖和备份策略
+### 3. 质量保证
+#### 3.1 添加错误处理和日志
+- [ ] 文件读写错误处理
+- [ ] AI 请求失败处理
+- [ ] 网络连接异常处理
+- [ ] 添加详细的日志记录
+- [ ] 实现优雅的错误恢复机制
+#### 3.2 实现配置系统
+- [ ] 支持配置文件（YAML)
+- [ ] API key 环境变量
+- [ ] 模型参数配置（温度、最大长度等）
+- [ ] 翻译选项配置
+- [ ] 环境变量支持
+#### 3.3 编写测试用例
+- [ ] 单元测试：核心模块测试
+- [ ] Mock AI 请求进行离线测试
+- [ ] 测试各种 Markdown 格式
+#### 3.4 执行结果记录
+- [] 中间出错了不跳出，而是记录在报错日志中
+### 4. 文档和发布
+#### 4.1 完善文档
+- [ ] 更新 README.md 文件
+- [ ] 编写详细的使用说明
+- [ ] API 文档和代码注释
+- [ ] 配置文件示例
+- [ ] 常见问题解答
+#### 4.2 打包发布
+- [ ] 配置 gem 打包流程
+- [ ] 设置版本管理策略
+- [ ] 准备发布到 RubyGems
+- [ ] 创建 GitHub Release
+- [ ] 设置持续集成流程
+## 技术栈
+- **语言**: Ruby
+- **CLI 框架**: OptionParser 或 Thor
+- **AI 集成**: rubyllm gem
+- **测试框架**: RSpec
+- **配置管理**: YAML
+- **日志**: Ruby Logger
+## 开发优先级
+1. **高优先级**: 项目结构设置、基础 CLI、文件发现、AI 集成
+2. **中优先级**: 翻译逻辑、输出管理、错误处理
+3. **低优先级**: 高级配置、测试完善、文档编写、发布准备
+## 预期功能
+```bash
+# 使用配置文件
+translator --config ./translator.yml
+```

data/exe/llm_translate ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+require_relative '../lib/llm_translate'
+LlmTranslate::CLI.start(ARGV)

data/lib/llm_translate/ai_client.rb ADDED Viewed

@@ -0,0 +1,95 @@
+# frozen_string_literal: true
+require 'ruby_llm'
+module LlmTranslate
+  class AiClient
+    attr_reader :config, :logger
+    def initialize(config, logger)
+      @config = config
+      @logger = logger
+      @client = initialize_client
+    end
+    def translate(content, custom_prompt = nil)
+      prompt = build_prompt(content, custom_prompt)
+      logger.log_ai_request(prompt.length, config.ai_model)
+      retries = 0
+      begin
+        response = make_request(prompt)
+        raise TranslationError, 'Empty response from AI service' unless response && !response.empty?
+        logger.log_ai_response(response.length)
+        response.strip
+      rescue StandardError => e
+        retries += 1
+        unless retries <= config.retry_attempts
+          raise TranslationError, "AI translation failed after #{config.retry_attempts} attempts: #{e.message}"
+        end
+        logger.warn "AI request failed (attempt #{retries}/#{config.retry_attempts}): #{e.message}"
+        sleep(config.retry_delay * retries) # Exponential backoff
+        retry
+      end
+    end
+    def test_connection
+      test_prompt = 'Hello, world!'
+      begin
+        response = make_request(test_prompt)
+        !response.nil? && !response.empty?
+      rescue StandardError => e
+        logger.error "AI connection test failed: #{e.message}"
+        false
+      end
+    end
+    private
+    def initialize_client
+      configure_ruby_llm
+    end
+    def configure_ruby_llm
+      RubyLLM.configure do |config_obj|
+        # For aihubmix.com or any custom host, use OpenAI-compatible API
+        config_obj.openai_api_key = config.api_key
+        config_obj.openai_api_base = config.ai_host
+        config_obj.default_model = config.ai_model
+      end
+    end
+    def make_request(prompt)
+      chat = RubyLLM.chat
+                    .with_model(config.ai_model)
+                    .with_temperature(config.temperature)
+      response = chat.ask(prompt)
+      # Handle different response formats
+      case response
+      when RubyLLM::Message
+        response.content
+      when String
+        response
+      when Hash
+        response['content'] || response[:content] || response.dig('choices', 0, 'message', 'content')
+      else
+        response.to_s
+      end
+    end
+    def build_prompt(content, custom_prompt = nil)
+      template = custom_prompt || config.default_prompt
+      # Replace {content} placeholder with actual content
+      template.gsub('{content}', content)
+              .gsub('{target_language}', config.target_language)
+              .gsub('{source_language}', config.source_language)
+    end
+  end
+end

data/lib/llm_translate/cli.rb ADDED Viewed

@@ -0,0 +1,205 @@
+# frozen_string_literal: true
+require 'thor'
+require 'yaml'
+module LlmTranslate
+  class CLI < Thor
+    desc 'translate', 'Translate markdown files using AI'
+    option :config, aliases: '-c', type: :string, default: './llm_translate.yml',
+                    desc: 'Path to configuration file'
+    option :input, aliases: '-i', type: :string,
+                   desc: 'Input directory (overrides config)'
+    option :output, aliases: '-o', type: :string,
+                    desc: 'Output directory (overrides config)'
+    option :prompt, aliases: '-p', type: :string,
+                    desc: 'Custom translation prompt (overrides config)'
+    option :verbose, aliases: '-v', type: :boolean, default: false,
+                     desc: 'Enable verbose output'
+    option :dry_run, aliases: '-d', type: :boolean, default: false,
+                     desc: 'Perform a dry run without actual translation'
+    def translate
+      config_path = options[:config]
+      unless File.exist?(config_path)
+        say "Configuration file not found: #{config_path}", :red
+        say 'Please create a configuration file or specify a valid path with --config', :yellow
+        exit 1
+      end
+      begin
+        config = Config.load(config_path, options)
+        logger = Logger.new(config)
+        logger.info 'Starting translation process...'
+        if config.single_file_mode?
+          logger.info "Input file: #{config.input_file}"
+          logger.info "Output file: #{config.output_file}"
+        else
+          logger.info "Input directory: #{config.input_directory}"
+          logger.info "Output directory: #{config.output_directory}"
+        end
+        logger.info 'DRY RUN MODE - No files will be translated' if options[:dry_run]
+        # Initialize components
+        ai_client = AiClient.new(config, logger)
+        translator_engine = TranslatorEngine.new(config, logger, ai_client)
+        # Determine files to translate
+        if config.single_file_mode?
+          # Single file mode
+          unless File.exist?(config.input_file)
+            logger.error "Input file not found: #{config.input_file}"
+            return
+          end
+          files = [config.input_file]
+          logger.info "Single file mode: translating #{config.input_file}"
+        else
+          # Directory mode
+          file_finder = FileFinder.new(config, logger)
+          files = file_finder.find_markdown_files
+          if files.empty?
+            logger.warn "No markdown files found in #{config.input_directory}"
+            return
+          end
+          logger.info "Found #{files.length} markdown files to translate"
+        end
+        # Translate files
+        success_count = 0
+        error_count = 0
+        files.each_with_index do |file_path, index|
+          logger.info "[#{index + 1}/#{files.length}] Processing: #{file_path}"
+          translator_engine.translate_file(file_path) unless options[:dry_run]
+          success_count += 1
+          logger.info "✓ Successfully processed: #{file_path}"
+        rescue StandardError => e
+          error_count += 1
+          logger.error "✗ Failed to process #{file_path}: #{e.message}"
+          if config.should_stop_on_error?(error_count)
+            logger.error 'Stopping due to too many consecutive errors'
+            break
+          end
+        end
+        # Summary
+        logger.info 'Translation completed!'
+        logger.info "Success: #{success_count}, Errors: #{error_count}"
+        generate_report(config, success_count, error_count, files) if config.generate_report?
+      rescue ConfigurationError => e
+        say "Configuration error: #{e.message}", :red
+        exit 1
+      rescue StandardError => e
+        say "Unexpected error: #{e.message}", :red
+        say e.backtrace.join("\n") if options[:verbose]
+        exit 1
+      end
+    end
+    desc 'version', 'Show version'
+    def version
+      say "LlmTranslate #{LlmTranslate::VERSION}"
+    end
+    desc 'init', 'Initialize a new configuration file'
+    option :output, aliases: '-o', type: :string, default: './llm_translate.yml',
+                    desc: 'Output path for configuration file'
+    def init
+      config_path = options[:output]
+      return if File.exist?(config_path) && !yes?('Configuration file already exists. Overwrite? (y/N)')
+      # Copy the sample configuration
+      sample_config = File.join(__dir__, '../../content/llm_translate.yml')
+      if File.exist?(sample_config)
+        FileUtils.cp(sample_config, config_path)
+        say "Configuration file created: #{config_path}", :green
+        say 'Please edit the file to configure your API keys and preferences', :yellow
+      else
+        # Create a minimal config if sample doesn't exist
+        create_minimal_config(config_path)
+      end
+    end
+    private
+    def generate_report(config, success_count, error_count, files)
+      return unless config.generate_report?
+      report_path = config.report_path
+      FileUtils.mkdir_p(File.dirname(report_path))
+      File.write(report_path, <<~REPORT)
+        # Translation Report
+        **Date**: #{Time.now.strftime('%Y-%m-%d %H:%M:%S')}
+        ## Summary
+        - Total files: #{files.length}
+        - Successfully translated: #{success_count}
+        - Errors: #{error_count}
+        - Success rate: #{(success_count.to_f / files.length * 100).round(2)}%
+        ## Configuration
+        - Input directory: #{config.input_directory}
+        - Output directory: #{config.output_directory}
+        - Target language: #{config.target_language}
+        - AI Provider: #{config.ai_provider}
+        - Model: #{config.ai_model}
+      REPORT
+      say "Report generated: #{report_path}", :green
+    end
+    def create_minimal_config(config_path)
+      config_content = <<~YAML
+        # LlmTranslate Configuration
+        ai:
+          api_key: sk-
+          host: https://aihubmix.com/v1
+          provider: "openai"
+          model: "gpt-4o-mini"
+          temperature: 0.3
+        translation:
+          target_language: "zh-CN"
+          default_prompt: |
+            Please translate the following Markdown content to Chinese, keeping all formatting intact:
+            - Preserve code blocks, links, images, and other Markdown syntax
+            - Keep English technical terms and product names
+            - Ensure natural and fluent translation
+        #{'    '}
+            Content:
+            {content}
+        files:
+          # Directory mode (default)
+          input_directory: "./docs"
+          output_directory: "./docs-translated"
+          filename_suffix: ".zh"
+        #{'  '}
+          # Single file mode (uncomment to use)
+          # input_file: "./README.md"
+          # output_file: "./README.zh.md"
+        logging:
+          level: "info"
+          output: "console"
+      YAML
+      File.write(config_path, config_content)
+      say "Minimal configuration file created: #{config_path}", :green
+      say 'Please edit the file to configure your API keys and preferences', :yellow
+    end
+  end
+end