llm_translate 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONCURRENT_CHUNKS_UPDATE.md +149 -0
- data/DOCUMENT_SPLITTER_SUMMARY.md +123 -0
- data/README.md +79 -0
- data/large_document_config.yml +159 -0
- data/lib/llm_translate/config.rb +30 -19
- data/lib/llm_translate/document_splitter.rb +157 -0
- data/lib/llm_translate/translator_engine.rb +96 -2
- data/lib/llm_translate/version.rb +1 -1
- data/llm_translate.yml +14 -2
- metadata +6 -5
- data/test_config.yml +0 -52
- data/test_llm_translate.yml +0 -176
- data/test_new_config.yml +0 -184
@@ -0,0 +1,157 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LlmTranslate
|
4
|
+
class DocumentSplitter
|
5
|
+
attr_reader :config, :logger
|
6
|
+
|
7
|
+
def initialize(config, logger = nil)
|
8
|
+
@config = config
|
9
|
+
@logger = logger || Logger.new($stdout, level: :info)
|
10
|
+
end
|
11
|
+
|
12
|
+
# 拆分文档为多个片段
|
13
|
+
def split_document(content)
|
14
|
+
return [content] unless should_split?(content)
|
15
|
+
|
16
|
+
logger.info "Document size (#{content.length} chars) exceeds limit, splitting..."
|
17
|
+
|
18
|
+
sections = extract_markdown_sections(content)
|
19
|
+
chunks = build_chunks(sections)
|
20
|
+
|
21
|
+
logger.info "Document split into #{chunks.length} chunks"
|
22
|
+
chunks
|
23
|
+
end
|
24
|
+
|
25
|
+
# 合并翻译后的文档片段
|
26
|
+
def merge_translated_chunks(translated_chunks)
|
27
|
+
return translated_chunks.first if translated_chunks.length == 1
|
28
|
+
|
29
|
+
logger.info "Merging #{translated_chunks.length} translated chunks..."
|
30
|
+
|
31
|
+
# 简单合并,用双换行连接
|
32
|
+
merged_content = translated_chunks.join("\n\n")
|
33
|
+
|
34
|
+
# 清理多余的空行
|
35
|
+
clean_merged_content(merged_content)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def should_split?(content)
|
41
|
+
content.length > config.max_chars_for_splitting
|
42
|
+
end
|
43
|
+
|
44
|
+
def extract_markdown_sections(content)
|
45
|
+
sections = []
|
46
|
+
current_section = ''
|
47
|
+
lines = content.split("\n")
|
48
|
+
|
49
|
+
lines.each do |line|
|
50
|
+
# 检查是否是新的段落开始(标题、空行后的内容等)
|
51
|
+
if is_section_boundary?(line, current_section) && !current_section.strip.empty?
|
52
|
+
sections << current_section.strip
|
53
|
+
current_section = ''
|
54
|
+
end
|
55
|
+
|
56
|
+
current_section += "#{line}\n"
|
57
|
+
end
|
58
|
+
|
59
|
+
# 添加最后一个段落
|
60
|
+
sections << current_section.strip unless current_section.strip.empty?
|
61
|
+
|
62
|
+
sections
|
63
|
+
end
|
64
|
+
|
65
|
+
def is_section_boundary?(line, current_section)
|
66
|
+
return false if current_section.strip.empty?
|
67
|
+
|
68
|
+
# 标题行
|
69
|
+
return true if line.start_with?('#') && line.match?(/^#+\s+/)
|
70
|
+
|
71
|
+
# 代码块开始/结束
|
72
|
+
return true if line.match?(/^```/)
|
73
|
+
|
74
|
+
# 列表项
|
75
|
+
return true if line.match?(/^\s*[-*+]\s+/) || line.match?(/^\s*\d+\.\s+/)
|
76
|
+
|
77
|
+
# 引用块
|
78
|
+
return true if line.match?(/^>\s+/)
|
79
|
+
|
80
|
+
# 水平分割线
|
81
|
+
return true if line.match?(/^[-*_]{3,}$/)
|
82
|
+
|
83
|
+
# 表格行
|
84
|
+
return true if line.match?(/^\|.*\|$/)
|
85
|
+
|
86
|
+
# 空行后的非空行(新段落)
|
87
|
+
return true if current_section.end_with?("\n\n") && !line.strip.empty?
|
88
|
+
|
89
|
+
false
|
90
|
+
end
|
91
|
+
|
92
|
+
def build_chunks(sections)
|
93
|
+
chunks = []
|
94
|
+
current_chunk = ''
|
95
|
+
|
96
|
+
sections.each do |section|
|
97
|
+
# 如果单个段落就超过限制,需要强制拆分
|
98
|
+
if section.length > config.split_every_chars
|
99
|
+
# 保存当前块
|
100
|
+
chunks << current_chunk.strip unless current_chunk.strip.empty?
|
101
|
+
|
102
|
+
# 强制拆分长段落
|
103
|
+
forced_chunks = force_split_section(section)
|
104
|
+
chunks.concat(forced_chunks)
|
105
|
+
|
106
|
+
current_chunk = ''
|
107
|
+
next
|
108
|
+
end
|
109
|
+
|
110
|
+
# 检查添加这个段落是否会超过限制
|
111
|
+
potential_length = current_chunk.length + section.length + 2 # +2 for "\n\n"
|
112
|
+
|
113
|
+
if potential_length > config.split_every_chars && !current_chunk.strip.empty?
|
114
|
+
# 保存当前块并开始新块
|
115
|
+
chunks << current_chunk.strip
|
116
|
+
current_chunk = "#{section}\n\n"
|
117
|
+
else
|
118
|
+
# 添加到当前块
|
119
|
+
current_chunk += "#{section}\n\n"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# 添加最后一个块
|
124
|
+
chunks << current_chunk.strip unless current_chunk.strip.empty?
|
125
|
+
|
126
|
+
chunks
|
127
|
+
end
|
128
|
+
|
129
|
+
def force_split_section(section)
|
130
|
+
chunks = []
|
131
|
+
lines = section.split("\n")
|
132
|
+
current_chunk = ''
|
133
|
+
|
134
|
+
lines.each do |line|
|
135
|
+
potential_length = current_chunk.length + line.length + 1 # +1 for "\n"
|
136
|
+
|
137
|
+
if potential_length > config.split_every_chars && !current_chunk.strip.empty?
|
138
|
+
chunks << current_chunk.strip
|
139
|
+
current_chunk = "#{line}\n"
|
140
|
+
else
|
141
|
+
current_chunk += "#{line}\n"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
chunks << current_chunk.strip unless current_chunk.strip.empty?
|
146
|
+
chunks
|
147
|
+
end
|
148
|
+
|
149
|
+
def clean_merged_content(content)
|
150
|
+
# 移除多余的空行(超过2个连续换行的情况)
|
151
|
+
cleaned = content.gsub(/\n{3,}/, "\n\n")
|
152
|
+
|
153
|
+
# 确保文档以单个换行结尾
|
154
|
+
"#{cleaned.strip}\n"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -3,16 +3,18 @@
|
|
3
3
|
require 'pathname'
|
4
4
|
require 'fileutils'
|
5
5
|
require 'async'
|
6
|
+
require_relative 'document_splitter'
|
6
7
|
|
7
8
|
module LlmTranslate
|
8
9
|
class TranslatorEngine
|
9
|
-
attr_reader :config, :logger, :ai_client, :file_finder
|
10
|
+
attr_reader :config, :logger, :ai_client, :file_finder, :document_splitter
|
10
11
|
|
11
12
|
def initialize(config, logger, ai_client)
|
12
13
|
@config = config
|
13
14
|
@logger = logger
|
14
15
|
@ai_client = ai_client
|
15
16
|
@file_finder = FileFinder.new(config, logger)
|
17
|
+
@document_splitter = DocumentSplitter.new(config, logger)
|
16
18
|
end
|
17
19
|
|
18
20
|
def translate_file(input_path)
|
@@ -115,7 +117,10 @@ module LlmTranslate
|
|
115
117
|
end
|
116
118
|
|
117
119
|
def translate_content(content, file_path = nil)
|
118
|
-
|
120
|
+
# 检查是否需要启用文档拆分
|
121
|
+
if config.enable_document_splitting? && content.length > config.max_chars_for_splitting
|
122
|
+
translate_with_document_splitting(content, file_path)
|
123
|
+
elsif config.preserve_formatting?
|
119
124
|
translate_with_format_preservation(content)
|
120
125
|
else
|
121
126
|
ai_client.translate(content)
|
@@ -151,5 +156,94 @@ module LlmTranslate
|
|
151
156
|
# Translate the content with placeholders
|
152
157
|
ai_client.translate(content)
|
153
158
|
end
|
159
|
+
|
160
|
+
def translate_with_document_splitting(content, file_path = nil)
|
161
|
+
logger.info "Document splitting enabled for large content#{file_path ? " from #{file_path}" : ''}"
|
162
|
+
|
163
|
+
# 拆分文档
|
164
|
+
chunks = document_splitter.split_document(content)
|
165
|
+
|
166
|
+
logger.info "Translating #{chunks.length} chunks with #{config.concurrent_chunks} concurrent workers..."
|
167
|
+
|
168
|
+
# 并发翻译chunks
|
169
|
+
translated_chunks = translate_chunks_concurrently(chunks)
|
170
|
+
|
171
|
+
# 合并翻译后的片段
|
172
|
+
logger.info 'Merging translated chunks...'
|
173
|
+
document_splitter.merge_translated_chunks(translated_chunks)
|
174
|
+
end
|
175
|
+
|
176
|
+
def translate_chunks_concurrently(chunks)
|
177
|
+
return translate_chunks_sequentially(chunks) if config.concurrent_chunks <= 1
|
178
|
+
|
179
|
+
translated_chunks = Array.new(chunks.length)
|
180
|
+
|
181
|
+
# 使用 Async 进行并发处理
|
182
|
+
Async do |task|
|
183
|
+
# 将chunks分批处理,每批最多concurrent_chunks个
|
184
|
+
chunks.each_slice(config.concurrent_chunks).each do |batch|
|
185
|
+
# 为当前批次创建并发任务
|
186
|
+
batch_tasks = batch.map.with_index do |chunk, _batch_index|
|
187
|
+
# 计算在原数组中的索引
|
188
|
+
chunk_index = chunks.index(chunk)
|
189
|
+
|
190
|
+
task.async do
|
191
|
+
logger.info "Translating chunk #{chunk_index + 1}/#{chunks.length} (#{chunk.length} chars)..."
|
192
|
+
|
193
|
+
begin
|
194
|
+
translated_chunk = if config.preserve_formatting?
|
195
|
+
translate_with_format_preservation(chunk)
|
196
|
+
else
|
197
|
+
ai_client.translate(chunk)
|
198
|
+
end
|
199
|
+
|
200
|
+
# 将翻译结果存储在正确的位置
|
201
|
+
translated_chunks[chunk_index] = translated_chunk
|
202
|
+
|
203
|
+
logger.info "✓ Completed chunk #{chunk_index + 1}/#{chunks.length}"
|
204
|
+
translated_chunk
|
205
|
+
rescue StandardError => e
|
206
|
+
logger.error "✗ Failed to translate chunk #{chunk_index + 1}: #{e.message}"
|
207
|
+
raise e
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# 等待当前批次的所有任务完成
|
213
|
+
batch_tasks.each(&:wait)
|
214
|
+
|
215
|
+
# 在批次间添加延迟
|
216
|
+
sleep(config.request_interval) if config.request_interval.positive?
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
translated_chunks
|
221
|
+
end
|
222
|
+
|
223
|
+
def translate_chunks_sequentially(chunks)
|
224
|
+
translated_chunks = []
|
225
|
+
|
226
|
+
chunks.each_with_index do |chunk, index|
|
227
|
+
logger.info "Translating chunk #{index + 1}/#{chunks.length} (#{chunk.length} chars)..."
|
228
|
+
|
229
|
+
begin
|
230
|
+
translated_chunk = if config.preserve_formatting?
|
231
|
+
translate_with_format_preservation(chunk)
|
232
|
+
else
|
233
|
+
ai_client.translate(chunk)
|
234
|
+
end
|
235
|
+
|
236
|
+
translated_chunks << translated_chunk
|
237
|
+
|
238
|
+
# 添加请求间隔延迟
|
239
|
+
sleep(config.request_interval) if config.request_interval.positive? && index < chunks.length - 1
|
240
|
+
rescue StandardError => e
|
241
|
+
logger.error "Failed to translate chunk #{index + 1}: #{e.message}"
|
242
|
+
raise e
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
translated_chunks
|
247
|
+
end
|
154
248
|
end
|
155
249
|
end
|
data/llm_translate.yml
CHANGED
@@ -50,6 +50,18 @@ translation:
|
|
50
50
|
# 是否翻译代码注释
|
51
51
|
translate_code_comments: false
|
52
52
|
|
53
|
+
# 文档拆分配置
|
54
|
+
# 当文档字符数超过 max_chars 时,自动启用拆分功能
|
55
|
+
enable_splitting: true
|
56
|
+
|
57
|
+
# 触发拆分的最大字符数
|
58
|
+
max_chars: 20000
|
59
|
+
|
60
|
+
# 每个片段的目标字符数
|
61
|
+
every_chars: 18000
|
62
|
+
|
63
|
+
# 并发翻译的 chunk 数量
|
64
|
+
concurrent_chunks: 3
|
53
65
|
|
54
66
|
|
55
67
|
# 文件处理配置
|
@@ -125,13 +137,13 @@ error_handling:
|
|
125
137
|
|
126
138
|
# 性能配置
|
127
139
|
performance:
|
128
|
-
#
|
140
|
+
# 并发处理文件数(使用文档拆分时建议设为 1)
|
129
141
|
concurrent_files: 3
|
130
142
|
|
131
143
|
# 批处理大小(同时翻译的文件数)
|
132
144
|
batch_size: 5
|
133
145
|
|
134
|
-
# 请求间隔(避免 API
|
146
|
+
# 请求间隔(避免 API 限流,拆分文档时特别重要)
|
135
147
|
request_interval: 1 # 秒
|
136
148
|
|
137
149
|
# 内存使用限制
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llm_translate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- LlmTranslate Team
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-09-
|
11
|
+
date: 2025-09-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: async
|
@@ -103,6 +103,8 @@ extensions: []
|
|
103
103
|
extra_rdoc_files: []
|
104
104
|
files:
|
105
105
|
- ".rspec_status"
|
106
|
+
- CONCURRENT_CHUNKS_UPDATE.md
|
107
|
+
- DOCUMENT_SPLITTER_SUMMARY.md
|
106
108
|
- README.md
|
107
109
|
- README.zh.md
|
108
110
|
- Rakefile
|
@@ -112,21 +114,20 @@ files:
|
|
112
114
|
- content/prompt.md
|
113
115
|
- content/todo.md
|
114
116
|
- exe/llm_translate
|
117
|
+
- large_document_config.yml
|
115
118
|
- lib/llm_translate.rb
|
116
119
|
- lib/llm_translate/ai_client.rb
|
117
120
|
- lib/llm_translate/cli.rb
|
118
121
|
- lib/llm_translate/config.rb
|
122
|
+
- lib/llm_translate/document_splitter.rb
|
119
123
|
- lib/llm_translate/file_finder.rb
|
120
124
|
- lib/llm_translate/logger.rb
|
121
125
|
- lib/llm_translate/translator_engine.rb
|
122
126
|
- lib/llm_translate/version.rb
|
123
127
|
- llm_translate.gemspec
|
124
128
|
- llm_translate.yml
|
125
|
-
- test_config.yml
|
126
129
|
- test_docs/sample.md
|
127
130
|
- test_docs_translated/sample.zh.md
|
128
|
-
- test_llm_translate.yml
|
129
|
-
- test_new_config.yml
|
130
131
|
homepage: https://github.com/tianlu1677/llm_translate
|
131
132
|
licenses:
|
132
133
|
- MIT
|
data/test_config.yml
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
# Test llm_translate configuration
|
2
|
-
ai:
|
3
|
-
api_key: ${LLM_TRANSLATE_API_KEY}
|
4
|
-
provider: "openai"
|
5
|
-
model: "gpt-4"
|
6
|
-
temperature: 0.3
|
7
|
-
max_tokens: 4000
|
8
|
-
retry_attempts: 3
|
9
|
-
retry_delay: 2
|
10
|
-
timeout: 60
|
11
|
-
|
12
|
-
translation:
|
13
|
-
target_language: "zh-CN"
|
14
|
-
default_prompt: |
|
15
|
-
Please translate the following Markdown content to Chinese, keeping all formatting intact:
|
16
|
-
- Preserve code blocks, links, images, and other Markdown syntax
|
17
|
-
- Keep English technical terms and product names
|
18
|
-
- Ensure natural and fluent translation
|
19
|
-
|
20
|
-
Content:
|
21
|
-
{content}
|
22
|
-
|
23
|
-
files:
|
24
|
-
input_directory: "./test_docs"
|
25
|
-
output_directory: "./test_docs_translated"
|
26
|
-
filename_suffix: ".zh"
|
27
|
-
include_patterns:
|
28
|
-
- "**/*.md"
|
29
|
-
- "**/*.markdown"
|
30
|
-
exclude_patterns: []
|
31
|
-
preserve_directory_structure: true
|
32
|
-
overwrite_policy: "overwrite"
|
33
|
-
|
34
|
-
logging:
|
35
|
-
level: "info"
|
36
|
-
output: "console"
|
37
|
-
verbose_translation: true
|
38
|
-
|
39
|
-
error_handling:
|
40
|
-
on_error: "log_and_continue"
|
41
|
-
max_consecutive_errors: 5
|
42
|
-
retry_on_failure: 2
|
43
|
-
generate_error_report: true
|
44
|
-
|
45
|
-
performance:
|
46
|
-
concurrent_files: 1
|
47
|
-
request_interval: 1
|
48
|
-
|
49
|
-
output:
|
50
|
-
show_progress: true
|
51
|
-
show_statistics: true
|
52
|
-
generate_report: true
|
data/test_llm_translate.yml
DELETED
@@ -1,176 +0,0 @@
|
|
1
|
-
# translator.yml - 翻译工具配置文件
|
2
|
-
|
3
|
-
# AI 模型配置
|
4
|
-
ai:
|
5
|
-
# API 密钥(建议使用环境变量 LLM_TRANSLATE_API_KEY)
|
6
|
-
api_key: ${LLM_TRANSLATE_API_KEY}
|
7
|
-
|
8
|
-
# 模型提供商(openai, anthropic, ollama 等)
|
9
|
-
provider: "openai"
|
10
|
-
|
11
|
-
# 模型名称
|
12
|
-
model: "gpt-4"
|
13
|
-
|
14
|
-
# 模型参数
|
15
|
-
temperature: 0.3
|
16
|
-
max_tokens: 4000
|
17
|
-
top_p: 1.0
|
18
|
-
|
19
|
-
# 请求重试配置
|
20
|
-
retry_attempts: 3
|
21
|
-
retry_delay: 2 # 秒
|
22
|
-
|
23
|
-
# 请求超时时间
|
24
|
-
timeout: 60 # 秒
|
25
|
-
|
26
|
-
# 翻译配置
|
27
|
-
translation:
|
28
|
-
# 默认翻译 prompt
|
29
|
-
default_prompt: |
|
30
|
-
请将以下 Markdown 内容翻译为中文,保持所有格式不变:
|
31
|
-
- 保留代码块、链接、图片等 Markdown 语法
|
32
|
-
- 保留英文的专业术语和产品名称
|
33
|
-
- 确保翻译自然流畅
|
34
|
-
|
35
|
-
内容:
|
36
|
-
{content}
|
37
|
-
|
38
|
-
# 目标语言
|
39
|
-
target_language: "zh-CN"
|
40
|
-
|
41
|
-
# 源语言(auto 为自动检测)
|
42
|
-
source_language: "auto"
|
43
|
-
|
44
|
-
# 是否保留原文格式
|
45
|
-
preserve_formatting: true
|
46
|
-
|
47
|
-
# 是否翻译代码注释
|
48
|
-
translate_code_comments: false
|
49
|
-
|
50
|
-
# 需要保留不翻译的内容模式
|
51
|
-
|
52
|
-
|
53
|
-
# 文件处理配置
|
54
|
-
files:
|
55
|
-
# 输入目录
|
56
|
-
input_directory: "./docs"
|
57
|
-
|
58
|
-
# 输出目录
|
59
|
-
output_directory: "./docs-translated"
|
60
|
-
|
61
|
-
# 文件名后缀策略
|
62
|
-
filename_strategy: "suffix" # suffix, replace, directory
|
63
|
-
filename_suffix: ".zh" # 仅当 strategy 为 suffix 时使用
|
64
|
-
|
65
|
-
# 包含的文件模式
|
66
|
-
include_patterns:
|
67
|
-
- "**/*.md"
|
68
|
-
- "**/*.markdown"
|
69
|
-
|
70
|
-
# 排除的文件模式
|
71
|
-
exclude_patterns:
|
72
|
-
- "**/node_modules/**"
|
73
|
-
- "**/.*"
|
74
|
-
- "**/*.tmp"
|
75
|
-
- "**/README.md" # 示例:排除 README 文件
|
76
|
-
|
77
|
-
# 是否保持目录结构
|
78
|
-
preserve_directory_structure: true
|
79
|
-
|
80
|
-
# 文件覆盖策略
|
81
|
-
overwrite_policy: "ask" # ask, overwrite, skip, backup
|
82
|
-
|
83
|
-
# 备份目录(当 overwrite_policy 为 backup 时)
|
84
|
-
backup_directory: "./backups"
|
85
|
-
|
86
|
-
# 日志配置
|
87
|
-
logging:
|
88
|
-
# 日志级别
|
89
|
-
level: "info" # debug, info, warn, error
|
90
|
-
|
91
|
-
# 日志输出位置
|
92
|
-
output: "console" # console, file, both
|
93
|
-
|
94
|
-
# 日志文件路径(当 output 包含 file 时)
|
95
|
-
file_path: "./logs/translator.log"
|
96
|
-
|
97
|
-
# 是否记录详细的翻译过程
|
98
|
-
verbose_translation: false
|
99
|
-
|
100
|
-
# 错误日志文件
|
101
|
-
error_log_path: "./logs/errors.log"
|
102
|
-
|
103
|
-
# 错误处理配置
|
104
|
-
error_handling:
|
105
|
-
# 遇到错误时的行为
|
106
|
-
on_error: "log_and_continue" # stop, log_and_continue, skip_file
|
107
|
-
|
108
|
-
# 最大连续错误数(超过则停止)
|
109
|
-
max_consecutive_errors: 5
|
110
|
-
|
111
|
-
# 错误重试次数
|
112
|
-
retry_on_failure: 2
|
113
|
-
|
114
|
-
# 生成错误报告
|
115
|
-
generate_error_report: true
|
116
|
-
error_report_path: "./logs/error_report.md"
|
117
|
-
|
118
|
-
# 性能配置
|
119
|
-
performance:
|
120
|
-
# 并发处理文件数
|
121
|
-
concurrent_files: 3
|
122
|
-
|
123
|
-
# 批处理大小(同时翻译的文件数)
|
124
|
-
batch_size: 5
|
125
|
-
|
126
|
-
# 请求间隔(避免 API 限流)
|
127
|
-
request_interval: 1 # 秒
|
128
|
-
|
129
|
-
# 内存使用限制
|
130
|
-
max_memory_mb: 500
|
131
|
-
|
132
|
-
# 输出配置
|
133
|
-
output:
|
134
|
-
# 是否显示进度条
|
135
|
-
show_progress: true
|
136
|
-
|
137
|
-
# 是否显示翻译统计
|
138
|
-
show_statistics: true
|
139
|
-
|
140
|
-
# 是否生成翻译报告
|
141
|
-
generate_report: true
|
142
|
-
report_path: "./reports/translation_report.md"
|
143
|
-
|
144
|
-
# 输出格式
|
145
|
-
format: "markdown" # markdown, json, yaml
|
146
|
-
|
147
|
-
# 是否保留元数据
|
148
|
-
include_metadata: true
|
149
|
-
|
150
|
-
# 预设配置(可通过 --preset 参数使用)
|
151
|
-
presets:
|
152
|
-
chinese:
|
153
|
-
translation:
|
154
|
-
target_language: "zh-CN"
|
155
|
-
default_prompt: "翻译为简体中文,保持技术术语的准确性"
|
156
|
-
|
157
|
-
japanese:
|
158
|
-
translation:
|
159
|
-
target_language: "ja"
|
160
|
-
default_prompt: "日本語に翻訳してください。技術用語は正確に保ってください"
|
161
|
-
|
162
|
-
english:
|
163
|
-
translation:
|
164
|
-
target_language: "en"
|
165
|
-
default_prompt: "Translate to English, maintaining technical accuracy"
|
166
|
-
|
167
|
-
# 自定义 Hook(高级功能)
|
168
|
-
hooks:
|
169
|
-
# 翻译前处理
|
170
|
-
pre_translation: null
|
171
|
-
|
172
|
-
# 翻译后处理
|
173
|
-
post_translation: null
|
174
|
-
|
175
|
-
# 文件处理完成后
|
176
|
-
post_file_processing: null
|