llm_translate 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/README.zh.md +1 -2
- data/content/llm_translate.yml +0 -3
- data/lib/llm_translate/document_splitter.rb +1 -30
- data/lib/llm_translate/version.rb +1 -1
- data/llm_translate.yml +1 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3db97a216e9ac7c9962108100342b02a3fa9e645a2863e14250864cec6ad84be
|
4
|
+
data.tar.gz: 108c48308ccb2fcc14a4d5f81877106042b7f5461ac60ae54252b2446232f346
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f668860d02e1368dd991b6c59f72b78ec6da8a77cd0b6b12aa39164a2420ab5c421de87c413f81318a19dd5202936761913d0dd3040916782a27d5cf85c87cad
|
7
|
+
data.tar.gz: c7ebe532bec72bcdcaeaf0fb6f9acd527b7475cd854da4cbafaee5e1bb0352352855298f2e509056fa374ed505c161d38c0366d2bf743cc142d9d88366d6495f
|
data/README.md
CHANGED
data/README.zh.md
CHANGED
data/content/llm_translate.yml
CHANGED
@@ -29,10 +29,7 @@ module LlmTranslate
|
|
29
29
|
logger.info "Merging #{translated_chunks.length} translated chunks..."
|
30
30
|
|
31
31
|
# 简单合并,用双换行连接
|
32
|
-
|
33
|
-
|
34
|
-
# 清理多余的空行
|
35
|
-
clean_merged_content(merged_content)
|
32
|
+
translated_chunks.join("\n\n")
|
36
33
|
end
|
37
34
|
|
38
35
|
private
|
@@ -68,24 +65,6 @@ module LlmTranslate
|
|
68
65
|
# 标题行
|
69
66
|
return true if line.start_with?('#') && line.match?(/^#+\s+/)
|
70
67
|
|
71
|
-
# 代码块开始/结束
|
72
|
-
return true if line.match?(/^```/)
|
73
|
-
|
74
|
-
# 列表项
|
75
|
-
return true if line.match?(/^\s*[-*+]\s+/) || line.match?(/^\s*\d+\.\s+/)
|
76
|
-
|
77
|
-
# 引用块
|
78
|
-
return true if line.match?(/^>\s+/)
|
79
|
-
|
80
|
-
# 水平分割线
|
81
|
-
return true if line.match?(/^[-*_]{3,}$/)
|
82
|
-
|
83
|
-
# 表格行
|
84
|
-
return true if line.match?(/^\|.*\|$/)
|
85
|
-
|
86
|
-
# 空行后的非空行(新段落)
|
87
|
-
return true if current_section.end_with?("\n\n") && !line.strip.empty?
|
88
|
-
|
89
68
|
false
|
90
69
|
end
|
91
70
|
|
@@ -145,13 +124,5 @@ module LlmTranslate
|
|
145
124
|
chunks << current_chunk.strip unless current_chunk.strip.empty?
|
146
125
|
chunks
|
147
126
|
end
|
148
|
-
|
149
|
-
def clean_merged_content(content)
|
150
|
-
# 移除多余的空行(超过2个连续换行的情况)
|
151
|
-
cleaned = content.gsub(/\n{3,}/, "\n\n")
|
152
|
-
|
153
|
-
# 确保文档以单个换行结尾
|
154
|
-
"#{cleaned.strip}\n"
|
155
|
-
end
|
156
127
|
end
|
157
128
|
end
|
data/llm_translate.yml
CHANGED
@@ -61,7 +61,7 @@ translation:
|
|
61
61
|
every_chars: 18000
|
62
62
|
|
63
63
|
# 并发翻译的 chunk 数量
|
64
|
-
concurrent_chunks:
|
64
|
+
concurrent_chunks: 2
|
65
65
|
|
66
66
|
|
67
67
|
# 文件处理配置
|
@@ -140,9 +140,6 @@ performance:
|
|
140
140
|
# 并发处理文件数(使用文档拆分时建议设为 1)
|
141
141
|
concurrent_files: 3
|
142
142
|
|
143
|
-
# 批处理大小(同时翻译的文件数)
|
144
|
-
batch_size: 5
|
145
|
-
|
146
143
|
# 请求间隔(避免 API 限流,拆分文档时特别重要)
|
147
144
|
request_interval: 1 # 秒
|
148
145
|
|