scancscode 1.0.40 → 1.0.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.trae/documents/csv_auto_translator_plan.md +59 -0
- package/.trae/documents/optimize_batching_plan.md +60 -0
- package/.trae/specs/refactor-translate-all/checklist.md +11 -0
- package/.trae/specs/refactor-translate-all/spec.md +66 -0
- package/.trae/specs/refactor-translate-all/tasks.md +49 -0
- package/bin/scanliterals.js +3 -3
- package/bin/slimlangs.js +3 -3
- package/bin/translatecsvfile.js +3 -0
- package/dist/src/CSVUtils.js +3 -0
- package/dist/src/CSharpStringExtractor.js +7 -0
- package/dist/src/CmdExecutor.js +54 -5
- package/dist/src/CsvAutoTranslator.js +272 -0
- package/dist/src/RunTranslateCSV.js +4 -0
- package/dist/test/CSharpStringExtractor.test.js +57 -1
- package/dist/test/CsvAutoTranslator.test.js +160 -0
- package/dist/test/TestConvert.test.js +5 -2
- package/docs/CSharpStringExtractor/344/273/243/347/240/201/347/224/237/346/210/220/346/217/220/347/244/272/350/257/215.txt +72 -72
- package/jest.config.js +9 -9
- package/package.json +1 -1
- package/src/CSVUtils.ts +5 -2
- package/src/CSharpStringExtractor.ts +9 -0
- package/src/CmdExecutor.ts +55 -5
- package/src/CsvAutoTranslator.ts +261 -0
- package/src/RunConvert.ts +3 -3
- package/src/RunSlimLangs.ts +3 -3
- package/src/RunTranslateCSV.ts +3 -0
- package/src/TableScanner.ts +92 -92
- package/test/Auto-Out.csv +12833 -0
- package/test/Auto.csv +12485 -0
- package/test/CSharpStringExtractor.test.ts +61 -1
- package/test/CsvAutoTranslator.test.ts +186 -0
- package/test/TestConvert.test.ts +6 -1
- package/test/TestSpecialString.cs +24 -24
- package/tsconfig.json +109 -109
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# CSV自动翻译器 - 实现计划
|
|
2
|
+
|
|
3
|
+
## [ ] Task 1: 实现小牛翻译API客户端
|
|
4
|
+
- **Priority**: P0
|
|
5
|
+
- **Depends On**: None
|
|
6
|
+
- **Description**:
|
|
7
|
+
- 实现权限字符串生成(按ASCII码排序参数字段 + MD5加密)
|
|
8
|
+
- 实现批量翻译API调用
|
|
9
|
+
- 支持API Key、AppId配置
|
|
10
|
+
- **Success Criteria**:
|
|
11
|
+
- 能正确生成符合小牛API规范的authStr
|
|
12
|
+
- 能成功调用小牛翻译批量接口并返回结果
|
|
13
|
+
- **Test Requirements**:
|
|
14
|
+
- `programmatic` TR-1.1: authStr生成算法符合文档规范
|
|
15
|
+
- `programmatic` TR-1.2: API调用能处理响应和错误
|
|
16
|
+
- **Notes**: 使用Node.js内置crypto模块实现MD5
|
|
17
|
+
|
|
18
|
+
## [ ] Task 2: 实现CSV读取和解析逻辑
|
|
19
|
+
- **Priority**: P0
|
|
20
|
+
- **Depends On**: None
|
|
21
|
+
- **Description**:
|
|
22
|
+
- 使用现有的CSVUtils类读取CSV文件
|
|
23
|
+
- 提取第一列内容作为待翻译文本
|
|
24
|
+
- 检查第三列是否已有内容,跳过已有内容的行
|
|
25
|
+
- **Success Criteria**:
|
|
26
|
+
- 能正确读取CSV文件并解析为二维数组
|
|
27
|
+
- 能正确识别需要翻译的行(第三列为空的行)
|
|
28
|
+
- **Test Requirements**:
|
|
29
|
+
- `programmatic` TR-2.1: 能正确解析CSV文件
|
|
30
|
+
- `programmatic` TR-2.2: 能正确跳过第三列已有内容的行
|
|
31
|
+
|
|
32
|
+
## [ ] Task 3: 实现批量翻译和CSV写入
|
|
33
|
+
- **Priority**: P0
|
|
34
|
+
- **Depends On**: Task 1, Task 2
|
|
35
|
+
- **Description**:
|
|
36
|
+
- 分批翻译待翻译文本(考虑API限制,每批最多50条)
|
|
37
|
+
- 将翻译结果写入第三列
|
|
38
|
+
- 保存更新后的CSV文件
|
|
39
|
+
- **Success Criteria**:
|
|
40
|
+
- 翻译结果正确写入CSV第三列
|
|
41
|
+
- 已有内容的行不被覆盖
|
|
42
|
+
- 分批处理符合API限制
|
|
43
|
+
- **Test Requirements**:
|
|
44
|
+
- `programmatic` TR-3.1: 翻译结果正确写入
|
|
45
|
+
- `programmatic` TR-3.2: 已有内容不被覆盖
|
|
46
|
+
- `programmatic` TR-3.3: 分批处理正常工作
|
|
47
|
+
|
|
48
|
+
## [ ] Task 4: 集成和主入口
|
|
49
|
+
- **Priority**: P1
|
|
50
|
+
- **Depends On**: Task 3
|
|
51
|
+
- **Description**:
|
|
52
|
+
- 整合所有功能
|
|
53
|
+
- 提供清晰的类接口
|
|
54
|
+
- **Success Criteria**:
|
|
55
|
+
- 完整的CsvAutoTranslator类
|
|
56
|
+
- 易于使用的API
|
|
57
|
+
- **Test Requirements**:
|
|
58
|
+
- `programmatic` TR-4.1: 代码可以正常编译
|
|
59
|
+
- `human-judgment` TR-4.2: 代码风格与项目保持一致
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# 智能分批算法优化 - 实现计划
|
|
2
|
+
|
|
3
|
+
## [x] Task 1: 实现简化的单文本 JSON 长度估算函数
|
|
4
|
+
- **Priority**: P0
|
|
5
|
+
- **Depends On**: None
|
|
6
|
+
- **Description**:
|
|
7
|
+
- 实现 `estimateSingleTextJsonSize` 函数
|
|
8
|
+
- 基于 string.length 粗略估算单个文本的 JSON 字节长度
|
|
9
|
+
- 考虑 JSON 转义开销
|
|
10
|
+
- **Success Criteria**:
|
|
11
|
+
- 能快速估算单个文本的 JSON 长度
|
|
12
|
+
- **Test Requirements**:
|
|
13
|
+
- `programmatic` TR-1.1: 简单文本估算正确
|
|
14
|
+
- `programmatic` TR-1.2: 含特殊字符文本估算合理
|
|
15
|
+
- **Status**: 已完成
|
|
16
|
+
|
|
17
|
+
## [x] Task 2: 实现优化的 estimateJsonSize 函数
|
|
18
|
+
- **Priority**: P0
|
|
19
|
+
- **Depends On**: Task 1
|
|
20
|
+
- **Description**:
|
|
21
|
+
- 基于取巧原理优化 estimateJsonSize
|
|
22
|
+
- texts 的 json byteLength = 所有单 text 的 json byteLength 加和 + [] 的长度(2)
|
|
23
|
+
- 考虑逗号分隔符(每个 text 之间 1 字节)
|
|
24
|
+
- **Success Criteria**:
|
|
25
|
+
- 估算结果接近真实值
|
|
26
|
+
- 性能比 JSON.stringify 更快
|
|
27
|
+
- **Test Requirements**:
|
|
28
|
+
- `programmatic` TR-2.1: 估算结果误差在可接受范围
|
|
29
|
+
- `programmatic` TR-2.2: 不使用真实 JSON.stringify
|
|
30
|
+
- **Status**: 已完成
|
|
31
|
+
|
|
32
|
+
## [x] Task 3: 重构 smartBatch 函数使用优化算法
|
|
33
|
+
- **Priority**: P0
|
|
34
|
+
- **Depends On**: Task 2
|
|
35
|
+
- **Description**:
|
|
36
|
+
- 先用 string.length 粗略预估批量大小
|
|
37
|
+
- 从后往前逐 text 预估 json 长度,逐步求减
|
|
38
|
+
- 保持 500 条和 4900 字节的限制
|
|
39
|
+
- **Success Criteria**:
|
|
40
|
+
- 分批算法更高效
|
|
41
|
+
- 保持原有功能不变
|
|
42
|
+
- **Test Requirements**:
|
|
43
|
+
- `programmatic` TR-3.1: 每批不超过 500 条
|
|
44
|
+
- `programmatic` TR-3.2: 每批不超过 4900 字节
|
|
45
|
+
- `programmatic` TR-3.3: 分批顺序正确
|
|
46
|
+
- **Status**: 已完成
|
|
47
|
+
|
|
48
|
+
## [x] Task 4: 验证和性能测试
|
|
49
|
+
- **Priority**: P1
|
|
50
|
+
- **Depends On**: Task 3
|
|
51
|
+
- **Description**:
|
|
52
|
+
- 验证优化后的功能正常
|
|
53
|
+
- 对比性能改进
|
|
54
|
+
- **Success Criteria**:
|
|
55
|
+
- 功能正常
|
|
56
|
+
- 性能有提升
|
|
57
|
+
- **Test Requirements**:
|
|
58
|
+
- `programmatic` TR-4.1: TypeScript 编译无错误
|
|
59
|
+
- `programmatic` TR-4.2: 完整功能测试通过
|
|
60
|
+
- **Status**: 已完成
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# translateAll 函数重构 - 产品需求文档
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
- **Summary**: 重构 `translateAll` 函数,实现智能分批和并发控制,优化小牛翻译API的调用效率
|
|
5
|
+
- **Purpose**: 根据小牛翻译API的限制条件,智能分批文本内容,确保请求不超过API容量限制,同时支持并发请求提高翻译效率
|
|
6
|
+
- **Target Users**: 使用 CsvAutoTranslator 类的开发者和使用者
|
|
7
|
+
|
|
8
|
+
## Goals
|
|
9
|
+
- 实现智能分批算法,预测JSON字节长度
|
|
10
|
+
- 确保每批不超过500条文本
|
|
11
|
+
- 确保每批JSON字节长度不超过4900字节
|
|
12
|
+
- 支持最多 `this.maxBatchSize 并发请求
|
|
13
|
+
- 保持原有的功能不变
|
|
14
|
+
|
|
15
|
+
## Non-Goals (Out of Scope)
|
|
16
|
+
- 不修改其他函数的功能
|
|
17
|
+
- 不改变API调用接口
|
|
18
|
+
- 不修改其他类的结构
|
|
19
|
+
|
|
20
|
+
## Background & Context
|
|
21
|
+
- 小牛翻译API有以下限制:
|
|
22
|
+
- 请求长度:5000字节
|
|
23
|
+
- 列表大小:50条(原有限制)
|
|
24
|
+
- 当前代码简单按50条分批,没有考虑文本长度
|
|
25
|
+
- 需要智能分批可以更高效利用API
|
|
26
|
+
|
|
27
|
+
## Functional Requirements
|
|
28
|
+
- **FR-1**: 实现JSON字节长度预测功能
|
|
29
|
+
- **FR-2**: 实现智能分批算法
|
|
30
|
+
- **FR-3**: 实现并发请求控制
|
|
31
|
+
|
|
32
|
+
## Non-Functional Requirements
|
|
33
|
+
- **NFR-1**: 分批算法高效,不显著增加处理时间
|
|
34
|
+
- **NFR-2**: 保持代码简洁易读
|
|
35
|
+
|
|
36
|
+
## Constraints
|
|
37
|
+
- **Technical**: TypeScript, Node.js
|
|
38
|
+
- **Dependencies**: 小牛翻译API
|
|
39
|
+
- **API限制**: 请求JSON长度≤5000字节,每批≤500条
|
|
40
|
+
|
|
41
|
+
## Assumptions
|
|
42
|
+
- JSON序列化后的字节长度预测合理
|
|
43
|
+
- 原有maxBatchSize为50(并发数)
|
|
44
|
+
|
|
45
|
+
## Acceptance Criteria
|
|
46
|
+
|
|
47
|
+
### AC-1: 智能分批功能
|
|
48
|
+
- **Given**: 有大量待翻译文本
|
|
49
|
+
- **When**: 调用 translateAll 函数
|
|
50
|
+
- **Then**: 文本被智能分批,每批不超过500条,且JSON字节长度不超过4900字节
|
|
51
|
+
- **Verification**: `programmatic`
|
|
52
|
+
|
|
53
|
+
### AC-2: 并发控制功能
|
|
54
|
+
- **Given**: 有多批待翻译文本
|
|
55
|
+
- **When**: 调用 translateAll 函数
|
|
56
|
+
- **Then**: 同时进行最多 this.maxBatchSize 个并发请求
|
|
57
|
+
- **Verification**: `programmatic`
|
|
58
|
+
|
|
59
|
+
### AC-3: 功能不变
|
|
60
|
+
- **Given**: 有文本需要翻译
|
|
61
|
+
- **When**: 调用 translateAll 函数
|
|
62
|
+
- **Then**: 返回正确的翻译结果,顺序与输入一致
|
|
63
|
+
- **Verification**: `programmatic`
|
|
64
|
+
|
|
65
|
+
## Open Questions
|
|
66
|
+
- 无
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# translateAll 函数重构 - 实现计划
|
|
2
|
+
|
|
3
|
+
## [x] Task 1: 实现JSON字节长度预测函数
|
|
4
|
+
- **Priority**: P0
|
|
5
|
+
- **Depends On**: None
|
|
6
|
+
- **Description**:
|
|
7
|
+
- 实现 estimateJsonSize 函数,预测一批文本转JSON后的字节长度
|
|
8
|
+
- 考虑 srcText 数组的JSON序列化开销
|
|
9
|
+
- **Acceptance Criteria Addressed**: [AC-1]
|
|
10
|
+
- **Test Requirements**:
|
|
11
|
+
- `programmatic` TR-1.1: 能正确预测简单文本的JSON大小
|
|
12
|
+
- `programmatic` TR-1.2: 预测结果接近实际序列化大小
|
|
13
|
+
- **Status**: 已完成
|
|
14
|
+
|
|
15
|
+
## [x] Task 2: 实现智能分批算法
|
|
16
|
+
- **Priority**: P0
|
|
17
|
+
- **Depends On**: Task 1
|
|
18
|
+
- **Description**:
|
|
19
|
+
- 实现 smartBatch 函数,根据文本列表智能分批
|
|
20
|
+
- 每批不超过500条,且JSON字节不超过4900
|
|
21
|
+
- 从 texts 依序截取
|
|
22
|
+
- **Acceptance Criteria Addressed**: [AC-1]
|
|
23
|
+
- **Test Requirements**:
|
|
24
|
+
- `programmatic` TR-2.1: 每批数量不超过500条
|
|
25
|
+
- `programmatic` TR-2.2: 每批JSON大小不超过4900字节
|
|
26
|
+
- **Status**: 已完成
|
|
27
|
+
|
|
28
|
+
## [x] Task 3: 实现并发请求控制
|
|
29
|
+
- **Priority**: P0
|
|
30
|
+
- **Depends On**: Task 2
|
|
31
|
+
- **Description**:
|
|
32
|
+
- 实现并发请求调度,最多同时发起 this.maxBatchSize 个请求
|
|
33
|
+
- 保持结果顺序与输入一致
|
|
34
|
+
- **Acceptance Criteria Addressed**: [AC-2, AC-3]
|
|
35
|
+
- **Test Requirements**:
|
|
36
|
+
- `programmatic` TR-3.1: 并发数不超过 this.maxBatchSize
|
|
37
|
+
- `programmatic` TR-3.2: 翻译结果顺序正确
|
|
38
|
+
- **Status**: 已完成
|
|
39
|
+
|
|
40
|
+
## [x] Task 4: 集成测试
|
|
41
|
+
- **Priority**: P1
|
|
42
|
+
- **Depends On**: Task 3
|
|
43
|
+
- **Description**:
|
|
44
|
+
- 完整测试 translateAll 函数
|
|
45
|
+
- 验证所有功能正常工作
|
|
46
|
+
- **Acceptance Criteria Addressed**: [AC-1, AC-2, AC-3]
|
|
47
|
+
- **Test Requirements**:
|
|
48
|
+
- `programmatic` TR-4.1: 完整功能测试通过
|
|
49
|
+
- **Status**: 已完成
|
package/bin/scanliterals.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
require("../dist/src/RunConvert.js")
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
require("../dist/src/RunConvert.js")
|
package/bin/slimlangs.js
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
require("../dist/src/RunSlimLangs.js")
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
require("../dist/src/RunSlimLangs.js")
|
package/dist/src/CSVUtils.js
CHANGED
|
@@ -175,6 +175,9 @@ class CSVUtils {
|
|
|
175
175
|
let csvUtils = new CSVUtils(filePath);
|
|
176
176
|
let rows0 = await csvUtils.parseCsv();
|
|
177
177
|
let header = rows0[0];
|
|
178
|
+
if (header.findIndex(title => title == null || title.match(/^Column\d+$/)) >= 0) {
|
|
179
|
+
console.warn(`文件 ${filePath} 包含 Column 列, 请检查title是否正确: ${header}`);
|
|
180
|
+
}
|
|
178
181
|
if (rows2.length == 0) {
|
|
179
182
|
rows2.push([header[0], ...langs]);
|
|
180
183
|
}
|
|
@@ -1788,6 +1788,13 @@ class CSharpStringExtractor {
|
|
|
1788
1788
|
const falseBranch = falseBranchMatch[1];
|
|
1789
1789
|
let processedTrueBranch = trueBranch;
|
|
1790
1790
|
let processedFalseBranch = falseBranch;
|
|
1791
|
+
const trueBranchTrimmed = trueBranch.trim();
|
|
1792
|
+
if (!trueBranchTrimmed.includes(`${trClass}.${trFormatMethod}(`) && !trueBranchTrimmed.endsWith(`.${trMethod}()`)) {
|
|
1793
|
+
const whitespaceBefore = trueBranch.substring(0, trueBranch.search(/\S/));
|
|
1794
|
+
const actualPart = trueBranch.substring(trueBranch.search(/\S/));
|
|
1795
|
+
const whitespaceAfter = actualPart.search(/\s*$/) === 0 ? actualPart : actualPart.substring(actualPart.search(/\S/) + trueBranchTrimmed.length);
|
|
1796
|
+
processedTrueBranch = whitespaceBefore + trueBranchTrimmed + `.${trMethod}()` + whitespaceAfter;
|
|
1797
|
+
}
|
|
1791
1798
|
const falseBranchTrimmed = falseBranch.trim();
|
|
1792
1799
|
if (!falseBranchTrimmed.includes(`${trClass}.${trFormatMethod}(`) && !falseBranchTrimmed.endsWith(`.${trMethod}()`)) {
|
|
1793
1800
|
const whitespaceBefore = falseBranch.substring(0, falseBranch.search(/\S/));
|
package/dist/src/CmdExecutor.js
CHANGED
|
@@ -4,6 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.CmdExecutor = void 0;
|
|
7
|
+
const CsvAutoTranslator_1 = require("./CsvAutoTranslator");
|
|
7
8
|
const CSVUtils_1 = require("./CSVUtils");
|
|
8
9
|
const LiteralCollector_1 = require("./LiteralCollector");
|
|
9
10
|
const command_line_args_1 = __importDefault(require("command-line-args"));
|
|
@@ -69,11 +70,14 @@ class CmdExecutor {
|
|
|
69
70
|
await literalCollector.convert(cscodedir, configdir, outcsv, langs, trmethod, scanonly, verbose);
|
|
70
71
|
console.log("convert done.");
|
|
71
72
|
}
|
|
72
|
-
static testSlimCsv() {
|
|
73
|
-
let inCsvFile = [
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
73
|
+
static async testSlimCsv() {
|
|
74
|
+
let inCsvFile = [
|
|
75
|
+
"E:/DATA/Projects/ZhiYou/ProjectFClient/GameClient/Assets/Editor/Translation/Auto.csv",
|
|
76
|
+
"E:/DATA/Projects/ZhiYou/ProjectFClient/GameClient/Assets/Editor/Translation/Manual.csv",
|
|
77
|
+
];
|
|
78
|
+
let outCsvFile = "E:/DATA/Projects/ZhiYou/ProjectFClient/GameClient/Assets/Bundles/GameConfigs/Translation/ScriptTrans.csv";
|
|
79
|
+
let langs = ["zh_hk"];
|
|
80
|
+
await CSVUtils_1.CSVUtils.slimCsvWithLangs(inCsvFile, outCsvFile, langs);
|
|
77
81
|
// node bin/slimlangs.js --incsv E:/DATA/Projects/e-gbl-client/client/Assets/Bundles/GameConfigs/Translation/hello.csv --outcsv E:/DATA/Projects/e-gbl-client/client/Assets/Bundles/GameConfigs/Translation/hello-out.csv --langs zh_cn
|
|
78
82
|
}
|
|
79
83
|
static async runSlimCsvWithLangs() {
|
|
@@ -103,5 +107,50 @@ class CmdExecutor {
|
|
|
103
107
|
await CSVUtils_1.CSVUtils.slimCsvWithLangs(incsv, outcsv, langs);
|
|
104
108
|
console.log("slim csv with langs done.");
|
|
105
109
|
}
|
|
110
|
+
static async runTranslateCsvWithCmdOptions() {
|
|
111
|
+
const optionDefinitions = [
|
|
112
|
+
{ name: 'incsv', type: String },
|
|
113
|
+
{ name: 'outcsv', type: String },
|
|
114
|
+
{ name: 'fromLang', type: String },
|
|
115
|
+
{ name: 'toLangs', type: String, multiple: true, defaultOption: true },
|
|
116
|
+
{ name: 'apiKey', type: String },
|
|
117
|
+
{ name: 'appId', type: String },
|
|
118
|
+
];
|
|
119
|
+
const options = (0, command_line_args_1.default)(optionDefinitions);
|
|
120
|
+
let incsv = options.incsv;
|
|
121
|
+
let outcsv = options.outcsv;
|
|
122
|
+
let fromLang = options.fromLang ?? undefined;
|
|
123
|
+
let toLangs = options.toLangs ?? undefined;
|
|
124
|
+
let apiKey = options.apiKey;
|
|
125
|
+
let appId = options.appId;
|
|
126
|
+
console.log(`slim csv cmd options: `, incsv, outcsv, fromLang, toLangs);
|
|
127
|
+
let argv = process.argv;
|
|
128
|
+
if (incsv == null) {
|
|
129
|
+
console.error(`incsv missing:`, argv);
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
if (outcsv == null) {
|
|
133
|
+
console.error(`outcsv missing:`, argv);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
if (fromLang == null) {
|
|
137
|
+
console.error(`fromLang missing:`, argv);
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
if (toLangs != null && toLangs.length == 0) {
|
|
141
|
+
console.error(`toLangs is empty:`, argv);
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
if (apiKey == null) {
|
|
145
|
+
console.error(`apiKey missing:`, argv);
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
if (appId == null) {
|
|
149
|
+
console.error(`appId missing:`, argv);
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
await CsvAutoTranslator_1.CsvAutoTranslator.translateCsvWithLangs(incsv, outcsv, fromLang, toLangs, apiKey, appId);
|
|
153
|
+
console.log("translate csv with cmd options done.");
|
|
154
|
+
}
|
|
106
155
|
}
|
|
107
156
|
exports.CmdExecutor = CmdExecutor;
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.CsvAutoTranslator = void 0;
|
|
37
|
+
const crypto = __importStar(require("crypto"));
|
|
38
|
+
const https = __importStar(require("https"));
|
|
39
|
+
const CSVUtils_1 = require("./CSVUtils");
|
|
40
|
+
class CsvAutoTranslator {
|
|
41
|
+
apiKey;
|
|
42
|
+
appId;
|
|
43
|
+
apiUrl = "https://api.niutrans.com/v2/text/translate/array";
|
|
44
|
+
maxBatchSize = 5;
|
|
45
|
+
MAX_BATCH_TEXT_COUNT = 50;
|
|
46
|
+
MAX_BATCH_JSON_SIZE = 4900;
|
|
47
|
+
constructor(apiKey, appId) {
|
|
48
|
+
this.apiKey = apiKey;
|
|
49
|
+
this.appId = appId;
|
|
50
|
+
}
|
|
51
|
+
generateAuthStr(params) {
|
|
52
|
+
const sortedKeys = Object.keys(params).sort();
|
|
53
|
+
const paramStr = sortedKeys
|
|
54
|
+
.filter(key => params[key] !== "" && params[key] !== undefined && params[key] !== null)
|
|
55
|
+
.map(key => `${key}=${params[key]}`)
|
|
56
|
+
.join("&");
|
|
57
|
+
const fullStr = `apikey=${this.apiKey}&${paramStr}`;
|
|
58
|
+
return crypto.createHash("md5").update(fullStr).digest("hex");
|
|
59
|
+
}
|
|
60
|
+
async requestTranslate(texts, from, to) {
|
|
61
|
+
const timestamp = Date.now().toString();
|
|
62
|
+
const params = {
|
|
63
|
+
from: from,
|
|
64
|
+
to: to,
|
|
65
|
+
appId: this.appId,
|
|
66
|
+
timestamp: timestamp
|
|
67
|
+
};
|
|
68
|
+
const authStr = this.generateAuthStr(params);
|
|
69
|
+
const postData = JSON.stringify({
|
|
70
|
+
...params,
|
|
71
|
+
srcText: texts,
|
|
72
|
+
authStr: authStr
|
|
73
|
+
});
|
|
74
|
+
const url = new URL(this.apiUrl);
|
|
75
|
+
const options = {
|
|
76
|
+
hostname: url.hostname,
|
|
77
|
+
port: 443,
|
|
78
|
+
path: url.pathname,
|
|
79
|
+
method: "POST",
|
|
80
|
+
headers: {
|
|
81
|
+
"Content-Type": "application/json",
|
|
82
|
+
"Content-Length": Buffer.byteLength(postData)
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
return new Promise((resolve, reject) => {
|
|
86
|
+
const req = https.request(options, (res) => {
|
|
87
|
+
let data = "";
|
|
88
|
+
res.on("data", (chunk) => {
|
|
89
|
+
data += chunk;
|
|
90
|
+
});
|
|
91
|
+
res.on("end", async () => {
|
|
92
|
+
try {
|
|
93
|
+
let ret = JSON.parse(data);
|
|
94
|
+
await res.destroy();
|
|
95
|
+
resolve(ret);
|
|
96
|
+
}
|
|
97
|
+
catch (e) {
|
|
98
|
+
reject(e);
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
req.on("error", reject);
|
|
103
|
+
req.write(postData);
|
|
104
|
+
req.end();
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
async translateBatch(texts, from, to) {
|
|
108
|
+
const response = await this.requestTranslate(texts, from, to);
|
|
109
|
+
if (response.resultCode !== "200") {
|
|
110
|
+
throw new Error(`翻译失败: ${response.resultMsg}`);
|
|
111
|
+
}
|
|
112
|
+
return response.tgtList.map(item => item.tgtText || "");
|
|
113
|
+
}
|
|
114
|
+
estimateSingleTextJsonSize(text, isSingle) {
|
|
115
|
+
return Buffer.byteLength(JSON.stringify(text)) + (isSingle ? 0 : 1);
|
|
116
|
+
}
|
|
117
|
+
estimateJsonSize(texts) {
|
|
118
|
+
return Buffer.byteLength(JSON.stringify(texts));
|
|
119
|
+
}
|
|
120
|
+
smartBatch(texts, batches) {
|
|
121
|
+
const resultBatches = batches || [];
|
|
122
|
+
let startIndex = 0;
|
|
123
|
+
while (startIndex < texts.length) {
|
|
124
|
+
let endIndexMax = Math.min(startIndex + this.MAX_BATCH_TEXT_COUNT, texts.length);
|
|
125
|
+
let batchByteLength = 2;
|
|
126
|
+
let currentBatchCount = 0;
|
|
127
|
+
// let currentBatch = texts.slice(startIndex, endIndexMax);
|
|
128
|
+
for (let i = startIndex; i < endIndexMax; i++) {
|
|
129
|
+
const text = texts[i];
|
|
130
|
+
const textSize = Buffer.byteLength(text) + (i >= 1 ? 1 : 0);
|
|
131
|
+
currentBatchCount++;
|
|
132
|
+
batchByteLength += textSize;
|
|
133
|
+
if (batchByteLength + textSize > this.MAX_BATCH_JSON_SIZE) {
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
let endIndex = startIndex + currentBatchCount;
|
|
138
|
+
let currentBatch = texts.slice(startIndex, endIndex);
|
|
139
|
+
let currentSize = this.estimateJsonSize(currentBatch);
|
|
140
|
+
while (currentSize > this.MAX_BATCH_JSON_SIZE && currentBatch.length > 0) {
|
|
141
|
+
endIndex--;
|
|
142
|
+
let endText = texts[endIndex];
|
|
143
|
+
let endTextSize = this.estimateSingleTextJsonSize(endText, endIndex > startIndex + 1);
|
|
144
|
+
currentSize -= endTextSize;
|
|
145
|
+
currentBatch.pop();
|
|
146
|
+
}
|
|
147
|
+
if (currentBatch.length == 0 && startIndex < endIndexMax) {
|
|
148
|
+
console.error(`无法将文本分成合适的批次, 存在过长的文本, 起始索引: ${startIndex}, 结束索引: ${endIndex}`);
|
|
149
|
+
currentBatch = [''];
|
|
150
|
+
endIndex = startIndex + 1;
|
|
151
|
+
}
|
|
152
|
+
resultBatches.push(currentBatch);
|
|
153
|
+
startIndex = endIndex;
|
|
154
|
+
}
|
|
155
|
+
return resultBatches;
|
|
156
|
+
}
|
|
157
|
+
async translateAll(texts, from, to) {
|
|
158
|
+
const batches = this.smartBatch(texts);
|
|
159
|
+
const results = new Array(batches.length);
|
|
160
|
+
const concurrencyLimit = this.maxBatchSize;
|
|
161
|
+
let index = 0;
|
|
162
|
+
let batchCount = 0;
|
|
163
|
+
const processBatch = async () => {
|
|
164
|
+
while (index < batches.length) {
|
|
165
|
+
const batchIndex = index++;
|
|
166
|
+
const batch = batches[batchIndex];
|
|
167
|
+
batchCount++;
|
|
168
|
+
console.log(`batchCount++: ${batchCount}`);
|
|
169
|
+
const batchResults = await this.translateBatch(batch, from, to);
|
|
170
|
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
171
|
+
batchCount--;
|
|
172
|
+
console.log(`batchCount--: ${batchCount}`);
|
|
173
|
+
results[batchIndex] = batchResults;
|
|
174
|
+
}
|
|
175
|
+
};
|
|
176
|
+
const workers = [];
|
|
177
|
+
for (let i = 0; i < concurrencyLimit && i < batches.length; i++) {
|
|
178
|
+
workers.push(processBatch());
|
|
179
|
+
}
|
|
180
|
+
await Promise.all(workers);
|
|
181
|
+
// await processBatch()
|
|
182
|
+
return results.flat();
|
|
183
|
+
}
|
|
184
|
+
langMap = new Map([
|
|
185
|
+
["zh_cn", "zh"],
|
|
186
|
+
["zh_hk", "cht"],
|
|
187
|
+
["en_us", "en"],
|
|
188
|
+
]);
|
|
189
|
+
async translateCsvRows(rows, fromLang, toLangs) {
|
|
190
|
+
if (rows.length === 0) {
|
|
191
|
+
console.log("CSV文件为空");
|
|
192
|
+
return 0;
|
|
193
|
+
}
|
|
194
|
+
let header = rows[0];
|
|
195
|
+
let fromLangIndex = header.indexOf(fromLang);
|
|
196
|
+
if (fromLangIndex === -1) {
|
|
197
|
+
console.error(`未找到 ${fromLang} 列`);
|
|
198
|
+
return 0;
|
|
199
|
+
}
|
|
200
|
+
if (!this.langMap.has(fromLang)) {
|
|
201
|
+
console.error(`未找到 ${fromLang} 的目标语言`);
|
|
202
|
+
return 0;
|
|
203
|
+
}
|
|
204
|
+
let fromLang2 = this.langMap.get(fromLang);
|
|
205
|
+
if (fromLang2 == null) {
|
|
206
|
+
console.error(`未找到 ${fromLang} 的目标语言`);
|
|
207
|
+
return 0;
|
|
208
|
+
}
|
|
209
|
+
const needTranslateIndices = [];
|
|
210
|
+
const needTranslateTexts = [];
|
|
211
|
+
for (let i = 1; i < rows.length; i++) {
|
|
212
|
+
const row = rows[i];
|
|
213
|
+
if (!row[0] || row[0].trim() === "") {
|
|
214
|
+
continue;
|
|
215
|
+
}
|
|
216
|
+
if (row[fromLangIndex] && row[fromLangIndex].trim() !== "") {
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
needTranslateIndices.push(i);
|
|
220
|
+
let text = row[fromLangIndex];
|
|
221
|
+
if (text == null || text == "") {
|
|
222
|
+
text = row[0];
|
|
223
|
+
}
|
|
224
|
+
needTranslateTexts.push(text);
|
|
225
|
+
}
|
|
226
|
+
if (needTranslateTexts.length === 0) {
|
|
227
|
+
console.log("没有需要翻译的内容");
|
|
228
|
+
return 0;
|
|
229
|
+
}
|
|
230
|
+
console.log(`开始翻译 ${needTranslateTexts.length} 条内容...`);
|
|
231
|
+
for (let curLangIndex = 0; curLangIndex < header.length; curLangIndex++) {
|
|
232
|
+
let lang = header[curLangIndex];
|
|
233
|
+
lang = lang.trim().toLowerCase();
|
|
234
|
+
if (lang == "key") {
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
if (toLangs != undefined && !toLangs.includes(lang)) {
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
if (lang != fromLang && this.langMap.has(lang)) {
|
|
241
|
+
const toLang = this.langMap.get(lang);
|
|
242
|
+
if (toLang == null) {
|
|
243
|
+
console.error(`未找到 ${lang} 的目标语言`);
|
|
244
|
+
continue;
|
|
245
|
+
}
|
|
246
|
+
const translations = await this.translateAll(needTranslateTexts, fromLang2, toLang);
|
|
247
|
+
for (let j = 0; j < needTranslateIndices.length; j++) {
|
|
248
|
+
const rowIndex = needTranslateIndices[j];
|
|
249
|
+
while (rows[rowIndex].length < header.length) {
|
|
250
|
+
rows[rowIndex].push("");
|
|
251
|
+
}
|
|
252
|
+
rows[rowIndex][curLangIndex] = translations[j];
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return needTranslateTexts.length;
|
|
257
|
+
}
|
|
258
|
+
async translateCsv(filePath, outFilePath, fromLang = "auto", toLangs) {
|
|
259
|
+
const csvUtils = new CSVUtils_1.CSVUtils(filePath);
|
|
260
|
+
const rows = await csvUtils.parseCsv();
|
|
261
|
+
const translatedCount = await this.translateCsvRows(rows, fromLang, toLangs);
|
|
262
|
+
if (translatedCount > 0) {
|
|
263
|
+
await CSVUtils_1.CSVUtils.writeCsv(outFilePath, rows);
|
|
264
|
+
console.log(`翻译完成,已更新 ${translatedCount} 条内容到 ${outFilePath}`);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
static async translateCsvWithLangs(filePath, outFilePath, fromLang, langs, apiKey, appId) {
|
|
268
|
+
const translator = new CsvAutoTranslator(apiKey, appId);
|
|
269
|
+
await translator.translateCsv(filePath, outFilePath, fromLang, langs);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
exports.CsvAutoTranslator = CsvAutoTranslator;
|