@thor123141245r/ai-translate 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agentdocs/code-changes/2026-01-22/AI/347/277/273/350/257/221/345/231/250TS/345/256/236/347/216/260-/345/256/236/347/216/260.md +22 -0
- package/.agentdocs/code-changes/2026-01-23/CLI/345/210/206/345/217/221-npx/345/256/236/347/216/260.md +18 -0
- package/.agentdocs/code-changes/2026-01-23/sora-watermask-remover-/345/233/275/351/231/205/345/214/226/347/277/273/350/257/221-/345/256/236/347/216/260.md +37 -0
- package/.agentdocs/code-changes/2026-01-23//351/205/215/347/275/256/350/257/273/345/217/226-/347/216/257/345/242/203/345/217/230/351/207/217/344/274/230/345/205/210-/345/256/236/347/216/260.md +22 -0
- package/.agentdocs/plans/2026-01-22/AI/347/277/273/350/257/221/345/231/250TS/345/256/236/347/216/260-/344/274/230/345/214/226/346/226/271/346/241/210.md +67 -0
- package/.agentdocs/plans/2026-01-23/CLI/345/210/206/345/217/221-npx/346/226/271/346/241/210.md +60 -0
- package/.agentdocs/plans/2026-01-23/sora-watermask-remover-/345/233/275/351/231/205/345/214/226/347/277/273/350/257/221-/344/274/230/345/214/226/346/226/271/346/241/210.md +51 -0
- package/.agentdocs/plans/2026-01-23//351/205/215/347/275/256/350/257/273/345/217/226-/347/216/257/345/242/203/345/217/230/351/207/217/344/274/230/345/205/210-/344/274/230/345/214/226/346/226/271/346/241/210.md +80 -0
- package/README.md +120 -0
- package/SKILL.md +103 -0
- package/dist/asyncTransform.d.ts +7 -0
- package/dist/asyncTransform.d.ts.map +1 -0
- package/dist/asyncTransform.js +23 -0
- package/dist/asyncTransform.js.map +1 -0
- package/dist/bin/ai-translate.d.ts +3 -0
- package/dist/bin/ai-translate.d.ts.map +1 -0
- package/dist/bin/ai-translate.js +4 -0
- package/dist/bin/ai-translate.js.map +1 -0
- package/dist/cli.d.ts +7 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +259 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +3 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +3 -0
- package/dist/logger.js.map +1 -0
- package/dist/model.d.ts +29 -0
- package/dist/model.d.ts.map +1 -0
- package/dist/model.js +103 -0
- package/dist/model.js.map +1 -0
- package/dist/prompt.d.ts +12 -0
- package/dist/prompt.d.ts.map +1 -0
- package/dist/prompt.js +51 -0
- package/dist/prompt.js.map +1 -0
- package/dist/split.d.ts +27 -0
- package/dist/split.d.ts.map +1 -0
- package/dist/split.js +87 -0
- package/dist/split.js.map +1 -0
- package/dist/utils.d.ts +7 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/utils.js +14 -0
- package/dist/utils.js.map +1 -0
- package/package.json +42 -0
- package/src/asyncTransform.ts +31 -0
- package/src/bin/ai-translate.ts +5 -0
- package/src/cli.ts +313 -0
- package/src/index.ts +9 -0
- package/src/logger.ts +3 -0
- package/src/model.ts +139 -0
- package/src/prompt.ts +71 -0
- package/src/split.ts +111 -0
- package/src/utils.ts +15 -0
- package/tsconfig.json +19 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"split.js","sourceRoot":"","sources":["../src/split.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,8BAA8B,EAAE,MAAM,0BAA0B,CAAA;AAKzE,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AAQpD,MAAM,kBAAkB,GAA6B;IACnD,GAAG,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC;IACzB,EAAE,EAAE,CAAC,KAAK,CAAC;IACX,IAAI,EAAE,CAAC,OAAO,CAAC;IACf,EAAE,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC;IAClD,GAAG,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,CAAC,QAAQ,CAAC;IACjB,MAAM,EAAE,CAAC,KAAK,CAAC;IACf,GAAG,EAAE,CAAC,MAAM,CAAC;IACb,IAAI,EAAE,CAAC,KAAK,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,CAAC,KAAK,CAAC;IACb,KAAK,EAAE,CAAC,QAAQ,EAAE,KAAK,CAAC;IACxB,KAAK,EAAE,CAAC,QAAQ,CAAC;IACjB,QAAQ,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,OAAO,CAAC;IACvC,KAAK,EAAE,CAAC,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC;IACjC,IAAI,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC;CACrE,CAAA;AAED,IAAI,iBAA4E,CAAA;AAEhF,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAClC,OAAe,EAC4B,EAAE;IAC7C,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACvB,iBAAiB,GAAG,EAAE,CAAA;QACtB,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,EAAE,CAAC;YAC/D,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;gBACzB,iBAAiB,CAAC,GAAG,CAAC,GAAG,GAAoC,CAAA;YAC/D,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAA;AACnC,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,0BAA0B,GAAG,CAAC,OAA4B,EAAE,EAAE;IACzE,MAAM,EAAE,MAAM,GAAG,UAAU,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,IAAI,EAAE,CAAA;IACtD,MAAM,UAAU,GAAG,kBAAkB,CAAC,MAAM,CAAC;QAC3C,CAAC,CAAC,8BAA8B,CAAC,wBAAwB,CAAC,MAAM,CAAC;QACjE,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,CAAA;IAC3B,OAAO,IAAI,0BAA0B,CAAC;QACpC,SAAS,EAAE,IAAI;QACf,YAAY,EAAE,CAAC;QACf,UAAU;QACV,GAAG,IAAI;KACR,CAAC,CAAA;AACJ,CAAC,CAAA;AAED,uFAAuF;AACvF,MAAM,OAAO,0BAA2B,SAAQ,8BAA8B;IACzD,QAAQ,CAAC,IAAc,EAAE,SAAiB;QAC3D,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;QACjC,OAAO,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAA;IAClC,CAAC;CACF;AAED,MAAM,OAAO,kBAAmB,SAAQ,cAAc;IACnC,aAAa,CAAkB;IACxC,UAAU,CAAU;IACpB,OAAO,CAAQ;IAEvB,YAAY,OAA6C;QACvD,MAAM,EAAE,YAAY,EAAE,GAAG,OAAO,IAAI,EAAE,CAAA;QACtC,KAAK,EAAE,CAAA;QACP,IAAI,CAAC,aAAa,GAAG,YAAY,IAAI,0BAA0B,EAAE,CAAA;QACjE,IAAI,CAAC,UAAU,GAAG,EAAE,CAAA;QACpB,IAAI,CAAC,OAAO,GAAG,EAAE,CAAA;IACnB,CAAC;IAES,KAAK,CAAC,eAAe,CAC7B,KAAsB,EACtB,QAAmC;QAEnC,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;QACrD,IAAI,CAAC,OAAO,IAAI,IAAI,CAAA;QAEpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAClE,IAAI,CAAC,OAAO,GAAG,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAA;QACpC,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;QAEnD,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;YAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAA;YACxC,IAAI,QAAQ,KAAK,SAAS;gBAAE,MAAK;YACjC,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YACjC,IAAI,IAAI,EAAE,CAAC;gBACT,OAAO,IAAI,CAAA;YACb,CAAC;QACH,CAAC;QACD,OAAO,KAAK,CAAA;IACd,CAAC;IAED,MAAM,CAAC,IAAoC;QACzC,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACvC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QACrB,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACvB,IAAI,EAAE,CAAA;IACR,CAAC;CACF"}
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export declare const isWhiteSpace: (text?: string) => boolean;
|
|
2
|
+
export declare const whiteSpace: (text?: string) => {
|
|
3
|
+
head: string;
|
|
4
|
+
tail: string;
|
|
5
|
+
};
|
|
6
|
+
export declare const preserveWhiteSpace: (inp: string, out: string) => string;
|
|
7
|
+
//# sourceMappingURL=utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,YAAY,GAAI,aAAS,YAAuB,CAAA;AAE7D,eAAO,MAAM,UAAU,GAAI,aAAS;;;CAOnC,CAAA;AAED,eAAO,MAAM,kBAAkB,GAAI,KAAK,MAAM,EAAE,KAAK,MAAM,WAG1D,CAAA"}
|
package/dist/utils.js
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export const isWhiteSpace = (text = '') => text.trim() === '';
|
|
2
|
+
export const whiteSpace = (text = '') => {
|
|
3
|
+
const head = /^\s{0,200}/.exec(text)?.[0] || '';
|
|
4
|
+
let tail = '';
|
|
5
|
+
if (head.length !== text.length) {
|
|
6
|
+
tail = /\s{0,200}$/.exec(text)?.[0] || '';
|
|
7
|
+
}
|
|
8
|
+
return { head, tail };
|
|
9
|
+
};
|
|
10
|
+
export const preserveWhiteSpace = (inp, out) => {
|
|
11
|
+
const { head, tail } = whiteSpace(inp);
|
|
12
|
+
return head + out.trim() + tail;
|
|
13
|
+
};
|
|
14
|
+
//# sourceMappingURL=utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,IAAI,GAAG,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAA;AAE7D,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,IAAI,GAAG,EAAE,EAAE,EAAE;IACtC,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;IAC/C,IAAI,IAAI,GAAG,EAAE,CAAA;IACb,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,MAAM,EAAE,CAAC;QAChC,IAAI,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;IAC3C,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;AACvB,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAE,GAAW,EAAE,EAAE;IAC7D,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,UAAU,CAAC,GAAG,CAAC,CAAA;IACtC,OAAO,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,GAAG,IAAI,CAAA;AACjC,CAAC,CAAA"}
|
package/package.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@thor123141245r/ai-translate",
|
|
3
|
+
"version": "0.0.0",
|
|
4
|
+
"publishConfig": {
|
|
5
|
+
"access": "public"
|
|
6
|
+
},
|
|
7
|
+
"type": "module",
|
|
8
|
+
"engines": {
|
|
9
|
+
"node": ">=22.0.0"
|
|
10
|
+
},
|
|
11
|
+
"bin": {
|
|
12
|
+
"ai-translate": "dist/bin/ai-translate.js"
|
|
13
|
+
},
|
|
14
|
+
"exports": {
|
|
15
|
+
".": {
|
|
16
|
+
"types": "./dist/index.d.ts",
|
|
17
|
+
"default": "./dist/index.js"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"scripts": {
|
|
21
|
+
"build": "tsc -p tsconfig.json",
|
|
22
|
+
"dev:cli": "tsx src/bin/ai-translate.ts",
|
|
23
|
+
"start": "node dist/bin/ai-translate.js",
|
|
24
|
+
"validate": "tsx scripts/validate-ai-translate.ts"
|
|
25
|
+
},
|
|
26
|
+
"dependencies": {
|
|
27
|
+
"@commenthol/app-config": "^0.1.1",
|
|
28
|
+
"@langchain/anthropic": "^0.3.15",
|
|
29
|
+
"@langchain/core": "^0.3.42",
|
|
30
|
+
"@langchain/deepseek": "^0.0.1",
|
|
31
|
+
"@langchain/mistralai": "^0.2.0",
|
|
32
|
+
"@langchain/ollama": "^0.2.0",
|
|
33
|
+
"@langchain/openai": "^0.4.4",
|
|
34
|
+
"@langchain/textsplitters": "^0.1.0",
|
|
35
|
+
"debug-level": "^4.1.1"
|
|
36
|
+
},
|
|
37
|
+
"devDependencies": {
|
|
38
|
+
"@types/node": "^22.10.2",
|
|
39
|
+
"tsx": "^4.19.2",
|
|
40
|
+
"typescript": "^5.5.3"
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { Transform } from 'node:stream'
|
|
2
|
+
|
|
3
|
+
export class AsyncTransform extends Transform {
|
|
4
|
+
static toString(chunk: Buffer | object, encoding: BufferEncoding | 'buffer') {
|
|
5
|
+
const enc = encoding !== 'buffer' ? encoding : undefined
|
|
6
|
+
return Buffer.isBuffer(chunk) ? chunk.toString(enc) : String(chunk)
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
_transform(
|
|
10
|
+
chunk: Buffer | object,
|
|
11
|
+
enc: BufferEncoding | 'buffer',
|
|
12
|
+
cb: (error?: Error | null) => void
|
|
13
|
+
) {
|
|
14
|
+
this._asyncTransform(chunk, enc)
|
|
15
|
+
.then((wait) => {
|
|
16
|
+
if (wait) {
|
|
17
|
+
this.once('drain', () => cb())
|
|
18
|
+
} else {
|
|
19
|
+
cb()
|
|
20
|
+
}
|
|
21
|
+
})
|
|
22
|
+
.catch((err) => this.emit('error', err))
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
protected async _asyncTransform(
|
|
26
|
+
_chunk: Buffer | object,
|
|
27
|
+
_enc: BufferEncoding | 'buffer'
|
|
28
|
+
): Promise<boolean | undefined> {
|
|
29
|
+
throw new Error('needs implementation')
|
|
30
|
+
}
|
|
31
|
+
}
|
package/src/cli.ts
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
import fs from 'node:fs'
|
|
2
|
+
import path from 'node:path'
|
|
3
|
+
import { pipeline } from 'node:stream/promises'
|
|
4
|
+
import { initProcLog } from 'debug-level'
|
|
5
|
+
import { AppConfig, StringSchema, v } from '@commenthol/app-config'
|
|
6
|
+
import type { SupportedTextSplitterLanguage } from '@langchain/textsplitters'
|
|
7
|
+
import { logger } from './logger.js'
|
|
8
|
+
import { modelFactory, AiTranslateTransform } from './model.js'
|
|
9
|
+
import {
|
|
10
|
+
TextSplitterStream,
|
|
11
|
+
recursiveChunkTextSplitter,
|
|
12
|
+
getFormatByExtension
|
|
13
|
+
} from './split.js'
|
|
14
|
+
|
|
15
|
+
const log = logger('cli')
|
|
16
|
+
|
|
17
|
+
const help: Record<string, string> = {}
|
|
18
|
+
|
|
19
|
+
help._ = `
|
|
20
|
+
AI Translator
|
|
21
|
+
|
|
22
|
+
Usage:
|
|
23
|
+
ai-translate [flags]
|
|
24
|
+
ai-translate [command] [flags]
|
|
25
|
+
|
|
26
|
+
Commands:
|
|
27
|
+
set set config value
|
|
28
|
+
|
|
29
|
+
Flags:
|
|
30
|
+
-h, --help Help for ai-translate
|
|
31
|
+
-v, --version Show version information
|
|
32
|
+
-c, --config DIR Use config file .ai-translate.json in DIR
|
|
33
|
+
-f, --from LANG Source language
|
|
34
|
+
-t, --to LANG Target language; LANG is English language name or
|
|
35
|
+
supported BCP47 codes (ar, de, en, es, fr, ja, pt, ru,
|
|
36
|
+
vi, zh-CN, zh-TW)
|
|
37
|
+
-i, --input FILE input file
|
|
38
|
+
-o, --output FILE output file
|
|
39
|
+
--format FORMAT specify input format (cpp, go, java, js, php, proto,
|
|
40
|
+
python, rst, ruby, rust, scala, swift, markdown, latex,
|
|
41
|
+
html, sol)
|
|
42
|
+
Examples:
|
|
43
|
+
Translate input.md from Spanish to output.md in English
|
|
44
|
+
ai-translate -f Spanish -t English -i input.md -o output.md
|
|
45
|
+
|
|
46
|
+
Pipe from stdin to stdout using the config in the local folder
|
|
47
|
+
echo "translate" | ai-translate -f en -t en -c .
|
|
48
|
+
|
|
49
|
+
Use "ai-translate [command] --help" for more information about a command.
|
|
50
|
+
`
|
|
51
|
+
|
|
52
|
+
help.set = `
|
|
53
|
+
Set ai-translate configuration
|
|
54
|
+
|
|
55
|
+
Writes config to \`.ai-translate.json\`
|
|
56
|
+
If --config flag is omitted then global config is used.
|
|
57
|
+
|
|
58
|
+
Usage:
|
|
59
|
+
ai-translate [flags] set KEY VALUE
|
|
60
|
+
|
|
61
|
+
Flags:
|
|
62
|
+
-c, --config DIR Use config file .ai-translate.json in DIR
|
|
63
|
+
|
|
64
|
+
Available KEYs:
|
|
65
|
+
provider set provider (ollama, mistral, anthropic, openai, deepseek);
|
|
66
|
+
default="ollama"
|
|
67
|
+
model set model from provider; default="qwen2.5:7b"
|
|
68
|
+
apiKey set api key
|
|
69
|
+
baseUrl baseUrl for model
|
|
70
|
+
temperature model temperature; default=0.1
|
|
71
|
+
maxRetries max. number of retries; default=10
|
|
72
|
+
chunkSize number of chunks used in text-splitter; default=1000
|
|
73
|
+
`
|
|
74
|
+
|
|
75
|
+
const PACKAGE_JSON = '../package.json'
|
|
76
|
+
const APP = 'ai-translate'
|
|
77
|
+
const CONF_FILE = `.${APP}.json`
|
|
78
|
+
const DEFAULT_LANG = 'en'
|
|
79
|
+
|
|
80
|
+
const commands: Record<string, (cmd: Record<string, unknown>, argv: string[]) => void> = {
|
|
81
|
+
set: (c, argv) => {
|
|
82
|
+
const key = nextArg(argv)
|
|
83
|
+
const value = nextArg(argv)
|
|
84
|
+
c.set = [key, value]
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const flags: Record<string, (opts: Record<string, unknown>, argv: string[]) => void> = {
|
|
89
|
+
'--help': (f) => (f.help = true),
|
|
90
|
+
'--version': (f) => (f.version = true),
|
|
91
|
+
'--config': (f, argv) => {
|
|
92
|
+
const dir = nextArg(argv)
|
|
93
|
+
if (dir) f.config = path.resolve(process.cwd(), dir)
|
|
94
|
+
},
|
|
95
|
+
'--from': (f, argv) => {
|
|
96
|
+
const lang = nextArg(argv)
|
|
97
|
+
f.sourceLanguage = lang || DEFAULT_LANG
|
|
98
|
+
},
|
|
99
|
+
'--to': (f, argv) => {
|
|
100
|
+
const lang = nextArg(argv)
|
|
101
|
+
f.targetLanguage = lang || DEFAULT_LANG
|
|
102
|
+
},
|
|
103
|
+
'--input': (f, argv) => {
|
|
104
|
+
const filename = nextArg(argv)
|
|
105
|
+
if (filename) f.input = path.resolve(process.cwd(), filename)
|
|
106
|
+
},
|
|
107
|
+
'--output': (f, argv) => {
|
|
108
|
+
const filename = nextArg(argv)
|
|
109
|
+
if (filename) f.output = path.resolve(process.cwd(), filename)
|
|
110
|
+
},
|
|
111
|
+
'--format': (f, argv) => {
|
|
112
|
+
const format = nextArg(argv)
|
|
113
|
+
if (format) f.format = format
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
flags['-h'] = flags['--help']
|
|
117
|
+
flags['-v'] = flags['--version']
|
|
118
|
+
flags['-c'] = flags['--config']
|
|
119
|
+
flags['-f'] = flags['--from']
|
|
120
|
+
flags['-t'] = flags['--to']
|
|
121
|
+
flags['-i'] = flags['--input']
|
|
122
|
+
flags['-o'] = flags['--output']
|
|
123
|
+
|
|
124
|
+
export const argvParse = (args?: string[]) => {
|
|
125
|
+
initProcLog()
|
|
126
|
+
const argv = args || process.argv.slice(2)
|
|
127
|
+
log.debug(argv)
|
|
128
|
+
const opts: Record<string, unknown> = {
|
|
129
|
+
sourceLanguage: 'en',
|
|
130
|
+
targetLanguage: 'en'
|
|
131
|
+
}
|
|
132
|
+
const cmd: Record<string, unknown> = {}
|
|
133
|
+
|
|
134
|
+
while (argv.length) {
|
|
135
|
+
const arg = argv.shift()
|
|
136
|
+
if (!arg) continue
|
|
137
|
+
if (commands[arg]) {
|
|
138
|
+
commands[arg](cmd, argv)
|
|
139
|
+
} else if (flags[arg]) {
|
|
140
|
+
flags[arg](opts, argv)
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
return { cmd, opts }
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const nextArg = (argv: string[]) => {
|
|
147
|
+
const next = argv[0] || ''
|
|
148
|
+
if (next.indexOf('-') === 0) {
|
|
149
|
+
return
|
|
150
|
+
}
|
|
151
|
+
return argv.shift()
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
let _console = console
|
|
155
|
+
export const _injectConsole = (obj: typeof console) => {
|
|
156
|
+
_console = obj
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const displayError = (msg: string) => {
|
|
160
|
+
_console.error(`ERROR: ${msg}`)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const display = (msg: string) => _console.log(msg)
|
|
164
|
+
|
|
165
|
+
const version = () => {
|
|
166
|
+
const packageJson = new URL(PACKAGE_JSON, import.meta.url)
|
|
167
|
+
const { version: pkgVersion } = JSON.parse(
|
|
168
|
+
fs.readFileSync(packageJson, 'utf-8')
|
|
169
|
+
) as { version: string }
|
|
170
|
+
display(pkgVersion)
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const schema = {
|
|
174
|
+
provider: StringSchema,
|
|
175
|
+
model: StringSchema,
|
|
176
|
+
apiKey: StringSchema,
|
|
177
|
+
baseUrl: StringSchema,
|
|
178
|
+
temperature: v.pipe(
|
|
179
|
+
v.string(),
|
|
180
|
+
v.transform(Number),
|
|
181
|
+
v.minValue(0),
|
|
182
|
+
v.maxValue(2)
|
|
183
|
+
),
|
|
184
|
+
maxRetries: v.pipe(
|
|
185
|
+
v.string(),
|
|
186
|
+
v.transform(Number),
|
|
187
|
+
v.integer(),
|
|
188
|
+
v.minValue(0),
|
|
189
|
+
v.maxValue(99)
|
|
190
|
+
),
|
|
191
|
+
chunkSize: v.pipe(
|
|
192
|
+
v.string(),
|
|
193
|
+
v.transform(Number),
|
|
194
|
+
v.integer(),
|
|
195
|
+
v.minValue(100),
|
|
196
|
+
v.maxValue(200e3)
|
|
197
|
+
)
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const PROVIDER_API_KEY_ENV: Record<string, string> = {
|
|
201
|
+
openai: 'OPENAI_API_KEY',
|
|
202
|
+
anthropic: 'ANTHROPIC_API_KEY',
|
|
203
|
+
mistral: 'MISTRAL_API_KEY',
|
|
204
|
+
deepseek: 'DEEPSEEK_API_KEY'
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const PROVIDER_BASE_URL_ENV: Record<string, string> = {
|
|
208
|
+
openai: 'OPENAI_BASE_URL',
|
|
209
|
+
anthropic: 'ANTHROPIC_BASE_URL',
|
|
210
|
+
mistral: 'MISTRAL_BASE_URL',
|
|
211
|
+
deepseek: 'DEEPSEEK_BASE_URL',
|
|
212
|
+
ollama: 'OLLAMA_BASE_URL'
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
const readEnv = (key?: string) => {
|
|
216
|
+
if (!key) return
|
|
217
|
+
const value = process.env[key]
|
|
218
|
+
if (!value) return
|
|
219
|
+
const trimmed = value.trim()
|
|
220
|
+
return trimmed ? trimmed : undefined
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const pickFirst = (...values: Array<string | undefined>) =>
|
|
224
|
+
values.find((value) => value && value.length > 0)
|
|
225
|
+
|
|
226
|
+
const resolveRuntimeConfig = (config: Record<string, unknown>) => {
|
|
227
|
+
const provider = String(config.provider || 'ollama')
|
|
228
|
+
const envApiKey = pickFirst(
|
|
229
|
+
readEnv('AI_TRANSLATE_API_KEY'),
|
|
230
|
+
readEnv(PROVIDER_API_KEY_ENV[provider])
|
|
231
|
+
)
|
|
232
|
+
const envBaseUrl = pickFirst(
|
|
233
|
+
readEnv('AI_TRANSLATE_BASE_URL'),
|
|
234
|
+
readEnv(PROVIDER_BASE_URL_ENV[provider])
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
...config,
|
|
239
|
+
...(envApiKey ? { apiKey: envApiKey } : {}),
|
|
240
|
+
...(envBaseUrl ? { baseUrl: envBaseUrl } : {})
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
export const cli = async (args?: string[]) => {
|
|
245
|
+
const { cmd, opts } = argvParse(args)
|
|
246
|
+
const command = Object.keys(cmd)[0]
|
|
247
|
+
log.debug({ cmd, opts })
|
|
248
|
+
|
|
249
|
+
const filename = !opts.config
|
|
250
|
+
? CONF_FILE
|
|
251
|
+
: fs.lstatSync(String(opts.config)).isDirectory()
|
|
252
|
+
? path.resolve(String(opts.config), CONF_FILE)
|
|
253
|
+
: String(opts.config)
|
|
254
|
+
|
|
255
|
+
const appConf = new AppConfig({ appName: APP, schema, filename })
|
|
256
|
+
await appConf.read().catch(() => null)
|
|
257
|
+
|
|
258
|
+
try {
|
|
259
|
+
if (opts.version) {
|
|
260
|
+
version()
|
|
261
|
+
return
|
|
262
|
+
}
|
|
263
|
+
if (opts.help) {
|
|
264
|
+
const helpText = help[command || ''] || help._
|
|
265
|
+
display(helpText)
|
|
266
|
+
return
|
|
267
|
+
}
|
|
268
|
+
if (cmd.set) {
|
|
269
|
+
const [key, value] = cmd.set as [string | undefined, string | undefined]
|
|
270
|
+
if (key) {
|
|
271
|
+
appConf.set(key, value)
|
|
272
|
+
await appConf.write()
|
|
273
|
+
} else {
|
|
274
|
+
display(JSON.stringify(appConf.config, null, 2))
|
|
275
|
+
}
|
|
276
|
+
return
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const sourceLanguage = String(opts.sourceLanguage || DEFAULT_LANG)
|
|
280
|
+
const targetLanguage = String(opts.targetLanguage || DEFAULT_LANG)
|
|
281
|
+
|
|
282
|
+
const reader = opts.input ? fs.createReadStream(String(opts.input)) : process.stdin
|
|
283
|
+
const writer = opts.output ? fs.createWriteStream(String(opts.output)) : process.stdout
|
|
284
|
+
|
|
285
|
+
const runtimeConfig = resolveRuntimeConfig(
|
|
286
|
+
appConf.config as Record<string, unknown>
|
|
287
|
+
)
|
|
288
|
+
const model = modelFactory(runtimeConfig)
|
|
289
|
+
const lcNamespace =
|
|
290
|
+
(model as unknown as { lc_namespace?: Record<string, unknown> }).lc_namespace || {}
|
|
291
|
+
const lcKwargs = (model as unknown as { lc_kwargs?: Record<string, unknown> }).lc_kwargs || {}
|
|
292
|
+
const { apiKey: _apiKey, ...modelParams } = { ...lcNamespace, ...lcKwargs } as Record<string, unknown>
|
|
293
|
+
log.debug(modelParams)
|
|
294
|
+
|
|
295
|
+
const format =
|
|
296
|
+
(opts.format as SupportedTextSplitterLanguage | undefined) ||
|
|
297
|
+
getFormatByExtension(path.extname(String(opts.input || '_.md')))
|
|
298
|
+
const chunkSize = (runtimeConfig as { chunkSize?: number }).chunkSize || 1000
|
|
299
|
+
const textSplitter = recursiveChunkTextSplitter({ chunkSize, format })
|
|
300
|
+
const splitter = new TextSplitterStream({ textSplitter })
|
|
301
|
+
const translator = new AiTranslateTransform({
|
|
302
|
+
...(opts as Record<string, unknown>),
|
|
303
|
+
sourceLanguage,
|
|
304
|
+
targetLanguage,
|
|
305
|
+
model
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
await pipeline(reader, splitter, translator, writer)
|
|
309
|
+
} catch (err) {
|
|
310
|
+
log.debug(err)
|
|
311
|
+
displayError((err as Error).message)
|
|
312
|
+
}
|
|
313
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export { AiTranslateTransform, modelFactory } from './model.js'
|
|
2
|
+
export { promptInvoke, languages } from './prompt.js'
|
|
3
|
+
export {
|
|
4
|
+
TextSplitterStream,
|
|
5
|
+
recursiveChunkTextSplitter,
|
|
6
|
+
getFormatByExtension
|
|
7
|
+
} from './split.js'
|
|
8
|
+
export type { Metadata, ModelFactoryOptions } from './model.js'
|
|
9
|
+
export type { TextSplitterParams, Separator } from './split.js'
|
package/src/logger.ts
ADDED
package/src/model.ts
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { ChatOllama } from '@langchain/ollama'
|
|
2
|
+
import { ChatMistralAI } from '@langchain/mistralai'
|
|
3
|
+
import { ChatAnthropic } from '@langchain/anthropic'
|
|
4
|
+
import { ChatOpenAI } from '@langchain/openai'
|
|
5
|
+
import { ChatDeepSeek } from '@langchain/deepseek'
|
|
6
|
+
import type { BaseChatModel } from '@langchain/core/language_models/chat_models'
|
|
7
|
+
import { AsyncTransform } from './asyncTransform.js'
|
|
8
|
+
import { promptInvoke, replaceMarkerSymbol } from './prompt.js'
|
|
9
|
+
import { logger } from './logger.js'
|
|
10
|
+
import { isWhiteSpace, preserveWhiteSpace } from './utils.js'
|
|
11
|
+
|
|
12
|
+
const log = logger('model')
|
|
13
|
+
|
|
14
|
+
const DEFAULT = {
|
|
15
|
+
temperature: 0.1,
|
|
16
|
+
maxRetries: 10,
|
|
17
|
+
maxConcurrency: 1
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export type Metadata = {
|
|
21
|
+
inputTokens: number
|
|
22
|
+
outputTokens: number
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export type ModelFactoryOptions = {
|
|
26
|
+
provider?: 'ollama' | 'mistral' | 'anthropic' | 'openai' | 'deepseek'
|
|
27
|
+
[key: string]: unknown
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export const modelFactory = (modelOpts?: ModelFactoryOptions): BaseChatModel => {
|
|
31
|
+
const { provider = 'ollama', ...other } = modelOpts || {}
|
|
32
|
+
const apiKeyValue = typeof other.apiKey === 'string' ? other.apiKey.trim() : ''
|
|
33
|
+
const baseUrlValue = typeof other.baseUrl === 'string' ? other.baseUrl.trim() : ''
|
|
34
|
+
const apiKey = apiKeyValue ? apiKeyValue : undefined
|
|
35
|
+
const baseUrl = baseUrlValue ? baseUrlValue : undefined
|
|
36
|
+
const filtered = { ...other }
|
|
37
|
+
if ('apiKey' in filtered) {
|
|
38
|
+
delete filtered.apiKey
|
|
39
|
+
}
|
|
40
|
+
if ('baseUrl' in filtered) {
|
|
41
|
+
delete filtered.baseUrl
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
switch (provider) {
|
|
45
|
+
case 'ollama':
|
|
46
|
+
return new ChatOllama({
|
|
47
|
+
...DEFAULT,
|
|
48
|
+
model: 'qwen2.5:7b',
|
|
49
|
+
...(baseUrl ? { baseUrl } : {}),
|
|
50
|
+
...filtered
|
|
51
|
+
})
|
|
52
|
+
case 'mistral':
|
|
53
|
+
return new ChatMistralAI({
|
|
54
|
+
...DEFAULT,
|
|
55
|
+
model: 'ministral-8b',
|
|
56
|
+
...(apiKey ? { apiKey } : {}),
|
|
57
|
+
...filtered
|
|
58
|
+
})
|
|
59
|
+
case 'anthropic':
|
|
60
|
+
return new ChatAnthropic({
|
|
61
|
+
...DEFAULT,
|
|
62
|
+
model: 'claude-3-5-haiku-20241022',
|
|
63
|
+
...(apiKey ? { apiKey } : {}),
|
|
64
|
+
...filtered
|
|
65
|
+
})
|
|
66
|
+
case 'openai':
|
|
67
|
+
return new ChatOpenAI({
|
|
68
|
+
...DEFAULT,
|
|
69
|
+
model: 'gpt-4o-mini',
|
|
70
|
+
...(apiKey ? { apiKey } : {}),
|
|
71
|
+
...(baseUrl ? { configuration: { baseURL: baseUrl } } : {}),
|
|
72
|
+
...filtered
|
|
73
|
+
})
|
|
74
|
+
case 'deepseek':
|
|
75
|
+
return new ChatDeepSeek({
|
|
76
|
+
...DEFAULT,
|
|
77
|
+
model: 'deepseek-reasoner',
|
|
78
|
+
...(apiKey ? { apiKey } : {}),
|
|
79
|
+
...(baseUrl ? { configuration: { baseURL: baseUrl } } : {}),
|
|
80
|
+
...filtered
|
|
81
|
+
})
|
|
82
|
+
default:
|
|
83
|
+
throw new Error(`unsupported provider=${provider}`)
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export class AiTranslateTransform extends AsyncTransform {
|
|
88
|
+
private readonly _model: BaseChatModel
|
|
89
|
+
private readonly _promptOpts: {
|
|
90
|
+
format?: string
|
|
91
|
+
sourceLanguage: string
|
|
92
|
+
targetLanguage: string
|
|
93
|
+
}
|
|
94
|
+
private _metadata: Metadata = { inputTokens: 0, outputTokens: 0 }
|
|
95
|
+
|
|
96
|
+
constructor(options: {
|
|
97
|
+
model: BaseChatModel
|
|
98
|
+
format?: string
|
|
99
|
+
sourceLanguage: string
|
|
100
|
+
targetLanguage: string
|
|
101
|
+
[key: string]: unknown
|
|
102
|
+
}) {
|
|
103
|
+
const { model, format, sourceLanguage, targetLanguage, ...rest } = options
|
|
104
|
+
super(rest)
|
|
105
|
+
this._model = model
|
|
106
|
+
this._promptOpts = { format, sourceLanguage, targetLanguage }
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
getMetadata() {
|
|
110
|
+
return { ...this._metadata }
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
protected async _asyncTransform(
|
|
114
|
+
chunk: Buffer | object,
|
|
115
|
+
encoding: BufferEncoding | 'buffer'
|
|
116
|
+
) {
|
|
117
|
+
const text = AsyncTransform.toString(chunk, encoding)
|
|
118
|
+
if (isWhiteSpace(text)) {
|
|
119
|
+
return !this.push(text)
|
|
120
|
+
}
|
|
121
|
+
log.debug('inp=%j', text)
|
|
122
|
+
const messages = await promptInvoke({ ...this._promptOpts, text })
|
|
123
|
+
const result = await this._model.invoke(messages)
|
|
124
|
+
log.debug('out=%j', result.content)
|
|
125
|
+
|
|
126
|
+
const usage = (result as { usage_metadata?: { input_tokens?: number; output_tokens?: number } })
|
|
127
|
+
.usage_metadata
|
|
128
|
+
const inputTokens = usage?.input_tokens ?? 0
|
|
129
|
+
const outputTokens = usage?.output_tokens ?? 0
|
|
130
|
+
|
|
131
|
+
this.emit('metadata', { inputTokens, outputTokens })
|
|
132
|
+
this._metadata.inputTokens += inputTokens
|
|
133
|
+
this._metadata.outputTokens += outputTokens
|
|
134
|
+
|
|
135
|
+
return !this.push(
|
|
136
|
+
preserveWhiteSpace(text, replaceMarkerSymbol(result.content))
|
|
137
|
+
)
|
|
138
|
+
}
|
|
139
|
+
}
|
package/src/prompt.ts
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { ChatPromptTemplate } from '@langchain/core/prompts'
|
|
2
|
+
import type { ChatPromptValueInterface } from '@langchain/core/prompt_values'
|
|
3
|
+
import type { MessageContent } from '@langchain/core/messages'
|
|
4
|
+
import { logger } from './logger.js'
|
|
5
|
+
|
|
6
|
+
const log = logger('prompt')
|
|
7
|
+
|
|
8
|
+
export const languages: Record<string, string> = {
|
|
9
|
+
ar: 'Arabic',
|
|
10
|
+
de: 'German',
|
|
11
|
+
en: 'English',
|
|
12
|
+
es: 'Spanish',
|
|
13
|
+
fr: 'French',
|
|
14
|
+
ja: 'Japanese',
|
|
15
|
+
pt: 'Portuguese',
|
|
16
|
+
ru: 'Russian',
|
|
17
|
+
vi: 'Vietnamese',
|
|
18
|
+
'zh-CN': 'Chinese-simplified',
|
|
19
|
+
'zh-TW': 'Chinese-traditional'
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export const getLanguageName = (lang: string) => languages[lang] || lang
|
|
23
|
+
|
|
24
|
+
const systemPrompt =
|
|
25
|
+
'You are an AI-driven advanced translation system, specifically designed to ' +
|
|
26
|
+
'translate structured and technical documents. ' +
|
|
27
|
+
'You will receive a text snippet from a file formatted as "{format}"\n\n' +
|
|
28
|
+
'Your task is to **accurately translate** the text enclosed between the 🔤 ' +
|
|
29
|
+
'symbols from "{sourceLanguage}" to "{targetLanguage}". ' +
|
|
30
|
+
'Preserve the original formatting, sentence structure, and terminology. ' +
|
|
31
|
+
'Ensure that every word and sentence is translated as closely as possible to ' +
|
|
32
|
+
'the original meaning, without summarizing or omitting any part of the content. ' +
|
|
33
|
+
'The translation must be faithful, detailed, and maintain the original length ' +
|
|
34
|
+
'and complexity. ' +
|
|
35
|
+
'Deliver the translation exactly as required, without any additional ' +
|
|
36
|
+
'commentary or explanation, and ensure the 🔤 symbols are removed in the final output.\n\n' +
|
|
37
|
+
'Remember: your job is to **translate** the text exactly as it is, without ' +
|
|
38
|
+
'adding summaries or changing the content in any way. ' +
|
|
39
|
+
'Do not skip or modify any part of the text. Ensure that the output is a ' +
|
|
40
|
+
'direct translation, and that the original structure and meaning are preserved.'
|
|
41
|
+
|
|
42
|
+
const promptTemplate = ChatPromptTemplate.fromMessages([
|
|
43
|
+
['system', systemPrompt],
|
|
44
|
+
['user', '🔤{text}🔤']
|
|
45
|
+
])
|
|
46
|
+
|
|
47
|
+
export const promptInvoke = ({
|
|
48
|
+
format = 'markdown',
|
|
49
|
+
sourceLanguage,
|
|
50
|
+
targetLanguage,
|
|
51
|
+
text = ''
|
|
52
|
+
}: {
|
|
53
|
+
format?: string
|
|
54
|
+
sourceLanguage: string
|
|
55
|
+
targetLanguage: string
|
|
56
|
+
text: string
|
|
57
|
+
}): Promise<ChatPromptValueInterface> =>
|
|
58
|
+
promptTemplate.invoke({
|
|
59
|
+
format,
|
|
60
|
+
sourceLanguage: getLanguageName(sourceLanguage),
|
|
61
|
+
targetLanguage: getLanguageName(targetLanguage),
|
|
62
|
+
text
|
|
63
|
+
})
|
|
64
|
+
|
|
65
|
+
export const replaceMarkerSymbol = (text: MessageContent) => {
|
|
66
|
+
if (typeof text === 'string') {
|
|
67
|
+
return text.replace(/^\s*🔤/, '').replace(/🔤\s*$/, '')
|
|
68
|
+
}
|
|
69
|
+
log.error(text)
|
|
70
|
+
return ''
|
|
71
|
+
}
|