@thor123141245r/ai-translate 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.agentdocs/code-changes/2026-01-22/AI/347/277/273/350/257/221/345/231/250TS/345/256/236/347/216/260-/345/256/236/347/216/260.md +22 -0
  2. package/.agentdocs/code-changes/2026-01-23/CLI/345/210/206/345/217/221-npx/345/256/236/347/216/260.md +18 -0
  3. package/.agentdocs/code-changes/2026-01-23/sora-watermask-remover-/345/233/275/351/231/205/345/214/226/347/277/273/350/257/221-/345/256/236/347/216/260.md +37 -0
  4. package/.agentdocs/code-changes/2026-01-23//351/205/215/347/275/256/350/257/273/345/217/226-/347/216/257/345/242/203/345/217/230/351/207/217/344/274/230/345/205/210-/345/256/236/347/216/260.md +22 -0
  5. package/.agentdocs/plans/2026-01-22/AI/347/277/273/350/257/221/345/231/250TS/345/256/236/347/216/260-/344/274/230/345/214/226/346/226/271/346/241/210.md +67 -0
  6. package/.agentdocs/plans/2026-01-23/CLI/345/210/206/345/217/221-npx/346/226/271/346/241/210.md +60 -0
  7. package/.agentdocs/plans/2026-01-23/sora-watermask-remover-/345/233/275/351/231/205/345/214/226/347/277/273/350/257/221-/344/274/230/345/214/226/346/226/271/346/241/210.md +51 -0
  8. package/.agentdocs/plans/2026-01-23//351/205/215/347/275/256/350/257/273/345/217/226-/347/216/257/345/242/203/345/217/230/351/207/217/344/274/230/345/205/210-/344/274/230/345/214/226/346/226/271/346/241/210.md +80 -0
  9. package/README.md +120 -0
  10. package/SKILL.md +103 -0
  11. package/dist/asyncTransform.d.ts +7 -0
  12. package/dist/asyncTransform.d.ts.map +1 -0
  13. package/dist/asyncTransform.js +23 -0
  14. package/dist/asyncTransform.js.map +1 -0
  15. package/dist/bin/ai-translate.d.ts +3 -0
  16. package/dist/bin/ai-translate.d.ts.map +1 -0
  17. package/dist/bin/ai-translate.js +4 -0
  18. package/dist/bin/ai-translate.js.map +1 -0
  19. package/dist/cli.d.ts +7 -0
  20. package/dist/cli.d.ts.map +1 -0
  21. package/dist/cli.js +259 -0
  22. package/dist/cli.js.map +1 -0
  23. package/dist/index.d.ts +6 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +4 -0
  26. package/dist/index.js.map +1 -0
  27. package/dist/logger.d.ts +3 -0
  28. package/dist/logger.d.ts.map +1 -0
  29. package/dist/logger.js +3 -0
  30. package/dist/logger.js.map +1 -0
  31. package/dist/model.d.ts +29 -0
  32. package/dist/model.d.ts.map +1 -0
  33. package/dist/model.js +103 -0
  34. package/dist/model.js.map +1 -0
  35. package/dist/prompt.d.ts +12 -0
  36. package/dist/prompt.d.ts.map +1 -0
  37. package/dist/prompt.js +51 -0
  38. package/dist/prompt.js.map +1 -0
  39. package/dist/split.d.ts +27 -0
  40. package/dist/split.d.ts.map +1 -0
  41. package/dist/split.js +87 -0
  42. package/dist/split.js.map +1 -0
  43. package/dist/utils.d.ts +7 -0
  44. package/dist/utils.d.ts.map +1 -0
  45. package/dist/utils.js +14 -0
  46. package/dist/utils.js.map +1 -0
  47. package/package.json +42 -0
  48. package/src/asyncTransform.ts +31 -0
  49. package/src/bin/ai-translate.ts +5 -0
  50. package/src/cli.ts +313 -0
  51. package/src/index.ts +9 -0
  52. package/src/logger.ts +3 -0
  53. package/src/model.ts +139 -0
  54. package/src/prompt.ts +71 -0
  55. package/src/split.ts +111 -0
  56. package/src/utils.ts +15 -0
  57. package/tsconfig.json +19 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"split.js","sourceRoot":"","sources":["../src/split.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,8BAA8B,EAAE,MAAM,0BAA0B,CAAA;AAKzE,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAA;AAQpD,MAAM,kBAAkB,GAA6B;IACnD,GAAG,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,CAAC;IACzB,EAAE,EAAE,CAAC,KAAK,CAAC;IACX,IAAI,EAAE,CAAC,OAAO,CAAC;IACf,EAAE,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC;IAClD,GAAG,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,CAAC,QAAQ,CAAC;IACjB,MAAM,EAAE,CAAC,KAAK,CAAC;IACf,GAAG,EAAE,CAAC,MAAM,CAAC;IACb,IAAI,EAAE,CAAC,KAAK,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,CAAC,KAAK,CAAC;IACb,KAAK,EAAE,CAAC,QAAQ,EAAE,KAAK,CAAC;IACxB,KAAK,EAAE,CAAC,QAAQ,CAAC;IACjB,QAAQ,EAAE,CAAC,KAAK,EAAE,WAAW,EAAE,OAAO,CAAC;IACvC,KAAK,EAAE,CAAC,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC;IACjC,IAAI,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC;CACrE,CAAA;AAED,IAAI,iBAA4E,CAAA;AAEhF,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAClC,OAAe,EAC4B,EAAE;IAC7C,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACvB,iBAAiB,GAAG,EAAE,CAAA;QACtB,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,EAAE,CAAC;YAC/D,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;gBACzB,iBAAiB,CAAC,GAAG,CAAC,GAAG,GAAoC,CAAA;YAC/D,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAA;AACnC,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,0BAA0B,GAAG,CAAC,OAA4B,EAAE,EAAE;IACzE,MAAM,EAAE,MAAM,GAAG,UAAU,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,IAAI,EAAE,CAAA;IACtD,MAAM,UAAU,GAAG,kBAAkB,CAAC,MAAM,CAAC;QAC3C,CAAC,CAAC,8BAA8B,CAAC,wBAAwB,CAAC,MAAM,CAAC;QACjE,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,CAAA;IAC3B,OAAO,IAAI,0BAA0B,CAAC;QACpC,SAAS,EAAE,IAAI;QACf,YAAY,EAAE,CAAC;QACf,UAAU;QACV,GAAG,IAAI;KACR,CAAC,CAAA;AACJ,CAAC,CAAA;AAED,uFAAuF;AACvF,MAAM,OAAO,0BAA2B,SAAQ,8BAA8B;IACzD,QAAQ,CAAC,IAAc,EAAE,SAAiB;QAC3D,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;QACjC,OAAO,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAA;IAClC,CAAC;CACF;AAED,MAAM,OAAO,kBAAmB,SAAQ,cAAc;IACnC,aAAa,CAAkB;IACxC,UAAU,CAAU;IACpB,OAAO,CAAQ;IAEvB,YAAY,OAA6C;QACvD,MAAM,EAAE,YAAY,EAAE,GAAG,OAAO,IAAI,EAAE,CAAA;QACtC,KAAK,EAAE,CAAA;QACP,IAAI,CAAC,aAAa,GAAG,YAAY,IAAI,0BAA0B,EAAE,CAAA;QACjE,IAAI,CAAC,UAAU,GAAG,EAAE,CAAA;QACpB,IAAI,CAAC,OAAO,GAAG,EAAE,CAAA;IACnB,CAAC;IAES,KAAK,CAAC,eAAe,CAC7B,KAAsB,EACtB,QAAmC;QAEnC,MAAM,IAAI,GAAG,cAAc,CAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;QACrD,IAAI,CAAC,OAAO,IAAI,IAAI,CAAA;QAEpB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAClE,IAAI,CAAC,OAAO,GAAG,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAA;QACpC,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;QAEnD,OAAO,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC;YAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAA;YACxC,IAAI,QAAQ,KAAK,SAAS;gBAAE,MAAK;YACjC,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;YACjC,IAAI,IAAI,EAAE,CAAC;gBACT,OAAO,IAAI,CAAA;YACb,CAAC;QACH,CAAC;QACD,OAAO,KAAK,CAAA;IACd,CAAC;IAED,MAAM,CAAC,IAAoC;QACzC,KAAK,MAAM,QAAQ,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACvC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;QACrB,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QACvB,IAAI,EAAE,CAAA;IACR,CAAC;CACF"}
@@ -0,0 +1,7 @@
1
+ export declare const isWhiteSpace: (text?: string) => boolean;
2
+ export declare const whiteSpace: (text?: string) => {
3
+ head: string;
4
+ tail: string;
5
+ };
6
+ export declare const preserveWhiteSpace: (inp: string, out: string) => string;
7
+ //# sourceMappingURL=utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,YAAY,GAAI,aAAS,YAAuB,CAAA;AAE7D,eAAO,MAAM,UAAU,GAAI,aAAS;;;CAOnC,CAAA;AAED,eAAO,MAAM,kBAAkB,GAAI,KAAK,MAAM,EAAE,KAAK,MAAM,WAG1D,CAAA"}
package/dist/utils.js ADDED
@@ -0,0 +1,14 @@
1
+ export const isWhiteSpace = (text = '') => text.trim() === '';
2
+ export const whiteSpace = (text = '') => {
3
+ const head = /^\s{0,200}/.exec(text)?.[0] || '';
4
+ let tail = '';
5
+ if (head.length !== text.length) {
6
+ tail = /\s{0,200}$/.exec(text)?.[0] || '';
7
+ }
8
+ return { head, tail };
9
+ };
10
+ export const preserveWhiteSpace = (inp, out) => {
11
+ const { head, tail } = whiteSpace(inp);
12
+ return head + out.trim() + tail;
13
+ };
14
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,IAAI,GAAG,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAA;AAE7D,MAAM,CAAC,MAAM,UAAU,GAAG,CAAC,IAAI,GAAG,EAAE,EAAE,EAAE;IACtC,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;IAC/C,IAAI,IAAI,GAAG,EAAE,CAAA;IACb,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,MAAM,EAAE,CAAC;QAChC,IAAI,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;IAC3C,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;AACvB,CAAC,CAAA;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAE,GAAW,EAAE,EAAE;IAC7D,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,UAAU,CAAC,GAAG,CAAC,CAAA;IACtC,OAAO,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,GAAG,IAAI,CAAA;AACjC,CAAC,CAAA"}
package/package.json ADDED
@@ -0,0 +1,42 @@
1
+ {
2
+ "name": "@thor123141245r/ai-translate",
3
+ "version": "0.0.0",
4
+ "publishConfig": {
5
+ "access": "public"
6
+ },
7
+ "type": "module",
8
+ "engines": {
9
+ "node": ">=22.0.0"
10
+ },
11
+ "bin": {
12
+ "ai-translate": "dist/bin/ai-translate.js"
13
+ },
14
+ "exports": {
15
+ ".": {
16
+ "types": "./dist/index.d.ts",
17
+ "default": "./dist/index.js"
18
+ }
19
+ },
20
+ "scripts": {
21
+ "build": "tsc -p tsconfig.json",
22
+ "dev:cli": "tsx src/bin/ai-translate.ts",
23
+ "start": "node dist/bin/ai-translate.js",
24
+ "validate": "tsx scripts/validate-ai-translate.ts"
25
+ },
26
+ "dependencies": {
27
+ "@commenthol/app-config": "^0.1.1",
28
+ "@langchain/anthropic": "^0.3.15",
29
+ "@langchain/core": "^0.3.42",
30
+ "@langchain/deepseek": "^0.0.1",
31
+ "@langchain/mistralai": "^0.2.0",
32
+ "@langchain/ollama": "^0.2.0",
33
+ "@langchain/openai": "^0.4.4",
34
+ "@langchain/textsplitters": "^0.1.0",
35
+ "debug-level": "^4.1.1"
36
+ },
37
+ "devDependencies": {
38
+ "@types/node": "^22.10.2",
39
+ "tsx": "^4.19.2",
40
+ "typescript": "^5.5.3"
41
+ }
42
+ }
@@ -0,0 +1,31 @@
1
+ import { Transform } from 'node:stream'
2
+
3
+ export class AsyncTransform extends Transform {
4
+ static toString(chunk: Buffer | object, encoding: BufferEncoding | 'buffer') {
5
+ const enc = encoding !== 'buffer' ? encoding : undefined
6
+ return Buffer.isBuffer(chunk) ? chunk.toString(enc) : String(chunk)
7
+ }
8
+
9
+ _transform(
10
+ chunk: Buffer | object,
11
+ enc: BufferEncoding | 'buffer',
12
+ cb: (error?: Error | null) => void
13
+ ) {
14
+ this._asyncTransform(chunk, enc)
15
+ .then((wait) => {
16
+ if (wait) {
17
+ this.once('drain', () => cb())
18
+ } else {
19
+ cb()
20
+ }
21
+ })
22
+ .catch((err) => this.emit('error', err))
23
+ }
24
+
25
+ protected async _asyncTransform(
26
+ _chunk: Buffer | object,
27
+ _enc: BufferEncoding | 'buffer'
28
+ ): Promise<boolean | undefined> {
29
+ throw new Error('needs implementation')
30
+ }
31
+ }
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { cli } from '../cli.js'
4
+
5
+ await cli()
package/src/cli.ts ADDED
@@ -0,0 +1,313 @@
1
+ import fs from 'node:fs'
2
+ import path from 'node:path'
3
+ import { pipeline } from 'node:stream/promises'
4
+ import { initProcLog } from 'debug-level'
5
+ import { AppConfig, StringSchema, v } from '@commenthol/app-config'
6
+ import type { SupportedTextSplitterLanguage } from '@langchain/textsplitters'
7
+ import { logger } from './logger.js'
8
+ import { modelFactory, AiTranslateTransform } from './model.js'
9
+ import {
10
+ TextSplitterStream,
11
+ recursiveChunkTextSplitter,
12
+ getFormatByExtension
13
+ } from './split.js'
14
+
15
+ const log = logger('cli')
16
+
17
+ const help: Record<string, string> = {}
18
+
19
+ help._ = `
20
+ AI Translator
21
+
22
+ Usage:
23
+ ai-translate [flags]
24
+ ai-translate [command] [flags]
25
+
26
+ Commands:
27
+ set set config value
28
+
29
+ Flags:
30
+ -h, --help Help for ai-translate
31
+ -v, --version Show version information
32
+ -c, --config DIR Use config file .ai-translate.json in DIR
33
+ -f, --from LANG Source language
34
+ -t, --to LANG Target language; LANG is English language name or
35
+ supported BCP47 codes (ar, de, en, es, fr, ja, pt, ru,
36
+ vi, zh-CN, zh-TW)
37
+ -i, --input FILE input file
38
+ -o, --output FILE output file
39
+ --format FORMAT specify input format (cpp, go, java, js, php, proto,
40
+ python, rst, ruby, rust, scala, swift, markdown, latex,
41
+ html, sol)
42
+ Examples:
43
+ Translate input.md from Spanish to output.md in English
44
+ ai-translate -f Spanish -t English -i input.md -o output.md
45
+
46
+ Pipe from stdin to stdout using the config in the local folder
47
+ echo "translate" | ai-translate -f en -t en -c .
48
+
49
+ Use "ai-translate [command] --help" for more information about a command.
50
+ `
51
+
52
+ help.set = `
53
+ Set ai-translate configuration
54
+
55
+ Writes config to \`.ai-translate.json\`
56
+ If --config flag is omitted then global config is used.
57
+
58
+ Usage:
59
+ ai-translate [flags] set KEY VALUE
60
+
61
+ Flags:
62
+ -c, --config DIR Use config file .ai-translate.json in DIR
63
+
64
+ Available KEYs:
65
+ provider set provider (ollama, mistral, anthropic, openai, deepseek);
66
+ default="ollama"
67
+ model set model from provider; default="qwen2.5:7b"
68
+ apiKey set api key
69
+ baseUrl baseUrl for model
70
+ temperature model temperature; default=0.1
71
+ maxRetries max. number of retries; default=10
72
+ chunkSize number of chunks used in text-splitter; default=1000
73
+ `
74
+
75
+ const PACKAGE_JSON = '../package.json'
76
+ const APP = 'ai-translate'
77
+ const CONF_FILE = `.${APP}.json`
78
+ const DEFAULT_LANG = 'en'
79
+
80
+ const commands: Record<string, (cmd: Record<string, unknown>, argv: string[]) => void> = {
81
+ set: (c, argv) => {
82
+ const key = nextArg(argv)
83
+ const value = nextArg(argv)
84
+ c.set = [key, value]
85
+ }
86
+ }
87
+
88
+ const flags: Record<string, (opts: Record<string, unknown>, argv: string[]) => void> = {
89
+ '--help': (f) => (f.help = true),
90
+ '--version': (f) => (f.version = true),
91
+ '--config': (f, argv) => {
92
+ const dir = nextArg(argv)
93
+ if (dir) f.config = path.resolve(process.cwd(), dir)
94
+ },
95
+ '--from': (f, argv) => {
96
+ const lang = nextArg(argv)
97
+ f.sourceLanguage = lang || DEFAULT_LANG
98
+ },
99
+ '--to': (f, argv) => {
100
+ const lang = nextArg(argv)
101
+ f.targetLanguage = lang || DEFAULT_LANG
102
+ },
103
+ '--input': (f, argv) => {
104
+ const filename = nextArg(argv)
105
+ if (filename) f.input = path.resolve(process.cwd(), filename)
106
+ },
107
+ '--output': (f, argv) => {
108
+ const filename = nextArg(argv)
109
+ if (filename) f.output = path.resolve(process.cwd(), filename)
110
+ },
111
+ '--format': (f, argv) => {
112
+ const format = nextArg(argv)
113
+ if (format) f.format = format
114
+ }
115
+ }
116
+ flags['-h'] = flags['--help']
117
+ flags['-v'] = flags['--version']
118
+ flags['-c'] = flags['--config']
119
+ flags['-f'] = flags['--from']
120
+ flags['-t'] = flags['--to']
121
+ flags['-i'] = flags['--input']
122
+ flags['-o'] = flags['--output']
123
+
124
+ export const argvParse = (args?: string[]) => {
125
+ initProcLog()
126
+ const argv = args || process.argv.slice(2)
127
+ log.debug(argv)
128
+ const opts: Record<string, unknown> = {
129
+ sourceLanguage: 'en',
130
+ targetLanguage: 'en'
131
+ }
132
+ const cmd: Record<string, unknown> = {}
133
+
134
+ while (argv.length) {
135
+ const arg = argv.shift()
136
+ if (!arg) continue
137
+ if (commands[arg]) {
138
+ commands[arg](cmd, argv)
139
+ } else if (flags[arg]) {
140
+ flags[arg](opts, argv)
141
+ }
142
+ }
143
+ return { cmd, opts }
144
+ }
145
+
146
+ const nextArg = (argv: string[]) => {
147
+ const next = argv[0] || ''
148
+ if (next.indexOf('-') === 0) {
149
+ return
150
+ }
151
+ return argv.shift()
152
+ }
153
+
154
+ let _console = console
155
+ export const _injectConsole = (obj: typeof console) => {
156
+ _console = obj
157
+ }
158
+
159
+ const displayError = (msg: string) => {
160
+ _console.error(`ERROR: ${msg}`)
161
+ }
162
+
163
+ const display = (msg: string) => _console.log(msg)
164
+
165
+ const version = () => {
166
+ const packageJson = new URL(PACKAGE_JSON, import.meta.url)
167
+ const { version: pkgVersion } = JSON.parse(
168
+ fs.readFileSync(packageJson, 'utf-8')
169
+ ) as { version: string }
170
+ display(pkgVersion)
171
+ }
172
+
173
+ const schema = {
174
+ provider: StringSchema,
175
+ model: StringSchema,
176
+ apiKey: StringSchema,
177
+ baseUrl: StringSchema,
178
+ temperature: v.pipe(
179
+ v.string(),
180
+ v.transform(Number),
181
+ v.minValue(0),
182
+ v.maxValue(2)
183
+ ),
184
+ maxRetries: v.pipe(
185
+ v.string(),
186
+ v.transform(Number),
187
+ v.integer(),
188
+ v.minValue(0),
189
+ v.maxValue(99)
190
+ ),
191
+ chunkSize: v.pipe(
192
+ v.string(),
193
+ v.transform(Number),
194
+ v.integer(),
195
+ v.minValue(100),
196
+ v.maxValue(200e3)
197
+ )
198
+ }
199
+
200
+ const PROVIDER_API_KEY_ENV: Record<string, string> = {
201
+ openai: 'OPENAI_API_KEY',
202
+ anthropic: 'ANTHROPIC_API_KEY',
203
+ mistral: 'MISTRAL_API_KEY',
204
+ deepseek: 'DEEPSEEK_API_KEY'
205
+ }
206
+
207
+ const PROVIDER_BASE_URL_ENV: Record<string, string> = {
208
+ openai: 'OPENAI_BASE_URL',
209
+ anthropic: 'ANTHROPIC_BASE_URL',
210
+ mistral: 'MISTRAL_BASE_URL',
211
+ deepseek: 'DEEPSEEK_BASE_URL',
212
+ ollama: 'OLLAMA_BASE_URL'
213
+ }
214
+
215
+ const readEnv = (key?: string) => {
216
+ if (!key) return
217
+ const value = process.env[key]
218
+ if (!value) return
219
+ const trimmed = value.trim()
220
+ return trimmed ? trimmed : undefined
221
+ }
222
+
223
+ const pickFirst = (...values: Array<string | undefined>) =>
224
+ values.find((value) => value && value.length > 0)
225
+
226
+ const resolveRuntimeConfig = (config: Record<string, unknown>) => {
227
+ const provider = String(config.provider || 'ollama')
228
+ const envApiKey = pickFirst(
229
+ readEnv('AI_TRANSLATE_API_KEY'),
230
+ readEnv(PROVIDER_API_KEY_ENV[provider])
231
+ )
232
+ const envBaseUrl = pickFirst(
233
+ readEnv('AI_TRANSLATE_BASE_URL'),
234
+ readEnv(PROVIDER_BASE_URL_ENV[provider])
235
+ )
236
+
237
+ return {
238
+ ...config,
239
+ ...(envApiKey ? { apiKey: envApiKey } : {}),
240
+ ...(envBaseUrl ? { baseUrl: envBaseUrl } : {})
241
+ }
242
+ }
243
+
244
+ export const cli = async (args?: string[]) => {
245
+ const { cmd, opts } = argvParse(args)
246
+ const command = Object.keys(cmd)[0]
247
+ log.debug({ cmd, opts })
248
+
249
+ const filename = !opts.config
250
+ ? CONF_FILE
251
+ : fs.lstatSync(String(opts.config)).isDirectory()
252
+ ? path.resolve(String(opts.config), CONF_FILE)
253
+ : String(opts.config)
254
+
255
+ const appConf = new AppConfig({ appName: APP, schema, filename })
256
+ await appConf.read().catch(() => null)
257
+
258
+ try {
259
+ if (opts.version) {
260
+ version()
261
+ return
262
+ }
263
+ if (opts.help) {
264
+ const helpText = help[command || ''] || help._
265
+ display(helpText)
266
+ return
267
+ }
268
+ if (cmd.set) {
269
+ const [key, value] = cmd.set as [string | undefined, string | undefined]
270
+ if (key) {
271
+ appConf.set(key, value)
272
+ await appConf.write()
273
+ } else {
274
+ display(JSON.stringify(appConf.config, null, 2))
275
+ }
276
+ return
277
+ }
278
+
279
+ const sourceLanguage = String(opts.sourceLanguage || DEFAULT_LANG)
280
+ const targetLanguage = String(opts.targetLanguage || DEFAULT_LANG)
281
+
282
+ const reader = opts.input ? fs.createReadStream(String(opts.input)) : process.stdin
283
+ const writer = opts.output ? fs.createWriteStream(String(opts.output)) : process.stdout
284
+
285
+ const runtimeConfig = resolveRuntimeConfig(
286
+ appConf.config as Record<string, unknown>
287
+ )
288
+ const model = modelFactory(runtimeConfig)
289
+ const lcNamespace =
290
+ (model as unknown as { lc_namespace?: Record<string, unknown> }).lc_namespace || {}
291
+ const lcKwargs = (model as unknown as { lc_kwargs?: Record<string, unknown> }).lc_kwargs || {}
292
+ const { apiKey: _apiKey, ...modelParams } = { ...lcNamespace, ...lcKwargs } as Record<string, unknown>
293
+ log.debug(modelParams)
294
+
295
+ const format =
296
+ (opts.format as SupportedTextSplitterLanguage | undefined) ||
297
+ getFormatByExtension(path.extname(String(opts.input || '_.md')))
298
+ const chunkSize = (runtimeConfig as { chunkSize?: number }).chunkSize || 1000
299
+ const textSplitter = recursiveChunkTextSplitter({ chunkSize, format })
300
+ const splitter = new TextSplitterStream({ textSplitter })
301
+ const translator = new AiTranslateTransform({
302
+ ...(opts as Record<string, unknown>),
303
+ sourceLanguage,
304
+ targetLanguage,
305
+ model
306
+ })
307
+
308
+ await pipeline(reader, splitter, translator, writer)
309
+ } catch (err) {
310
+ log.debug(err)
311
+ displayError((err as Error).message)
312
+ }
313
+ }
package/src/index.ts ADDED
@@ -0,0 +1,9 @@
1
+ export { AiTranslateTransform, modelFactory } from './model.js'
2
+ export { promptInvoke, languages } from './prompt.js'
3
+ export {
4
+ TextSplitterStream,
5
+ recursiveChunkTextSplitter,
6
+ getFormatByExtension
7
+ } from './split.js'
8
+ export type { Metadata, ModelFactoryOptions } from './model.js'
9
+ export type { TextSplitterParams, Separator } from './split.js'
package/src/logger.ts ADDED
@@ -0,0 +1,3 @@
1
+ import { ProcLog } from 'debug-level'
2
+
3
+ export const logger = (namespace: string) => new ProcLog(`ai-translate:${namespace}`)
package/src/model.ts ADDED
@@ -0,0 +1,139 @@
1
+ import { ChatOllama } from '@langchain/ollama'
2
+ import { ChatMistralAI } from '@langchain/mistralai'
3
+ import { ChatAnthropic } from '@langchain/anthropic'
4
+ import { ChatOpenAI } from '@langchain/openai'
5
+ import { ChatDeepSeek } from '@langchain/deepseek'
6
+ import type { BaseChatModel } from '@langchain/core/language_models/chat_models'
7
+ import { AsyncTransform } from './asyncTransform.js'
8
+ import { promptInvoke, replaceMarkerSymbol } from './prompt.js'
9
+ import { logger } from './logger.js'
10
+ import { isWhiteSpace, preserveWhiteSpace } from './utils.js'
11
+
12
+ const log = logger('model')
13
+
14
+ const DEFAULT = {
15
+ temperature: 0.1,
16
+ maxRetries: 10,
17
+ maxConcurrency: 1
18
+ }
19
+
20
+ export type Metadata = {
21
+ inputTokens: number
22
+ outputTokens: number
23
+ }
24
+
25
+ export type ModelFactoryOptions = {
26
+ provider?: 'ollama' | 'mistral' | 'anthropic' | 'openai' | 'deepseek'
27
+ [key: string]: unknown
28
+ }
29
+
30
+ export const modelFactory = (modelOpts?: ModelFactoryOptions): BaseChatModel => {
31
+ const { provider = 'ollama', ...other } = modelOpts || {}
32
+ const apiKeyValue = typeof other.apiKey === 'string' ? other.apiKey.trim() : ''
33
+ const baseUrlValue = typeof other.baseUrl === 'string' ? other.baseUrl.trim() : ''
34
+ const apiKey = apiKeyValue ? apiKeyValue : undefined
35
+ const baseUrl = baseUrlValue ? baseUrlValue : undefined
36
+ const filtered = { ...other }
37
+ if ('apiKey' in filtered) {
38
+ delete filtered.apiKey
39
+ }
40
+ if ('baseUrl' in filtered) {
41
+ delete filtered.baseUrl
42
+ }
43
+
44
+ switch (provider) {
45
+ case 'ollama':
46
+ return new ChatOllama({
47
+ ...DEFAULT,
48
+ model: 'qwen2.5:7b',
49
+ ...(baseUrl ? { baseUrl } : {}),
50
+ ...filtered
51
+ })
52
+ case 'mistral':
53
+ return new ChatMistralAI({
54
+ ...DEFAULT,
55
+ model: 'ministral-8b',
56
+ ...(apiKey ? { apiKey } : {}),
57
+ ...filtered
58
+ })
59
+ case 'anthropic':
60
+ return new ChatAnthropic({
61
+ ...DEFAULT,
62
+ model: 'claude-3-5-haiku-20241022',
63
+ ...(apiKey ? { apiKey } : {}),
64
+ ...filtered
65
+ })
66
+ case 'openai':
67
+ return new ChatOpenAI({
68
+ ...DEFAULT,
69
+ model: 'gpt-4o-mini',
70
+ ...(apiKey ? { apiKey } : {}),
71
+ ...(baseUrl ? { configuration: { baseURL: baseUrl } } : {}),
72
+ ...filtered
73
+ })
74
+ case 'deepseek':
75
+ return new ChatDeepSeek({
76
+ ...DEFAULT,
77
+ model: 'deepseek-reasoner',
78
+ ...(apiKey ? { apiKey } : {}),
79
+ ...(baseUrl ? { configuration: { baseURL: baseUrl } } : {}),
80
+ ...filtered
81
+ })
82
+ default:
83
+ throw new Error(`unsupported provider=${provider}`)
84
+ }
85
+ }
86
+
87
+ export class AiTranslateTransform extends AsyncTransform {
88
+ private readonly _model: BaseChatModel
89
+ private readonly _promptOpts: {
90
+ format?: string
91
+ sourceLanguage: string
92
+ targetLanguage: string
93
+ }
94
+ private _metadata: Metadata = { inputTokens: 0, outputTokens: 0 }
95
+
96
+ constructor(options: {
97
+ model: BaseChatModel
98
+ format?: string
99
+ sourceLanguage: string
100
+ targetLanguage: string
101
+ [key: string]: unknown
102
+ }) {
103
+ const { model, format, sourceLanguage, targetLanguage, ...rest } = options
104
+ super(rest)
105
+ this._model = model
106
+ this._promptOpts = { format, sourceLanguage, targetLanguage }
107
+ }
108
+
109
+ getMetadata() {
110
+ return { ...this._metadata }
111
+ }
112
+
113
+ protected async _asyncTransform(
114
+ chunk: Buffer | object,
115
+ encoding: BufferEncoding | 'buffer'
116
+ ) {
117
+ const text = AsyncTransform.toString(chunk, encoding)
118
+ if (isWhiteSpace(text)) {
119
+ return !this.push(text)
120
+ }
121
+ log.debug('inp=%j', text)
122
+ const messages = await promptInvoke({ ...this._promptOpts, text })
123
+ const result = await this._model.invoke(messages)
124
+ log.debug('out=%j', result.content)
125
+
126
+ const usage = (result as { usage_metadata?: { input_tokens?: number; output_tokens?: number } })
127
+ .usage_metadata
128
+ const inputTokens = usage?.input_tokens ?? 0
129
+ const outputTokens = usage?.output_tokens ?? 0
130
+
131
+ this.emit('metadata', { inputTokens, outputTokens })
132
+ this._metadata.inputTokens += inputTokens
133
+ this._metadata.outputTokens += outputTokens
134
+
135
+ return !this.push(
136
+ preserveWhiteSpace(text, replaceMarkerSymbol(result.content))
137
+ )
138
+ }
139
+ }
package/src/prompt.ts ADDED
@@ -0,0 +1,71 @@
1
+ import { ChatPromptTemplate } from '@langchain/core/prompts'
2
+ import type { ChatPromptValueInterface } from '@langchain/core/prompt_values'
3
+ import type { MessageContent } from '@langchain/core/messages'
4
+ import { logger } from './logger.js'
5
+
6
+ const log = logger('prompt')
7
+
8
+ export const languages: Record<string, string> = {
9
+ ar: 'Arabic',
10
+ de: 'German',
11
+ en: 'English',
12
+ es: 'Spanish',
13
+ fr: 'French',
14
+ ja: 'Japanese',
15
+ pt: 'Portuguese',
16
+ ru: 'Russian',
17
+ vi: 'Vietnamese',
18
+ 'zh-CN': 'Chinese-simplified',
19
+ 'zh-TW': 'Chinese-traditional'
20
+ }
21
+
22
+ export const getLanguageName = (lang: string) => languages[lang] || lang
23
+
24
+ const systemPrompt =
25
+ 'You are an AI-driven advanced translation system, specifically designed to ' +
26
+ 'translate structured and technical documents. ' +
27
+ 'You will receive a text snippet from a file formatted as "{format}"\n\n' +
28
+ 'Your task is to **accurately translate** the text enclosed between the 🔤 ' +
29
+ 'symbols from "{sourceLanguage}" to "{targetLanguage}". ' +
30
+ 'Preserve the original formatting, sentence structure, and terminology. ' +
31
+ 'Ensure that every word and sentence is translated as closely as possible to ' +
32
+ 'the original meaning, without summarizing or omitting any part of the content. ' +
33
+ 'The translation must be faithful, detailed, and maintain the original length ' +
34
+ 'and complexity. ' +
35
+ 'Deliver the translation exactly as required, without any additional ' +
36
+ 'commentary or explanation, and ensure the 🔤 symbols are removed in the final output.\n\n' +
37
+ 'Remember: your job is to **translate** the text exactly as it is, without ' +
38
+ 'adding summaries or changing the content in any way. ' +
39
+ 'Do not skip or modify any part of the text. Ensure that the output is a ' +
40
+ 'direct translation, and that the original structure and meaning are preserved.'
41
+
42
+ const promptTemplate = ChatPromptTemplate.fromMessages([
43
+ ['system', systemPrompt],
44
+ ['user', '🔤{text}🔤']
45
+ ])
46
+
47
+ export const promptInvoke = ({
48
+ format = 'markdown',
49
+ sourceLanguage,
50
+ targetLanguage,
51
+ text = ''
52
+ }: {
53
+ format?: string
54
+ sourceLanguage: string
55
+ targetLanguage: string
56
+ text: string
57
+ }): Promise<ChatPromptValueInterface> =>
58
+ promptTemplate.invoke({
59
+ format,
60
+ sourceLanguage: getLanguageName(sourceLanguage),
61
+ targetLanguage: getLanguageName(targetLanguage),
62
+ text
63
+ })
64
+
65
+ export const replaceMarkerSymbol = (text: MessageContent) => {
66
+ if (typeof text === 'string') {
67
+ return text.replace(/^\s*🔤/, '').replace(/🔤\s*$/, '')
68
+ }
69
+ log.error(text)
70
+ return ''
71
+ }