@thor123141245r/ai-translate 0.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +161 -71
  2. package/README.zh-CN.md +194 -0
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +77 -0
  5. package/dist/cli.js.map +1 -1
  6. package/dist/i18n/align.d.ts +3 -0
  7. package/dist/i18n/align.d.ts.map +1 -0
  8. package/dist/i18n/align.js +46 -0
  9. package/dist/i18n/align.js.map +1 -0
  10. package/dist/i18n/cache.d.ts +14 -0
  11. package/dist/i18n/cache.d.ts.map +1 -0
  12. package/dist/i18n/cache.js +36 -0
  13. package/dist/i18n/cache.js.map +1 -0
  14. package/dist/i18n/command.d.ts +7 -0
  15. package/dist/i18n/command.d.ts.map +1 -0
  16. package/dist/i18n/command.js +84 -0
  17. package/dist/i18n/command.js.map +1 -0
  18. package/dist/i18n/extract.d.ts +7 -0
  19. package/dist/i18n/extract.d.ts.map +1 -0
  20. package/dist/i18n/extract.js +24 -0
  21. package/dist/i18n/extract.js.map +1 -0
  22. package/dist/i18n/json-path.d.ts +7 -0
  23. package/dist/i18n/json-path.d.ts.map +1 -0
  24. package/dist/i18n/json-path.js +61 -0
  25. package/dist/i18n/json-path.js.map +1 -0
  26. package/dist/i18n/parse.d.ts +6 -0
  27. package/dist/i18n/parse.d.ts.map +1 -0
  28. package/dist/i18n/parse.js +44 -0
  29. package/dist/i18n/parse.js.map +1 -0
  30. package/dist/i18n/placeholders.d.ts +7 -0
  31. package/dist/i18n/placeholders.d.ts.map +1 -0
  32. package/dist/i18n/placeholders.js +44 -0
  33. package/dist/i18n/placeholders.js.map +1 -0
  34. package/dist/i18n/prompt.d.ts +13 -0
  35. package/dist/i18n/prompt.d.ts.map +1 -0
  36. package/dist/i18n/prompt.js +37 -0
  37. package/dist/i18n/prompt.js.map +1 -0
  38. package/dist/i18n/translate.d.ts +29 -0
  39. package/dist/i18n/translate.d.ts.map +1 -0
  40. package/dist/i18n/translate.js +199 -0
  41. package/dist/i18n/translate.js.map +1 -0
  42. package/dist/index.d.ts +1 -0
  43. package/dist/index.d.ts.map +1 -1
  44. package/dist/index.js +1 -0
  45. package/dist/index.js.map +1 -1
  46. package/package.json +6 -1
  47. package/.agentdocs/code-changes/2026-01-22/AI/347/277/273/350/257/221/345/231/250TS/345/256/236/347/216/260-/345/256/236/347/216/260.md +0 -22
  48. package/.agentdocs/code-changes/2026-01-23/CLI/345/210/206/345/217/221-npx/345/256/236/347/216/260.md +0 -18
  49. package/.agentdocs/code-changes/2026-01-23/sora-watermask-remover-/345/233/275/351/231/205/345/214/226/347/277/273/350/257/221-/345/256/236/347/216/260.md +0 -37
  50. package/.agentdocs/code-changes/2026-01-23//351/205/215/347/275/256/350/257/273/345/217/226-/347/216/257/345/242/203/345/217/230/351/207/217/344/274/230/345/205/210-/345/256/236/347/216/260.md +0 -22
  51. package/.agentdocs/plans/2026-01-22/AI/347/277/273/350/257/221/345/231/250TS/345/256/236/347/216/260-/344/274/230/345/214/226/346/226/271/346/241/210.md +0 -67
  52. package/.agentdocs/plans/2026-01-23/CLI/345/210/206/345/217/221-npx/346/226/271/346/241/210.md +0 -60
  53. package/.agentdocs/plans/2026-01-23/sora-watermask-remover-/345/233/275/351/231/205/345/214/226/347/277/273/350/257/221-/344/274/230/345/214/226/346/226/271/346/241/210.md +0 -51
  54. package/.agentdocs/plans/2026-01-23//351/205/215/347/275/256/350/257/273/345/217/226-/347/216/257/345/242/203/345/217/230/351/207/217/344/274/230/345/205/210-/344/274/230/345/214/226/346/226/271/346/241/210.md +0 -80
  55. package/SKILL.md +0 -103
  56. package/src/asyncTransform.ts +0 -31
  57. package/src/bin/ai-translate.ts +0 -5
  58. package/src/cli.ts +0 -313
  59. package/src/index.ts +0 -9
  60. package/src/logger.ts +0 -3
  61. package/src/model.ts +0 -139
  62. package/src/prompt.ts +0 -71
  63. package/src/split.ts +0 -111
  64. package/src/utils.ts +0 -15
  65. package/tsconfig.json +0 -19
package/SKILL.md DELETED
@@ -1,103 +0,0 @@
1
- ---
2
- name: i18n-copywriter
3
- description: 为 i18n JSON(以 en.json 为准)生成与优化高转化率营销文案,并同步到多语言文件(如 de.json、fr.json)。仅在用户明确要求“优化/生成国际化文案、翻译 i18n JSON、为 landing page 写多语言文案”时使用;其余代码与通用翻译请求不要使用。
4
- ---
5
-
6
- # I18n Copywriter
7
-
8
- ## Overview
9
-
10
- 你是一名 landing page 资深国际化文案写手:先基于目标关键词做轻量网络调研,提炼用户需求与痛点,再以 `en.json` 为“单一事实来源”写出高转化英文文案,并将相同 key 结构同步到其他语言 JSON(如 `de.json`),保证占位符与“受保护关键词”不被硬翻译。
11
-
12
- 使用这个 skill 时候不需要进入先写文档再写代码的流程,直接开始脚本翻译流程
13
-
14
- ## 使用边界(非常重要)
15
-
16
- - 只处理“国际化文案”本身:`*.json` 文案内容(尤其是 `en.json` 及其同结构的多语言文件)。
17
- - 不做:业务逻辑改动、组件重构、通用逐字翻译、与 i18n 无关的改文案。
18
- - 不确定是否属于 i18n 文案任务时:先反问用户要改的 `en.json` 路径、目标页面与目标语言列表。
19
-
20
- ## 你需要向用户确认的信息(缺一会显著降质量)
21
-
22
- - `en.json` 的真实路径(以及其他语言 JSON 所在目录)。
23
- - 本次要优化的页面或模块(例如 pricing、homepage hero、generator 区域)。
24
- - 目标关键词(可多个):哪些词必须保留为英文不翻译(例如 `sora 2 video`)。
25
- - 目标语言:例如 `de, fr, ja, zh`。
26
- - 语气与品牌约束:更偏“极简理性”还是“强促销转化”。
27
-
28
- ## 工作流(按顺序执行)
29
-
30
- ### 1)锁定范围与“受保护关键词”
31
-
32
- - 从 `en.json` 中提取:品牌名、产品名、核心功能名、套餐名、以及用户指定的 SEO 关键词。
33
- - 建立“受保护关键词清单”:这些词在任何语言文件中都必须保持原样(不硬翻译、不改大小写)。
34
- - 典型例子:`Sora 2`、`Sora2 Cloud`、`sora 2 video`、`HD`、`Starter`、`Pro`、`Checkout`(以项目实际为准)。
35
-
36
- 需要参考时:打开 `references/protected-terms.md`,按模板补齐。
37
-
38
- ### 2)轻量网络调研(为“高转化文案”服务)
39
-
40
- - 用目标关键词做 3~6 次搜索,重点收集:用户常见需求、痛点、反对理由、对比方案(竞品/替代品)。
41
- - 输出为“洞察清单”:每条洞察对应 1 个文案策略(例如降低学习成本、强调速度、强调质量、强调可控性)。
42
- - 目标不是写报告,而是为文案提供“可用弹药”。
43
-
44
- ### 3)先写好 `en.json`(单一事实来源)
45
-
46
- - 只改 value,不改 key,不引入新结构(除非用户明确新增文案点并同意加 key)。
47
- - 保留所有变量与占位符:例如 `{count}`、`{credits}`、`{{name}}`、`%s`(以文件实际为准)。
48
- - 保留受保护关键词:例如不要把 `sora 2 video` 翻译成任何语言。
49
- - Landing page 文案优先:短句、动词开头、清晰利益点、避免“空泛 AI 口号”。
50
-
51
- ### 4)复制 `en.json` 到其他语言文件并做本地化改写
52
-
53
- - 每个语言文件应与 `en.json` 保持完全相同的 key 集合。
54
- - 先用 `en.json` 结构覆盖(缺失 key 补齐),再逐条替换为目标语言文案。
55
- - 禁止“硬翻译品牌/产品/关键词”:受保护关键词保持英文原样。
56
-
57
- ### 5)一致性与质量检查(必做)
58
-
59
- - 运行 `scripts/check_i18n_keys.py` 确保各语言文件 key 集合一致。
60
- - 检查占位符是否被破坏(特别是 `{}`、`{{}}`、`%`、换行符)。
61
- - 检查“受保护关键词”是否被误翻译或改写。
62
- - 统一标点与引号风格(同一语言内一致)。
63
-
64
- ## 常见硬性规则(避免踩坑)
65
-
66
- - 任何语言都不要翻译项目指定关键词。
67
- - 不要把 “产品名 + 功能名” 翻成“产品名 + 本地语言词”这种半截混搭(除非品牌策略要求)。
68
- - 如果英文里刻意使用了 SEO 关键词(例如 `sora 2 video`),其他语言也优先保留该关键词,并围绕它调整句子结构。
69
-
70
- ## 资源
71
-
72
- - `scripts/check_i18n_keys.py`:检查各语言 JSON 是否与 `en.json` key 集合一致。
73
- - `scripts/sentence_json_translator.py`:逐句翻译 JSON 文案并回填结果,适合大段文案与输出可控场景。
74
- - 常用参数:
75
- - `--concurrency`:并发线程数,默认 6,可用 `CONTENT_LOCALIZE_CONCURRENCY` 覆盖。
76
- - `--overwrite`:全量覆盖翻译,默认关闭(仅翻译缺失或等同英文的条目)。
77
- - `--limit`:限制翻译条数,便于小范围验证。
78
- - `--cache-file`:指定缓存文件路径,复用重复句子的翻译结果。
79
- - `--protected-terms`:逗号分隔的受保护词列表,会写入系统提示。
80
- - `--log-every`:进度输出步长,默认 20。
81
- - `references/protected-terms.md`:受保护关键词清单模板与维护规则。
82
-
83
- ## 最小执行示例(给未来的你)
84
-
85
- 用户请求:
86
- - “帮我优化 landing page 的 i18n 文案,`src/i18n/pages/pricing/en.json` 为准,同步到 `de/fr/ja`,关键词 `sora 2 video` 不要翻译。”
87
-
88
- 你应输出:
89
- - 先给出 6~12 条“调研洞察 → 文案策略”的要点。
90
- - 完整更新 `en.json` 文案。
91
- - 复制并更新 `de.json / fr.json / ja.json`(同 key 结构)。
92
- - 跑一次 key 检查脚本并修复缺失 key。
93
-
94
- 脚本示例(小范围验证):
95
- ```bash
96
- python3 /Users/thor/.codex/skills/i18n-copywriter/scripts/sentence_json_translator.py \\
97
- src/i18n/pages/landing \\
98
- --locales de,fr,ja \\
99
- --concurrency 6 \\
100
- --limit 20 \\
101
- --cache-file .i18n-translation-cache.json \\
102
- --protected-terms "Sora 2,Sora2 Cloud,sora 2 video"
103
- ```
@@ -1,31 +0,0 @@
1
- import { Transform } from 'node:stream'
2
-
3
- export class AsyncTransform extends Transform {
4
- static toString(chunk: Buffer | object, encoding: BufferEncoding | 'buffer') {
5
- const enc = encoding !== 'buffer' ? encoding : undefined
6
- return Buffer.isBuffer(chunk) ? chunk.toString(enc) : String(chunk)
7
- }
8
-
9
- _transform(
10
- chunk: Buffer | object,
11
- enc: BufferEncoding | 'buffer',
12
- cb: (error?: Error | null) => void
13
- ) {
14
- this._asyncTransform(chunk, enc)
15
- .then((wait) => {
16
- if (wait) {
17
- this.once('drain', () => cb())
18
- } else {
19
- cb()
20
- }
21
- })
22
- .catch((err) => this.emit('error', err))
23
- }
24
-
25
- protected async _asyncTransform(
26
- _chunk: Buffer | object,
27
- _enc: BufferEncoding | 'buffer'
28
- ): Promise<boolean | undefined> {
29
- throw new Error('needs implementation')
30
- }
31
- }
@@ -1,5 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- import { cli } from '../cli.js'
4
-
5
- await cli()
package/src/cli.ts DELETED
@@ -1,313 +0,0 @@
1
- import fs from 'node:fs'
2
- import path from 'node:path'
3
- import { pipeline } from 'node:stream/promises'
4
- import { initProcLog } from 'debug-level'
5
- import { AppConfig, StringSchema, v } from '@commenthol/app-config'
6
- import type { SupportedTextSplitterLanguage } from '@langchain/textsplitters'
7
- import { logger } from './logger.js'
8
- import { modelFactory, AiTranslateTransform } from './model.js'
9
- import {
10
- TextSplitterStream,
11
- recursiveChunkTextSplitter,
12
- getFormatByExtension
13
- } from './split.js'
14
-
15
- const log = logger('cli')
16
-
17
- const help: Record<string, string> = {}
18
-
19
- help._ = `
20
- AI Translator
21
-
22
- Usage:
23
- ai-translate [flags]
24
- ai-translate [command] [flags]
25
-
26
- Commands:
27
- set set config value
28
-
29
- Flags:
30
- -h, --help Help for ai-translate
31
- -v, --version Show version information
32
- -c, --config DIR Use config file .ai-translate.json in DIR
33
- -f, --from LANG Source language
34
- -t, --to LANG Target language; LANG is English language name or
35
- supported BCP47 codes (ar, de, en, es, fr, ja, pt, ru,
36
- vi, zh-CN, zh-TW)
37
- -i, --input FILE input file
38
- -o, --output FILE output file
39
- --format FORMAT specify input format (cpp, go, java, js, php, proto,
40
- python, rst, ruby, rust, scala, swift, markdown, latex,
41
- html, sol)
42
- Examples:
43
- Translate input.md from Spanish to output.md in English
44
- ai-translate -f Spanish -t English -i input.md -o output.md
45
-
46
- Pipe from stdin to stdout using the config in the local folder
47
- echo "translate" | ai-translate -f en -t en -c .
48
-
49
- Use "ai-translate [command] --help" for more information about a command.
50
- `
51
-
52
- help.set = `
53
- Set ai-translate configuration
54
-
55
- Writes config to \`.ai-translate.json\`
56
- If --config flag is omitted then global config is used.
57
-
58
- Usage:
59
- ai-translate [flags] set KEY VALUE
60
-
61
- Flags:
62
- -c, --config DIR Use config file .ai-translate.json in DIR
63
-
64
- Available KEYs:
65
- provider set provider (ollama, mistral, anthropic, openai, deepseek);
66
- default="ollama"
67
- model set model from provider; default="qwen2.5:7b"
68
- apiKey set api key
69
- baseUrl baseUrl for model
70
- temperature model temperature; default=0.1
71
- maxRetries max. number of retries; default=10
72
- chunkSize number of chunks used in text-splitter; default=1000
73
- `
74
-
75
- const PACKAGE_JSON = '../package.json'
76
- const APP = 'ai-translate'
77
- const CONF_FILE = `.${APP}.json`
78
- const DEFAULT_LANG = 'en'
79
-
80
- const commands: Record<string, (cmd: Record<string, unknown>, argv: string[]) => void> = {
81
- set: (c, argv) => {
82
- const key = nextArg(argv)
83
- const value = nextArg(argv)
84
- c.set = [key, value]
85
- }
86
- }
87
-
88
- const flags: Record<string, (opts: Record<string, unknown>, argv: string[]) => void> = {
89
- '--help': (f) => (f.help = true),
90
- '--version': (f) => (f.version = true),
91
- '--config': (f, argv) => {
92
- const dir = nextArg(argv)
93
- if (dir) f.config = path.resolve(process.cwd(), dir)
94
- },
95
- '--from': (f, argv) => {
96
- const lang = nextArg(argv)
97
- f.sourceLanguage = lang || DEFAULT_LANG
98
- },
99
- '--to': (f, argv) => {
100
- const lang = nextArg(argv)
101
- f.targetLanguage = lang || DEFAULT_LANG
102
- },
103
- '--input': (f, argv) => {
104
- const filename = nextArg(argv)
105
- if (filename) f.input = path.resolve(process.cwd(), filename)
106
- },
107
- '--output': (f, argv) => {
108
- const filename = nextArg(argv)
109
- if (filename) f.output = path.resolve(process.cwd(), filename)
110
- },
111
- '--format': (f, argv) => {
112
- const format = nextArg(argv)
113
- if (format) f.format = format
114
- }
115
- }
116
- flags['-h'] = flags['--help']
117
- flags['-v'] = flags['--version']
118
- flags['-c'] = flags['--config']
119
- flags['-f'] = flags['--from']
120
- flags['-t'] = flags['--to']
121
- flags['-i'] = flags['--input']
122
- flags['-o'] = flags['--output']
123
-
124
- export const argvParse = (args?: string[]) => {
125
- initProcLog()
126
- const argv = args || process.argv.slice(2)
127
- log.debug(argv)
128
- const opts: Record<string, unknown> = {
129
- sourceLanguage: 'en',
130
- targetLanguage: 'en'
131
- }
132
- const cmd: Record<string, unknown> = {}
133
-
134
- while (argv.length) {
135
- const arg = argv.shift()
136
- if (!arg) continue
137
- if (commands[arg]) {
138
- commands[arg](cmd, argv)
139
- } else if (flags[arg]) {
140
- flags[arg](opts, argv)
141
- }
142
- }
143
- return { cmd, opts }
144
- }
145
-
146
- const nextArg = (argv: string[]) => {
147
- const next = argv[0] || ''
148
- if (next.indexOf('-') === 0) {
149
- return
150
- }
151
- return argv.shift()
152
- }
153
-
154
- let _console = console
155
- export const _injectConsole = (obj: typeof console) => {
156
- _console = obj
157
- }
158
-
159
- const displayError = (msg: string) => {
160
- _console.error(`ERROR: ${msg}`)
161
- }
162
-
163
- const display = (msg: string) => _console.log(msg)
164
-
165
- const version = () => {
166
- const packageJson = new URL(PACKAGE_JSON, import.meta.url)
167
- const { version: pkgVersion } = JSON.parse(
168
- fs.readFileSync(packageJson, 'utf-8')
169
- ) as { version: string }
170
- display(pkgVersion)
171
- }
172
-
173
- const schema = {
174
- provider: StringSchema,
175
- model: StringSchema,
176
- apiKey: StringSchema,
177
- baseUrl: StringSchema,
178
- temperature: v.pipe(
179
- v.string(),
180
- v.transform(Number),
181
- v.minValue(0),
182
- v.maxValue(2)
183
- ),
184
- maxRetries: v.pipe(
185
- v.string(),
186
- v.transform(Number),
187
- v.integer(),
188
- v.minValue(0),
189
- v.maxValue(99)
190
- ),
191
- chunkSize: v.pipe(
192
- v.string(),
193
- v.transform(Number),
194
- v.integer(),
195
- v.minValue(100),
196
- v.maxValue(200e3)
197
- )
198
- }
199
-
200
- const PROVIDER_API_KEY_ENV: Record<string, string> = {
201
- openai: 'OPENAI_API_KEY',
202
- anthropic: 'ANTHROPIC_API_KEY',
203
- mistral: 'MISTRAL_API_KEY',
204
- deepseek: 'DEEPSEEK_API_KEY'
205
- }
206
-
207
- const PROVIDER_BASE_URL_ENV: Record<string, string> = {
208
- openai: 'OPENAI_BASE_URL',
209
- anthropic: 'ANTHROPIC_BASE_URL',
210
- mistral: 'MISTRAL_BASE_URL',
211
- deepseek: 'DEEPSEEK_BASE_URL',
212
- ollama: 'OLLAMA_BASE_URL'
213
- }
214
-
215
- const readEnv = (key?: string) => {
216
- if (!key) return
217
- const value = process.env[key]
218
- if (!value) return
219
- const trimmed = value.trim()
220
- return trimmed ? trimmed : undefined
221
- }
222
-
223
- const pickFirst = (...values: Array<string | undefined>) =>
224
- values.find((value) => value && value.length > 0)
225
-
226
- const resolveRuntimeConfig = (config: Record<string, unknown>) => {
227
- const provider = String(config.provider || 'ollama')
228
- const envApiKey = pickFirst(
229
- readEnv('AI_TRANSLATE_API_KEY'),
230
- readEnv(PROVIDER_API_KEY_ENV[provider])
231
- )
232
- const envBaseUrl = pickFirst(
233
- readEnv('AI_TRANSLATE_BASE_URL'),
234
- readEnv(PROVIDER_BASE_URL_ENV[provider])
235
- )
236
-
237
- return {
238
- ...config,
239
- ...(envApiKey ? { apiKey: envApiKey } : {}),
240
- ...(envBaseUrl ? { baseUrl: envBaseUrl } : {})
241
- }
242
- }
243
-
244
- export const cli = async (args?: string[]) => {
245
- const { cmd, opts } = argvParse(args)
246
- const command = Object.keys(cmd)[0]
247
- log.debug({ cmd, opts })
248
-
249
- const filename = !opts.config
250
- ? CONF_FILE
251
- : fs.lstatSync(String(opts.config)).isDirectory()
252
- ? path.resolve(String(opts.config), CONF_FILE)
253
- : String(opts.config)
254
-
255
- const appConf = new AppConfig({ appName: APP, schema, filename })
256
- await appConf.read().catch(() => null)
257
-
258
- try {
259
- if (opts.version) {
260
- version()
261
- return
262
- }
263
- if (opts.help) {
264
- const helpText = help[command || ''] || help._
265
- display(helpText)
266
- return
267
- }
268
- if (cmd.set) {
269
- const [key, value] = cmd.set as [string | undefined, string | undefined]
270
- if (key) {
271
- appConf.set(key, value)
272
- await appConf.write()
273
- } else {
274
- display(JSON.stringify(appConf.config, null, 2))
275
- }
276
- return
277
- }
278
-
279
- const sourceLanguage = String(opts.sourceLanguage || DEFAULT_LANG)
280
- const targetLanguage = String(opts.targetLanguage || DEFAULT_LANG)
281
-
282
- const reader = opts.input ? fs.createReadStream(String(opts.input)) : process.stdin
283
- const writer = opts.output ? fs.createWriteStream(String(opts.output)) : process.stdout
284
-
285
- const runtimeConfig = resolveRuntimeConfig(
286
- appConf.config as Record<string, unknown>
287
- )
288
- const model = modelFactory(runtimeConfig)
289
- const lcNamespace =
290
- (model as unknown as { lc_namespace?: Record<string, unknown> }).lc_namespace || {}
291
- const lcKwargs = (model as unknown as { lc_kwargs?: Record<string, unknown> }).lc_kwargs || {}
292
- const { apiKey: _apiKey, ...modelParams } = { ...lcNamespace, ...lcKwargs } as Record<string, unknown>
293
- log.debug(modelParams)
294
-
295
- const format =
296
- (opts.format as SupportedTextSplitterLanguage | undefined) ||
297
- getFormatByExtension(path.extname(String(opts.input || '_.md')))
298
- const chunkSize = (runtimeConfig as { chunkSize?: number }).chunkSize || 1000
299
- const textSplitter = recursiveChunkTextSplitter({ chunkSize, format })
300
- const splitter = new TextSplitterStream({ textSplitter })
301
- const translator = new AiTranslateTransform({
302
- ...(opts as Record<string, unknown>),
303
- sourceLanguage,
304
- targetLanguage,
305
- model
306
- })
307
-
308
- await pipeline(reader, splitter, translator, writer)
309
- } catch (err) {
310
- log.debug(err)
311
- displayError((err as Error).message)
312
- }
313
- }
package/src/index.ts DELETED
@@ -1,9 +0,0 @@
1
- export { AiTranslateTransform, modelFactory } from './model.js'
2
- export { promptInvoke, languages } from './prompt.js'
3
- export {
4
- TextSplitterStream,
5
- recursiveChunkTextSplitter,
6
- getFormatByExtension
7
- } from './split.js'
8
- export type { Metadata, ModelFactoryOptions } from './model.js'
9
- export type { TextSplitterParams, Separator } from './split.js'
package/src/logger.ts DELETED
@@ -1,3 +0,0 @@
1
- import { ProcLog } from 'debug-level'
2
-
3
- export const logger = (namespace: string) => new ProcLog(`ai-translate:${namespace}`)
package/src/model.ts DELETED
@@ -1,139 +0,0 @@
1
- import { ChatOllama } from '@langchain/ollama'
2
- import { ChatMistralAI } from '@langchain/mistralai'
3
- import { ChatAnthropic } from '@langchain/anthropic'
4
- import { ChatOpenAI } from '@langchain/openai'
5
- import { ChatDeepSeek } from '@langchain/deepseek'
6
- import type { BaseChatModel } from '@langchain/core/language_models/chat_models'
7
- import { AsyncTransform } from './asyncTransform.js'
8
- import { promptInvoke, replaceMarkerSymbol } from './prompt.js'
9
- import { logger } from './logger.js'
10
- import { isWhiteSpace, preserveWhiteSpace } from './utils.js'
11
-
12
- const log = logger('model')
13
-
14
- const DEFAULT = {
15
- temperature: 0.1,
16
- maxRetries: 10,
17
- maxConcurrency: 1
18
- }
19
-
20
- export type Metadata = {
21
- inputTokens: number
22
- outputTokens: number
23
- }
24
-
25
- export type ModelFactoryOptions = {
26
- provider?: 'ollama' | 'mistral' | 'anthropic' | 'openai' | 'deepseek'
27
- [key: string]: unknown
28
- }
29
-
30
- export const modelFactory = (modelOpts?: ModelFactoryOptions): BaseChatModel => {
31
- const { provider = 'ollama', ...other } = modelOpts || {}
32
- const apiKeyValue = typeof other.apiKey === 'string' ? other.apiKey.trim() : ''
33
- const baseUrlValue = typeof other.baseUrl === 'string' ? other.baseUrl.trim() : ''
34
- const apiKey = apiKeyValue ? apiKeyValue : undefined
35
- const baseUrl = baseUrlValue ? baseUrlValue : undefined
36
- const filtered = { ...other }
37
- if ('apiKey' in filtered) {
38
- delete filtered.apiKey
39
- }
40
- if ('baseUrl' in filtered) {
41
- delete filtered.baseUrl
42
- }
43
-
44
- switch (provider) {
45
- case 'ollama':
46
- return new ChatOllama({
47
- ...DEFAULT,
48
- model: 'qwen2.5:7b',
49
- ...(baseUrl ? { baseUrl } : {}),
50
- ...filtered
51
- })
52
- case 'mistral':
53
- return new ChatMistralAI({
54
- ...DEFAULT,
55
- model: 'ministral-8b',
56
- ...(apiKey ? { apiKey } : {}),
57
- ...filtered
58
- })
59
- case 'anthropic':
60
- return new ChatAnthropic({
61
- ...DEFAULT,
62
- model: 'claude-3-5-haiku-20241022',
63
- ...(apiKey ? { apiKey } : {}),
64
- ...filtered
65
- })
66
- case 'openai':
67
- return new ChatOpenAI({
68
- ...DEFAULT,
69
- model: 'gpt-4o-mini',
70
- ...(apiKey ? { apiKey } : {}),
71
- ...(baseUrl ? { configuration: { baseURL: baseUrl } } : {}),
72
- ...filtered
73
- })
74
- case 'deepseek':
75
- return new ChatDeepSeek({
76
- ...DEFAULT,
77
- model: 'deepseek-reasoner',
78
- ...(apiKey ? { apiKey } : {}),
79
- ...(baseUrl ? { configuration: { baseURL: baseUrl } } : {}),
80
- ...filtered
81
- })
82
- default:
83
- throw new Error(`unsupported provider=${provider}`)
84
- }
85
- }
86
-
87
- export class AiTranslateTransform extends AsyncTransform {
88
- private readonly _model: BaseChatModel
89
- private readonly _promptOpts: {
90
- format?: string
91
- sourceLanguage: string
92
- targetLanguage: string
93
- }
94
- private _metadata: Metadata = { inputTokens: 0, outputTokens: 0 }
95
-
96
- constructor(options: {
97
- model: BaseChatModel
98
- format?: string
99
- sourceLanguage: string
100
- targetLanguage: string
101
- [key: string]: unknown
102
- }) {
103
- const { model, format, sourceLanguage, targetLanguage, ...rest } = options
104
- super(rest)
105
- this._model = model
106
- this._promptOpts = { format, sourceLanguage, targetLanguage }
107
- }
108
-
109
- getMetadata() {
110
- return { ...this._metadata }
111
- }
112
-
113
- protected async _asyncTransform(
114
- chunk: Buffer | object,
115
- encoding: BufferEncoding | 'buffer'
116
- ) {
117
- const text = AsyncTransform.toString(chunk, encoding)
118
- if (isWhiteSpace(text)) {
119
- return !this.push(text)
120
- }
121
- log.debug('inp=%j', text)
122
- const messages = await promptInvoke({ ...this._promptOpts, text })
123
- const result = await this._model.invoke(messages)
124
- log.debug('out=%j', result.content)
125
-
126
- const usage = (result as { usage_metadata?: { input_tokens?: number; output_tokens?: number } })
127
- .usage_metadata
128
- const inputTokens = usage?.input_tokens ?? 0
129
- const outputTokens = usage?.output_tokens ?? 0
130
-
131
- this.emit('metadata', { inputTokens, outputTokens })
132
- this._metadata.inputTokens += inputTokens
133
- this._metadata.outputTokens += outputTokens
134
-
135
- return !this.push(
136
- preserveWhiteSpace(text, replaceMarkerSymbol(result.content))
137
- )
138
- }
139
- }
package/src/prompt.ts DELETED
@@ -1,71 +0,0 @@
1
- import { ChatPromptTemplate } from '@langchain/core/prompts'
2
- import type { ChatPromptValueInterface } from '@langchain/core/prompt_values'
3
- import type { MessageContent } from '@langchain/core/messages'
4
- import { logger } from './logger.js'
5
-
6
- const log = logger('prompt')
7
-
8
- export const languages: Record<string, string> = {
9
- ar: 'Arabic',
10
- de: 'German',
11
- en: 'English',
12
- es: 'Spanish',
13
- fr: 'French',
14
- ja: 'Japanese',
15
- pt: 'Portuguese',
16
- ru: 'Russian',
17
- vi: 'Vietnamese',
18
- 'zh-CN': 'Chinese-simplified',
19
- 'zh-TW': 'Chinese-traditional'
20
- }
21
-
22
- export const getLanguageName = (lang: string) => languages[lang] || lang
23
-
24
- const systemPrompt =
25
- 'You are an AI-driven advanced translation system, specifically designed to ' +
26
- 'translate structured and technical documents. ' +
27
- 'You will receive a text snippet from a file formatted as "{format}"\n\n' +
28
- 'Your task is to **accurately translate** the text enclosed between the 🔤 ' +
29
- 'symbols from "{sourceLanguage}" to "{targetLanguage}". ' +
30
- 'Preserve the original formatting, sentence structure, and terminology. ' +
31
- 'Ensure that every word and sentence is translated as closely as possible to ' +
32
- 'the original meaning, without summarizing or omitting any part of the content. ' +
33
- 'The translation must be faithful, detailed, and maintain the original length ' +
34
- 'and complexity. ' +
35
- 'Deliver the translation exactly as required, without any additional ' +
36
- 'commentary or explanation, and ensure the 🔤 symbols are removed in the final output.\n\n' +
37
- 'Remember: your job is to **translate** the text exactly as it is, without ' +
38
- 'adding summaries or changing the content in any way. ' +
39
- 'Do not skip or modify any part of the text. Ensure that the output is a ' +
40
- 'direct translation, and that the original structure and meaning are preserved.'
41
-
42
- const promptTemplate = ChatPromptTemplate.fromMessages([
43
- ['system', systemPrompt],
44
- ['user', '🔤{text}🔤']
45
- ])
46
-
47
- export const promptInvoke = ({
48
- format = 'markdown',
49
- sourceLanguage,
50
- targetLanguage,
51
- text = ''
52
- }: {
53
- format?: string
54
- sourceLanguage: string
55
- targetLanguage: string
56
- text: string
57
- }): Promise<ChatPromptValueInterface> =>
58
- promptTemplate.invoke({
59
- format,
60
- sourceLanguage: getLanguageName(sourceLanguage),
61
- targetLanguage: getLanguageName(targetLanguage),
62
- text
63
- })
64
-
65
- export const replaceMarkerSymbol = (text: MessageContent) => {
66
- if (typeof text === 'string') {
67
- return text.replace(/^\s*🔤/, '').replace(/🔤\s*$/, '')
68
- }
69
- log.error(text)
70
- return ''
71
- }