cctrans 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.hi.md +94 -36
- package/README.ja.md +94 -36
- package/README.ko.md +94 -36
- package/README.md +95 -37
- package/README.ru.md +95 -37
- package/README.zh-Hans.md +94 -36
- package/README.zh-Hant.md +94 -36
- package/ROADMAP.md +4 -1
- package/bin/cctrans.js +56 -13
- package/hook/message-display.js +76 -23
- package/hook/user-prompt-submit.js +8 -4
- package/package.json +2 -2
- package/src/config.js +23 -2
- package/src/interleave.js +160 -7
- package/src/langs.js +5 -5
- package/src/setup.js +40 -13
package/src/interleave.js
CHANGED
|
@@ -16,6 +16,22 @@ function looksLikeCodeish(s) {
|
|
|
16
16
|
return false;
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
+
// Leading BLOCK markdown (heading / list marker / blockquote) must be split
|
|
20
|
+
// off before translation: fed to an MT backend it gets kept or mangled, and
|
|
21
|
+
// re-rendered mid-line after the ↳ marker it shows up as literal "##" / "-" /
|
|
22
|
+
// ">" (the translated line no longer starts with the prefix, so the renderer
|
|
23
|
+
// treats it as text). Translate the content only; re-apply structure when
|
|
24
|
+
// building the translated line.
|
|
25
|
+
function splitBlockPrefix(line) {
|
|
26
|
+
let m = line.match(/^(\s{0,3}#{1,6}\s+)(.*)$/); // heading
|
|
27
|
+
if (m) return { prefix: m[1], content: m[2], block: 'heading' };
|
|
28
|
+
m = line.match(/^(\s*(?:[-*+]|\d{1,3}[.)])\s+)(.*)$/); // list item
|
|
29
|
+
if (m) return { prefix: m[1], content: m[2], block: 'list' };
|
|
30
|
+
m = line.match(/^(\s*(?:>\s*)+)(.*)$/); // blockquote (possibly nested)
|
|
31
|
+
if (m) return { prefix: m[1], content: m[2], block: 'quote' };
|
|
32
|
+
return { prefix: '', content: line, block: null };
|
|
33
|
+
}
|
|
34
|
+
|
|
19
35
|
// A code fence (```), and therefore "are we inside a code block?", can span
|
|
20
36
|
// multiple MessageDisplay deltas. The caller threads the ending fence state of
|
|
21
37
|
// one delta into the next (keyed by message_id), so classify takes an initial
|
|
@@ -30,17 +46,34 @@ function classify(lines, inFenceInit, target) {
|
|
|
30
46
|
if (line.trim() === '') { plan.push({ line, kind: 'blank' }); continue; }
|
|
31
47
|
if (isProbablyTarget(line, target)) { plan.push({ line, kind: 'target' }); continue; }
|
|
32
48
|
if (looksLikeCodeish(line)) { plan.push({ line, kind: 'code' }); continue; }
|
|
33
|
-
|
|
49
|
+
const { prefix, content, block } = splitBlockPrefix(line);
|
|
50
|
+
if (looksLikeCodeish(content)) { plan.push({ line, kind: 'code' }); continue; } // e.g. "- /path/to/file"
|
|
51
|
+
plan.push({ line, kind: 'prose', prefix, content, block });
|
|
34
52
|
}
|
|
35
53
|
return { plan, inFence };
|
|
36
54
|
}
|
|
37
55
|
|
|
38
|
-
//
|
|
56
|
+
// The translated line mirrors the English line's block structure:
|
|
57
|
+
// heading "## T" -> "## ↳ 译" (same heading style)
|
|
58
|
+
// quote "> T" -> "> ↳ 译" (stays inside the quote)
|
|
59
|
+
// list "- T" -> " ↳ 译" (same-width indent — a re-applied "- " would
|
|
60
|
+
// render a second bullet)
|
|
61
|
+
// plain "T" -> "↳ 译"
|
|
62
|
+
// demoteStructure drops the heading/quote prefix (plain "↳ 译"): in a grouped
|
|
63
|
+
// section block displaced from its English line, a re-applied "## "/"> " would
|
|
64
|
+
// render a REAL heading / fresh blockquote detached from what it translates.
|
|
65
|
+
function zhLineFor(p, zh, marker, demoteStructure) {
|
|
66
|
+
if (p.block === 'list') return ' '.repeat(p.prefix.length) + marker + zh;
|
|
67
|
+
if (demoteStructure && (p.block === 'heading' || p.block === 'quote')) return marker + zh;
|
|
68
|
+
return p.prefix + marker + zh;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// How to place the Chinese line under the English line (line mode).
|
|
39
72
|
// hardBreak=true uses a CommonMark hard line break (two trailing spaces) so the
|
|
40
73
|
// two lines stay separate even if displayContent is markdown-rendered.
|
|
41
|
-
function pair(
|
|
74
|
+
function pair(p, zh, marker, hardBreak) {
|
|
42
75
|
const br = hardBreak ? ' \n' : '\n';
|
|
43
|
-
return
|
|
76
|
+
return p.line + br + zhLineFor(p, zh, marker, false);
|
|
44
77
|
}
|
|
45
78
|
|
|
46
79
|
// Returns { displayContent, inFence }:
|
|
@@ -64,7 +97,7 @@ async function buildDisplayContent(rawDelta, opts) {
|
|
|
64
97
|
const proseIdx = [];
|
|
65
98
|
const proseLines = [];
|
|
66
99
|
for (let i = 0; i < plan.length; i++) {
|
|
67
|
-
if (plan[i].kind === 'prose') { proseIdx.push(i); proseLines.push(plan[i].
|
|
100
|
+
if (plan[i].kind === 'prose') { proseIdx.push(i); proseLines.push(plan[i].content); }
|
|
68
101
|
}
|
|
69
102
|
if (proseLines.length === 0) return { displayContent: null, inFence }; // nothing to translate
|
|
70
103
|
|
|
@@ -79,7 +112,7 @@ async function buildDisplayContent(rawDelta, opts) {
|
|
|
79
112
|
const p = plan[i];
|
|
80
113
|
if (p.kind === 'prose') {
|
|
81
114
|
const t = zhFor[i];
|
|
82
|
-
if (t && t.trim() && t.trim() !== p.
|
|
115
|
+
if (t && t.trim() && t.trim() !== p.content.trim()) out.push(pair(p, t, marker, hardBreak));
|
|
83
116
|
else out.push(p.line);
|
|
84
117
|
} else {
|
|
85
118
|
out.push(p.line);
|
|
@@ -90,4 +123,124 @@ async function buildDisplayContent(rawDelta, opts) {
|
|
|
90
123
|
return { displayContent: dc, inFence };
|
|
91
124
|
}
|
|
92
125
|
|
|
93
|
-
|
|
126
|
+
// ---------------------------------------------------------------------------
|
|
127
|
+
// Section mode: English streams untouched; a section's translation is spliced
|
|
128
|
+
// in as one grouped "↳" block when the section closes.
|
|
129
|
+
//
|
|
130
|
+
// A section = a maximal run of consecutive prose lines. Boundaries are
|
|
131
|
+
// properties of the TEXT, never of delta chunking (deltas batch arbitrarily —
|
|
132
|
+
// the same reply can arrive as 3 deltas in one run and 5 in another), which is
|
|
133
|
+
// what makes a full repaint (replay from index 0) reproduce identical output:
|
|
134
|
+
// - a real blank line (the last split element '' merely encodes the delta's
|
|
135
|
+
// trailing "\n" — a continuing block, not a blank);
|
|
136
|
+
// - any code/fence/target-language line;
|
|
137
|
+
// - a heading, which closes the run before it AND itself: a displaced
|
|
138
|
+
// "## ↳ 译" would render as a real heading below the block it titles;
|
|
139
|
+
// - the soft buffer cap (deferred past list items so a forced splice never
|
|
140
|
+
// lands mid-list, with a hard ceiling as backstop);
|
|
141
|
+
// - final:true.
|
|
142
|
+
|
|
143
|
+
const SECTION_CAP = 6000; // soft cap on buffered EN chars before a forced flush
|
|
144
|
+
const SECTION_HARD_CAP = 9000; // flush even mid-list past this
|
|
145
|
+
|
|
146
|
+
// Pure, synchronous segmentation — no I/O, no await, so the hook can persist
|
|
147
|
+
// the resulting buffer BEFORE translation starts (at-most-once flush: a crash
|
|
148
|
+
// or timeout after the save can only drop a section's translation, never
|
|
149
|
+
// replay it at a wrong position).
|
|
150
|
+
// opts: {inFence, buf (pending entries from prior deltas), target, final}
|
|
151
|
+
// Returns:
|
|
152
|
+
// out — the delta's own lines, verbatim (splice skeleton)
|
|
153
|
+
// flushes — [{pos, entries}]: closed sections and where in `out` their ZH
|
|
154
|
+
// block goes (right after the section's last English line)
|
|
155
|
+
// buf — entries still pending (the open section), to persist
|
|
156
|
+
// inFence — fence state at end of delta, to persist
|
|
157
|
+
function planSections(rawDelta, opts) {
|
|
158
|
+
opts = opts || {};
|
|
159
|
+
const lines = String(rawDelta).split('\n');
|
|
160
|
+
const { plan, inFence } = classify(lines, opts.inFence, opts.target || 'zh-Hans');
|
|
161
|
+
const out = [];
|
|
162
|
+
const flushes = [];
|
|
163
|
+
let pending = (opts.buf || []).slice();
|
|
164
|
+
let pendingChars = pending.reduce((n, e) => n + e.content.length, 0);
|
|
165
|
+
const flush = () => {
|
|
166
|
+
if (pending.length) { flushes.push({ pos: out.length, entries: pending }); pending = []; pendingChars = 0; }
|
|
167
|
+
};
|
|
168
|
+
for (let i = 0; i < plan.length; i++) {
|
|
169
|
+
const p = plan[i];
|
|
170
|
+
if (p.kind === 'prose') {
|
|
171
|
+
if (p.block === 'heading') flush(); // close the run before the heading
|
|
172
|
+
out.push(p.line);
|
|
173
|
+
pending.push({ line: p.line, prefix: p.prefix, content: p.content, block: p.block });
|
|
174
|
+
pendingChars += p.content.length;
|
|
175
|
+
if (p.block === 'heading') flush(); // ...and the heading itself
|
|
176
|
+
else if (pendingChars > SECTION_CAP && (p.block !== 'list' || pendingChars > SECTION_HARD_CAP)) flush();
|
|
177
|
+
continue;
|
|
178
|
+
}
|
|
179
|
+
if (p.kind === 'blank') {
|
|
180
|
+
const terminalArtifact = i === plan.length - 1 && p.line === '';
|
|
181
|
+
if (!terminalArtifact) flush();
|
|
182
|
+
out.push(p.line);
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
flush(); // code/fence/target line interrupts the section, then passes through
|
|
186
|
+
out.push(p.line);
|
|
187
|
+
}
|
|
188
|
+
if (opts.final) flush();
|
|
189
|
+
return { out, flushes, buf: pending, inFence };
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Translate all flushed sections (one batch call) and splice each grouped ZH
|
|
193
|
+
// block into the out-skeleton. Returns the displayContent string, or null for
|
|
194
|
+
// "leave this delta as the original English" (nothing survived translation, or
|
|
195
|
+
// over the cap). Translation is per-LINE (prefix-stripped), so the sha1 cache
|
|
196
|
+
// is shared with line mode and the backends' line contracts hold unchanged.
|
|
197
|
+
async function renderSections(planned, opts) {
|
|
198
|
+
opts = opts || {};
|
|
199
|
+
const marker = opts.marker || '↳ ';
|
|
200
|
+
const cap = opts.cap || 9000;
|
|
201
|
+
if (!planned.flushes.length) return null;
|
|
202
|
+
|
|
203
|
+
const contents = [];
|
|
204
|
+
for (const f of planned.flushes) for (const e of f.entries) contents.push(e.content);
|
|
205
|
+
const zh = await translateLines(contents, {
|
|
206
|
+
target: opts.target, backend: opts.backend, model: opts.model, timeoutMs: opts.timeoutMs,
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
let k = 0;
|
|
210
|
+
const blocks = [];
|
|
211
|
+
for (const f of planned.flushes) {
|
|
212
|
+
// Single-line sections keep line-mode structure; a uniform quote run keeps
|
|
213
|
+
// its "> " (the block continues the same blockquote). Only mixed grouped
|
|
214
|
+
// blocks demote structure prefixes.
|
|
215
|
+
const grouped = f.entries.length > 1;
|
|
216
|
+
const allQuote = grouped && f.entries.every((e) => e.block === 'quote');
|
|
217
|
+
const blockLines = [];
|
|
218
|
+
for (const e of f.entries) {
|
|
219
|
+
const t = zh[k++];
|
|
220
|
+
if (t && t.trim() && t.trim() !== e.content.trim()) blockLines.push(zhLineFor(e, t, marker, grouped && !allQuote));
|
|
221
|
+
}
|
|
222
|
+
if (blockLines.length) blocks.push({ pos: f.pos, lines: blockLines });
|
|
223
|
+
}
|
|
224
|
+
if (!blocks.length) return null;
|
|
225
|
+
|
|
226
|
+
const splice = () => {
|
|
227
|
+
const merged = planned.out.slice();
|
|
228
|
+
for (let i = blocks.length - 1; i >= 0; i--) merged.splice(blocks[i].pos, 0, ...blocks[i].lines);
|
|
229
|
+
return merged.join('\n');
|
|
230
|
+
};
|
|
231
|
+
let dc = splice();
|
|
232
|
+
// Over the cap: shed whole ZH blocks (largest first) instead of dropping the
|
|
233
|
+
// delta's entire translation — their sections are already committed out of
|
|
234
|
+
// the buffer, so a shed block is simply lost, never repositioned.
|
|
235
|
+
while (dc.length > cap && blocks.length) {
|
|
236
|
+
let big = 0;
|
|
237
|
+
for (let i = 1; i < blocks.length; i++) {
|
|
238
|
+
if (blocks[i].lines.join('\n').length > blocks[big].lines.join('\n').length) big = i;
|
|
239
|
+
}
|
|
240
|
+
blocks.splice(big, 1);
|
|
241
|
+
dc = blocks.length ? splice() : null;
|
|
242
|
+
}
|
|
243
|
+
return dc;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
module.exports = { buildDisplayContent, classify, looksLikeCodeish, planSections, renderSections };
|
package/src/langs.js
CHANGED
|
@@ -52,10 +52,10 @@ const LANGS = {
|
|
|
52
52
|
// supported non-English languages?" Used by the input-translation hook.
|
|
53
53
|
const NON_LATIN = /[一-鿿㐀-䶿-ゟ゠-ヿ가-ᄀ-ᇿЀ-ӿऀ-ॿ]/g;
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
return
|
|
55
|
+
// Absolute count, not a ratio: coding prompts are full of Latin paths and
|
|
56
|
+
// identifiers that would dilute any ratio below a usable threshold.
|
|
57
|
+
function nonLatinCount(text) {
|
|
58
|
+
return (text.match(NON_LATIN) || []).length;
|
|
59
59
|
}
|
|
60
60
|
|
|
61
61
|
// Region-code (and bare-zh) aliases -> canonical script codes.
|
|
@@ -91,4 +91,4 @@ function isProbablyTarget(line, code) {
|
|
|
91
91
|
return nonspace > 0 && hits / nonspace >= 0.3;
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
-
module.exports = { LANGS, getLang, listLangs, isProbablyTarget, normalizeLang,
|
|
94
|
+
module.exports = { LANGS, getLang, listLangs, isProbablyTarget, normalizeLang, nonLatinCount };
|
package/src/setup.js
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
'use strict';
|
|
2
|
-
// Interactive setup wizard: language -> backend -> API-key
|
|
3
|
-
// verification -> save. Re-runnable
|
|
4
|
-
// flags (--lang, --
|
|
5
|
-
// shell environment is
|
|
2
|
+
// Interactive setup wizard: language -> display mode -> backend -> API-key
|
|
3
|
+
// entry -> input translation (beta) -> live verification -> save. Re-runnable
|
|
4
|
+
// via `cctrans setup`; non-interactive with flags (--lang, --mode, --backend,
|
|
5
|
+
// --key, --input, --yes). Keys go to keys.json only — the shell environment is
|
|
6
|
+
// never read.
|
|
6
7
|
|
|
7
8
|
const readline = require('node:readline/promises');
|
|
8
|
-
const { getState, setState } = require('./config');
|
|
9
|
+
const { getState, setState, MODES } = require('./config');
|
|
9
10
|
const { listLangs, getLang, normalizeLang } = require('./langs');
|
|
10
11
|
const { listBackends, getBackend } = require('./backends');
|
|
11
12
|
const keys = require('./keys');
|
|
@@ -38,7 +39,7 @@ async function runSetup(opts) {
|
|
|
38
39
|
let lang = opts.lang;
|
|
39
40
|
if (!lang) {
|
|
40
41
|
const codes = listLangs();
|
|
41
|
-
console.log('\n' + C.bold('Target language') + ' —
|
|
42
|
+
console.log('\n' + C.bold('Target language') + ' — replies show English + your language inline:');
|
|
42
43
|
codes.forEach((c, i) => console.log(' ' + (i + 1) + '. ' + c.padEnd(8) + C.dim(getLang(c).name)));
|
|
43
44
|
const cur = getState().target;
|
|
44
45
|
const a = await ask('Pick a number or code', cur);
|
|
@@ -47,7 +48,18 @@ async function runSetup(opts) {
|
|
|
47
48
|
if (!getLang(lang)) { console.error(C.red('unsupported language: ' + lang)); return false; }
|
|
48
49
|
lang = normalizeLang(lang);
|
|
49
50
|
|
|
50
|
-
// 2.
|
|
51
|
+
// 2. Display mode
|
|
52
|
+
let mode = opts.mode;
|
|
53
|
+
if (!mode) {
|
|
54
|
+
console.log('\n' + C.bold('Display mode') + ':');
|
|
55
|
+
console.log(' 1. line ' + C.dim('translation under each English line, as it streams'));
|
|
56
|
+
console.log(' 2. section ' + C.dim('English block first, then its translation — appears when the block completes'));
|
|
57
|
+
const a = await ask('Pick a number or name', getState().mode);
|
|
58
|
+
mode = a === '1' ? 'line' : a === '2' ? 'section' : a;
|
|
59
|
+
}
|
|
60
|
+
if (!MODES.includes(mode)) { console.error(C.red('unknown mode: ' + mode + ' (available: ' + MODES.join(', ') + ')')); return false; }
|
|
61
|
+
|
|
62
|
+
// 3. Backend
|
|
51
63
|
let backend = opts.backend;
|
|
52
64
|
if (!backend) {
|
|
53
65
|
console.log('\n' + C.bold('Translation backend') + ':');
|
|
@@ -62,7 +74,7 @@ async function runSetup(opts) {
|
|
|
62
74
|
const b = getBackend(backend);
|
|
63
75
|
if (!b) { console.error(C.red('unknown backend: ' + backend)); return false; }
|
|
64
76
|
|
|
65
|
-
//
|
|
77
|
+
// 4. Key entry for the chosen backend, if missing (keys live ONLY in
|
|
66
78
|
// keys.json — shell env vars are never read)
|
|
67
79
|
if (!b.available() && keys.KEY_IDS.includes(b.id)) {
|
|
68
80
|
const v = opts.key || (await ask('Paste your ' + b.id + ' API key (enter to skip)', ''));
|
|
@@ -73,12 +85,24 @@ async function runSetup(opts) {
|
|
|
73
85
|
}
|
|
74
86
|
}
|
|
75
87
|
|
|
76
|
-
//
|
|
77
|
-
|
|
78
|
-
|
|
88
|
+
// 5. Input translation (beta, opt-in): prompt -> English as context
|
|
89
|
+
let inputEn = typeof opts.input === 'string' ? opts.input === 'on' : getState().inputEn;
|
|
90
|
+
if (opts.input === undefined && rl) {
|
|
91
|
+
console.log('\n' + C.bold('Input translation') + ' ' + C.dim('(beta)') +
|
|
92
|
+
' — type prompts in your language; an English translation is\n' +
|
|
93
|
+
'attached as context and the model is asked to reply in English (the original\n' +
|
|
94
|
+
'prompt stays in your history; adds ~0.5–1.5s per non-English prompt).');
|
|
95
|
+
const a = await ask('Enable input translation? (y/N)', inputEn ? 'y' : 'n');
|
|
96
|
+
inputEn = /^y(es)?$/i.test(a);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// 6. Save config
|
|
100
|
+
setState({ target: lang, mode, backend, inputEn });
|
|
101
|
+
console.log('\n' + C.green('✓') + ' saved: lang=' + lang + ' (' + getLang(lang).name + '), mode=' + mode +
|
|
102
|
+
', backend=' + backend + ', input=' + (inputEn ? 'on' : 'off') +
|
|
79
103
|
(b.available() ? '' : C.red(' (no key yet — will fall back to google)')));
|
|
80
104
|
|
|
81
|
-
//
|
|
105
|
+
// 7. Live verification
|
|
82
106
|
process.stdout.write(C.dim('verifying… '));
|
|
83
107
|
try {
|
|
84
108
|
const { displayContent } = await buildDisplayContent('Setup verification: translation works.\n', {
|
|
@@ -89,7 +113,10 @@ async function runSetup(opts) {
|
|
|
89
113
|
console.log(C.red('verification failed: ' + e.message));
|
|
90
114
|
}
|
|
91
115
|
|
|
92
|
-
console.log(C.dim('\nNext: restart Claude Code (new session). Toggle with `!cctrans off` / `!cctrans on
|
|
116
|
+
console.log(C.dim('\nNext: restart Claude Code (new session). Toggle with `!cctrans off` / `!cctrans on`. ' +
|
|
117
|
+
(inputEn
|
|
118
|
+
? 'Input translation (beta) is ON — disable: `cctrans input off`.'
|
|
119
|
+
: 'Input translation (beta): `cctrans input on`.')));
|
|
93
120
|
return true;
|
|
94
121
|
} finally {
|
|
95
122
|
if (rl) rl.close();
|