cctrans 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/interleave.js CHANGED
@@ -16,6 +16,22 @@ function looksLikeCodeish(s) {
16
16
  return false;
17
17
  }
18
18
 
19
+ // Leading BLOCK markdown (heading / list marker / blockquote) must be split
20
+ // off before translation: fed to an MT backend it gets kept or mangled, and
21
+ // re-rendered mid-line after the ↳ marker it shows up as literal "##" / "-" /
22
+ // ">" (the translated line no longer starts with the prefix, so the renderer
23
+ // treats it as text). Translate the content only; re-apply structure when
24
+ // building the translated line.
25
+ function splitBlockPrefix(line) {
26
+ let m = line.match(/^(\s{0,3}#{1,6}\s+)(.*)$/); // heading
27
+ if (m) return { prefix: m[1], content: m[2], block: 'heading' };
28
+ m = line.match(/^(\s*(?:[-*+]|\d{1,3}[.)])\s+)(.*)$/); // list item
29
+ if (m) return { prefix: m[1], content: m[2], block: 'list' };
30
+ m = line.match(/^(\s*(?:>\s*)+)(.*)$/); // blockquote (possibly nested)
31
+ if (m) return { prefix: m[1], content: m[2], block: 'quote' };
32
+ return { prefix: '', content: line, block: null };
33
+ }
34
+
19
35
  // A code fence (```), and therefore "are we inside a code block?", can span
20
36
  // multiple MessageDisplay deltas. The caller threads the ending fence state of
21
37
  // one delta into the next (keyed by message_id), so classify takes an initial
@@ -30,17 +46,34 @@ function classify(lines, inFenceInit, target) {
30
46
  if (line.trim() === '') { plan.push({ line, kind: 'blank' }); continue; }
31
47
  if (isProbablyTarget(line, target)) { plan.push({ line, kind: 'target' }); continue; }
32
48
  if (looksLikeCodeish(line)) { plan.push({ line, kind: 'code' }); continue; }
33
- plan.push({ line, kind: 'prose' });
49
+ const { prefix, content, block } = splitBlockPrefix(line);
50
+ if (looksLikeCodeish(content)) { plan.push({ line, kind: 'code' }); continue; } // e.g. "- /path/to/file"
51
+ plan.push({ line, kind: 'prose', prefix, content, block });
34
52
  }
35
53
  return { plan, inFence };
36
54
  }
37
55
 
38
- // How to place the Chinese line under the English line.
56
+ // The translated line mirrors the English line's block structure:
57
+ // heading "## T" -> "## ↳ 译" (same heading style)
58
+ // quote "> T" -> "> ↳ 译" (stays inside the quote)
59
+ // list "- T" -> " ↳ 译" (same-width indent — a re-applied "- " would
60
+ // render a second bullet)
61
+ // plain "T" -> "↳ 译"
62
+ // demoteStructure drops the heading/quote prefix (plain "↳ 译"): in a grouped
63
+ // section block displaced from its English line, a re-applied "## "/"> " would
64
+ // render a REAL heading / fresh blockquote detached from what it translates.
65
+ function zhLineFor(p, zh, marker, demoteStructure) {
66
+ if (p.block === 'list') return ' '.repeat(p.prefix.length) + marker + zh;
67
+ if (demoteStructure && (p.block === 'heading' || p.block === 'quote')) return marker + zh;
68
+ return p.prefix + marker + zh;
69
+ }
70
+
71
+ // How to place the Chinese line under the English line (line mode).
39
72
  // hardBreak=true uses a CommonMark hard line break (two trailing spaces) so the
40
73
  // two lines stay separate even if displayContent is markdown-rendered.
41
- function pair(enLine, zhLine, marker, hardBreak) {
74
+ function pair(p, zh, marker, hardBreak) {
42
75
  const br = hardBreak ? ' \n' : '\n';
43
- return enLine + br + marker + zhLine;
76
+ return p.line + br + zhLineFor(p, zh, marker, false);
44
77
  }
45
78
 
46
79
  // Returns { displayContent, inFence }:
@@ -64,7 +97,7 @@ async function buildDisplayContent(rawDelta, opts) {
64
97
  const proseIdx = [];
65
98
  const proseLines = [];
66
99
  for (let i = 0; i < plan.length; i++) {
67
- if (plan[i].kind === 'prose') { proseIdx.push(i); proseLines.push(plan[i].line); }
100
+ if (plan[i].kind === 'prose') { proseIdx.push(i); proseLines.push(plan[i].content); }
68
101
  }
69
102
  if (proseLines.length === 0) return { displayContent: null, inFence }; // nothing to translate
70
103
 
@@ -79,7 +112,7 @@ async function buildDisplayContent(rawDelta, opts) {
79
112
  const p = plan[i];
80
113
  if (p.kind === 'prose') {
81
114
  const t = zhFor[i];
82
- if (t && t.trim() && t.trim() !== p.line.trim()) out.push(pair(p.line, t, marker, hardBreak));
115
+ if (t && t.trim() && t.trim() !== p.content.trim()) out.push(pair(p, t, marker, hardBreak));
83
116
  else out.push(p.line);
84
117
  } else {
85
118
  out.push(p.line);
@@ -90,4 +123,124 @@ async function buildDisplayContent(rawDelta, opts) {
90
123
  return { displayContent: dc, inFence };
91
124
  }
92
125
 
93
- module.exports = { buildDisplayContent, classify, looksLikeCodeish };
126
+ // ---------------------------------------------------------------------------
127
+ // Section mode: English streams untouched; a section's translation is spliced
128
+ // in as one grouped "↳" block when the section closes.
129
+ //
130
+ // A section = a maximal run of consecutive prose lines. Boundaries are
131
+ // properties of the TEXT, never of delta chunking (deltas batch arbitrarily —
132
+ // the same reply can arrive as 3 deltas in one run and 5 in another), which is
133
+ // what makes a full repaint (replay from index 0) reproduce identical output:
134
+ // - a real blank line (the last split element '' merely encodes the delta's
135
+ // trailing "\n" — a continuing block, not a blank);
136
+ // - any code/fence/target-language line;
137
+ // - a heading, which closes the run before it AND itself: a displaced
138
+ // "## ↳ 译" would render as a real heading below the block it titles;
139
+ // - the soft buffer cap (deferred past list items so a forced splice never
140
+ // lands mid-list, with a hard ceiling as backstop);
141
+ // - final:true.
142
+
143
+ const SECTION_CAP = 6000; // soft cap on buffered EN chars before a forced flush
144
+ const SECTION_HARD_CAP = 9000; // flush even mid-list past this
145
+
146
+ // Pure, synchronous segmentation — no I/O, no await, so the hook can persist
147
+ // the resulting buffer BEFORE translation starts (at-most-once flush: a crash
148
+ // or timeout after the save can only drop a section's translation, never
149
+ // replay it at a wrong position).
150
+ // opts: {inFence, buf (pending entries from prior deltas), target, final}
151
+ // Returns:
152
+ // out — the delta's own lines, verbatim (splice skeleton)
153
+ // flushes — [{pos, entries}]: closed sections and where in `out` their ZH
154
+ // block goes (right after the section's last English line)
155
+ // buf — entries still pending (the open section), to persist
156
+ // inFence — fence state at end of delta, to persist
157
+ function planSections(rawDelta, opts) {
158
+ opts = opts || {};
159
+ const lines = String(rawDelta).split('\n');
160
+ const { plan, inFence } = classify(lines, opts.inFence, opts.target || 'zh-Hans');
161
+ const out = [];
162
+ const flushes = [];
163
+ let pending = (opts.buf || []).slice();
164
+ let pendingChars = pending.reduce((n, e) => n + e.content.length, 0);
165
+ const flush = () => {
166
+ if (pending.length) { flushes.push({ pos: out.length, entries: pending }); pending = []; pendingChars = 0; }
167
+ };
168
+ for (let i = 0; i < plan.length; i++) {
169
+ const p = plan[i];
170
+ if (p.kind === 'prose') {
171
+ if (p.block === 'heading') flush(); // close the run before the heading
172
+ out.push(p.line);
173
+ pending.push({ line: p.line, prefix: p.prefix, content: p.content, block: p.block });
174
+ pendingChars += p.content.length;
175
+ if (p.block === 'heading') flush(); // ...and the heading itself
176
+ else if (pendingChars > SECTION_CAP && (p.block !== 'list' || pendingChars > SECTION_HARD_CAP)) flush();
177
+ continue;
178
+ }
179
+ if (p.kind === 'blank') {
180
+ const terminalArtifact = i === plan.length - 1 && p.line === '';
181
+ if (!terminalArtifact) flush();
182
+ out.push(p.line);
183
+ continue;
184
+ }
185
+ flush(); // code/fence/target line interrupts the section, then passes through
186
+ out.push(p.line);
187
+ }
188
+ if (opts.final) flush();
189
+ return { out, flushes, buf: pending, inFence };
190
+ }
191
+
192
+ // Translate all flushed sections (one batch call) and splice each grouped ZH
193
+ // block into the out-skeleton. Returns the displayContent string, or null for
194
+ // "leave this delta as the original English" (nothing survived translation, or
195
+ // over the cap). Translation is per-LINE (prefix-stripped), so the sha1 cache
196
+ // is shared with line mode and the backends' line contracts hold unchanged.
197
+ async function renderSections(planned, opts) {
198
+ opts = opts || {};
199
+ const marker = opts.marker || '↳ ';
200
+ const cap = opts.cap || 9000;
201
+ if (!planned.flushes.length) return null;
202
+
203
+ const contents = [];
204
+ for (const f of planned.flushes) for (const e of f.entries) contents.push(e.content);
205
+ const zh = await translateLines(contents, {
206
+ target: opts.target, backend: opts.backend, model: opts.model, timeoutMs: opts.timeoutMs,
207
+ });
208
+
209
+ let k = 0;
210
+ const blocks = [];
211
+ for (const f of planned.flushes) {
212
+ // Single-line sections keep line-mode structure; a uniform quote run keeps
213
+ // its "> " (the block continues the same blockquote). Only mixed grouped
214
+ // blocks demote structure prefixes.
215
+ const grouped = f.entries.length > 1;
216
+ const allQuote = grouped && f.entries.every((e) => e.block === 'quote');
217
+ const blockLines = [];
218
+ for (const e of f.entries) {
219
+ const t = zh[k++];
220
+ if (t && t.trim() && t.trim() !== e.content.trim()) blockLines.push(zhLineFor(e, t, marker, grouped && !allQuote));
221
+ }
222
+ if (blockLines.length) blocks.push({ pos: f.pos, lines: blockLines });
223
+ }
224
+ if (!blocks.length) return null;
225
+
226
+ const splice = () => {
227
+ const merged = planned.out.slice();
228
+ for (let i = blocks.length - 1; i >= 0; i--) merged.splice(blocks[i].pos, 0, ...blocks[i].lines);
229
+ return merged.join('\n');
230
+ };
231
+ let dc = splice();
232
+ // Over the cap: shed whole ZH blocks (largest first) instead of dropping the
233
+ // delta's entire translation — their sections are already committed out of
234
+ // the buffer, so a shed block is simply lost, never repositioned.
235
+ while (dc.length > cap && blocks.length) {
236
+ let big = 0;
237
+ for (let i = 1; i < blocks.length; i++) {
238
+ if (blocks[i].lines.join('\n').length > blocks[big].lines.join('\n').length) big = i;
239
+ }
240
+ blocks.splice(big, 1);
241
+ dc = blocks.length ? splice() : null;
242
+ }
243
+ return dc;
244
+ }
245
+
246
+ module.exports = { buildDisplayContent, classify, looksLikeCodeish, planSections, renderSections };
package/src/langs.js CHANGED
@@ -52,10 +52,10 @@ const LANGS = {
52
52
  // supported non-English languages?" Used by the input-translation hook.
53
53
  const NON_LATIN = /[一-鿿㐀-䶿぀-ゟ゠-ヿ가-힯ᄀ-ᇿЀ-ӿऀ-ॿ]/g;
54
54
 
55
- function nonLatinRatio(text) {
56
- const hits = (text.match(NON_LATIN) || []).length;
57
- const nonspace = text.replace(/\s/g, '').length;
58
- return nonspace === 0 ? 0 : hits / nonspace;
55
+ // Absolute count, not a ratio: coding prompts are full of Latin paths and
56
+ // identifiers that would dilute any ratio below a usable threshold.
57
+ function nonLatinCount(text) {
58
+ return (text.match(NON_LATIN) || []).length;
59
59
  }
60
60
 
61
61
  // Region-code (and bare-zh) aliases -> canonical script codes.
@@ -91,4 +91,4 @@ function isProbablyTarget(line, code) {
91
91
  return nonspace > 0 && hits / nonspace >= 0.3;
92
92
  }
93
93
 
94
- module.exports = { LANGS, getLang, listLangs, isProbablyTarget, normalizeLang, nonLatinRatio };
94
+ module.exports = { LANGS, getLang, listLangs, isProbablyTarget, normalizeLang, nonLatinCount };
package/src/setup.js CHANGED
@@ -1,11 +1,12 @@
1
1
  'use strict';
2
- // Interactive setup wizard: language -> backend -> API-key entry -> live
3
- // verification -> save. Re-runnable via `cctrans setup`; non-interactive with
4
- // flags (--lang, --backend, --key, --yes). Keys go to keys.json only — the
5
- // shell environment is never read.
2
+ // Interactive setup wizard: language -> display mode -> backend -> API-key
3
+ // entry -> input translation (beta) -> live verification -> save. Re-runnable
4
+ // via `cctrans setup`; non-interactive with flags (--lang, --mode, --backend,
5
+ // --key, --input, --yes). Keys go to keys.json only — the shell environment is
6
+ // never read.
6
7
 
7
8
  const readline = require('node:readline/promises');
8
- const { getState, setState } = require('./config');
9
+ const { getState, setState, MODES } = require('./config');
9
10
  const { listLangs, getLang, normalizeLang } = require('./langs');
10
11
  const { listBackends, getBackend } = require('./backends');
11
12
  const keys = require('./keys');
@@ -38,7 +39,7 @@ async function runSetup(opts) {
38
39
  let lang = opts.lang;
39
40
  if (!lang) {
40
41
  const codes = listLangs();
41
- console.log('\n' + C.bold('Target language') + ' — translations appear under each English line:');
42
+ console.log('\n' + C.bold('Target language') + ' — replies show English + your language inline:');
42
43
  codes.forEach((c, i) => console.log(' ' + (i + 1) + '. ' + c.padEnd(8) + C.dim(getLang(c).name)));
43
44
  const cur = getState().target;
44
45
  const a = await ask('Pick a number or code', cur);
@@ -47,7 +48,18 @@ async function runSetup(opts) {
47
48
  if (!getLang(lang)) { console.error(C.red('unsupported language: ' + lang)); return false; }
48
49
  lang = normalizeLang(lang);
49
50
 
50
- // 2. Backend
51
+ // 2. Display mode
52
+ let mode = opts.mode;
53
+ if (!mode) {
54
+ console.log('\n' + C.bold('Display mode') + ':');
55
+ console.log(' 1. line ' + C.dim('translation under each English line, as it streams'));
56
+ console.log(' 2. section ' + C.dim('English block first, then its translation — appears when the block completes'));
57
+ const a = await ask('Pick a number or name', getState().mode);
58
+ mode = a === '1' ? 'line' : a === '2' ? 'section' : a;
59
+ }
60
+ if (!MODES.includes(mode)) { console.error(C.red('unknown mode: ' + mode + ' (available: ' + MODES.join(', ') + ')')); return false; }
61
+
62
+ // 3. Backend
51
63
  let backend = opts.backend;
52
64
  if (!backend) {
53
65
  console.log('\n' + C.bold('Translation backend') + ':');
@@ -62,7 +74,7 @@ async function runSetup(opts) {
62
74
  const b = getBackend(backend);
63
75
  if (!b) { console.error(C.red('unknown backend: ' + backend)); return false; }
64
76
 
65
- // 3. Key entry for the chosen backend, if missing (keys live ONLY in
77
+ // 4. Key entry for the chosen backend, if missing (keys live ONLY in
66
78
  // keys.json — shell env vars are never read)
67
79
  if (!b.available() && keys.KEY_IDS.includes(b.id)) {
68
80
  const v = opts.key || (await ask('Paste your ' + b.id + ' API key (enter to skip)', ''));
@@ -73,12 +85,24 @@ async function runSetup(opts) {
73
85
  }
74
86
  }
75
87
 
76
- // 4. Save config
77
- setState({ target: lang, backend });
78
- console.log('\n' + C.green('✓') + ' saved: lang=' + lang + ' (' + getLang(lang).name + '), backend=' + backend +
88
+ // 5. Input translation (beta, opt-in): prompt -> English as context
89
+ let inputEn = typeof opts.input === 'string' ? opts.input === 'on' : getState().inputEn;
90
+ if (opts.input === undefined && rl) {
91
+ console.log('\n' + C.bold('Input translation') + ' ' + C.dim('(beta)') +
92
+ ' — type prompts in your language; an English translation is\n' +
93
+ 'attached as context and the model is asked to reply in English (the original\n' +
94
+ 'prompt stays in your history; adds ~0.5–1.5s per non-English prompt).');
95
+ const a = await ask('Enable input translation? (y/N)', inputEn ? 'y' : 'n');
96
+ inputEn = /^y(es)?$/i.test(a);
97
+ }
98
+
99
+ // 6. Save config
100
+ setState({ target: lang, mode, backend, inputEn });
101
+ console.log('\n' + C.green('✓') + ' saved: lang=' + lang + ' (' + getLang(lang).name + '), mode=' + mode +
102
+ ', backend=' + backend + ', input=' + (inputEn ? 'on' : 'off') +
79
103
  (b.available() ? '' : C.red(' (no key yet — will fall back to google)')));
80
104
 
81
- // 5. Live verification
105
+ // 7. Live verification
82
106
  process.stdout.write(C.dim('verifying… '));
83
107
  try {
84
108
  const { displayContent } = await buildDisplayContent('Setup verification: translation works.\n', {
@@ -89,7 +113,10 @@ async function runSetup(opts) {
89
113
  console.log(C.red('verification failed: ' + e.message));
90
114
  }
91
115
 
92
- console.log(C.dim('\nNext: restart Claude Code (new session). Toggle with `!cctrans off` / `!cctrans on`; input translation: `cctrans input on`.'));
116
+ console.log(C.dim('\nNext: restart Claude Code (new session). Toggle with `!cctrans off` / `!cctrans on`. ' +
117
+ (inputEn
118
+ ? 'Input translation (beta) is ON — disable: `cctrans input off`.'
119
+ : 'Input translation (beta): `cctrans input on`.')));
93
120
  return true;
94
121
  } finally {
95
122
  if (rl) rl.close();