@llm-translate/cli 1.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/.dockerignore +51 -0
  2. package/.env.example +33 -0
  3. package/.github/workflows/docs-pages.yml +57 -0
  4. package/.github/workflows/release.yml +49 -0
  5. package/.translaterc.json +44 -0
  6. package/CLAUDE.md +243 -0
  7. package/Dockerfile +55 -0
  8. package/README.md +371 -0
  9. package/RFC.md +1595 -0
  10. package/dist/cli/index.d.ts +2 -0
  11. package/dist/cli/index.js +4494 -0
  12. package/dist/cli/index.js.map +1 -0
  13. package/dist/index.d.ts +1152 -0
  14. package/dist/index.js +3841 -0
  15. package/dist/index.js.map +1 -0
  16. package/docker-compose.yml +56 -0
  17. package/docs/.vitepress/config.ts +161 -0
  18. package/docs/api/agent.md +262 -0
  19. package/docs/api/engine.md +274 -0
  20. package/docs/api/index.md +171 -0
  21. package/docs/api/providers.md +304 -0
  22. package/docs/changelog.md +64 -0
  23. package/docs/cli/dir.md +243 -0
  24. package/docs/cli/file.md +213 -0
  25. package/docs/cli/glossary.md +273 -0
  26. package/docs/cli/index.md +129 -0
  27. package/docs/cli/init.md +158 -0
  28. package/docs/cli/serve.md +211 -0
  29. package/docs/glossary.json +235 -0
  30. package/docs/guide/chunking.md +272 -0
  31. package/docs/guide/configuration.md +139 -0
  32. package/docs/guide/cost-optimization.md +237 -0
  33. package/docs/guide/docker.md +371 -0
  34. package/docs/guide/getting-started.md +150 -0
  35. package/docs/guide/glossary.md +241 -0
  36. package/docs/guide/index.md +86 -0
  37. package/docs/guide/ollama.md +515 -0
  38. package/docs/guide/prompt-caching.md +221 -0
  39. package/docs/guide/providers.md +232 -0
  40. package/docs/guide/quality-control.md +206 -0
  41. package/docs/guide/vitepress-integration.md +265 -0
  42. package/docs/index.md +63 -0
  43. package/docs/ja/api/agent.md +262 -0
  44. package/docs/ja/api/engine.md +274 -0
  45. package/docs/ja/api/index.md +171 -0
  46. package/docs/ja/api/providers.md +304 -0
  47. package/docs/ja/changelog.md +64 -0
  48. package/docs/ja/cli/dir.md +243 -0
  49. package/docs/ja/cli/file.md +213 -0
  50. package/docs/ja/cli/glossary.md +273 -0
  51. package/docs/ja/cli/index.md +111 -0
  52. package/docs/ja/cli/init.md +158 -0
  53. package/docs/ja/guide/chunking.md +271 -0
  54. package/docs/ja/guide/configuration.md +139 -0
  55. package/docs/ja/guide/cost-optimization.md +30 -0
  56. package/docs/ja/guide/getting-started.md +150 -0
  57. package/docs/ja/guide/glossary.md +214 -0
  58. package/docs/ja/guide/index.md +32 -0
  59. package/docs/ja/guide/ollama.md +410 -0
  60. package/docs/ja/guide/prompt-caching.md +221 -0
  61. package/docs/ja/guide/providers.md +232 -0
  62. package/docs/ja/guide/quality-control.md +137 -0
  63. package/docs/ja/guide/vitepress-integration.md +265 -0
  64. package/docs/ja/index.md +58 -0
  65. package/docs/ko/api/agent.md +262 -0
  66. package/docs/ko/api/engine.md +274 -0
  67. package/docs/ko/api/index.md +171 -0
  68. package/docs/ko/api/providers.md +304 -0
  69. package/docs/ko/changelog.md +64 -0
  70. package/docs/ko/cli/dir.md +243 -0
  71. package/docs/ko/cli/file.md +213 -0
  72. package/docs/ko/cli/glossary.md +273 -0
  73. package/docs/ko/cli/index.md +111 -0
  74. package/docs/ko/cli/init.md +158 -0
  75. package/docs/ko/guide/chunking.md +271 -0
  76. package/docs/ko/guide/configuration.md +139 -0
  77. package/docs/ko/guide/cost-optimization.md +30 -0
  78. package/docs/ko/guide/getting-started.md +150 -0
  79. package/docs/ko/guide/glossary.md +214 -0
  80. package/docs/ko/guide/index.md +32 -0
  81. package/docs/ko/guide/ollama.md +410 -0
  82. package/docs/ko/guide/prompt-caching.md +221 -0
  83. package/docs/ko/guide/providers.md +232 -0
  84. package/docs/ko/guide/quality-control.md +137 -0
  85. package/docs/ko/guide/vitepress-integration.md +265 -0
  86. package/docs/ko/index.md +58 -0
  87. package/docs/zh/api/agent.md +262 -0
  88. package/docs/zh/api/engine.md +274 -0
  89. package/docs/zh/api/index.md +171 -0
  90. package/docs/zh/api/providers.md +304 -0
  91. package/docs/zh/changelog.md +64 -0
  92. package/docs/zh/cli/dir.md +243 -0
  93. package/docs/zh/cli/file.md +213 -0
  94. package/docs/zh/cli/glossary.md +273 -0
  95. package/docs/zh/cli/index.md +111 -0
  96. package/docs/zh/cli/init.md +158 -0
  97. package/docs/zh/guide/chunking.md +271 -0
  98. package/docs/zh/guide/configuration.md +139 -0
  99. package/docs/zh/guide/cost-optimization.md +30 -0
  100. package/docs/zh/guide/getting-started.md +150 -0
  101. package/docs/zh/guide/glossary.md +214 -0
  102. package/docs/zh/guide/index.md +32 -0
  103. package/docs/zh/guide/ollama.md +410 -0
  104. package/docs/zh/guide/prompt-caching.md +221 -0
  105. package/docs/zh/guide/providers.md +232 -0
  106. package/docs/zh/guide/quality-control.md +137 -0
  107. package/docs/zh/guide/vitepress-integration.md +265 -0
  108. package/docs/zh/index.md +58 -0
  109. package/package.json +91 -0
  110. package/release.config.mjs +15 -0
  111. package/schemas/glossary.schema.json +110 -0
  112. package/src/cli/commands/dir.ts +469 -0
  113. package/src/cli/commands/file.ts +291 -0
  114. package/src/cli/commands/glossary.ts +221 -0
  115. package/src/cli/commands/init.ts +68 -0
  116. package/src/cli/commands/serve.ts +60 -0
  117. package/src/cli/index.ts +64 -0
  118. package/src/cli/options.ts +59 -0
  119. package/src/core/agent.ts +1119 -0
  120. package/src/core/chunker.ts +391 -0
  121. package/src/core/engine.ts +634 -0
  122. package/src/errors.ts +188 -0
  123. package/src/index.ts +147 -0
  124. package/src/integrations/vitepress.ts +549 -0
  125. package/src/parsers/markdown.ts +383 -0
  126. package/src/providers/claude.ts +259 -0
  127. package/src/providers/interface.ts +109 -0
  128. package/src/providers/ollama.ts +379 -0
  129. package/src/providers/openai.ts +308 -0
  130. package/src/providers/registry.ts +153 -0
  131. package/src/server/index.ts +152 -0
  132. package/src/server/middleware/auth.ts +93 -0
  133. package/src/server/middleware/logger.ts +90 -0
  134. package/src/server/routes/health.ts +84 -0
  135. package/src/server/routes/translate.ts +210 -0
  136. package/src/server/types.ts +138 -0
  137. package/src/services/cache.ts +899 -0
  138. package/src/services/config.ts +217 -0
  139. package/src/services/glossary.ts +247 -0
  140. package/src/types/analysis.ts +164 -0
  141. package/src/types/index.ts +265 -0
  142. package/src/types/modes.ts +121 -0
  143. package/src/types/mqm.ts +157 -0
  144. package/src/utils/logger.ts +141 -0
  145. package/src/utils/tokens.ts +116 -0
  146. package/tests/fixtures/glossaries/ml-glossary.json +53 -0
  147. package/tests/fixtures/input/lynq-installation.ko.md +350 -0
  148. package/tests/fixtures/input/lynq-installation.md +350 -0
  149. package/tests/fixtures/input/simple.ko.md +27 -0
  150. package/tests/fixtures/input/simple.md +27 -0
  151. package/tests/unit/chunker.test.ts +229 -0
  152. package/tests/unit/glossary.test.ts +146 -0
  153. package/tests/unit/markdown.test.ts +205 -0
  154. package/tests/unit/tokens.test.ts +81 -0
  155. package/tsconfig.json +28 -0
  156. package/tsup.config.ts +34 -0
  157. package/vitest.config.ts +16 -0
@@ -0,0 +1,634 @@
1
+ import type {
2
+ TranslateConfig,
3
+ TranslationRequest,
4
+ DocumentResult,
5
+ ChunkResult,
6
+ Chunk,
7
+ DocumentFormat,
8
+ ResolvedGlossary,
9
+ } from '../types/index.js';
10
+ import type { LLMProvider } from '../providers/interface.js';
11
+ import { TranslationAgent, createTranslationAgent } from './agent.js';
12
+ import { chunkContent, getChunkStats } from './chunker.js';
13
+ import {
14
+ extractTextForTranslation,
15
+ restorePreservedSections,
16
+ } from '../parsers/markdown.js';
17
+ import { loadGlossary, resolveGlossary } from '../services/glossary.js';
18
+ import { getProvider, getProviderConfigFromEnv } from '../providers/registry.js';
19
+ import { logger, createTimer } from '../utils/logger.js';
20
+ import { TranslationError, ErrorCode } from '../errors.js';
21
+ import {
22
+ CacheManager,
23
+ createCacheManager,
24
+ createNullCacheManager,
25
+ type CacheKey,
26
+ } from '../services/cache.js';
27
+
28
+ // ============================================================================
29
+ // Engine Options
30
+ // ============================================================================
31
+
32
+ export interface TranslationEngineOptions {
33
+ config: TranslateConfig;
34
+ provider?: LLMProvider;
35
+ verbose?: boolean;
36
+ /** Disable caching (--no-cache mode) */
37
+ noCache?: boolean;
38
+ }
39
+
40
+ export interface TranslateFileOptions {
41
+ content: string;
42
+ sourceLang: string;
43
+ targetLang: string;
44
+ format?: DocumentFormat;
45
+ glossaryPath?: string;
46
+ qualityThreshold?: number;
47
+ maxIterations?: number;
48
+ context?: string;
49
+ /** Per-language style instruction (e.g., "경어체", "です・ます調"). Falls back to config.languages.styles[targetLang] if not specified. */
50
+ styleInstruction?: string;
51
+ /** If true, throw error when quality threshold is not met */
52
+ strictQuality?: boolean;
53
+ /** If true, throw error when glossary terms are missed */
54
+ strictGlossary?: boolean;
55
+ }
56
+
57
+ // ============================================================================
58
+ // Translation Engine
59
+ // ============================================================================
60
+
61
+ export class TranslationEngine {
62
+ private config: TranslateConfig;
63
+ private provider: LLMProvider;
64
+ private verbose: boolean;
65
+ private cache: CacheManager;
66
+ private cacheHits = 0;
67
+ private cacheMisses = 0;
68
+
69
+ constructor(options: TranslationEngineOptions) {
70
+ this.config = options.config;
71
+ this.verbose = options.verbose ?? false;
72
+
73
+ // Initialize provider
74
+ if (options.provider) {
75
+ this.provider = options.provider;
76
+ } else {
77
+ const providerConfig = getProviderConfigFromEnv(this.config.provider.default);
78
+ // Use model from config if specified (CLI --model option)
79
+ if (this.config.provider.model) {
80
+ providerConfig.defaultModel = this.config.provider.model;
81
+ }
82
+ this.provider = getProvider(this.config.provider.default, providerConfig);
83
+ }
84
+
85
+ // Initialize cache
86
+ const cacheDisabled = options.noCache || !this.config.paths?.cache;
87
+ if (cacheDisabled) {
88
+ this.cache = createNullCacheManager();
89
+ if (this.verbose && options.noCache) {
90
+ logger.info('Cache disabled (--no-cache)');
91
+ }
92
+ } else {
93
+ this.cache = createCacheManager({
94
+ cacheDir: this.config.paths.cache!,
95
+ verbose: this.verbose,
96
+ });
97
+ if (this.verbose) {
98
+ const stats = this.cache.getStats();
99
+ logger.info(`Cache initialized: ${stats.entries} entries`);
100
+ }
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Translate a single file/content
106
+ */
107
+ async translateContent(options: TranslateFileOptions): Promise<DocumentResult> {
108
+ const timer = createTimer();
109
+ const format = options.format ?? this.detectFormat(options.content);
110
+
111
+ if (this.verbose) {
112
+ logger.info(`Translating content (${format} format)`);
113
+ logger.info(`Source: ${options.sourceLang} → Target: ${options.targetLang}`);
114
+ }
115
+
116
+ // Load glossary if provided
117
+ let glossary: ResolvedGlossary | undefined;
118
+ if (options.glossaryPath) {
119
+ try {
120
+ const rawGlossary = await loadGlossary(options.glossaryPath);
121
+ glossary = resolveGlossary(rawGlossary, options.targetLang);
122
+ if (this.verbose) {
123
+ logger.info(`Loaded glossary: ${glossary.terms.length} terms`);
124
+ }
125
+ } catch (error) {
126
+ if (this.verbose) {
127
+ logger.warn(`Failed to load glossary: ${error}`);
128
+ }
129
+ }
130
+ } else if (this.config.glossary?.path) {
131
+ try {
132
+ const rawGlossary = await loadGlossary(this.config.glossary.path);
133
+ glossary = resolveGlossary(rawGlossary, options.targetLang);
134
+ if (this.verbose) {
135
+ logger.info(`Loaded glossary from config: ${glossary.terms.length} terms`);
136
+ }
137
+ } catch {
138
+ // Glossary is optional
139
+ }
140
+ }
141
+
142
+ // Process based on format
143
+ let result: DocumentResult;
144
+
145
+ switch (format) {
146
+ case 'markdown':
147
+ result = await this.translateMarkdown(options, glossary);
148
+ break;
149
+ case 'html':
150
+ // For now, treat HTML as plain text (Phase 2 will add proper HTML support)
151
+ result = await this.translatePlainText(options, glossary);
152
+ break;
153
+ case 'text':
154
+ default:
155
+ result = await this.translatePlainText(options, glossary);
156
+ break;
157
+ }
158
+
159
+ result.metadata.totalDuration = timer.elapsed();
160
+
161
+ // Check glossary compliance if glossary is provided
162
+ if (glossary && glossary.terms.length > 0) {
163
+ const compliance = this.checkDocumentGlossaryCompliance(
164
+ options.content,
165
+ result.content,
166
+ glossary
167
+ );
168
+ result.glossaryCompliance = compliance;
169
+
170
+ if (this.verbose) {
171
+ logger.info(`Glossary compliance: ${compliance.applied.length}/${compliance.applied.length + compliance.missed.length} terms applied`);
172
+ if (compliance.missed.length > 0) {
173
+ logger.warn(`Missed glossary terms: ${compliance.missed.join(', ')}`);
174
+ }
175
+ }
176
+
177
+ // Strict glossary mode - fail if any terms are missed
178
+ if (options.strictGlossary && !compliance.compliant) {
179
+ throw new TranslationError(ErrorCode.GLOSSARY_COMPLIANCE_FAILED, {
180
+ missed: compliance.missed.join(', '),
181
+ applied: compliance.applied,
182
+ total: glossary.terms.length,
183
+ });
184
+ }
185
+ }
186
+
187
+ if (this.verbose) {
188
+ logger.success(`Translation complete in ${timer.format()}`);
189
+ logger.info(`Average quality: ${result.metadata.averageQuality.toFixed(1)}/100`);
190
+ }
191
+
192
+ return result;
193
+ }
194
+
195
+ /**
196
+ * Check glossary compliance for the entire document
197
+ */
198
+ private checkDocumentGlossaryCompliance(
199
+ sourceContent: string,
200
+ translatedContent: string,
201
+ glossary: ResolvedGlossary
202
+ ): { applied: string[]; missed: string[]; compliant: boolean } {
203
+ const applied: string[] = [];
204
+ const missed: string[] = [];
205
+ const sourceLower = sourceContent.toLowerCase();
206
+ const translatedLower = translatedContent.toLowerCase();
207
+
208
+ for (const term of glossary.terms) {
209
+ // Check if source term exists in original content
210
+ const sourceInContent = term.caseSensitive
211
+ ? sourceContent.includes(term.source)
212
+ : sourceLower.includes(term.source.toLowerCase());
213
+
214
+ if (!sourceInContent) {
215
+ // Term not in source, skip
216
+ continue;
217
+ }
218
+
219
+ // Check if target term exists in translated content
220
+ const targetInTranslation = term.caseSensitive
221
+ ? translatedContent.includes(term.target)
222
+ : translatedLower.includes(term.target.toLowerCase());
223
+
224
+ if (targetInTranslation) {
225
+ applied.push(term.source);
226
+ } else {
227
+ missed.push(term.source);
228
+ }
229
+ }
230
+
231
+ return {
232
+ applied,
233
+ missed,
234
+ compliant: missed.length === 0,
235
+ };
236
+ }
237
+
238
+ // ============================================================================
239
+ // Format-Specific Translation
240
+ // ============================================================================
241
+
242
+ private async translateMarkdown(
243
+ options: TranslateFileOptions,
244
+ glossary?: ResolvedGlossary
245
+ ): Promise<DocumentResult> {
246
+ // Extract text for translation, preserving code blocks etc.
247
+ const { text, preservedSections } = extractTextForTranslation(options.content);
248
+
249
+ // Chunk the content
250
+ const chunks = chunkContent(text, {
251
+ maxTokens: this.config.chunking.maxTokens,
252
+ overlapTokens: this.config.chunking.overlapTokens,
253
+ });
254
+
255
+ if (this.verbose) {
256
+ const stats = getChunkStats(chunks);
257
+ logger.info(`Chunked into ${stats.translatableChunks} translatable sections`);
258
+ }
259
+
260
+ // Create translation agent
261
+ const agent = createTranslationAgent({
262
+ provider: this.provider,
263
+ qualityThreshold: options.qualityThreshold ?? this.config.quality.threshold,
264
+ maxIterations: options.maxIterations ?? this.config.quality.maxIterations,
265
+ verbose: this.verbose,
266
+ strictQuality: options.strictQuality,
267
+ });
268
+
269
+ // Translate each chunk
270
+ const chunkResults: ChunkResult[] = [];
271
+ let totalInputTokens = 0;
272
+ let totalOutputTokens = 0;
273
+ let totalIterations = 0;
274
+
275
+ for (let i = 0; i < chunks.length; i++) {
276
+ const chunk = chunks[i];
277
+ if (!chunk) continue;
278
+
279
+ if (chunk.type === 'preserve') {
280
+ // Keep preserved content as-is
281
+ chunkResults.push({
282
+ original: chunk.content,
283
+ translated: chunk.content,
284
+ startOffset: chunk.startOffset,
285
+ endOffset: chunk.endOffset,
286
+ qualityScore: 100,
287
+ });
288
+ continue;
289
+ }
290
+
291
+ if (this.verbose) {
292
+ logger.info(`Translating chunk ${i + 1}/${chunks.length}...`);
293
+ }
294
+
295
+ const result = await this.translateChunk(chunk, options, glossary, agent);
296
+ chunkResults.push(result);
297
+
298
+ // Accumulate token and iteration counts
299
+ if (result.tokensUsed) {
300
+ totalInputTokens += result.tokensUsed.input;
301
+ totalOutputTokens += result.tokensUsed.output;
302
+ }
303
+ if (result.iterations) {
304
+ totalIterations += result.iterations;
305
+ }
306
+ }
307
+
308
+ // Reassemble translated content
309
+ const translatedText = chunkResults.map((r) => r.translated).join('');
310
+
311
+ // Restore preserved sections
312
+ const finalContent = restorePreservedSections(translatedText, preservedSections);
313
+
314
+ // Calculate average quality
315
+ const qualityScores = chunkResults
316
+ .filter((r) => r.qualityScore > 0)
317
+ .map((r) => r.qualityScore);
318
+ const averageQuality =
319
+ qualityScores.length > 0
320
+ ? qualityScores.reduce((a, b) => a + b, 0) / qualityScores.length
321
+ : 0;
322
+
323
+ // Calculate cache statistics from chunk results
324
+ const cacheHits = chunkResults.filter((r) => r.cached).length;
325
+ const cacheMisses = chunkResults.filter((r) => !r.cached && r.qualityScore > 0).length;
326
+
327
+ return {
328
+ content: finalContent,
329
+ chunks: chunkResults,
330
+ metadata: {
331
+ totalTokensUsed: totalInputTokens + totalOutputTokens,
332
+ totalDuration: 0, // Will be set by caller
333
+ averageQuality,
334
+ provider: this.provider.name,
335
+ model: this.config.provider.model ?? this.provider.defaultModel,
336
+ totalIterations,
337
+ tokensUsed: {
338
+ input: totalInputTokens,
339
+ output: totalOutputTokens,
340
+ },
341
+ cache: {
342
+ hits: cacheHits,
343
+ misses: cacheMisses,
344
+ },
345
+ },
346
+ };
347
+ }
348
+
349
+ private async translatePlainText(
350
+ options: TranslateFileOptions,
351
+ glossary?: ResolvedGlossary
352
+ ): Promise<DocumentResult> {
353
+ // Chunk the content
354
+ const chunks = chunkContent(options.content, {
355
+ maxTokens: this.config.chunking.maxTokens,
356
+ overlapTokens: this.config.chunking.overlapTokens,
357
+ });
358
+
359
+ // Create translation agent
360
+ const agent = createTranslationAgent({
361
+ provider: this.provider,
362
+ qualityThreshold: options.qualityThreshold ?? this.config.quality.threshold,
363
+ maxIterations: options.maxIterations ?? this.config.quality.maxIterations,
364
+ verbose: this.verbose,
365
+ strictQuality: options.strictQuality,
366
+ });
367
+
368
+ // Translate each chunk
369
+ const chunkResults: ChunkResult[] = [];
370
+ let totalInputTokens = 0;
371
+ let totalOutputTokens = 0;
372
+ let totalIterations = 0;
373
+
374
+ for (let i = 0; i < chunks.length; i++) {
375
+ const chunk = chunks[i];
376
+ if (!chunk) continue;
377
+
378
+ if (chunk.type === 'preserve') {
379
+ chunkResults.push({
380
+ original: chunk.content,
381
+ translated: chunk.content,
382
+ startOffset: chunk.startOffset,
383
+ endOffset: chunk.endOffset,
384
+ qualityScore: 100,
385
+ });
386
+ continue;
387
+ }
388
+
389
+ if (this.verbose) {
390
+ logger.info(`Translating chunk ${i + 1}/${chunks.length}...`);
391
+ }
392
+
393
+ const result = await this.translateChunk(chunk, options, glossary, agent);
394
+ chunkResults.push(result);
395
+
396
+ // Accumulate token and iteration counts
397
+ if (result.tokensUsed) {
398
+ totalInputTokens += result.tokensUsed.input;
399
+ totalOutputTokens += result.tokensUsed.output;
400
+ }
401
+ if (result.iterations) {
402
+ totalIterations += result.iterations;
403
+ }
404
+ }
405
+
406
+ // Reassemble
407
+ const translatedContent = chunkResults.map((r) => r.translated).join('');
408
+
409
+ // Calculate average quality
410
+ const qualityScores = chunkResults
411
+ .filter((r) => r.qualityScore > 0)
412
+ .map((r) => r.qualityScore);
413
+ const averageQuality =
414
+ qualityScores.length > 0
415
+ ? qualityScores.reduce((a, b) => a + b, 0) / qualityScores.length
416
+ : 0;
417
+
418
+ // Calculate cache statistics from chunk results
419
+ const cacheHits = chunkResults.filter((r) => r.cached).length;
420
+ const cacheMisses = chunkResults.filter((r) => !r.cached && r.qualityScore > 0).length;
421
+
422
+ return {
423
+ content: translatedContent,
424
+ chunks: chunkResults,
425
+ metadata: {
426
+ totalTokensUsed: totalInputTokens + totalOutputTokens,
427
+ totalDuration: 0,
428
+ averageQuality,
429
+ provider: this.provider.name,
430
+ model: this.config.provider.model ?? this.provider.defaultModel,
431
+ totalIterations,
432
+ tokensUsed: {
433
+ input: totalInputTokens,
434
+ output: totalOutputTokens,
435
+ },
436
+ cache: {
437
+ hits: cacheHits,
438
+ misses: cacheMisses,
439
+ },
440
+ },
441
+ };
442
+ }
443
+
444
+ private async translateChunk(
445
+ chunk: Chunk,
446
+ options: TranslateFileOptions,
447
+ glossary: ResolvedGlossary | undefined,
448
+ agent: TranslationAgent
449
+ ): Promise<ChunkResult> {
450
+ // Build cache key
451
+ const glossaryString = glossary
452
+ ? JSON.stringify(glossary.terms.map((t) => ({ s: t.source, t: t.target })))
453
+ : undefined;
454
+
455
+ const cacheKey: CacheKey = {
456
+ content: chunk.content,
457
+ sourceLang: options.sourceLang,
458
+ targetLang: options.targetLang,
459
+ glossary: glossaryString,
460
+ provider: this.provider.name,
461
+ model: this.config.provider.model ?? this.provider.defaultModel,
462
+ };
463
+
464
+ // Check cache first
465
+ const cacheResult = this.cache.get(cacheKey);
466
+ if (cacheResult.hit && cacheResult.entry) {
467
+ this.cacheHits++;
468
+ if (this.verbose) {
469
+ logger.info(` ↳ Cache hit (quality: ${cacheResult.entry.qualityScore})`);
470
+ }
471
+ return {
472
+ original: chunk.content,
473
+ translated: cacheResult.entry.translation,
474
+ startOffset: chunk.startOffset,
475
+ endOffset: chunk.endOffset,
476
+ qualityScore: cacheResult.entry.qualityScore,
477
+ iterations: 0,
478
+ tokensUsed: { input: 0, output: 0, cacheRead: 1 },
479
+ cached: true,
480
+ };
481
+ }
482
+
483
+ this.cacheMisses++;
484
+
485
+ // Build context from chunk metadata and options
486
+ // Resolve style instruction: CLI option > config.languages.styles[targetLang]
487
+ const resolvedStyleInstruction =
488
+ options.styleInstruction ?? this.config.languages.styles?.[options.targetLang];
489
+
490
+ const context: TranslationRequest['context'] = {
491
+ documentPurpose: options.context,
492
+ styleInstruction: resolvedStyleInstruction,
493
+ };
494
+
495
+ // Add header hierarchy context if available
496
+ if (chunk.metadata?.headerHierarchy && chunk.metadata.headerHierarchy.length > 0) {
497
+ context.documentSummary = `Current section: ${chunk.metadata.headerHierarchy.join(' > ')}`;
498
+ }
499
+
500
+ // Add previous context if available
501
+ if (chunk.metadata?.previousContext) {
502
+ context.previousChunks = [chunk.metadata.previousContext];
503
+ }
504
+
505
+ const request: TranslationRequest = {
506
+ content: chunk.content,
507
+ sourceLang: options.sourceLang,
508
+ targetLang: options.targetLang,
509
+ format: options.format ?? 'text',
510
+ glossary,
511
+ context,
512
+ };
513
+
514
+ try {
515
+ const result = await agent.translate(request);
516
+
517
+ // Store in cache
518
+ this.cache.set(cacheKey, result.content, result.metadata.qualityScore);
519
+
520
+ return {
521
+ original: chunk.content,
522
+ translated: result.content,
523
+ startOffset: chunk.startOffset,
524
+ endOffset: chunk.endOffset,
525
+ qualityScore: result.metadata.qualityScore,
526
+ iterations: result.metadata.iterations,
527
+ tokensUsed: result.metadata.tokensUsed,
528
+ };
529
+ } catch (error) {
530
+ // Log error but continue with original content
531
+ logger.error(`Failed to translate chunk: ${error}`);
532
+
533
+ return {
534
+ original: chunk.content,
535
+ translated: chunk.content, // Fallback to original
536
+ startOffset: chunk.startOffset,
537
+ endOffset: chunk.endOffset,
538
+ qualityScore: 0,
539
+ iterations: 0,
540
+ tokensUsed: { input: 0, output: 0 },
541
+ };
542
+ }
543
+ }
544
+
545
+ // ============================================================================
546
+ // Utility Methods
547
+ // ============================================================================
548
+
549
+ private detectFormat(content: string): DocumentFormat {
550
+ // Check for markdown indicators
551
+ if (
552
+ content.includes('# ') ||
553
+ content.includes('## ') ||
554
+ content.includes('```') ||
555
+ content.includes('- ') ||
556
+ content.match(/\[.+\]\(.+\)/)
557
+ ) {
558
+ return 'markdown';
559
+ }
560
+
561
+ // Check for HTML indicators
562
+ if (
563
+ content.includes('<html') ||
564
+ content.includes('<body') ||
565
+ content.includes('<div') ||
566
+ content.includes('<p>')
567
+ ) {
568
+ return 'html';
569
+ }
570
+
571
+ return 'text';
572
+ }
573
+ }
574
+
575
+ // ============================================================================
576
+ // Factory Function
577
+ // ============================================================================
578
+
579
+ export function createTranslationEngine(
580
+ options: TranslationEngineOptions
581
+ ): TranslationEngine {
582
+ return new TranslationEngine(options);
583
+ }
584
+
585
+ // ============================================================================
586
+ // Simple Translation Function (for direct use)
587
+ // ============================================================================
588
+
589
+ export async function translateText(
590
+ content: string,
591
+ sourceLang: string,
592
+ targetLang: string,
593
+ options?: {
594
+ provider?: LLMProvider;
595
+ glossaryPath?: string;
596
+ qualityThreshold?: number;
597
+ maxIterations?: number;
598
+ verbose?: boolean;
599
+ }
600
+ ): Promise<string> {
601
+ const defaultConfig: TranslateConfig = {
602
+ version: '1.0',
603
+ languages: { source: sourceLang, targets: [targetLang] },
604
+ provider: { default: 'claude' },
605
+ quality: {
606
+ threshold: options?.qualityThreshold ?? 85,
607
+ maxIterations: options?.maxIterations ?? 4,
608
+ evaluationMethod: 'llm',
609
+ },
610
+ chunking: {
611
+ maxTokens: 1024,
612
+ overlapTokens: 150,
613
+ preserveStructure: true,
614
+ },
615
+ paths: { output: './' },
616
+ };
617
+
618
+ const engine = createTranslationEngine({
619
+ config: defaultConfig,
620
+ provider: options?.provider,
621
+ verbose: options?.verbose,
622
+ });
623
+
624
+ const result = await engine.translateContent({
625
+ content,
626
+ sourceLang,
627
+ targetLang,
628
+ glossaryPath: options?.glossaryPath,
629
+ qualityThreshold: options?.qualityThreshold,
630
+ maxIterations: options?.maxIterations,
631
+ });
632
+
633
+ return result.content;
634
+ }