@llm-translate/cli 1.0.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +51 -0
- package/.env.example +33 -0
- package/.github/workflows/docs-pages.yml +57 -0
- package/.github/workflows/release.yml +49 -0
- package/.translaterc.json +44 -0
- package/CLAUDE.md +243 -0
- package/Dockerfile +55 -0
- package/README.md +371 -0
- package/RFC.md +1595 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +4494 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/index.d.ts +1152 -0
- package/dist/index.js +3841 -0
- package/dist/index.js.map +1 -0
- package/docker-compose.yml +56 -0
- package/docs/.vitepress/config.ts +161 -0
- package/docs/api/agent.md +262 -0
- package/docs/api/engine.md +274 -0
- package/docs/api/index.md +171 -0
- package/docs/api/providers.md +304 -0
- package/docs/changelog.md +64 -0
- package/docs/cli/dir.md +243 -0
- package/docs/cli/file.md +213 -0
- package/docs/cli/glossary.md +273 -0
- package/docs/cli/index.md +129 -0
- package/docs/cli/init.md +158 -0
- package/docs/cli/serve.md +211 -0
- package/docs/glossary.json +235 -0
- package/docs/guide/chunking.md +272 -0
- package/docs/guide/configuration.md +139 -0
- package/docs/guide/cost-optimization.md +237 -0
- package/docs/guide/docker.md +371 -0
- package/docs/guide/getting-started.md +150 -0
- package/docs/guide/glossary.md +241 -0
- package/docs/guide/index.md +86 -0
- package/docs/guide/ollama.md +515 -0
- package/docs/guide/prompt-caching.md +221 -0
- package/docs/guide/providers.md +232 -0
- package/docs/guide/quality-control.md +206 -0
- package/docs/guide/vitepress-integration.md +265 -0
- package/docs/index.md +63 -0
- package/docs/ja/api/agent.md +262 -0
- package/docs/ja/api/engine.md +274 -0
- package/docs/ja/api/index.md +171 -0
- package/docs/ja/api/providers.md +304 -0
- package/docs/ja/changelog.md +64 -0
- package/docs/ja/cli/dir.md +243 -0
- package/docs/ja/cli/file.md +213 -0
- package/docs/ja/cli/glossary.md +273 -0
- package/docs/ja/cli/index.md +111 -0
- package/docs/ja/cli/init.md +158 -0
- package/docs/ja/guide/chunking.md +271 -0
- package/docs/ja/guide/configuration.md +139 -0
- package/docs/ja/guide/cost-optimization.md +30 -0
- package/docs/ja/guide/getting-started.md +150 -0
- package/docs/ja/guide/glossary.md +214 -0
- package/docs/ja/guide/index.md +32 -0
- package/docs/ja/guide/ollama.md +410 -0
- package/docs/ja/guide/prompt-caching.md +221 -0
- package/docs/ja/guide/providers.md +232 -0
- package/docs/ja/guide/quality-control.md +137 -0
- package/docs/ja/guide/vitepress-integration.md +265 -0
- package/docs/ja/index.md +58 -0
- package/docs/ko/api/agent.md +262 -0
- package/docs/ko/api/engine.md +274 -0
- package/docs/ko/api/index.md +171 -0
- package/docs/ko/api/providers.md +304 -0
- package/docs/ko/changelog.md +64 -0
- package/docs/ko/cli/dir.md +243 -0
- package/docs/ko/cli/file.md +213 -0
- package/docs/ko/cli/glossary.md +273 -0
- package/docs/ko/cli/index.md +111 -0
- package/docs/ko/cli/init.md +158 -0
- package/docs/ko/guide/chunking.md +271 -0
- package/docs/ko/guide/configuration.md +139 -0
- package/docs/ko/guide/cost-optimization.md +30 -0
- package/docs/ko/guide/getting-started.md +150 -0
- package/docs/ko/guide/glossary.md +214 -0
- package/docs/ko/guide/index.md +32 -0
- package/docs/ko/guide/ollama.md +410 -0
- package/docs/ko/guide/prompt-caching.md +221 -0
- package/docs/ko/guide/providers.md +232 -0
- package/docs/ko/guide/quality-control.md +137 -0
- package/docs/ko/guide/vitepress-integration.md +265 -0
- package/docs/ko/index.md +58 -0
- package/docs/zh/api/agent.md +262 -0
- package/docs/zh/api/engine.md +274 -0
- package/docs/zh/api/index.md +171 -0
- package/docs/zh/api/providers.md +304 -0
- package/docs/zh/changelog.md +64 -0
- package/docs/zh/cli/dir.md +243 -0
- package/docs/zh/cli/file.md +213 -0
- package/docs/zh/cli/glossary.md +273 -0
- package/docs/zh/cli/index.md +111 -0
- package/docs/zh/cli/init.md +158 -0
- package/docs/zh/guide/chunking.md +271 -0
- package/docs/zh/guide/configuration.md +139 -0
- package/docs/zh/guide/cost-optimization.md +30 -0
- package/docs/zh/guide/getting-started.md +150 -0
- package/docs/zh/guide/glossary.md +214 -0
- package/docs/zh/guide/index.md +32 -0
- package/docs/zh/guide/ollama.md +410 -0
- package/docs/zh/guide/prompt-caching.md +221 -0
- package/docs/zh/guide/providers.md +232 -0
- package/docs/zh/guide/quality-control.md +137 -0
- package/docs/zh/guide/vitepress-integration.md +265 -0
- package/docs/zh/index.md +58 -0
- package/package.json +91 -0
- package/release.config.mjs +15 -0
- package/schemas/glossary.schema.json +110 -0
- package/src/cli/commands/dir.ts +469 -0
- package/src/cli/commands/file.ts +291 -0
- package/src/cli/commands/glossary.ts +221 -0
- package/src/cli/commands/init.ts +68 -0
- package/src/cli/commands/serve.ts +60 -0
- package/src/cli/index.ts +64 -0
- package/src/cli/options.ts +59 -0
- package/src/core/agent.ts +1119 -0
- package/src/core/chunker.ts +391 -0
- package/src/core/engine.ts +634 -0
- package/src/errors.ts +188 -0
- package/src/index.ts +147 -0
- package/src/integrations/vitepress.ts +549 -0
- package/src/parsers/markdown.ts +383 -0
- package/src/providers/claude.ts +259 -0
- package/src/providers/interface.ts +109 -0
- package/src/providers/ollama.ts +379 -0
- package/src/providers/openai.ts +308 -0
- package/src/providers/registry.ts +153 -0
- package/src/server/index.ts +152 -0
- package/src/server/middleware/auth.ts +93 -0
- package/src/server/middleware/logger.ts +90 -0
- package/src/server/routes/health.ts +84 -0
- package/src/server/routes/translate.ts +210 -0
- package/src/server/types.ts +138 -0
- package/src/services/cache.ts +899 -0
- package/src/services/config.ts +217 -0
- package/src/services/glossary.ts +247 -0
- package/src/types/analysis.ts +164 -0
- package/src/types/index.ts +265 -0
- package/src/types/modes.ts +121 -0
- package/src/types/mqm.ts +157 -0
- package/src/utils/logger.ts +141 -0
- package/src/utils/tokens.ts +116 -0
- package/tests/fixtures/glossaries/ml-glossary.json +53 -0
- package/tests/fixtures/input/lynq-installation.ko.md +350 -0
- package/tests/fixtures/input/lynq-installation.md +350 -0
- package/tests/fixtures/input/simple.ko.md +27 -0
- package/tests/fixtures/input/simple.md +27 -0
- package/tests/unit/chunker.test.ts +229 -0
- package/tests/unit/glossary.test.ts +146 -0
- package/tests/unit/markdown.test.ts +205 -0
- package/tests/unit/tokens.test.ts +81 -0
- package/tsconfig.json +28 -0
- package/tsup.config.ts +34 -0
- package/vitest.config.ts +16 -0
|
@@ -0,0 +1,634 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
TranslateConfig,
|
|
3
|
+
TranslationRequest,
|
|
4
|
+
DocumentResult,
|
|
5
|
+
ChunkResult,
|
|
6
|
+
Chunk,
|
|
7
|
+
DocumentFormat,
|
|
8
|
+
ResolvedGlossary,
|
|
9
|
+
} from '../types/index.js';
|
|
10
|
+
import type { LLMProvider } from '../providers/interface.js';
|
|
11
|
+
import { TranslationAgent, createTranslationAgent } from './agent.js';
|
|
12
|
+
import { chunkContent, getChunkStats } from './chunker.js';
|
|
13
|
+
import {
|
|
14
|
+
extractTextForTranslation,
|
|
15
|
+
restorePreservedSections,
|
|
16
|
+
} from '../parsers/markdown.js';
|
|
17
|
+
import { loadGlossary, resolveGlossary } from '../services/glossary.js';
|
|
18
|
+
import { getProvider, getProviderConfigFromEnv } from '../providers/registry.js';
|
|
19
|
+
import { logger, createTimer } from '../utils/logger.js';
|
|
20
|
+
import { TranslationError, ErrorCode } from '../errors.js';
|
|
21
|
+
import {
|
|
22
|
+
CacheManager,
|
|
23
|
+
createCacheManager,
|
|
24
|
+
createNullCacheManager,
|
|
25
|
+
type CacheKey,
|
|
26
|
+
} from '../services/cache.js';
|
|
27
|
+
|
|
28
|
+
// ============================================================================
|
|
29
|
+
// Engine Options
|
|
30
|
+
// ============================================================================
|
|
31
|
+
|
|
32
|
+
export interface TranslationEngineOptions {
|
|
33
|
+
config: TranslateConfig;
|
|
34
|
+
provider?: LLMProvider;
|
|
35
|
+
verbose?: boolean;
|
|
36
|
+
/** Disable caching (--no-cache mode) */
|
|
37
|
+
noCache?: boolean;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface TranslateFileOptions {
|
|
41
|
+
content: string;
|
|
42
|
+
sourceLang: string;
|
|
43
|
+
targetLang: string;
|
|
44
|
+
format?: DocumentFormat;
|
|
45
|
+
glossaryPath?: string;
|
|
46
|
+
qualityThreshold?: number;
|
|
47
|
+
maxIterations?: number;
|
|
48
|
+
context?: string;
|
|
49
|
+
/** Per-language style instruction (e.g., "경어체", "です・ます調"). Falls back to config.languages.styles[targetLang] if not specified. */
|
|
50
|
+
styleInstruction?: string;
|
|
51
|
+
/** If true, throw error when quality threshold is not met */
|
|
52
|
+
strictQuality?: boolean;
|
|
53
|
+
/** If true, throw error when glossary terms are missed */
|
|
54
|
+
strictGlossary?: boolean;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// ============================================================================
|
|
58
|
+
// Translation Engine
|
|
59
|
+
// ============================================================================
|
|
60
|
+
|
|
61
|
+
export class TranslationEngine {
|
|
62
|
+
private config: TranslateConfig;
|
|
63
|
+
private provider: LLMProvider;
|
|
64
|
+
private verbose: boolean;
|
|
65
|
+
private cache: CacheManager;
|
|
66
|
+
private cacheHits = 0;
|
|
67
|
+
private cacheMisses = 0;
|
|
68
|
+
|
|
69
|
+
constructor(options: TranslationEngineOptions) {
|
|
70
|
+
this.config = options.config;
|
|
71
|
+
this.verbose = options.verbose ?? false;
|
|
72
|
+
|
|
73
|
+
// Initialize provider
|
|
74
|
+
if (options.provider) {
|
|
75
|
+
this.provider = options.provider;
|
|
76
|
+
} else {
|
|
77
|
+
const providerConfig = getProviderConfigFromEnv(this.config.provider.default);
|
|
78
|
+
// Use model from config if specified (CLI --model option)
|
|
79
|
+
if (this.config.provider.model) {
|
|
80
|
+
providerConfig.defaultModel = this.config.provider.model;
|
|
81
|
+
}
|
|
82
|
+
this.provider = getProvider(this.config.provider.default, providerConfig);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Initialize cache
|
|
86
|
+
const cacheDisabled = options.noCache || !this.config.paths?.cache;
|
|
87
|
+
if (cacheDisabled) {
|
|
88
|
+
this.cache = createNullCacheManager();
|
|
89
|
+
if (this.verbose && options.noCache) {
|
|
90
|
+
logger.info('Cache disabled (--no-cache)');
|
|
91
|
+
}
|
|
92
|
+
} else {
|
|
93
|
+
this.cache = createCacheManager({
|
|
94
|
+
cacheDir: this.config.paths.cache!,
|
|
95
|
+
verbose: this.verbose,
|
|
96
|
+
});
|
|
97
|
+
if (this.verbose) {
|
|
98
|
+
const stats = this.cache.getStats();
|
|
99
|
+
logger.info(`Cache initialized: ${stats.entries} entries`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Translate a single file/content
|
|
106
|
+
*/
|
|
107
|
+
async translateContent(options: TranslateFileOptions): Promise<DocumentResult> {
|
|
108
|
+
const timer = createTimer();
|
|
109
|
+
const format = options.format ?? this.detectFormat(options.content);
|
|
110
|
+
|
|
111
|
+
if (this.verbose) {
|
|
112
|
+
logger.info(`Translating content (${format} format)`);
|
|
113
|
+
logger.info(`Source: ${options.sourceLang} → Target: ${options.targetLang}`);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Load glossary if provided
|
|
117
|
+
let glossary: ResolvedGlossary | undefined;
|
|
118
|
+
if (options.glossaryPath) {
|
|
119
|
+
try {
|
|
120
|
+
const rawGlossary = await loadGlossary(options.glossaryPath);
|
|
121
|
+
glossary = resolveGlossary(rawGlossary, options.targetLang);
|
|
122
|
+
if (this.verbose) {
|
|
123
|
+
logger.info(`Loaded glossary: ${glossary.terms.length} terms`);
|
|
124
|
+
}
|
|
125
|
+
} catch (error) {
|
|
126
|
+
if (this.verbose) {
|
|
127
|
+
logger.warn(`Failed to load glossary: ${error}`);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
} else if (this.config.glossary?.path) {
|
|
131
|
+
try {
|
|
132
|
+
const rawGlossary = await loadGlossary(this.config.glossary.path);
|
|
133
|
+
glossary = resolveGlossary(rawGlossary, options.targetLang);
|
|
134
|
+
if (this.verbose) {
|
|
135
|
+
logger.info(`Loaded glossary from config: ${glossary.terms.length} terms`);
|
|
136
|
+
}
|
|
137
|
+
} catch {
|
|
138
|
+
// Glossary is optional
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Process based on format
|
|
143
|
+
let result: DocumentResult;
|
|
144
|
+
|
|
145
|
+
switch (format) {
|
|
146
|
+
case 'markdown':
|
|
147
|
+
result = await this.translateMarkdown(options, glossary);
|
|
148
|
+
break;
|
|
149
|
+
case 'html':
|
|
150
|
+
// For now, treat HTML as plain text (Phase 2 will add proper HTML support)
|
|
151
|
+
result = await this.translatePlainText(options, glossary);
|
|
152
|
+
break;
|
|
153
|
+
case 'text':
|
|
154
|
+
default:
|
|
155
|
+
result = await this.translatePlainText(options, glossary);
|
|
156
|
+
break;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
result.metadata.totalDuration = timer.elapsed();
|
|
160
|
+
|
|
161
|
+
// Check glossary compliance if glossary is provided
|
|
162
|
+
if (glossary && glossary.terms.length > 0) {
|
|
163
|
+
const compliance = this.checkDocumentGlossaryCompliance(
|
|
164
|
+
options.content,
|
|
165
|
+
result.content,
|
|
166
|
+
glossary
|
|
167
|
+
);
|
|
168
|
+
result.glossaryCompliance = compliance;
|
|
169
|
+
|
|
170
|
+
if (this.verbose) {
|
|
171
|
+
logger.info(`Glossary compliance: ${compliance.applied.length}/${compliance.applied.length + compliance.missed.length} terms applied`);
|
|
172
|
+
if (compliance.missed.length > 0) {
|
|
173
|
+
logger.warn(`Missed glossary terms: ${compliance.missed.join(', ')}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Strict glossary mode - fail if any terms are missed
|
|
178
|
+
if (options.strictGlossary && !compliance.compliant) {
|
|
179
|
+
throw new TranslationError(ErrorCode.GLOSSARY_COMPLIANCE_FAILED, {
|
|
180
|
+
missed: compliance.missed.join(', '),
|
|
181
|
+
applied: compliance.applied,
|
|
182
|
+
total: glossary.terms.length,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (this.verbose) {
|
|
188
|
+
logger.success(`Translation complete in ${timer.format()}`);
|
|
189
|
+
logger.info(`Average quality: ${result.metadata.averageQuality.toFixed(1)}/100`);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return result;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Check glossary compliance for the entire document
|
|
197
|
+
*/
|
|
198
|
+
private checkDocumentGlossaryCompliance(
|
|
199
|
+
sourceContent: string,
|
|
200
|
+
translatedContent: string,
|
|
201
|
+
glossary: ResolvedGlossary
|
|
202
|
+
): { applied: string[]; missed: string[]; compliant: boolean } {
|
|
203
|
+
const applied: string[] = [];
|
|
204
|
+
const missed: string[] = [];
|
|
205
|
+
const sourceLower = sourceContent.toLowerCase();
|
|
206
|
+
const translatedLower = translatedContent.toLowerCase();
|
|
207
|
+
|
|
208
|
+
for (const term of glossary.terms) {
|
|
209
|
+
// Check if source term exists in original content
|
|
210
|
+
const sourceInContent = term.caseSensitive
|
|
211
|
+
? sourceContent.includes(term.source)
|
|
212
|
+
: sourceLower.includes(term.source.toLowerCase());
|
|
213
|
+
|
|
214
|
+
if (!sourceInContent) {
|
|
215
|
+
// Term not in source, skip
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Check if target term exists in translated content
|
|
220
|
+
const targetInTranslation = term.caseSensitive
|
|
221
|
+
? translatedContent.includes(term.target)
|
|
222
|
+
: translatedLower.includes(term.target.toLowerCase());
|
|
223
|
+
|
|
224
|
+
if (targetInTranslation) {
|
|
225
|
+
applied.push(term.source);
|
|
226
|
+
} else {
|
|
227
|
+
missed.push(term.source);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return {
|
|
232
|
+
applied,
|
|
233
|
+
missed,
|
|
234
|
+
compliant: missed.length === 0,
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ============================================================================
|
|
239
|
+
// Format-Specific Translation
|
|
240
|
+
// ============================================================================
|
|
241
|
+
|
|
242
|
+
private async translateMarkdown(
|
|
243
|
+
options: TranslateFileOptions,
|
|
244
|
+
glossary?: ResolvedGlossary
|
|
245
|
+
): Promise<DocumentResult> {
|
|
246
|
+
// Extract text for translation, preserving code blocks etc.
|
|
247
|
+
const { text, preservedSections } = extractTextForTranslation(options.content);
|
|
248
|
+
|
|
249
|
+
// Chunk the content
|
|
250
|
+
const chunks = chunkContent(text, {
|
|
251
|
+
maxTokens: this.config.chunking.maxTokens,
|
|
252
|
+
overlapTokens: this.config.chunking.overlapTokens,
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
if (this.verbose) {
|
|
256
|
+
const stats = getChunkStats(chunks);
|
|
257
|
+
logger.info(`Chunked into ${stats.translatableChunks} translatable sections`);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Create translation agent
|
|
261
|
+
const agent = createTranslationAgent({
|
|
262
|
+
provider: this.provider,
|
|
263
|
+
qualityThreshold: options.qualityThreshold ?? this.config.quality.threshold,
|
|
264
|
+
maxIterations: options.maxIterations ?? this.config.quality.maxIterations,
|
|
265
|
+
verbose: this.verbose,
|
|
266
|
+
strictQuality: options.strictQuality,
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
// Translate each chunk
|
|
270
|
+
const chunkResults: ChunkResult[] = [];
|
|
271
|
+
let totalInputTokens = 0;
|
|
272
|
+
let totalOutputTokens = 0;
|
|
273
|
+
let totalIterations = 0;
|
|
274
|
+
|
|
275
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
276
|
+
const chunk = chunks[i];
|
|
277
|
+
if (!chunk) continue;
|
|
278
|
+
|
|
279
|
+
if (chunk.type === 'preserve') {
|
|
280
|
+
// Keep preserved content as-is
|
|
281
|
+
chunkResults.push({
|
|
282
|
+
original: chunk.content,
|
|
283
|
+
translated: chunk.content,
|
|
284
|
+
startOffset: chunk.startOffset,
|
|
285
|
+
endOffset: chunk.endOffset,
|
|
286
|
+
qualityScore: 100,
|
|
287
|
+
});
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (this.verbose) {
|
|
292
|
+
logger.info(`Translating chunk ${i + 1}/${chunks.length}...`);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const result = await this.translateChunk(chunk, options, glossary, agent);
|
|
296
|
+
chunkResults.push(result);
|
|
297
|
+
|
|
298
|
+
// Accumulate token and iteration counts
|
|
299
|
+
if (result.tokensUsed) {
|
|
300
|
+
totalInputTokens += result.tokensUsed.input;
|
|
301
|
+
totalOutputTokens += result.tokensUsed.output;
|
|
302
|
+
}
|
|
303
|
+
if (result.iterations) {
|
|
304
|
+
totalIterations += result.iterations;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Reassemble translated content
|
|
309
|
+
const translatedText = chunkResults.map((r) => r.translated).join('');
|
|
310
|
+
|
|
311
|
+
// Restore preserved sections
|
|
312
|
+
const finalContent = restorePreservedSections(translatedText, preservedSections);
|
|
313
|
+
|
|
314
|
+
// Calculate average quality
|
|
315
|
+
const qualityScores = chunkResults
|
|
316
|
+
.filter((r) => r.qualityScore > 0)
|
|
317
|
+
.map((r) => r.qualityScore);
|
|
318
|
+
const averageQuality =
|
|
319
|
+
qualityScores.length > 0
|
|
320
|
+
? qualityScores.reduce((a, b) => a + b, 0) / qualityScores.length
|
|
321
|
+
: 0;
|
|
322
|
+
|
|
323
|
+
// Calculate cache statistics from chunk results
|
|
324
|
+
const cacheHits = chunkResults.filter((r) => r.cached).length;
|
|
325
|
+
const cacheMisses = chunkResults.filter((r) => !r.cached && r.qualityScore > 0).length;
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
content: finalContent,
|
|
329
|
+
chunks: chunkResults,
|
|
330
|
+
metadata: {
|
|
331
|
+
totalTokensUsed: totalInputTokens + totalOutputTokens,
|
|
332
|
+
totalDuration: 0, // Will be set by caller
|
|
333
|
+
averageQuality,
|
|
334
|
+
provider: this.provider.name,
|
|
335
|
+
model: this.config.provider.model ?? this.provider.defaultModel,
|
|
336
|
+
totalIterations,
|
|
337
|
+
tokensUsed: {
|
|
338
|
+
input: totalInputTokens,
|
|
339
|
+
output: totalOutputTokens,
|
|
340
|
+
},
|
|
341
|
+
cache: {
|
|
342
|
+
hits: cacheHits,
|
|
343
|
+
misses: cacheMisses,
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
private async translatePlainText(
|
|
350
|
+
options: TranslateFileOptions,
|
|
351
|
+
glossary?: ResolvedGlossary
|
|
352
|
+
): Promise<DocumentResult> {
|
|
353
|
+
// Chunk the content
|
|
354
|
+
const chunks = chunkContent(options.content, {
|
|
355
|
+
maxTokens: this.config.chunking.maxTokens,
|
|
356
|
+
overlapTokens: this.config.chunking.overlapTokens,
|
|
357
|
+
});
|
|
358
|
+
|
|
359
|
+
// Create translation agent
|
|
360
|
+
const agent = createTranslationAgent({
|
|
361
|
+
provider: this.provider,
|
|
362
|
+
qualityThreshold: options.qualityThreshold ?? this.config.quality.threshold,
|
|
363
|
+
maxIterations: options.maxIterations ?? this.config.quality.maxIterations,
|
|
364
|
+
verbose: this.verbose,
|
|
365
|
+
strictQuality: options.strictQuality,
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
// Translate each chunk
|
|
369
|
+
const chunkResults: ChunkResult[] = [];
|
|
370
|
+
let totalInputTokens = 0;
|
|
371
|
+
let totalOutputTokens = 0;
|
|
372
|
+
let totalIterations = 0;
|
|
373
|
+
|
|
374
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
375
|
+
const chunk = chunks[i];
|
|
376
|
+
if (!chunk) continue;
|
|
377
|
+
|
|
378
|
+
if (chunk.type === 'preserve') {
|
|
379
|
+
chunkResults.push({
|
|
380
|
+
original: chunk.content,
|
|
381
|
+
translated: chunk.content,
|
|
382
|
+
startOffset: chunk.startOffset,
|
|
383
|
+
endOffset: chunk.endOffset,
|
|
384
|
+
qualityScore: 100,
|
|
385
|
+
});
|
|
386
|
+
continue;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
if (this.verbose) {
|
|
390
|
+
logger.info(`Translating chunk ${i + 1}/${chunks.length}...`);
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
const result = await this.translateChunk(chunk, options, glossary, agent);
|
|
394
|
+
chunkResults.push(result);
|
|
395
|
+
|
|
396
|
+
// Accumulate token and iteration counts
|
|
397
|
+
if (result.tokensUsed) {
|
|
398
|
+
totalInputTokens += result.tokensUsed.input;
|
|
399
|
+
totalOutputTokens += result.tokensUsed.output;
|
|
400
|
+
}
|
|
401
|
+
if (result.iterations) {
|
|
402
|
+
totalIterations += result.iterations;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Reassemble
|
|
407
|
+
const translatedContent = chunkResults.map((r) => r.translated).join('');
|
|
408
|
+
|
|
409
|
+
// Calculate average quality
|
|
410
|
+
const qualityScores = chunkResults
|
|
411
|
+
.filter((r) => r.qualityScore > 0)
|
|
412
|
+
.map((r) => r.qualityScore);
|
|
413
|
+
const averageQuality =
|
|
414
|
+
qualityScores.length > 0
|
|
415
|
+
? qualityScores.reduce((a, b) => a + b, 0) / qualityScores.length
|
|
416
|
+
: 0;
|
|
417
|
+
|
|
418
|
+
// Calculate cache statistics from chunk results
|
|
419
|
+
const cacheHits = chunkResults.filter((r) => r.cached).length;
|
|
420
|
+
const cacheMisses = chunkResults.filter((r) => !r.cached && r.qualityScore > 0).length;
|
|
421
|
+
|
|
422
|
+
return {
|
|
423
|
+
content: translatedContent,
|
|
424
|
+
chunks: chunkResults,
|
|
425
|
+
metadata: {
|
|
426
|
+
totalTokensUsed: totalInputTokens + totalOutputTokens,
|
|
427
|
+
totalDuration: 0,
|
|
428
|
+
averageQuality,
|
|
429
|
+
provider: this.provider.name,
|
|
430
|
+
model: this.config.provider.model ?? this.provider.defaultModel,
|
|
431
|
+
totalIterations,
|
|
432
|
+
tokensUsed: {
|
|
433
|
+
input: totalInputTokens,
|
|
434
|
+
output: totalOutputTokens,
|
|
435
|
+
},
|
|
436
|
+
cache: {
|
|
437
|
+
hits: cacheHits,
|
|
438
|
+
misses: cacheMisses,
|
|
439
|
+
},
|
|
440
|
+
},
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
private async translateChunk(
|
|
445
|
+
chunk: Chunk,
|
|
446
|
+
options: TranslateFileOptions,
|
|
447
|
+
glossary: ResolvedGlossary | undefined,
|
|
448
|
+
agent: TranslationAgent
|
|
449
|
+
): Promise<ChunkResult> {
|
|
450
|
+
// Build cache key
|
|
451
|
+
const glossaryString = glossary
|
|
452
|
+
? JSON.stringify(glossary.terms.map((t) => ({ s: t.source, t: t.target })))
|
|
453
|
+
: undefined;
|
|
454
|
+
|
|
455
|
+
const cacheKey: CacheKey = {
|
|
456
|
+
content: chunk.content,
|
|
457
|
+
sourceLang: options.sourceLang,
|
|
458
|
+
targetLang: options.targetLang,
|
|
459
|
+
glossary: glossaryString,
|
|
460
|
+
provider: this.provider.name,
|
|
461
|
+
model: this.config.provider.model ?? this.provider.defaultModel,
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
// Check cache first
|
|
465
|
+
const cacheResult = this.cache.get(cacheKey);
|
|
466
|
+
if (cacheResult.hit && cacheResult.entry) {
|
|
467
|
+
this.cacheHits++;
|
|
468
|
+
if (this.verbose) {
|
|
469
|
+
logger.info(` ↳ Cache hit (quality: ${cacheResult.entry.qualityScore})`);
|
|
470
|
+
}
|
|
471
|
+
return {
|
|
472
|
+
original: chunk.content,
|
|
473
|
+
translated: cacheResult.entry.translation,
|
|
474
|
+
startOffset: chunk.startOffset,
|
|
475
|
+
endOffset: chunk.endOffset,
|
|
476
|
+
qualityScore: cacheResult.entry.qualityScore,
|
|
477
|
+
iterations: 0,
|
|
478
|
+
tokensUsed: { input: 0, output: 0, cacheRead: 1 },
|
|
479
|
+
cached: true,
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
this.cacheMisses++;
|
|
484
|
+
|
|
485
|
+
// Build context from chunk metadata and options
|
|
486
|
+
// Resolve style instruction: CLI option > config.languages.styles[targetLang]
|
|
487
|
+
const resolvedStyleInstruction =
|
|
488
|
+
options.styleInstruction ?? this.config.languages.styles?.[options.targetLang];
|
|
489
|
+
|
|
490
|
+
const context: TranslationRequest['context'] = {
|
|
491
|
+
documentPurpose: options.context,
|
|
492
|
+
styleInstruction: resolvedStyleInstruction,
|
|
493
|
+
};
|
|
494
|
+
|
|
495
|
+
// Add header hierarchy context if available
|
|
496
|
+
if (chunk.metadata?.headerHierarchy && chunk.metadata.headerHierarchy.length > 0) {
|
|
497
|
+
context.documentSummary = `Current section: ${chunk.metadata.headerHierarchy.join(' > ')}`;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Add previous context if available
|
|
501
|
+
if (chunk.metadata?.previousContext) {
|
|
502
|
+
context.previousChunks = [chunk.metadata.previousContext];
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
const request: TranslationRequest = {
|
|
506
|
+
content: chunk.content,
|
|
507
|
+
sourceLang: options.sourceLang,
|
|
508
|
+
targetLang: options.targetLang,
|
|
509
|
+
format: options.format ?? 'text',
|
|
510
|
+
glossary,
|
|
511
|
+
context,
|
|
512
|
+
};
|
|
513
|
+
|
|
514
|
+
try {
|
|
515
|
+
const result = await agent.translate(request);
|
|
516
|
+
|
|
517
|
+
// Store in cache
|
|
518
|
+
this.cache.set(cacheKey, result.content, result.metadata.qualityScore);
|
|
519
|
+
|
|
520
|
+
return {
|
|
521
|
+
original: chunk.content,
|
|
522
|
+
translated: result.content,
|
|
523
|
+
startOffset: chunk.startOffset,
|
|
524
|
+
endOffset: chunk.endOffset,
|
|
525
|
+
qualityScore: result.metadata.qualityScore,
|
|
526
|
+
iterations: result.metadata.iterations,
|
|
527
|
+
tokensUsed: result.metadata.tokensUsed,
|
|
528
|
+
};
|
|
529
|
+
} catch (error) {
|
|
530
|
+
// Log error but continue with original content
|
|
531
|
+
logger.error(`Failed to translate chunk: ${error}`);
|
|
532
|
+
|
|
533
|
+
return {
|
|
534
|
+
original: chunk.content,
|
|
535
|
+
translated: chunk.content, // Fallback to original
|
|
536
|
+
startOffset: chunk.startOffset,
|
|
537
|
+
endOffset: chunk.endOffset,
|
|
538
|
+
qualityScore: 0,
|
|
539
|
+
iterations: 0,
|
|
540
|
+
tokensUsed: { input: 0, output: 0 },
|
|
541
|
+
};
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
// ============================================================================
|
|
546
|
+
// Utility Methods
|
|
547
|
+
// ============================================================================
|
|
548
|
+
|
|
549
|
+
private detectFormat(content: string): DocumentFormat {
|
|
550
|
+
// Check for markdown indicators
|
|
551
|
+
if (
|
|
552
|
+
content.includes('# ') ||
|
|
553
|
+
content.includes('## ') ||
|
|
554
|
+
content.includes('```') ||
|
|
555
|
+
content.includes('- ') ||
|
|
556
|
+
content.match(/\[.+\]\(.+\)/)
|
|
557
|
+
) {
|
|
558
|
+
return 'markdown';
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// Check for HTML indicators
|
|
562
|
+
if (
|
|
563
|
+
content.includes('<html') ||
|
|
564
|
+
content.includes('<body') ||
|
|
565
|
+
content.includes('<div') ||
|
|
566
|
+
content.includes('<p>')
|
|
567
|
+
) {
|
|
568
|
+
return 'html';
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
return 'text';
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
// ============================================================================
|
|
576
|
+
// Factory Function
|
|
577
|
+
// ============================================================================
|
|
578
|
+
|
|
579
|
+
export function createTranslationEngine(
|
|
580
|
+
options: TranslationEngineOptions
|
|
581
|
+
): TranslationEngine {
|
|
582
|
+
return new TranslationEngine(options);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// ============================================================================
|
|
586
|
+
// Simple Translation Function (for direct use)
|
|
587
|
+
// ============================================================================
|
|
588
|
+
|
|
589
|
+
export async function translateText(
|
|
590
|
+
content: string,
|
|
591
|
+
sourceLang: string,
|
|
592
|
+
targetLang: string,
|
|
593
|
+
options?: {
|
|
594
|
+
provider?: LLMProvider;
|
|
595
|
+
glossaryPath?: string;
|
|
596
|
+
qualityThreshold?: number;
|
|
597
|
+
maxIterations?: number;
|
|
598
|
+
verbose?: boolean;
|
|
599
|
+
}
|
|
600
|
+
): Promise<string> {
|
|
601
|
+
const defaultConfig: TranslateConfig = {
|
|
602
|
+
version: '1.0',
|
|
603
|
+
languages: { source: sourceLang, targets: [targetLang] },
|
|
604
|
+
provider: { default: 'claude' },
|
|
605
|
+
quality: {
|
|
606
|
+
threshold: options?.qualityThreshold ?? 85,
|
|
607
|
+
maxIterations: options?.maxIterations ?? 4,
|
|
608
|
+
evaluationMethod: 'llm',
|
|
609
|
+
},
|
|
610
|
+
chunking: {
|
|
611
|
+
maxTokens: 1024,
|
|
612
|
+
overlapTokens: 150,
|
|
613
|
+
preserveStructure: true,
|
|
614
|
+
},
|
|
615
|
+
paths: { output: './' },
|
|
616
|
+
};
|
|
617
|
+
|
|
618
|
+
const engine = createTranslationEngine({
|
|
619
|
+
config: defaultConfig,
|
|
620
|
+
provider: options?.provider,
|
|
621
|
+
verbose: options?.verbose,
|
|
622
|
+
});
|
|
623
|
+
|
|
624
|
+
const result = await engine.translateContent({
|
|
625
|
+
content,
|
|
626
|
+
sourceLang,
|
|
627
|
+
targetLang,
|
|
628
|
+
glossaryPath: options?.glossaryPath,
|
|
629
|
+
qualityThreshold: options?.qualityThreshold,
|
|
630
|
+
maxIterations: options?.maxIterations,
|
|
631
|
+
});
|
|
632
|
+
|
|
633
|
+
return result.content;
|
|
634
|
+
}
|