@llm-translate/cli 1.0.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +51 -0
- package/.env.example +33 -0
- package/.github/workflows/docs-pages.yml +57 -0
- package/.github/workflows/release.yml +49 -0
- package/.translaterc.json +44 -0
- package/CLAUDE.md +243 -0
- package/Dockerfile +55 -0
- package/README.md +371 -0
- package/RFC.md +1595 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +4494 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/index.d.ts +1152 -0
- package/dist/index.js +3841 -0
- package/dist/index.js.map +1 -0
- package/docker-compose.yml +56 -0
- package/docs/.vitepress/config.ts +161 -0
- package/docs/api/agent.md +262 -0
- package/docs/api/engine.md +274 -0
- package/docs/api/index.md +171 -0
- package/docs/api/providers.md +304 -0
- package/docs/changelog.md +64 -0
- package/docs/cli/dir.md +243 -0
- package/docs/cli/file.md +213 -0
- package/docs/cli/glossary.md +273 -0
- package/docs/cli/index.md +129 -0
- package/docs/cli/init.md +158 -0
- package/docs/cli/serve.md +211 -0
- package/docs/glossary.json +235 -0
- package/docs/guide/chunking.md +272 -0
- package/docs/guide/configuration.md +139 -0
- package/docs/guide/cost-optimization.md +237 -0
- package/docs/guide/docker.md +371 -0
- package/docs/guide/getting-started.md +150 -0
- package/docs/guide/glossary.md +241 -0
- package/docs/guide/index.md +86 -0
- package/docs/guide/ollama.md +515 -0
- package/docs/guide/prompt-caching.md +221 -0
- package/docs/guide/providers.md +232 -0
- package/docs/guide/quality-control.md +206 -0
- package/docs/guide/vitepress-integration.md +265 -0
- package/docs/index.md +63 -0
- package/docs/ja/api/agent.md +262 -0
- package/docs/ja/api/engine.md +274 -0
- package/docs/ja/api/index.md +171 -0
- package/docs/ja/api/providers.md +304 -0
- package/docs/ja/changelog.md +64 -0
- package/docs/ja/cli/dir.md +243 -0
- package/docs/ja/cli/file.md +213 -0
- package/docs/ja/cli/glossary.md +273 -0
- package/docs/ja/cli/index.md +111 -0
- package/docs/ja/cli/init.md +158 -0
- package/docs/ja/guide/chunking.md +271 -0
- package/docs/ja/guide/configuration.md +139 -0
- package/docs/ja/guide/cost-optimization.md +30 -0
- package/docs/ja/guide/getting-started.md +150 -0
- package/docs/ja/guide/glossary.md +214 -0
- package/docs/ja/guide/index.md +32 -0
- package/docs/ja/guide/ollama.md +410 -0
- package/docs/ja/guide/prompt-caching.md +221 -0
- package/docs/ja/guide/providers.md +232 -0
- package/docs/ja/guide/quality-control.md +137 -0
- package/docs/ja/guide/vitepress-integration.md +265 -0
- package/docs/ja/index.md +58 -0
- package/docs/ko/api/agent.md +262 -0
- package/docs/ko/api/engine.md +274 -0
- package/docs/ko/api/index.md +171 -0
- package/docs/ko/api/providers.md +304 -0
- package/docs/ko/changelog.md +64 -0
- package/docs/ko/cli/dir.md +243 -0
- package/docs/ko/cli/file.md +213 -0
- package/docs/ko/cli/glossary.md +273 -0
- package/docs/ko/cli/index.md +111 -0
- package/docs/ko/cli/init.md +158 -0
- package/docs/ko/guide/chunking.md +271 -0
- package/docs/ko/guide/configuration.md +139 -0
- package/docs/ko/guide/cost-optimization.md +30 -0
- package/docs/ko/guide/getting-started.md +150 -0
- package/docs/ko/guide/glossary.md +214 -0
- package/docs/ko/guide/index.md +32 -0
- package/docs/ko/guide/ollama.md +410 -0
- package/docs/ko/guide/prompt-caching.md +221 -0
- package/docs/ko/guide/providers.md +232 -0
- package/docs/ko/guide/quality-control.md +137 -0
- package/docs/ko/guide/vitepress-integration.md +265 -0
- package/docs/ko/index.md +58 -0
- package/docs/zh/api/agent.md +262 -0
- package/docs/zh/api/engine.md +274 -0
- package/docs/zh/api/index.md +171 -0
- package/docs/zh/api/providers.md +304 -0
- package/docs/zh/changelog.md +64 -0
- package/docs/zh/cli/dir.md +243 -0
- package/docs/zh/cli/file.md +213 -0
- package/docs/zh/cli/glossary.md +273 -0
- package/docs/zh/cli/index.md +111 -0
- package/docs/zh/cli/init.md +158 -0
- package/docs/zh/guide/chunking.md +271 -0
- package/docs/zh/guide/configuration.md +139 -0
- package/docs/zh/guide/cost-optimization.md +30 -0
- package/docs/zh/guide/getting-started.md +150 -0
- package/docs/zh/guide/glossary.md +214 -0
- package/docs/zh/guide/index.md +32 -0
- package/docs/zh/guide/ollama.md +410 -0
- package/docs/zh/guide/prompt-caching.md +221 -0
- package/docs/zh/guide/providers.md +232 -0
- package/docs/zh/guide/quality-control.md +137 -0
- package/docs/zh/guide/vitepress-integration.md +265 -0
- package/docs/zh/index.md +58 -0
- package/package.json +91 -0
- package/release.config.mjs +15 -0
- package/schemas/glossary.schema.json +110 -0
- package/src/cli/commands/dir.ts +469 -0
- package/src/cli/commands/file.ts +291 -0
- package/src/cli/commands/glossary.ts +221 -0
- package/src/cli/commands/init.ts +68 -0
- package/src/cli/commands/serve.ts +60 -0
- package/src/cli/index.ts +64 -0
- package/src/cli/options.ts +59 -0
- package/src/core/agent.ts +1119 -0
- package/src/core/chunker.ts +391 -0
- package/src/core/engine.ts +634 -0
- package/src/errors.ts +188 -0
- package/src/index.ts +147 -0
- package/src/integrations/vitepress.ts +549 -0
- package/src/parsers/markdown.ts +383 -0
- package/src/providers/claude.ts +259 -0
- package/src/providers/interface.ts +109 -0
- package/src/providers/ollama.ts +379 -0
- package/src/providers/openai.ts +308 -0
- package/src/providers/registry.ts +153 -0
- package/src/server/index.ts +152 -0
- package/src/server/middleware/auth.ts +93 -0
- package/src/server/middleware/logger.ts +90 -0
- package/src/server/routes/health.ts +84 -0
- package/src/server/routes/translate.ts +210 -0
- package/src/server/types.ts +138 -0
- package/src/services/cache.ts +899 -0
- package/src/services/config.ts +217 -0
- package/src/services/glossary.ts +247 -0
- package/src/types/analysis.ts +164 -0
- package/src/types/index.ts +265 -0
- package/src/types/modes.ts +121 -0
- package/src/types/mqm.ts +157 -0
- package/src/utils/logger.ts +141 -0
- package/src/utils/tokens.ts +116 -0
- package/tests/fixtures/glossaries/ml-glossary.json +53 -0
- package/tests/fixtures/input/lynq-installation.ko.md +350 -0
- package/tests/fixtures/input/lynq-installation.md +350 -0
- package/tests/fixtures/input/simple.ko.md +27 -0
- package/tests/fixtures/input/simple.md +27 -0
- package/tests/unit/chunker.test.ts +229 -0
- package/tests/unit/glossary.test.ts +146 -0
- package/tests/unit/markdown.test.ts +205 -0
- package/tests/unit/tokens.test.ts +81 -0
- package/tsconfig.json +28 -0
- package/tsup.config.ts +34 -0
- package/vitest.config.ts +16 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,3841 @@
|
|
|
1
|
+
import { cosmiconfig } from 'cosmiconfig';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { readFile } from 'fs/promises';
|
|
4
|
+
import { createHash } from 'crypto';
|
|
5
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync, rmSync, readdirSync, statSync } from 'fs';
|
|
6
|
+
import { join, dirname, relative } from 'path';
|
|
7
|
+
import chalk from 'chalk';
|
|
8
|
+
import { createAnthropic } from '@ai-sdk/anthropic';
|
|
9
|
+
import { generateText, streamText } from 'ai';
|
|
10
|
+
import { createOpenAI } from '@ai-sdk/openai';
|
|
11
|
+
import { unified } from 'unified';
|
|
12
|
+
import remarkParse from 'remark-parse';
|
|
13
|
+
import remarkStringify from 'remark-stringify';
|
|
14
|
+
import remarkGfm from 'remark-gfm';
|
|
15
|
+
import { visit } from 'unist-util-visit';
|
|
16
|
+
|
|
17
|
+
// src/errors.ts
|
|
18
|
+
var ErrorCode = /* @__PURE__ */ ((ErrorCode2) => {
|
|
19
|
+
ErrorCode2["CONFIG_NOT_FOUND"] = "CONFIG_NOT_FOUND";
|
|
20
|
+
ErrorCode2["CONFIG_INVALID"] = "CONFIG_INVALID";
|
|
21
|
+
ErrorCode2["GLOSSARY_NOT_FOUND"] = "GLOSSARY_NOT_FOUND";
|
|
22
|
+
ErrorCode2["GLOSSARY_INVALID"] = "GLOSSARY_INVALID";
|
|
23
|
+
ErrorCode2["PROVIDER_NOT_FOUND"] = "PROVIDER_NOT_FOUND";
|
|
24
|
+
ErrorCode2["PROVIDER_AUTH_FAILED"] = "PROVIDER_AUTH_FAILED";
|
|
25
|
+
ErrorCode2["PROVIDER_RATE_LIMITED"] = "PROVIDER_RATE_LIMITED";
|
|
26
|
+
ErrorCode2["PROVIDER_ERROR"] = "PROVIDER_ERROR";
|
|
27
|
+
ErrorCode2["QUALITY_THRESHOLD_NOT_MET"] = "QUALITY_THRESHOLD_NOT_MET";
|
|
28
|
+
ErrorCode2["GLOSSARY_COMPLIANCE_FAILED"] = "GLOSSARY_COMPLIANCE_FAILED";
|
|
29
|
+
ErrorCode2["FILE_NOT_FOUND"] = "FILE_NOT_FOUND";
|
|
30
|
+
ErrorCode2["FILE_READ_ERROR"] = "FILE_READ_ERROR";
|
|
31
|
+
ErrorCode2["FILE_WRITE_ERROR"] = "FILE_WRITE_ERROR";
|
|
32
|
+
ErrorCode2["UNSUPPORTED_FORMAT"] = "UNSUPPORTED_FORMAT";
|
|
33
|
+
ErrorCode2["CHUNK_TOO_LARGE"] = "CHUNK_TOO_LARGE";
|
|
34
|
+
ErrorCode2["UNKNOWN_ERROR"] = "UNKNOWN_ERROR";
|
|
35
|
+
return ErrorCode2;
|
|
36
|
+
})(ErrorCode || {});
|
|
37
|
+
var errorMessages = {
|
|
38
|
+
["CONFIG_NOT_FOUND" /* CONFIG_NOT_FOUND */]: "Configuration file not found. Run `llm-translate init` to create one.",
|
|
39
|
+
["CONFIG_INVALID" /* CONFIG_INVALID */]: "Configuration file is invalid. Please check the format and required fields.",
|
|
40
|
+
["GLOSSARY_NOT_FOUND" /* GLOSSARY_NOT_FOUND */]: "Glossary file not found at the specified path.",
|
|
41
|
+
["GLOSSARY_INVALID" /* GLOSSARY_INVALID */]: "Glossary file is invalid. Please check the JSON format and structure.",
|
|
42
|
+
["PROVIDER_NOT_FOUND" /* PROVIDER_NOT_FOUND */]: "The specified LLM provider is not available. Supported providers: claude, openai, ollama.",
|
|
43
|
+
["PROVIDER_AUTH_FAILED" /* PROVIDER_AUTH_FAILED */]: "Authentication failed. Check your API key in environment variables.",
|
|
44
|
+
["PROVIDER_RATE_LIMITED" /* PROVIDER_RATE_LIMITED */]: "Rate limited by the LLM provider. Please wait and try again.",
|
|
45
|
+
["PROVIDER_ERROR" /* PROVIDER_ERROR */]: "An error occurred while communicating with the LLM provider.",
|
|
46
|
+
["QUALITY_THRESHOLD_NOT_MET" /* QUALITY_THRESHOLD_NOT_MET */]: "Translation quality ({score}) did not meet threshold ({threshold}). Use --quality to adjust or --max-iterations to allow more refinement.",
|
|
47
|
+
["GLOSSARY_COMPLIANCE_FAILED" /* GLOSSARY_COMPLIANCE_FAILED */]: "Glossary compliance failed. Missing terms: {missed}. Use --no-strict-glossary to allow partial compliance.",
|
|
48
|
+
["FILE_NOT_FOUND" /* FILE_NOT_FOUND */]: "The specified file was not found.",
|
|
49
|
+
["FILE_READ_ERROR" /* FILE_READ_ERROR */]: "Failed to read the file.",
|
|
50
|
+
["FILE_WRITE_ERROR" /* FILE_WRITE_ERROR */]: "Failed to write to the output file.",
|
|
51
|
+
["UNSUPPORTED_FORMAT" /* UNSUPPORTED_FORMAT */]: "The file format is not supported. Supported formats: markdown, html, text.",
|
|
52
|
+
["CHUNK_TOO_LARGE" /* CHUNK_TOO_LARGE */]: "A chunk exceeds the maximum token limit and cannot be processed.",
|
|
53
|
+
["UNKNOWN_ERROR" /* UNKNOWN_ERROR */]: "An unexpected error occurred."
|
|
54
|
+
};
|
|
55
|
+
var TranslationError = class _TranslationError extends Error {
|
|
56
|
+
code;
|
|
57
|
+
details;
|
|
58
|
+
constructor(code, details, customMessage) {
|
|
59
|
+
const message = customMessage ?? formatErrorMessage(code, details);
|
|
60
|
+
super(message);
|
|
61
|
+
this.name = "TranslationError";
|
|
62
|
+
this.code = code;
|
|
63
|
+
this.details = details;
|
|
64
|
+
if (Error.captureStackTrace) {
|
|
65
|
+
Error.captureStackTrace(this, _TranslationError);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Create a JSON representation of the error
|
|
70
|
+
*/
|
|
71
|
+
toJSON() {
|
|
72
|
+
return {
|
|
73
|
+
name: this.name,
|
|
74
|
+
code: this.code,
|
|
75
|
+
message: this.message,
|
|
76
|
+
details: this.details
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
function formatErrorMessage(code, details) {
|
|
81
|
+
let message = errorMessages[code] ?? errorMessages["UNKNOWN_ERROR" /* UNKNOWN_ERROR */];
|
|
82
|
+
if (details) {
|
|
83
|
+
for (const [key, value] of Object.entries(details)) {
|
|
84
|
+
message = message.replace(`{${key}}`, String(value));
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return message;
|
|
88
|
+
}
|
|
89
|
+
function isTranslationError(error) {
|
|
90
|
+
return error instanceof TranslationError;
|
|
91
|
+
}
|
|
92
|
+
function isErrorCode(error, code) {
|
|
93
|
+
return isTranslationError(error) && error.code === code;
|
|
94
|
+
}
|
|
95
|
+
var ExitCode = {
|
|
96
|
+
SUCCESS: 0,
|
|
97
|
+
GENERAL_ERROR: 1,
|
|
98
|
+
INVALID_ARGUMENTS: 2,
|
|
99
|
+
FILE_NOT_FOUND: 3,
|
|
100
|
+
QUALITY_THRESHOLD_NOT_MET: 4,
|
|
101
|
+
PROVIDER_ERROR: 5,
|
|
102
|
+
GLOSSARY_VALIDATION_FAILED: 6
|
|
103
|
+
};
|
|
104
|
+
function getExitCode(error) {
|
|
105
|
+
switch (error.code) {
|
|
106
|
+
case "FILE_NOT_FOUND" /* FILE_NOT_FOUND */:
|
|
107
|
+
case "CONFIG_NOT_FOUND" /* CONFIG_NOT_FOUND */:
|
|
108
|
+
case "GLOSSARY_NOT_FOUND" /* GLOSSARY_NOT_FOUND */:
|
|
109
|
+
return ExitCode.FILE_NOT_FOUND;
|
|
110
|
+
case "CONFIG_INVALID" /* CONFIG_INVALID */:
|
|
111
|
+
case "UNSUPPORTED_FORMAT" /* UNSUPPORTED_FORMAT */:
|
|
112
|
+
return ExitCode.INVALID_ARGUMENTS;
|
|
113
|
+
case "QUALITY_THRESHOLD_NOT_MET" /* QUALITY_THRESHOLD_NOT_MET */:
|
|
114
|
+
return ExitCode.QUALITY_THRESHOLD_NOT_MET;
|
|
115
|
+
case "PROVIDER_NOT_FOUND" /* PROVIDER_NOT_FOUND */:
|
|
116
|
+
case "PROVIDER_AUTH_FAILED" /* PROVIDER_AUTH_FAILED */:
|
|
117
|
+
case "PROVIDER_RATE_LIMITED" /* PROVIDER_RATE_LIMITED */:
|
|
118
|
+
case "PROVIDER_ERROR" /* PROVIDER_ERROR */:
|
|
119
|
+
return ExitCode.PROVIDER_ERROR;
|
|
120
|
+
case "GLOSSARY_INVALID" /* GLOSSARY_INVALID */:
|
|
121
|
+
return ExitCode.GLOSSARY_VALIDATION_FAILED;
|
|
122
|
+
default:
|
|
123
|
+
return ExitCode.GENERAL_ERROR;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
var providerNameSchema = z.enum(["claude", "openai", "ollama", "custom"]);
|
|
127
|
+
var configSchema = z.object({
|
|
128
|
+
version: z.string(),
|
|
129
|
+
project: z.object({
|
|
130
|
+
name: z.string(),
|
|
131
|
+
description: z.string(),
|
|
132
|
+
purpose: z.string()
|
|
133
|
+
}).optional(),
|
|
134
|
+
languages: z.object({
|
|
135
|
+
source: z.string(),
|
|
136
|
+
targets: z.array(z.string()),
|
|
137
|
+
styles: z.record(z.string(), z.string()).optional()
|
|
138
|
+
}),
|
|
139
|
+
provider: z.object({
|
|
140
|
+
default: providerNameSchema,
|
|
141
|
+
model: z.string().optional(),
|
|
142
|
+
fallback: z.array(providerNameSchema).optional(),
|
|
143
|
+
apiKeys: z.record(providerNameSchema, z.string()).optional()
|
|
144
|
+
}),
|
|
145
|
+
quality: z.object({
|
|
146
|
+
threshold: z.number().min(0).max(100),
|
|
147
|
+
maxIterations: z.number().min(1).max(10),
|
|
148
|
+
evaluationMethod: z.enum(["llm", "embedding", "hybrid"])
|
|
149
|
+
}),
|
|
150
|
+
chunking: z.object({
|
|
151
|
+
maxTokens: z.number().min(100).max(8e3),
|
|
152
|
+
overlapTokens: z.number().min(0),
|
|
153
|
+
preserveStructure: z.boolean()
|
|
154
|
+
}),
|
|
155
|
+
glossary: z.object({
|
|
156
|
+
path: z.string(),
|
|
157
|
+
strict: z.boolean()
|
|
158
|
+
}).optional(),
|
|
159
|
+
paths: z.object({
|
|
160
|
+
output: z.string(),
|
|
161
|
+
cache: z.string().optional()
|
|
162
|
+
}),
|
|
163
|
+
ignore: z.array(z.string()).optional()
|
|
164
|
+
});
|
|
165
|
+
var defaultConfig = {
|
|
166
|
+
version: "1.0",
|
|
167
|
+
languages: {
|
|
168
|
+
source: "en",
|
|
169
|
+
targets: []
|
|
170
|
+
},
|
|
171
|
+
provider: {
|
|
172
|
+
default: "claude"
|
|
173
|
+
},
|
|
174
|
+
quality: {
|
|
175
|
+
threshold: 85,
|
|
176
|
+
maxIterations: 4,
|
|
177
|
+
evaluationMethod: "llm"
|
|
178
|
+
},
|
|
179
|
+
chunking: {
|
|
180
|
+
maxTokens: 1024,
|
|
181
|
+
overlapTokens: 150,
|
|
182
|
+
preserveStructure: true
|
|
183
|
+
},
|
|
184
|
+
paths: {
|
|
185
|
+
output: "./{lang}"
|
|
186
|
+
}
|
|
187
|
+
};
|
|
188
|
+
var explorer = cosmiconfig("translate", {
|
|
189
|
+
searchPlaces: [
|
|
190
|
+
".translaterc",
|
|
191
|
+
".translaterc.json",
|
|
192
|
+
".translaterc.yaml",
|
|
193
|
+
".translaterc.yml",
|
|
194
|
+
"translate.config.js",
|
|
195
|
+
"translate.config.mjs"
|
|
196
|
+
]
|
|
197
|
+
});
|
|
198
|
+
async function loadConfig(options = {}) {
|
|
199
|
+
const { configPath, cwd = process.cwd() } = options;
|
|
200
|
+
let result;
|
|
201
|
+
try {
|
|
202
|
+
if (configPath) {
|
|
203
|
+
result = await explorer.load(configPath);
|
|
204
|
+
} else {
|
|
205
|
+
result = await explorer.search(cwd);
|
|
206
|
+
}
|
|
207
|
+
} catch (error) {
|
|
208
|
+
throw new TranslationError("CONFIG_NOT_FOUND" /* CONFIG_NOT_FOUND */, {
|
|
209
|
+
path: configPath ?? cwd,
|
|
210
|
+
error: error instanceof Error ? error.message : String(error)
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
if (!result || result.isEmpty) {
|
|
214
|
+
return defaultConfig;
|
|
215
|
+
}
|
|
216
|
+
const parseResult = configSchema.safeParse(result.config);
|
|
217
|
+
if (!parseResult.success) {
|
|
218
|
+
throw new TranslationError("CONFIG_INVALID" /* CONFIG_INVALID */, {
|
|
219
|
+
path: result.filepath,
|
|
220
|
+
errors: parseResult.error.errors.map((e) => ({
|
|
221
|
+
path: e.path.join("."),
|
|
222
|
+
message: e.message
|
|
223
|
+
}))
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
return parseResult.data;
|
|
227
|
+
}
|
|
228
|
+
function mergeConfig(config2, overrides) {
|
|
229
|
+
const merged = { ...config2 };
|
|
230
|
+
if (overrides.sourceLang) {
|
|
231
|
+
merged.languages = { ...merged.languages, source: overrides.sourceLang };
|
|
232
|
+
}
|
|
233
|
+
if (overrides.targetLang) {
|
|
234
|
+
merged.languages = {
|
|
235
|
+
...merged.languages,
|
|
236
|
+
targets: [overrides.targetLang]
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
if (overrides.provider) {
|
|
240
|
+
merged.provider = { ...merged.provider, default: overrides.provider };
|
|
241
|
+
}
|
|
242
|
+
if (overrides.model) {
|
|
243
|
+
merged.provider = { ...merged.provider, model: overrides.model };
|
|
244
|
+
}
|
|
245
|
+
if (overrides.quality !== void 0) {
|
|
246
|
+
merged.quality = { ...merged.quality, threshold: overrides.quality };
|
|
247
|
+
}
|
|
248
|
+
if (overrides.maxIterations !== void 0) {
|
|
249
|
+
merged.quality = {
|
|
250
|
+
...merged.quality,
|
|
251
|
+
maxIterations: overrides.maxIterations
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
if (overrides.chunkSize !== void 0) {
|
|
255
|
+
merged.chunking = { ...merged.chunking, maxTokens: overrides.chunkSize };
|
|
256
|
+
}
|
|
257
|
+
if (overrides.glossary) {
|
|
258
|
+
merged.glossary = {
|
|
259
|
+
path: overrides.glossary,
|
|
260
|
+
strict: merged.glossary?.strict ?? false
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
if (overrides.output) {
|
|
264
|
+
merged.paths = { ...merged.paths, output: overrides.output };
|
|
265
|
+
}
|
|
266
|
+
if (overrides.noCache) {
|
|
267
|
+
merged.paths = { ...merged.paths, cache: void 0 };
|
|
268
|
+
}
|
|
269
|
+
return merged;
|
|
270
|
+
}
|
|
271
|
+
async function loadGlossary(path) {
|
|
272
|
+
let content;
|
|
273
|
+
try {
|
|
274
|
+
content = await readFile(path, "utf-8");
|
|
275
|
+
} catch (error) {
|
|
276
|
+
throw new TranslationError("GLOSSARY_NOT_FOUND" /* GLOSSARY_NOT_FOUND */, {
|
|
277
|
+
path,
|
|
278
|
+
error: error instanceof Error ? error.message : String(error)
|
|
279
|
+
});
|
|
280
|
+
}
|
|
281
|
+
try {
|
|
282
|
+
return JSON.parse(content);
|
|
283
|
+
} catch (error) {
|
|
284
|
+
throw new TranslationError("GLOSSARY_INVALID" /* GLOSSARY_INVALID */, {
|
|
285
|
+
path,
|
|
286
|
+
error: error instanceof Error ? error.message : String(error)
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
function resolveGlossary(glossary, targetLang) {
|
|
291
|
+
return {
|
|
292
|
+
metadata: {
|
|
293
|
+
name: glossary.metadata.name,
|
|
294
|
+
sourceLang: glossary.metadata.sourceLang,
|
|
295
|
+
targetLang,
|
|
296
|
+
version: glossary.metadata.version,
|
|
297
|
+
domain: glossary.metadata.domain
|
|
298
|
+
},
|
|
299
|
+
terms: glossary.terms.map((term) => resolveGlossaryTerm(term, targetLang)).filter((term) => term !== null)
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
function resolveGlossaryTerm(term, targetLang) {
|
|
303
|
+
const target = resolveTarget(term, targetLang);
|
|
304
|
+
if (target === void 0) {
|
|
305
|
+
return null;
|
|
306
|
+
}
|
|
307
|
+
return {
|
|
308
|
+
source: term.source,
|
|
309
|
+
target,
|
|
310
|
+
context: term.context,
|
|
311
|
+
caseSensitive: term.caseSensitive ?? false,
|
|
312
|
+
doNotTranslate: resolveDoNotTranslate(term, targetLang)
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
function resolveTarget(term, targetLang) {
|
|
316
|
+
if (term.doNotTranslate) {
|
|
317
|
+
return term.source;
|
|
318
|
+
}
|
|
319
|
+
if (term.doNotTranslateFor?.includes(targetLang)) {
|
|
320
|
+
return term.source;
|
|
321
|
+
}
|
|
322
|
+
const translation = term.targets[targetLang];
|
|
323
|
+
if (translation) {
|
|
324
|
+
return translation;
|
|
325
|
+
}
|
|
326
|
+
return void 0;
|
|
327
|
+
}
|
|
328
|
+
function resolveDoNotTranslate(term, targetLang) {
|
|
329
|
+
return term.doNotTranslate === true || term.doNotTranslateFor?.includes(targetLang) === true;
|
|
330
|
+
}
|
|
331
|
+
function createGlossaryLookup(glossary) {
|
|
332
|
+
const termMap = /* @__PURE__ */ new Map();
|
|
333
|
+
const caseSensitiveTerms = [];
|
|
334
|
+
const caseInsensitiveTerms = [];
|
|
335
|
+
for (const term of glossary.terms) {
|
|
336
|
+
if (term.caseSensitive) {
|
|
337
|
+
termMap.set(term.source, term);
|
|
338
|
+
caseSensitiveTerms.push(term);
|
|
339
|
+
} else {
|
|
340
|
+
termMap.set(term.source.toLowerCase(), term);
|
|
341
|
+
caseInsensitiveTerms.push(term);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
return {
|
|
345
|
+
find(text) {
|
|
346
|
+
const exact = termMap.get(text);
|
|
347
|
+
if (exact) return exact;
|
|
348
|
+
return termMap.get(text.toLowerCase());
|
|
349
|
+
},
|
|
350
|
+
findAll(text) {
|
|
351
|
+
const matches = [];
|
|
352
|
+
for (const term of caseSensitiveTerms) {
|
|
353
|
+
if (text.includes(term.source)) {
|
|
354
|
+
matches.push(term);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
const lowerText = text.toLowerCase();
|
|
358
|
+
for (const term of caseInsensitiveTerms) {
|
|
359
|
+
if (lowerText.includes(term.source.toLowerCase())) {
|
|
360
|
+
matches.push(term);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
return matches;
|
|
364
|
+
},
|
|
365
|
+
getTerms() {
|
|
366
|
+
return glossary.terms;
|
|
367
|
+
},
|
|
368
|
+
formatForPrompt() {
|
|
369
|
+
const lines = [];
|
|
370
|
+
for (const term of glossary.terms) {
|
|
371
|
+
const flags = [];
|
|
372
|
+
if (term.caseSensitive) {
|
|
373
|
+
flags.push("case-sensitive");
|
|
374
|
+
} else {
|
|
375
|
+
flags.push("case-insensitive");
|
|
376
|
+
}
|
|
377
|
+
if (term.context) {
|
|
378
|
+
flags.push(`context: ${term.context}`);
|
|
379
|
+
}
|
|
380
|
+
const flagStr = flags.length > 0 ? ` (${flags.join(", ")})` : "";
|
|
381
|
+
if (term.doNotTranslate) {
|
|
382
|
+
lines.push(`- "${term.source}" \u2192 [DO NOT TRANSLATE, keep as-is]${flagStr}`);
|
|
383
|
+
} else {
|
|
384
|
+
lines.push(`- "${term.source}" \u2192 "${term.target}"${flagStr}`);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
return lines.join("\n");
|
|
388
|
+
}
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
function checkGlossaryCompliance(sourceText, translatedText, glossary) {
|
|
392
|
+
const lookup = createGlossaryLookup(glossary);
|
|
393
|
+
const sourceTerms = lookup.findAll(sourceText);
|
|
394
|
+
const applied = [];
|
|
395
|
+
const missed = [];
|
|
396
|
+
for (const term of sourceTerms) {
|
|
397
|
+
const targetInTranslation = term.caseSensitive ? translatedText.includes(term.target) : translatedText.toLowerCase().includes(term.target.toLowerCase());
|
|
398
|
+
if (targetInTranslation) {
|
|
399
|
+
applied.push(term.source);
|
|
400
|
+
} else {
|
|
401
|
+
missed.push(term.source);
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
const total = sourceTerms.length;
|
|
405
|
+
const score = total > 0 ? applied.length / total * 100 : 100;
|
|
406
|
+
return { applied, missed, score };
|
|
407
|
+
}
|
|
408
|
+
var LOG_LEVEL_PRIORITY = {
|
|
409
|
+
debug: 0,
|
|
410
|
+
info: 1,
|
|
411
|
+
warn: 2,
|
|
412
|
+
error: 3
|
|
413
|
+
};
|
|
414
|
+
var config = {
|
|
415
|
+
level: "info",
|
|
416
|
+
quiet: false,
|
|
417
|
+
json: false
|
|
418
|
+
};
|
|
419
|
+
function configureLogger(options) {
|
|
420
|
+
config = { ...config, ...options };
|
|
421
|
+
}
|
|
422
|
+
function shouldLog(level) {
|
|
423
|
+
if (config.quiet && level !== "error") {
|
|
424
|
+
return false;
|
|
425
|
+
}
|
|
426
|
+
return LOG_LEVEL_PRIORITY[level] >= LOG_LEVEL_PRIORITY[config.level];
|
|
427
|
+
}
|
|
428
|
+
function formatMessage(level, message, data) {
|
|
429
|
+
if (config.json) {
|
|
430
|
+
return JSON.stringify({
|
|
431
|
+
level,
|
|
432
|
+
message,
|
|
433
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
434
|
+
...data
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().slice(11, 19);
|
|
438
|
+
const prefix = `[${timestamp}]`;
|
|
439
|
+
switch (level) {
|
|
440
|
+
case "debug":
|
|
441
|
+
return chalk.gray(`${prefix} ${message}`);
|
|
442
|
+
case "info":
|
|
443
|
+
return `${prefix} ${message}`;
|
|
444
|
+
case "warn":
|
|
445
|
+
return chalk.yellow(`${prefix} \u26A0 ${message}`);
|
|
446
|
+
case "error":
|
|
447
|
+
return chalk.red(`${prefix} \u2717 ${message}`);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
var logger = {
|
|
451
|
+
debug(message, data) {
|
|
452
|
+
if (shouldLog("debug")) {
|
|
453
|
+
console.log(formatMessage("debug", message, data));
|
|
454
|
+
}
|
|
455
|
+
},
|
|
456
|
+
info(message, data) {
|
|
457
|
+
if (shouldLog("info")) {
|
|
458
|
+
console.log(formatMessage("info", message, data));
|
|
459
|
+
}
|
|
460
|
+
},
|
|
461
|
+
warn(message, data) {
|
|
462
|
+
if (shouldLog("warn")) {
|
|
463
|
+
console.warn(formatMessage("warn", message, data));
|
|
464
|
+
}
|
|
465
|
+
},
|
|
466
|
+
error(message, data) {
|
|
467
|
+
if (shouldLog("error")) {
|
|
468
|
+
console.error(formatMessage("error", message, data));
|
|
469
|
+
}
|
|
470
|
+
},
|
|
471
|
+
success(message) {
|
|
472
|
+
if (!config.quiet) {
|
|
473
|
+
console.log(chalk.green(`\u2713 ${message}`));
|
|
474
|
+
}
|
|
475
|
+
},
|
|
476
|
+
progress(current, total, message) {
|
|
477
|
+
if (!config.quiet && !config.json) {
|
|
478
|
+
const percent = Math.round(current / total * 100);
|
|
479
|
+
const bar = "\u2588".repeat(Math.round(percent / 5)) + "\u2591".repeat(20 - Math.round(percent / 5));
|
|
480
|
+
process.stdout.write(`\r[${bar}] ${percent}% ${message}`);
|
|
481
|
+
if (current === total) {
|
|
482
|
+
console.log();
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
};
|
|
487
|
+
function createTimer() {
|
|
488
|
+
const start = performance.now();
|
|
489
|
+
return {
|
|
490
|
+
elapsed() {
|
|
491
|
+
return performance.now() - start;
|
|
492
|
+
},
|
|
493
|
+
format() {
|
|
494
|
+
const ms = this.elapsed();
|
|
495
|
+
if (ms < 1e3) {
|
|
496
|
+
return `${ms.toFixed(0)}ms`;
|
|
497
|
+
}
|
|
498
|
+
return `${(ms / 1e3).toFixed(1)}s`;
|
|
499
|
+
}
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
// src/services/cache.ts
|
|
504
|
+
var CACHE_VERSION = "1.0";
|
|
505
|
+
var INDEX_FILE = "index.json";
|
|
506
|
+
var ENTRIES_DIR = "entries";
|
|
507
|
+
var GlossaryChangePolicy = class {
|
|
508
|
+
name = "GlossaryChangePolicy";
|
|
509
|
+
mode;
|
|
510
|
+
/**
|
|
511
|
+
* @param mode - 'all' invalidates entire cache, 'matching' only entries with old glossary hash
|
|
512
|
+
*/
|
|
513
|
+
constructor(mode = "all") {
|
|
514
|
+
this.mode = mode;
|
|
515
|
+
}
|
|
516
|
+
check(context) {
|
|
517
|
+
const { glossaryHash, previousGlossaryHash } = context;
|
|
518
|
+
if (!glossaryHash || !previousGlossaryHash) {
|
|
519
|
+
return { shouldInvalidate: false, scope: "none" };
|
|
520
|
+
}
|
|
521
|
+
if (glossaryHash === previousGlossaryHash) {
|
|
522
|
+
return { shouldInvalidate: false, scope: "none" };
|
|
523
|
+
}
|
|
524
|
+
if (this.mode === "all") {
|
|
525
|
+
return {
|
|
526
|
+
shouldInvalidate: true,
|
|
527
|
+
reason: `Glossary changed (${previousGlossaryHash.slice(0, 8)} \u2192 ${glossaryHash.slice(0, 8)})`,
|
|
528
|
+
scope: "all"
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
return {
|
|
532
|
+
shouldInvalidate: true,
|
|
533
|
+
reason: `Glossary changed, invalidating matching entries`,
|
|
534
|
+
scope: "matching",
|
|
535
|
+
filter: (entry) => entry.glossaryHash === previousGlossaryHash
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
};
|
|
539
|
+
var TTLPolicy = class _TTLPolicy {
|
|
540
|
+
name = "TTLPolicy";
|
|
541
|
+
ttlMs;
|
|
542
|
+
/**
|
|
543
|
+
* @param ttlMs - Time-to-live in milliseconds
|
|
544
|
+
*/
|
|
545
|
+
constructor(ttlMs) {
|
|
546
|
+
this.ttlMs = ttlMs;
|
|
547
|
+
}
|
|
548
|
+
/**
|
|
549
|
+
* Create policy with TTL in hours
|
|
550
|
+
*/
|
|
551
|
+
static hours(hours) {
|
|
552
|
+
return new _TTLPolicy(hours * 60 * 60 * 1e3);
|
|
553
|
+
}
|
|
554
|
+
/**
|
|
555
|
+
* Create policy with TTL in days
|
|
556
|
+
*/
|
|
557
|
+
static days(days) {
|
|
558
|
+
return new _TTLPolicy(days * 24 * 60 * 60 * 1e3);
|
|
559
|
+
}
|
|
560
|
+
check(context) {
|
|
561
|
+
const currentTime = context.currentTime ?? /* @__PURE__ */ new Date();
|
|
562
|
+
const ttlMs = this.ttlMs;
|
|
563
|
+
return {
|
|
564
|
+
shouldInvalidate: true,
|
|
565
|
+
reason: `TTL check (${this.ttlMs}ms)`,
|
|
566
|
+
scope: "matching",
|
|
567
|
+
filter: (entry) => {
|
|
568
|
+
const createdAt = new Date(entry.createdAt);
|
|
569
|
+
const age = currentTime.getTime() - createdAt.getTime();
|
|
570
|
+
return age > ttlMs;
|
|
571
|
+
}
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
};
|
|
575
|
+
var ProviderChangePolicy = class {
|
|
576
|
+
name = "ProviderChangePolicy";
|
|
577
|
+
checkModel;
|
|
578
|
+
/**
|
|
579
|
+
* @param checkModel - If true, also invalidate when model changes within same provider
|
|
580
|
+
*/
|
|
581
|
+
constructor(checkModel = true) {
|
|
582
|
+
this.checkModel = checkModel;
|
|
583
|
+
}
|
|
584
|
+
check(context) {
|
|
585
|
+
const { provider, model } = context;
|
|
586
|
+
if (!provider) {
|
|
587
|
+
return { shouldInvalidate: false, scope: "none" };
|
|
588
|
+
}
|
|
589
|
+
return {
|
|
590
|
+
shouldInvalidate: true,
|
|
591
|
+
reason: `Provider/model mismatch check`,
|
|
592
|
+
scope: "matching",
|
|
593
|
+
filter: (entry) => {
|
|
594
|
+
if (entry.provider !== provider) return true;
|
|
595
|
+
if (this.checkModel && model && entry.model !== model) return true;
|
|
596
|
+
return false;
|
|
597
|
+
}
|
|
598
|
+
};
|
|
599
|
+
}
|
|
600
|
+
};
|
|
601
|
+
var QualityThresholdPolicy = class {
|
|
602
|
+
name = "QualityThresholdPolicy";
|
|
603
|
+
threshold;
|
|
604
|
+
/**
|
|
605
|
+
* @param threshold - Minimum quality score (0-100)
|
|
606
|
+
*/
|
|
607
|
+
constructor(threshold) {
|
|
608
|
+
this.threshold = threshold;
|
|
609
|
+
}
|
|
610
|
+
check(_context) {
|
|
611
|
+
const threshold = this.threshold;
|
|
612
|
+
return {
|
|
613
|
+
shouldInvalidate: true,
|
|
614
|
+
reason: `Quality below threshold (${threshold})`,
|
|
615
|
+
scope: "matching",
|
|
616
|
+
filter: (entry) => entry.qualityScore < threshold
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
};
|
|
620
|
+
var CompositePolicy = class {
|
|
621
|
+
name = "CompositePolicy";
|
|
622
|
+
policies;
|
|
623
|
+
mode;
|
|
624
|
+
/**
|
|
625
|
+
* @param policies - Array of policies to combine
|
|
626
|
+
* @param mode - 'any' triggers if any policy matches, 'all' requires all policies to match
|
|
627
|
+
*/
|
|
628
|
+
constructor(policies, mode = "any") {
|
|
629
|
+
this.policies = policies;
|
|
630
|
+
this.mode = mode;
|
|
631
|
+
}
|
|
632
|
+
check(context) {
|
|
633
|
+
const results = this.policies.map((p) => ({
|
|
634
|
+
policy: p,
|
|
635
|
+
result: p.check(context)
|
|
636
|
+
}));
|
|
637
|
+
const activeResults = results.filter((r) => r.result.shouldInvalidate);
|
|
638
|
+
if (activeResults.length === 0) {
|
|
639
|
+
return { shouldInvalidate: false, scope: "none" };
|
|
640
|
+
}
|
|
641
|
+
const allScope = activeResults.find((r) => r.result.scope === "all");
|
|
642
|
+
if (allScope) {
|
|
643
|
+
return {
|
|
644
|
+
shouldInvalidate: true,
|
|
645
|
+
reason: `${allScope.policy.name}: ${allScope.result.reason}`,
|
|
646
|
+
scope: "all"
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
const filters = activeResults.filter((r) => r.result.filter).map((r) => r.result.filter);
|
|
650
|
+
if (filters.length === 0) {
|
|
651
|
+
return { shouldInvalidate: false, scope: "none" };
|
|
652
|
+
}
|
|
653
|
+
const reasons = activeResults.map((r) => `${r.policy.name}`).join(", ");
|
|
654
|
+
if (this.mode === "any") {
|
|
655
|
+
return {
|
|
656
|
+
shouldInvalidate: true,
|
|
657
|
+
reason: `Composite (any): ${reasons}`,
|
|
658
|
+
scope: "matching",
|
|
659
|
+
filter: (entry, key) => filters.some((f) => f(entry, key))
|
|
660
|
+
};
|
|
661
|
+
} else {
|
|
662
|
+
return {
|
|
663
|
+
shouldInvalidate: true,
|
|
664
|
+
reason: `Composite (all): ${reasons}`,
|
|
665
|
+
scope: "matching",
|
|
666
|
+
filter: (entry, key) => filters.every((f) => f(entry, key))
|
|
667
|
+
};
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
};
|
|
671
|
+
function hashContent(content) {
|
|
672
|
+
return createHash("sha256").update(content, "utf8").digest("hex").slice(0, 16);
|
|
673
|
+
}
|
|
674
|
+
function generateCacheKey(key) {
|
|
675
|
+
const contentHash = hashContent(key.content);
|
|
676
|
+
const glossaryHash = key.glossary ? hashContent(key.glossary) : "none";
|
|
677
|
+
return `${contentHash}_${key.sourceLang}_${key.targetLang}_${glossaryHash}_${key.provider}_${key.model}`;
|
|
678
|
+
}
|
|
679
|
+
var CacheManager = class {
|
|
680
|
+
cacheDir;
|
|
681
|
+
indexPath;
|
|
682
|
+
entriesDir;
|
|
683
|
+
metadataPath;
|
|
684
|
+
verbose;
|
|
685
|
+
index = null;
|
|
686
|
+
policies;
|
|
687
|
+
metadata = null;
|
|
688
|
+
constructor(options) {
|
|
689
|
+
this.cacheDir = options.cacheDir;
|
|
690
|
+
this.indexPath = join(this.cacheDir, INDEX_FILE);
|
|
691
|
+
this.entriesDir = join(this.cacheDir, ENTRIES_DIR);
|
|
692
|
+
this.metadataPath = join(this.cacheDir, "metadata.json");
|
|
693
|
+
this.verbose = options.verbose ?? false;
|
|
694
|
+
this.policies = options.invalidationPolicies ?? [];
|
|
695
|
+
}
|
|
696
|
+
/**
|
|
697
|
+
* Initialize cache directory and load index
|
|
698
|
+
*/
|
|
699
|
+
ensureInitialized() {
|
|
700
|
+
if (this.index !== null) return;
|
|
701
|
+
if (!existsSync(this.cacheDir)) {
|
|
702
|
+
mkdirSync(this.cacheDir, { recursive: true });
|
|
703
|
+
}
|
|
704
|
+
if (!existsSync(this.entriesDir)) {
|
|
705
|
+
mkdirSync(this.entriesDir, { recursive: true });
|
|
706
|
+
}
|
|
707
|
+
if (existsSync(this.indexPath)) {
|
|
708
|
+
try {
|
|
709
|
+
const data = readFileSync(this.indexPath, "utf-8");
|
|
710
|
+
this.index = JSON.parse(data);
|
|
711
|
+
if (this.index.version !== CACHE_VERSION) {
|
|
712
|
+
if (this.verbose) {
|
|
713
|
+
logger.warn(`Cache version mismatch (${this.index.version} vs ${CACHE_VERSION}), clearing cache`);
|
|
714
|
+
}
|
|
715
|
+
this.clearSync();
|
|
716
|
+
this.index = { version: CACHE_VERSION, entries: {} };
|
|
717
|
+
}
|
|
718
|
+
} catch {
|
|
719
|
+
if (this.verbose) {
|
|
720
|
+
logger.warn("Failed to load cache index, creating new one");
|
|
721
|
+
}
|
|
722
|
+
this.index = { version: CACHE_VERSION, entries: {} };
|
|
723
|
+
}
|
|
724
|
+
} else {
|
|
725
|
+
this.index = { version: CACHE_VERSION, entries: {} };
|
|
726
|
+
}
|
|
727
|
+
this.loadMetadata();
|
|
728
|
+
}
|
|
729
|
+
/**
|
|
730
|
+
* Load cache metadata from disk
|
|
731
|
+
*/
|
|
732
|
+
loadMetadata() {
|
|
733
|
+
if (existsSync(this.metadataPath)) {
|
|
734
|
+
try {
|
|
735
|
+
const data = readFileSync(this.metadataPath, "utf-8");
|
|
736
|
+
this.metadata = JSON.parse(data);
|
|
737
|
+
} catch {
|
|
738
|
+
this.metadata = {};
|
|
739
|
+
}
|
|
740
|
+
} else {
|
|
741
|
+
this.metadata = {};
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
/**
|
|
745
|
+
* Save cache metadata to disk
|
|
746
|
+
*/
|
|
747
|
+
saveMetadata() {
|
|
748
|
+
if (!this.metadata) return;
|
|
749
|
+
try {
|
|
750
|
+
writeFileSync(this.metadataPath, JSON.stringify(this.metadata, null, 2), "utf-8");
|
|
751
|
+
} catch (error) {
|
|
752
|
+
if (this.verbose) {
|
|
753
|
+
logger.error(`Failed to save cache metadata: ${error}`);
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
/**
|
|
758
|
+
* Update cache metadata
|
|
759
|
+
*/
|
|
760
|
+
updateMetadata(updates) {
|
|
761
|
+
this.ensureInitialized();
|
|
762
|
+
this.metadata = { ...this.metadata, ...updates };
|
|
763
|
+
this.saveMetadata();
|
|
764
|
+
}
|
|
765
|
+
/**
|
|
766
|
+
* Get current cache metadata
|
|
767
|
+
*/
|
|
768
|
+
getMetadata() {
|
|
769
|
+
this.ensureInitialized();
|
|
770
|
+
return { ...this.metadata };
|
|
771
|
+
}
|
|
772
|
+
/**
|
|
773
|
+
* Apply all configured invalidation policies
|
|
774
|
+
* @returns Number of entries invalidated
|
|
775
|
+
*/
|
|
776
|
+
applyPolicies(context) {
|
|
777
|
+
this.ensureInitialized();
|
|
778
|
+
if (this.policies.length === 0) {
|
|
779
|
+
return 0;
|
|
780
|
+
}
|
|
781
|
+
let totalInvalidated = 0;
|
|
782
|
+
for (const policy of this.policies) {
|
|
783
|
+
const result = policy.check({
|
|
784
|
+
...context,
|
|
785
|
+
previousGlossaryHash: this.metadata?.glossaryHash,
|
|
786
|
+
currentTime: context.currentTime ?? /* @__PURE__ */ new Date()
|
|
787
|
+
});
|
|
788
|
+
if (!result.shouldInvalidate) {
|
|
789
|
+
continue;
|
|
790
|
+
}
|
|
791
|
+
if (this.verbose) {
|
|
792
|
+
logger.info(`Applying ${policy.name}: ${result.reason}`);
|
|
793
|
+
}
|
|
794
|
+
if (result.scope === "all") {
|
|
795
|
+
const count = Object.keys(this.index.entries).length;
|
|
796
|
+
this.clear();
|
|
797
|
+
totalInvalidated += count;
|
|
798
|
+
break;
|
|
799
|
+
}
|
|
800
|
+
if (result.scope === "matching" && result.filter) {
|
|
801
|
+
const invalidated = this.invalidateMatching(result.filter);
|
|
802
|
+
totalInvalidated += invalidated;
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
if (context.glossaryHash) {
|
|
806
|
+
this.metadata.glossaryHash = context.glossaryHash;
|
|
807
|
+
}
|
|
808
|
+
if (context.provider) {
|
|
809
|
+
this.metadata.provider = context.provider;
|
|
810
|
+
}
|
|
811
|
+
if (context.model) {
|
|
812
|
+
this.metadata.model = context.model;
|
|
813
|
+
}
|
|
814
|
+
if (totalInvalidated > 0) {
|
|
815
|
+
this.metadata.lastInvalidation = (/* @__PURE__ */ new Date()).toISOString();
|
|
816
|
+
}
|
|
817
|
+
this.saveMetadata();
|
|
818
|
+
return totalInvalidated;
|
|
819
|
+
}
|
|
820
|
+
/**
|
|
821
|
+
* Invalidate entries matching a filter function
|
|
822
|
+
* @returns Number of entries invalidated
|
|
823
|
+
*/
|
|
824
|
+
invalidateMatching(filter) {
|
|
825
|
+
this.ensureInitialized();
|
|
826
|
+
const keysToDelete = [];
|
|
827
|
+
for (const [key, entry] of Object.entries(this.index.entries)) {
|
|
828
|
+
if (filter(entry, key)) {
|
|
829
|
+
keysToDelete.push(key);
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
for (const key of keysToDelete) {
|
|
833
|
+
const entryPath = join(this.entriesDir, `${key}.json`);
|
|
834
|
+
try {
|
|
835
|
+
if (existsSync(entryPath)) {
|
|
836
|
+
rmSync(entryPath);
|
|
837
|
+
}
|
|
838
|
+
} catch {
|
|
839
|
+
}
|
|
840
|
+
delete this.index.entries[key];
|
|
841
|
+
}
|
|
842
|
+
if (keysToDelete.length > 0) {
|
|
843
|
+
this.saveIndex();
|
|
844
|
+
if (this.verbose) {
|
|
845
|
+
logger.info(`Invalidated ${keysToDelete.length} cache entries`);
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
return keysToDelete.length;
|
|
849
|
+
}
|
|
850
|
+
/**
|
|
851
|
+
* Add an invalidation policy at runtime
|
|
852
|
+
*/
|
|
853
|
+
addPolicy(policy) {
|
|
854
|
+
this.policies.push(policy);
|
|
855
|
+
}
|
|
856
|
+
/**
|
|
857
|
+
* Remove an invalidation policy by name
|
|
858
|
+
*/
|
|
859
|
+
removePolicy(name) {
|
|
860
|
+
const index = this.policies.findIndex((p) => p.name === name);
|
|
861
|
+
if (index !== -1) {
|
|
862
|
+
this.policies.splice(index, 1);
|
|
863
|
+
return true;
|
|
864
|
+
}
|
|
865
|
+
return false;
|
|
866
|
+
}
|
|
867
|
+
/**
|
|
868
|
+
* Get all configured policies
|
|
869
|
+
*/
|
|
870
|
+
getPolicies() {
|
|
871
|
+
return [...this.policies];
|
|
872
|
+
}
|
|
873
|
+
/**
|
|
874
|
+
* Save index to disk
|
|
875
|
+
*/
|
|
876
|
+
saveIndex() {
|
|
877
|
+
if (!this.index) return;
|
|
878
|
+
try {
|
|
879
|
+
const dir = dirname(this.indexPath);
|
|
880
|
+
if (!existsSync(dir)) {
|
|
881
|
+
mkdirSync(dir, { recursive: true });
|
|
882
|
+
}
|
|
883
|
+
writeFileSync(this.indexPath, JSON.stringify(this.index, null, 2), "utf-8");
|
|
884
|
+
} catch (error) {
|
|
885
|
+
if (this.verbose) {
|
|
886
|
+
logger.error(`Failed to save cache index: ${error}`);
|
|
887
|
+
}
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
/**
|
|
891
|
+
* Get cached translation if available
|
|
892
|
+
*/
|
|
893
|
+
get(key) {
|
|
894
|
+
this.ensureInitialized();
|
|
895
|
+
const cacheKey = generateCacheKey(key);
|
|
896
|
+
const entry = this.index.entries[cacheKey];
|
|
897
|
+
if (entry) {
|
|
898
|
+
const entryPath = join(this.entriesDir, `${cacheKey}.json`);
|
|
899
|
+
if (existsSync(entryPath)) {
|
|
900
|
+
if (this.verbose) {
|
|
901
|
+
logger.info(`Cache hit: ${cacheKey.slice(0, 20)}...`);
|
|
902
|
+
}
|
|
903
|
+
return { hit: true, entry };
|
|
904
|
+
} else {
|
|
905
|
+
delete this.index.entries[cacheKey];
|
|
906
|
+
this.saveIndex();
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
if (this.verbose) {
|
|
910
|
+
logger.debug(`Cache miss: ${cacheKey.slice(0, 20)}...`);
|
|
911
|
+
}
|
|
912
|
+
return { hit: false };
|
|
913
|
+
}
|
|
914
|
+
/**
|
|
915
|
+
* Store translation in cache
|
|
916
|
+
*/
|
|
917
|
+
set(key, translation, qualityScore) {
|
|
918
|
+
this.ensureInitialized();
|
|
919
|
+
const cacheKey = generateCacheKey(key);
|
|
920
|
+
const contentHash = hashContent(key.content);
|
|
921
|
+
const glossaryHash = key.glossary ? hashContent(key.glossary) : "";
|
|
922
|
+
const entry = {
|
|
923
|
+
sourceHash: contentHash,
|
|
924
|
+
sourceLang: key.sourceLang,
|
|
925
|
+
targetLang: key.targetLang,
|
|
926
|
+
glossaryHash,
|
|
927
|
+
translation,
|
|
928
|
+
qualityScore,
|
|
929
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
930
|
+
provider: key.provider,
|
|
931
|
+
model: key.model
|
|
932
|
+
};
|
|
933
|
+
const entryPath = join(this.entriesDir, `${cacheKey}.json`);
|
|
934
|
+
try {
|
|
935
|
+
writeFileSync(entryPath, JSON.stringify(entry, null, 2), "utf-8");
|
|
936
|
+
this.index.entries[cacheKey] = entry;
|
|
937
|
+
this.saveIndex();
|
|
938
|
+
if (this.verbose) {
|
|
939
|
+
logger.info(`Cached: ${cacheKey.slice(0, 20)}...`);
|
|
940
|
+
}
|
|
941
|
+
} catch (error) {
|
|
942
|
+
if (this.verbose) {
|
|
943
|
+
logger.error(`Failed to cache entry: ${error}`);
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
/**
|
|
948
|
+
* Check if entry exists in cache
|
|
949
|
+
*/
|
|
950
|
+
has(key) {
|
|
951
|
+
return this.get(key).hit;
|
|
952
|
+
}
|
|
953
|
+
/**
|
|
954
|
+
* Remove entry from cache
|
|
955
|
+
*/
|
|
956
|
+
delete(key) {
|
|
957
|
+
this.ensureInitialized();
|
|
958
|
+
const cacheKey = generateCacheKey(key);
|
|
959
|
+
if (this.index.entries[cacheKey]) {
|
|
960
|
+
const entryPath = join(this.entriesDir, `${cacheKey}.json`);
|
|
961
|
+
try {
|
|
962
|
+
if (existsSync(entryPath)) {
|
|
963
|
+
rmSync(entryPath);
|
|
964
|
+
}
|
|
965
|
+
} catch {
|
|
966
|
+
}
|
|
967
|
+
delete this.index.entries[cacheKey];
|
|
968
|
+
this.saveIndex();
|
|
969
|
+
return true;
|
|
970
|
+
}
|
|
971
|
+
return false;
|
|
972
|
+
}
|
|
973
|
+
/**
|
|
974
|
+
* Clear entire cache (synchronous)
|
|
975
|
+
*/
|
|
976
|
+
clearSync() {
|
|
977
|
+
try {
|
|
978
|
+
if (existsSync(this.entriesDir)) {
|
|
979
|
+
rmSync(this.entriesDir, { recursive: true, force: true });
|
|
980
|
+
}
|
|
981
|
+
if (existsSync(this.indexPath)) {
|
|
982
|
+
rmSync(this.indexPath);
|
|
983
|
+
}
|
|
984
|
+
mkdirSync(this.entriesDir, { recursive: true });
|
|
985
|
+
} catch {
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
/**
|
|
989
|
+
* Clear entire cache
|
|
990
|
+
*/
|
|
991
|
+
clear() {
|
|
992
|
+
this.clearSync();
|
|
993
|
+
this.index = { version: CACHE_VERSION, entries: {} };
|
|
994
|
+
this.saveIndex();
|
|
995
|
+
if (this.verbose) {
|
|
996
|
+
logger.info("Cache cleared");
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
/**
|
|
1000
|
+
* Get cache statistics
|
|
1001
|
+
*/
|
|
1002
|
+
getStats() {
|
|
1003
|
+
this.ensureInitialized();
|
|
1004
|
+
let sizeBytes = 0;
|
|
1005
|
+
if (existsSync(this.entriesDir)) {
|
|
1006
|
+
try {
|
|
1007
|
+
const files = readdirSync(this.entriesDir);
|
|
1008
|
+
for (const file of files) {
|
|
1009
|
+
const filePath = join(this.entriesDir, file);
|
|
1010
|
+
try {
|
|
1011
|
+
const stat = statSync(filePath);
|
|
1012
|
+
sizeBytes += stat.size;
|
|
1013
|
+
} catch {
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
} catch {
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
if (existsSync(this.indexPath)) {
|
|
1020
|
+
try {
|
|
1021
|
+
const stat = statSync(this.indexPath);
|
|
1022
|
+
sizeBytes += stat.size;
|
|
1023
|
+
} catch {
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
return {
|
|
1027
|
+
entries: Object.keys(this.index?.entries ?? {}).length,
|
|
1028
|
+
sizeBytes,
|
|
1029
|
+
version: CACHE_VERSION
|
|
1030
|
+
};
|
|
1031
|
+
}
|
|
1032
|
+
/**
|
|
1033
|
+
* Get all cached entries (for debugging)
|
|
1034
|
+
*/
|
|
1035
|
+
getAllEntries() {
|
|
1036
|
+
this.ensureInitialized();
|
|
1037
|
+
return { ...this.index.entries };
|
|
1038
|
+
}
|
|
1039
|
+
};
|
|
1040
|
+
function createCacheManager(options) {
|
|
1041
|
+
return new CacheManager(options);
|
|
1042
|
+
}
|
|
1043
|
+
function createNullCacheManager() {
|
|
1044
|
+
const nullManager = {
|
|
1045
|
+
isNull: true,
|
|
1046
|
+
get: () => ({ hit: false }),
|
|
1047
|
+
set: () => {
|
|
1048
|
+
},
|
|
1049
|
+
has: () => false,
|
|
1050
|
+
delete: () => false,
|
|
1051
|
+
clear: () => {
|
|
1052
|
+
},
|
|
1053
|
+
getStats: () => ({ entries: 0, sizeBytes: 0, version: CACHE_VERSION }),
|
|
1054
|
+
getAllEntries: () => ({}),
|
|
1055
|
+
updateMetadata: () => {
|
|
1056
|
+
},
|
|
1057
|
+
getMetadata: () => ({}),
|
|
1058
|
+
applyPolicies: () => 0,
|
|
1059
|
+
invalidateMatching: () => 0,
|
|
1060
|
+
addPolicy: () => {
|
|
1061
|
+
},
|
|
1062
|
+
removePolicy: () => false,
|
|
1063
|
+
getPolicies: () => []
|
|
1064
|
+
};
|
|
1065
|
+
return nullManager;
|
|
1066
|
+
}
|
|
1067
|
+
function createDefaultPolicies() {
|
|
1068
|
+
return [
|
|
1069
|
+
new GlossaryChangePolicy("all"),
|
|
1070
|
+
TTLPolicy.days(30)
|
|
1071
|
+
];
|
|
1072
|
+
}
|
|
1073
|
+
function createStrictPolicies(qualityThreshold = 85) {
|
|
1074
|
+
return [
|
|
1075
|
+
new GlossaryChangePolicy("all"),
|
|
1076
|
+
new ProviderChangePolicy(true),
|
|
1077
|
+
new QualityThresholdPolicy(qualityThreshold),
|
|
1078
|
+
TTLPolicy.days(7)
|
|
1079
|
+
];
|
|
1080
|
+
}
|
|
1081
|
+
function createMinimalPolicies() {
|
|
1082
|
+
return [
|
|
1083
|
+
new GlossaryChangePolicy("matching")
|
|
1084
|
+
];
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
// src/utils/tokens.ts
|
|
1088
|
+
function estimateTokens(text) {
|
|
1089
|
+
if (!text) return 0;
|
|
1090
|
+
let latinChars = 0;
|
|
1091
|
+
let cjkChars = 0;
|
|
1092
|
+
let otherChars = 0;
|
|
1093
|
+
for (const char of text) {
|
|
1094
|
+
const code = char.charCodeAt(0);
|
|
1095
|
+
if (code >= 19968 && code <= 40959 || // CJK Unified Ideographs
|
|
1096
|
+
code >= 13312 && code <= 19903 || // CJK Extension A
|
|
1097
|
+
code >= 44032 && code <= 55215 || // Hangul Syllables
|
|
1098
|
+
code >= 12352 && code <= 12447 || // Hiragana
|
|
1099
|
+
code >= 12448 && code <= 12543) {
|
|
1100
|
+
cjkChars++;
|
|
1101
|
+
} else if (code >= 65 && code <= 90 || // A-Z
|
|
1102
|
+
code >= 97 && code <= 122 || // a-z
|
|
1103
|
+
code >= 48 && code <= 57) {
|
|
1104
|
+
latinChars++;
|
|
1105
|
+
} else {
|
|
1106
|
+
otherChars++;
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
const latinTokens = latinChars / 4;
|
|
1110
|
+
const cjkTokens = cjkChars / 1.5;
|
|
1111
|
+
const otherTokens = otherChars / 3;
|
|
1112
|
+
return Math.ceil(latinTokens + cjkTokens + otherTokens);
|
|
1113
|
+
}
|
|
1114
|
+
function exceedsTokenLimit(text, limit) {
|
|
1115
|
+
return estimateTokens(text) > limit;
|
|
1116
|
+
}
|
|
1117
|
+
function truncateToTokenLimit(text, limit) {
|
|
1118
|
+
const estimated = estimateTokens(text);
|
|
1119
|
+
if (estimated <= limit) {
|
|
1120
|
+
return text;
|
|
1121
|
+
}
|
|
1122
|
+
const avgCharsPerToken = text.length / estimated;
|
|
1123
|
+
const targetChars = Math.floor(limit * avgCharsPerToken * 0.95);
|
|
1124
|
+
return text.slice(0, targetChars) + "...";
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
// src/providers/claude.ts
|
|
1128
|
+
var MODEL_INFO = {
|
|
1129
|
+
// Latest Claude 4.5 models
|
|
1130
|
+
"claude-sonnet-4-5-20250929": {
|
|
1131
|
+
maxContextTokens: 2e5,
|
|
1132
|
+
supportsStreaming: true,
|
|
1133
|
+
costPer1kInput: 3e-3,
|
|
1134
|
+
costPer1kOutput: 0.015
|
|
1135
|
+
},
|
|
1136
|
+
"claude-opus-4-5-20251101": {
|
|
1137
|
+
maxContextTokens: 2e5,
|
|
1138
|
+
supportsStreaming: true,
|
|
1139
|
+
costPer1kInput: 0.015,
|
|
1140
|
+
costPer1kOutput: 0.075
|
|
1141
|
+
},
|
|
1142
|
+
"claude-haiku-4-5-20251001": {
|
|
1143
|
+
maxContextTokens: 2e5,
|
|
1144
|
+
supportsStreaming: true,
|
|
1145
|
+
costPer1kInput: 1e-3,
|
|
1146
|
+
costPer1kOutput: 5e-3
|
|
1147
|
+
},
|
|
1148
|
+
// Claude 4 models (previous generation)
|
|
1149
|
+
"claude-sonnet-4-20250514": {
|
|
1150
|
+
maxContextTokens: 2e5,
|
|
1151
|
+
supportsStreaming: true,
|
|
1152
|
+
costPer1kInput: 3e-3,
|
|
1153
|
+
costPer1kOutput: 0.015
|
|
1154
|
+
},
|
|
1155
|
+
"claude-opus-4-20250514": {
|
|
1156
|
+
maxContextTokens: 2e5,
|
|
1157
|
+
supportsStreaming: true,
|
|
1158
|
+
costPer1kInput: 0.015,
|
|
1159
|
+
costPer1kOutput: 0.075
|
|
1160
|
+
},
|
|
1161
|
+
// Claude 3.5 models
|
|
1162
|
+
"claude-3-5-haiku-20241022": {
|
|
1163
|
+
maxContextTokens: 2e5,
|
|
1164
|
+
supportsStreaming: true,
|
|
1165
|
+
costPer1kInput: 1e-3,
|
|
1166
|
+
costPer1kOutput: 5e-3
|
|
1167
|
+
}
|
|
1168
|
+
};
|
|
1169
|
+
var DEFAULT_MODEL = "claude-haiku-4-5-20251001";
|
|
1170
|
+
var ClaudeProvider = class {
|
|
1171
|
+
name = "claude";
|
|
1172
|
+
defaultModel;
|
|
1173
|
+
client;
|
|
1174
|
+
constructor(config2 = {}) {
|
|
1175
|
+
const apiKey = config2.apiKey ?? process.env["ANTHROPIC_API_KEY"];
|
|
1176
|
+
if (!apiKey) {
|
|
1177
|
+
throw new TranslationError("PROVIDER_AUTH_FAILED" /* PROVIDER_AUTH_FAILED */, {
|
|
1178
|
+
provider: "claude",
|
|
1179
|
+
message: "ANTHROPIC_API_KEY environment variable is not set"
|
|
1180
|
+
});
|
|
1181
|
+
}
|
|
1182
|
+
this.client = createAnthropic({
|
|
1183
|
+
apiKey,
|
|
1184
|
+
baseURL: config2.baseUrl
|
|
1185
|
+
});
|
|
1186
|
+
this.defaultModel = config2.defaultModel ?? DEFAULT_MODEL;
|
|
1187
|
+
}
|
|
1188
|
+
async chat(request) {
|
|
1189
|
+
const model = request.model ?? this.defaultModel;
|
|
1190
|
+
try {
|
|
1191
|
+
const messages = this.convertMessages(request.messages);
|
|
1192
|
+
const result = await generateText({
|
|
1193
|
+
model: this.client(model),
|
|
1194
|
+
messages,
|
|
1195
|
+
temperature: request.temperature ?? 0,
|
|
1196
|
+
maxTokens: request.maxTokens ?? 4096
|
|
1197
|
+
});
|
|
1198
|
+
const anthropicMeta = result.providerMetadata?.anthropic;
|
|
1199
|
+
return {
|
|
1200
|
+
content: result.text,
|
|
1201
|
+
usage: {
|
|
1202
|
+
inputTokens: result.usage?.promptTokens ?? 0,
|
|
1203
|
+
outputTokens: result.usage?.completionTokens ?? 0,
|
|
1204
|
+
cacheReadTokens: anthropicMeta?.cacheReadInputTokens,
|
|
1205
|
+
cacheWriteTokens: anthropicMeta?.cacheCreationInputTokens
|
|
1206
|
+
},
|
|
1207
|
+
model,
|
|
1208
|
+
finishReason: mapFinishReason(result.finishReason)
|
|
1209
|
+
};
|
|
1210
|
+
} catch (error) {
|
|
1211
|
+
throw this.handleError(error);
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
/**
|
|
1215
|
+
* Convert messages to Vercel AI SDK format with cache control support
|
|
1216
|
+
*/
|
|
1217
|
+
convertMessages(messages) {
|
|
1218
|
+
return messages.map((msg) => {
|
|
1219
|
+
if (typeof msg.content === "string") {
|
|
1220
|
+
return { role: msg.role, content: msg.content };
|
|
1221
|
+
}
|
|
1222
|
+
const parts = msg.content.map((part) => ({
|
|
1223
|
+
type: "text",
|
|
1224
|
+
text: part.text,
|
|
1225
|
+
...part.cacheControl && {
|
|
1226
|
+
providerOptions: {
|
|
1227
|
+
anthropic: { cacheControl: part.cacheControl }
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
}));
|
|
1231
|
+
return { role: msg.role, content: parts };
|
|
1232
|
+
});
|
|
1233
|
+
}
|
|
1234
|
+
async *stream(request) {
|
|
1235
|
+
const model = request.model ?? this.defaultModel;
|
|
1236
|
+
try {
|
|
1237
|
+
const messages = this.convertMessages(request.messages);
|
|
1238
|
+
const result = streamText({
|
|
1239
|
+
model: this.client(model),
|
|
1240
|
+
messages,
|
|
1241
|
+
temperature: request.temperature ?? 0,
|
|
1242
|
+
maxTokens: request.maxTokens ?? 4096
|
|
1243
|
+
});
|
|
1244
|
+
for await (const chunk of result.textStream) {
|
|
1245
|
+
yield chunk;
|
|
1246
|
+
}
|
|
1247
|
+
} catch (error) {
|
|
1248
|
+
throw this.handleError(error);
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
countTokens(text) {
|
|
1252
|
+
return estimateTokens(text);
|
|
1253
|
+
}
|
|
1254
|
+
getModelInfo(model) {
|
|
1255
|
+
const modelName = model ?? this.defaultModel;
|
|
1256
|
+
return MODEL_INFO[modelName] ?? {
|
|
1257
|
+
maxContextTokens: 2e5,
|
|
1258
|
+
supportsStreaming: true
|
|
1259
|
+
};
|
|
1260
|
+
}
|
|
1261
|
+
handleError(error) {
|
|
1262
|
+
if (error instanceof TranslationError) {
|
|
1263
|
+
return error;
|
|
1264
|
+
}
|
|
1265
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1266
|
+
if (errorMessage.includes("rate_limit") || errorMessage.includes("429")) {
|
|
1267
|
+
return new TranslationError("PROVIDER_RATE_LIMITED" /* PROVIDER_RATE_LIMITED */, {
|
|
1268
|
+
provider: "claude",
|
|
1269
|
+
message: errorMessage
|
|
1270
|
+
});
|
|
1271
|
+
}
|
|
1272
|
+
if (errorMessage.includes("authentication") || errorMessage.includes("401") || errorMessage.includes("invalid_api_key")) {
|
|
1273
|
+
return new TranslationError("PROVIDER_AUTH_FAILED" /* PROVIDER_AUTH_FAILED */, {
|
|
1274
|
+
provider: "claude",
|
|
1275
|
+
message: errorMessage
|
|
1276
|
+
});
|
|
1277
|
+
}
|
|
1278
|
+
return new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1279
|
+
provider: "claude",
|
|
1280
|
+
message: errorMessage
|
|
1281
|
+
});
|
|
1282
|
+
}
|
|
1283
|
+
};
|
|
1284
|
+
function mapFinishReason(reason) {
|
|
1285
|
+
switch (reason) {
|
|
1286
|
+
case "stop":
|
|
1287
|
+
case "end_turn":
|
|
1288
|
+
return "stop";
|
|
1289
|
+
case "length":
|
|
1290
|
+
case "max_tokens":
|
|
1291
|
+
return "length";
|
|
1292
|
+
default:
|
|
1293
|
+
return "error";
|
|
1294
|
+
}
|
|
1295
|
+
}
|
|
1296
|
+
function createClaudeProvider(config2 = {}) {
|
|
1297
|
+
return new ClaudeProvider(config2);
|
|
1298
|
+
}
|
|
1299
|
+
var MODEL_INFO2 = {
|
|
1300
|
+
// GPT-4o models (latest)
|
|
1301
|
+
"gpt-4o": {
|
|
1302
|
+
maxContextTokens: 128e3,
|
|
1303
|
+
supportsStreaming: true,
|
|
1304
|
+
costPer1kInput: 25e-4,
|
|
1305
|
+
costPer1kOutput: 0.01
|
|
1306
|
+
},
|
|
1307
|
+
"gpt-4o-2024-11-20": {
|
|
1308
|
+
maxContextTokens: 128e3,
|
|
1309
|
+
supportsStreaming: true,
|
|
1310
|
+
costPer1kInput: 25e-4,
|
|
1311
|
+
costPer1kOutput: 0.01
|
|
1312
|
+
},
|
|
1313
|
+
"gpt-4o-2024-08-06": {
|
|
1314
|
+
maxContextTokens: 128e3,
|
|
1315
|
+
supportsStreaming: true,
|
|
1316
|
+
costPer1kInput: 25e-4,
|
|
1317
|
+
costPer1kOutput: 0.01
|
|
1318
|
+
},
|
|
1319
|
+
// GPT-4o mini (cost-effective)
|
|
1320
|
+
"gpt-4o-mini": {
|
|
1321
|
+
maxContextTokens: 128e3,
|
|
1322
|
+
supportsStreaming: true,
|
|
1323
|
+
costPer1kInput: 15e-5,
|
|
1324
|
+
costPer1kOutput: 6e-4
|
|
1325
|
+
},
|
|
1326
|
+
"gpt-4o-mini-2024-07-18": {
|
|
1327
|
+
maxContextTokens: 128e3,
|
|
1328
|
+
supportsStreaming: true,
|
|
1329
|
+
costPer1kInput: 15e-5,
|
|
1330
|
+
costPer1kOutput: 6e-4
|
|
1331
|
+
},
|
|
1332
|
+
// GPT-4 Turbo
|
|
1333
|
+
"gpt-4-turbo": {
|
|
1334
|
+
maxContextTokens: 128e3,
|
|
1335
|
+
supportsStreaming: true,
|
|
1336
|
+
costPer1kInput: 0.01,
|
|
1337
|
+
costPer1kOutput: 0.03
|
|
1338
|
+
},
|
|
1339
|
+
"gpt-4-turbo-2024-04-09": {
|
|
1340
|
+
maxContextTokens: 128e3,
|
|
1341
|
+
supportsStreaming: true,
|
|
1342
|
+
costPer1kInput: 0.01,
|
|
1343
|
+
costPer1kOutput: 0.03
|
|
1344
|
+
},
|
|
1345
|
+
// GPT-4 (original)
|
|
1346
|
+
"gpt-4": {
|
|
1347
|
+
maxContextTokens: 8192,
|
|
1348
|
+
supportsStreaming: true,
|
|
1349
|
+
costPer1kInput: 0.03,
|
|
1350
|
+
costPer1kOutput: 0.06
|
|
1351
|
+
},
|
|
1352
|
+
// GPT-3.5 Turbo
|
|
1353
|
+
"gpt-3.5-turbo": {
|
|
1354
|
+
maxContextTokens: 16385,
|
|
1355
|
+
supportsStreaming: true,
|
|
1356
|
+
costPer1kInput: 5e-4,
|
|
1357
|
+
costPer1kOutput: 15e-4
|
|
1358
|
+
},
|
|
1359
|
+
// o1 models (reasoning)
|
|
1360
|
+
"o1": {
|
|
1361
|
+
maxContextTokens: 2e5,
|
|
1362
|
+
supportsStreaming: false,
|
|
1363
|
+
costPer1kInput: 0.015,
|
|
1364
|
+
costPer1kOutput: 0.06
|
|
1365
|
+
},
|
|
1366
|
+
"o1-preview": {
|
|
1367
|
+
maxContextTokens: 128e3,
|
|
1368
|
+
supportsStreaming: false,
|
|
1369
|
+
costPer1kInput: 0.015,
|
|
1370
|
+
costPer1kOutput: 0.06
|
|
1371
|
+
},
|
|
1372
|
+
"o1-mini": {
|
|
1373
|
+
maxContextTokens: 128e3,
|
|
1374
|
+
supportsStreaming: false,
|
|
1375
|
+
costPer1kInput: 3e-3,
|
|
1376
|
+
costPer1kOutput: 0.012
|
|
1377
|
+
}
|
|
1378
|
+
};
|
|
1379
|
+
var DEFAULT_MODEL2 = "gpt-4o-mini";
|
|
1380
|
+
var OpenAIProvider = class {
|
|
1381
|
+
name = "openai";
|
|
1382
|
+
defaultModel;
|
|
1383
|
+
client;
|
|
1384
|
+
constructor(config2 = {}) {
|
|
1385
|
+
const apiKey = config2.apiKey ?? process.env["OPENAI_API_KEY"];
|
|
1386
|
+
if (!apiKey) {
|
|
1387
|
+
throw new TranslationError("PROVIDER_AUTH_FAILED" /* PROVIDER_AUTH_FAILED */, {
|
|
1388
|
+
provider: "openai",
|
|
1389
|
+
message: "OPENAI_API_KEY environment variable is not set"
|
|
1390
|
+
});
|
|
1391
|
+
}
|
|
1392
|
+
this.client = createOpenAI({
|
|
1393
|
+
apiKey,
|
|
1394
|
+
baseURL: config2.baseUrl
|
|
1395
|
+
});
|
|
1396
|
+
this.defaultModel = config2.defaultModel ?? DEFAULT_MODEL2;
|
|
1397
|
+
}
|
|
1398
|
+
async chat(request) {
|
|
1399
|
+
const model = request.model ?? this.defaultModel;
|
|
1400
|
+
try {
|
|
1401
|
+
const messages = this.convertMessages(request.messages);
|
|
1402
|
+
const result = await generateText({
|
|
1403
|
+
model: this.client(model),
|
|
1404
|
+
messages,
|
|
1405
|
+
temperature: request.temperature ?? 0,
|
|
1406
|
+
maxTokens: request.maxTokens ?? 4096
|
|
1407
|
+
});
|
|
1408
|
+
return {
|
|
1409
|
+
content: result.text,
|
|
1410
|
+
usage: {
|
|
1411
|
+
inputTokens: result.usage?.promptTokens ?? 0,
|
|
1412
|
+
outputTokens: result.usage?.completionTokens ?? 0
|
|
1413
|
+
},
|
|
1414
|
+
model,
|
|
1415
|
+
finishReason: mapFinishReason2(result.finishReason)
|
|
1416
|
+
};
|
|
1417
|
+
} catch (error) {
|
|
1418
|
+
throw this.handleError(error);
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
/**
|
|
1422
|
+
* Convert messages to Vercel AI SDK format
|
|
1423
|
+
* OpenAI doesn't support cache control like Claude, so we simplify content
|
|
1424
|
+
*/
|
|
1425
|
+
convertMessages(messages) {
|
|
1426
|
+
return messages.map((msg) => {
|
|
1427
|
+
if (Array.isArray(msg.content)) {
|
|
1428
|
+
return {
|
|
1429
|
+
role: msg.role,
|
|
1430
|
+
content: msg.content.map((part) => part.text).join("")
|
|
1431
|
+
};
|
|
1432
|
+
}
|
|
1433
|
+
return { role: msg.role, content: msg.content };
|
|
1434
|
+
});
|
|
1435
|
+
}
|
|
1436
|
+
async *stream(request) {
|
|
1437
|
+
const model = request.model ?? this.defaultModel;
|
|
1438
|
+
const modelInfo = this.getModelInfo(model);
|
|
1439
|
+
if (!modelInfo.supportsStreaming) {
|
|
1440
|
+
const response = await this.chat(request);
|
|
1441
|
+
yield response.content;
|
|
1442
|
+
return;
|
|
1443
|
+
}
|
|
1444
|
+
try {
|
|
1445
|
+
const messages = this.convertMessages(request.messages);
|
|
1446
|
+
const result = streamText({
|
|
1447
|
+
model: this.client(model),
|
|
1448
|
+
messages,
|
|
1449
|
+
temperature: request.temperature ?? 0,
|
|
1450
|
+
maxTokens: request.maxTokens ?? 4096
|
|
1451
|
+
});
|
|
1452
|
+
for await (const chunk of result.textStream) {
|
|
1453
|
+
yield chunk;
|
|
1454
|
+
}
|
|
1455
|
+
} catch (error) {
|
|
1456
|
+
throw this.handleError(error);
|
|
1457
|
+
}
|
|
1458
|
+
}
|
|
1459
|
+
countTokens(text) {
|
|
1460
|
+
return estimateTokens(text);
|
|
1461
|
+
}
|
|
1462
|
+
getModelInfo(model) {
|
|
1463
|
+
const modelName = model ?? this.defaultModel;
|
|
1464
|
+
return MODEL_INFO2[modelName] ?? {
|
|
1465
|
+
maxContextTokens: 128e3,
|
|
1466
|
+
supportsStreaming: true
|
|
1467
|
+
};
|
|
1468
|
+
}
|
|
1469
|
+
handleError(error) {
|
|
1470
|
+
if (error instanceof TranslationError) {
|
|
1471
|
+
return error;
|
|
1472
|
+
}
|
|
1473
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1474
|
+
if (errorMessage.includes("rate_limit") || errorMessage.includes("429") || errorMessage.includes("Rate limit")) {
|
|
1475
|
+
return new TranslationError("PROVIDER_RATE_LIMITED" /* PROVIDER_RATE_LIMITED */, {
|
|
1476
|
+
provider: "openai",
|
|
1477
|
+
message: errorMessage
|
|
1478
|
+
});
|
|
1479
|
+
}
|
|
1480
|
+
if (errorMessage.includes("authentication") || errorMessage.includes("401") || errorMessage.includes("invalid_api_key") || errorMessage.includes("Incorrect API key")) {
|
|
1481
|
+
return new TranslationError("PROVIDER_AUTH_FAILED" /* PROVIDER_AUTH_FAILED */, {
|
|
1482
|
+
provider: "openai",
|
|
1483
|
+
message: errorMessage
|
|
1484
|
+
});
|
|
1485
|
+
}
|
|
1486
|
+
if (errorMessage.includes("quota") || errorMessage.includes("insufficient_quota")) {
|
|
1487
|
+
return new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1488
|
+
provider: "openai",
|
|
1489
|
+
message: "API quota exceeded. Please check your billing settings."
|
|
1490
|
+
});
|
|
1491
|
+
}
|
|
1492
|
+
if (errorMessage.includes("context_length_exceeded") || errorMessage.includes("maximum context length")) {
|
|
1493
|
+
return new TranslationError("CHUNK_TOO_LARGE" /* CHUNK_TOO_LARGE */, {
|
|
1494
|
+
provider: "openai",
|
|
1495
|
+
message: errorMessage
|
|
1496
|
+
});
|
|
1497
|
+
}
|
|
1498
|
+
return new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1499
|
+
provider: "openai",
|
|
1500
|
+
message: errorMessage
|
|
1501
|
+
});
|
|
1502
|
+
}
|
|
1503
|
+
};
|
|
1504
|
+
function mapFinishReason2(reason) {
|
|
1505
|
+
switch (reason) {
|
|
1506
|
+
case "stop":
|
|
1507
|
+
return "stop";
|
|
1508
|
+
case "length":
|
|
1509
|
+
case "max_tokens":
|
|
1510
|
+
return "length";
|
|
1511
|
+
default:
|
|
1512
|
+
return "error";
|
|
1513
|
+
}
|
|
1514
|
+
}
|
|
1515
|
+
function createOpenAIProvider(config2 = {}) {
|
|
1516
|
+
return new OpenAIProvider(config2);
|
|
1517
|
+
}
|
|
1518
|
+
var MODEL_INFO3 = {
|
|
1519
|
+
// Llama 3.x models
|
|
1520
|
+
"llama3.3": {
|
|
1521
|
+
maxContextTokens: 128e3,
|
|
1522
|
+
supportsStreaming: true
|
|
1523
|
+
},
|
|
1524
|
+
"llama3.2": {
|
|
1525
|
+
maxContextTokens: 128e3,
|
|
1526
|
+
supportsStreaming: true
|
|
1527
|
+
},
|
|
1528
|
+
"llama3.1": {
|
|
1529
|
+
maxContextTokens: 128e3,
|
|
1530
|
+
supportsStreaming: true
|
|
1531
|
+
},
|
|
1532
|
+
"llama3": {
|
|
1533
|
+
maxContextTokens: 8192,
|
|
1534
|
+
supportsStreaming: true
|
|
1535
|
+
},
|
|
1536
|
+
// Llama 2 models
|
|
1537
|
+
llama2: {
|
|
1538
|
+
maxContextTokens: 4096,
|
|
1539
|
+
supportsStreaming: true
|
|
1540
|
+
},
|
|
1541
|
+
"llama2:13b": {
|
|
1542
|
+
maxContextTokens: 4096,
|
|
1543
|
+
supportsStreaming: true
|
|
1544
|
+
},
|
|
1545
|
+
"llama2:70b": {
|
|
1546
|
+
maxContextTokens: 4096,
|
|
1547
|
+
supportsStreaming: true
|
|
1548
|
+
},
|
|
1549
|
+
// Mistral models
|
|
1550
|
+
mistral: {
|
|
1551
|
+
maxContextTokens: 32768,
|
|
1552
|
+
supportsStreaming: true
|
|
1553
|
+
},
|
|
1554
|
+
"mistral-nemo": {
|
|
1555
|
+
maxContextTokens: 128e3,
|
|
1556
|
+
supportsStreaming: true
|
|
1557
|
+
},
|
|
1558
|
+
mixtral: {
|
|
1559
|
+
maxContextTokens: 32768,
|
|
1560
|
+
supportsStreaming: true
|
|
1561
|
+
},
|
|
1562
|
+
// Qwen models
|
|
1563
|
+
qwen2: {
|
|
1564
|
+
maxContextTokens: 32768,
|
|
1565
|
+
supportsStreaming: true
|
|
1566
|
+
},
|
|
1567
|
+
"qwen2.5": {
|
|
1568
|
+
maxContextTokens: 128e3,
|
|
1569
|
+
supportsStreaming: true
|
|
1570
|
+
},
|
|
1571
|
+
"qwen2.5-coder": {
|
|
1572
|
+
maxContextTokens: 128e3,
|
|
1573
|
+
supportsStreaming: true
|
|
1574
|
+
},
|
|
1575
|
+
// Gemma models
|
|
1576
|
+
gemma2: {
|
|
1577
|
+
maxContextTokens: 8192,
|
|
1578
|
+
supportsStreaming: true
|
|
1579
|
+
},
|
|
1580
|
+
gemma: {
|
|
1581
|
+
maxContextTokens: 8192,
|
|
1582
|
+
supportsStreaming: true
|
|
1583
|
+
},
|
|
1584
|
+
// Phi models
|
|
1585
|
+
phi3: {
|
|
1586
|
+
maxContextTokens: 128e3,
|
|
1587
|
+
supportsStreaming: true
|
|
1588
|
+
},
|
|
1589
|
+
"phi3:mini": {
|
|
1590
|
+
maxContextTokens: 128e3,
|
|
1591
|
+
supportsStreaming: true
|
|
1592
|
+
},
|
|
1593
|
+
// Code models
|
|
1594
|
+
codellama: {
|
|
1595
|
+
maxContextTokens: 16384,
|
|
1596
|
+
supportsStreaming: true
|
|
1597
|
+
},
|
|
1598
|
+
"deepseek-coder": {
|
|
1599
|
+
maxContextTokens: 16384,
|
|
1600
|
+
supportsStreaming: true
|
|
1601
|
+
},
|
|
1602
|
+
// Other popular models
|
|
1603
|
+
"neural-chat": {
|
|
1604
|
+
maxContextTokens: 8192,
|
|
1605
|
+
supportsStreaming: true
|
|
1606
|
+
},
|
|
1607
|
+
vicuna: {
|
|
1608
|
+
maxContextTokens: 2048,
|
|
1609
|
+
supportsStreaming: true
|
|
1610
|
+
}
|
|
1611
|
+
};
|
|
1612
|
+
var DEFAULT_MODEL3 = "llama3.2";
|
|
1613
|
+
var DEFAULT_BASE_URL = "http://localhost:11434";
|
|
1614
|
+
var OllamaProvider = class {
|
|
1615
|
+
name = "ollama";
|
|
1616
|
+
defaultModel;
|
|
1617
|
+
client;
|
|
1618
|
+
baseUrl;
|
|
1619
|
+
constructor(config2 = {}) {
|
|
1620
|
+
this.baseUrl = config2.baseUrl ?? process.env["OLLAMA_BASE_URL"] ?? DEFAULT_BASE_URL;
|
|
1621
|
+
this.client = createOpenAI({
|
|
1622
|
+
apiKey: "ollama",
|
|
1623
|
+
// Ollama doesn't require an API key
|
|
1624
|
+
baseURL: `${this.baseUrl}/v1`
|
|
1625
|
+
});
|
|
1626
|
+
this.defaultModel = config2.defaultModel ?? DEFAULT_MODEL3;
|
|
1627
|
+
}
|
|
1628
|
+
async chat(request) {
|
|
1629
|
+
const model = request.model ?? this.defaultModel;
|
|
1630
|
+
try {
|
|
1631
|
+
await this.ensureModelAvailable(model);
|
|
1632
|
+
const messages = this.convertMessages(request.messages);
|
|
1633
|
+
const result = await generateText({
|
|
1634
|
+
model: this.client(model),
|
|
1635
|
+
messages,
|
|
1636
|
+
temperature: request.temperature ?? 0,
|
|
1637
|
+
maxTokens: request.maxTokens ?? 4096
|
|
1638
|
+
});
|
|
1639
|
+
return {
|
|
1640
|
+
content: result.text,
|
|
1641
|
+
usage: {
|
|
1642
|
+
inputTokens: result.usage?.promptTokens ?? 0,
|
|
1643
|
+
outputTokens: result.usage?.completionTokens ?? 0
|
|
1644
|
+
},
|
|
1645
|
+
model,
|
|
1646
|
+
finishReason: mapFinishReason3(result.finishReason)
|
|
1647
|
+
};
|
|
1648
|
+
} catch (error) {
|
|
1649
|
+
throw this.handleError(error, model);
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
/**
|
|
1653
|
+
* Convert messages to Vercel AI SDK format
|
|
1654
|
+
* Ollama doesn't support cache control, so we simplify content
|
|
1655
|
+
*/
|
|
1656
|
+
convertMessages(messages) {
|
|
1657
|
+
return messages.map((msg) => {
|
|
1658
|
+
if (Array.isArray(msg.content)) {
|
|
1659
|
+
return {
|
|
1660
|
+
role: msg.role,
|
|
1661
|
+
content: msg.content.map((part) => part.text).join("")
|
|
1662
|
+
};
|
|
1663
|
+
}
|
|
1664
|
+
return { role: msg.role, content: msg.content };
|
|
1665
|
+
});
|
|
1666
|
+
}
|
|
1667
|
+
async *stream(request) {
|
|
1668
|
+
const model = request.model ?? this.defaultModel;
|
|
1669
|
+
try {
|
|
1670
|
+
await this.ensureModelAvailable(model);
|
|
1671
|
+
const messages = this.convertMessages(request.messages);
|
|
1672
|
+
const result = streamText({
|
|
1673
|
+
model: this.client(model),
|
|
1674
|
+
messages,
|
|
1675
|
+
temperature: request.temperature ?? 0,
|
|
1676
|
+
maxTokens: request.maxTokens ?? 4096
|
|
1677
|
+
});
|
|
1678
|
+
for await (const chunk of result.textStream) {
|
|
1679
|
+
yield chunk;
|
|
1680
|
+
}
|
|
1681
|
+
} catch (error) {
|
|
1682
|
+
throw this.handleError(error, model);
|
|
1683
|
+
}
|
|
1684
|
+
}
|
|
1685
|
+
countTokens(text) {
|
|
1686
|
+
return estimateTokens(text);
|
|
1687
|
+
}
|
|
1688
|
+
getModelInfo(model) {
|
|
1689
|
+
const modelName = model ?? this.defaultModel;
|
|
1690
|
+
if (MODEL_INFO3[modelName]) {
|
|
1691
|
+
return MODEL_INFO3[modelName];
|
|
1692
|
+
}
|
|
1693
|
+
const baseModel = modelName.split(":")[0] ?? modelName;
|
|
1694
|
+
if (baseModel && MODEL_INFO3[baseModel]) {
|
|
1695
|
+
return MODEL_INFO3[baseModel];
|
|
1696
|
+
}
|
|
1697
|
+
return {
|
|
1698
|
+
maxContextTokens: 4096,
|
|
1699
|
+
supportsStreaming: true
|
|
1700
|
+
};
|
|
1701
|
+
}
|
|
1702
|
+
/**
|
|
1703
|
+
* Check if the Ollama server is running and the model is available
|
|
1704
|
+
*/
|
|
1705
|
+
async ensureModelAvailable(model) {
|
|
1706
|
+
try {
|
|
1707
|
+
const response = await fetch(`${this.baseUrl}/api/tags`);
|
|
1708
|
+
if (!response.ok) {
|
|
1709
|
+
throw new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1710
|
+
provider: "ollama",
|
|
1711
|
+
message: `Ollama server not responding at ${this.baseUrl}`
|
|
1712
|
+
});
|
|
1713
|
+
}
|
|
1714
|
+
const data = await response.json();
|
|
1715
|
+
const models = data.models ?? [];
|
|
1716
|
+
const modelNames = models.map((m) => m.name);
|
|
1717
|
+
const modelExists = modelNames.some(
|
|
1718
|
+
(name) => name === model || name.startsWith(`${model}:`)
|
|
1719
|
+
);
|
|
1720
|
+
if (!modelExists) {
|
|
1721
|
+
throw new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1722
|
+
provider: "ollama",
|
|
1723
|
+
model,
|
|
1724
|
+
availableModels: modelNames.slice(0, 10),
|
|
1725
|
+
// Show first 10
|
|
1726
|
+
message: `Model "${model}" not found. Pull it with: ollama pull ${model}`
|
|
1727
|
+
});
|
|
1728
|
+
}
|
|
1729
|
+
} catch (error) {
|
|
1730
|
+
if (error instanceof TranslationError) {
|
|
1731
|
+
throw error;
|
|
1732
|
+
}
|
|
1733
|
+
throw new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1734
|
+
provider: "ollama",
|
|
1735
|
+
baseUrl: this.baseUrl,
|
|
1736
|
+
message: `Cannot connect to Ollama server at ${this.baseUrl}. Is Ollama running?`
|
|
1737
|
+
});
|
|
1738
|
+
}
|
|
1739
|
+
}
|
|
1740
|
+
handleError(error, model) {
|
|
1741
|
+
if (error instanceof TranslationError) {
|
|
1742
|
+
return error;
|
|
1743
|
+
}
|
|
1744
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1745
|
+
if (errorMessage.includes("ECONNREFUSED") || errorMessage.includes("fetch failed") || errorMessage.includes("network")) {
|
|
1746
|
+
return new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1747
|
+
provider: "ollama",
|
|
1748
|
+
baseUrl: this.baseUrl,
|
|
1749
|
+
message: `Cannot connect to Ollama server at ${this.baseUrl}. Is Ollama running?`
|
|
1750
|
+
});
|
|
1751
|
+
}
|
|
1752
|
+
if (errorMessage.includes("model") && errorMessage.includes("not found")) {
|
|
1753
|
+
return new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1754
|
+
provider: "ollama",
|
|
1755
|
+
model,
|
|
1756
|
+
message: `Model "${model}" not found. Pull it with: ollama pull ${model}`
|
|
1757
|
+
});
|
|
1758
|
+
}
|
|
1759
|
+
if (errorMessage.includes("context") || errorMessage.includes("too long")) {
|
|
1760
|
+
return new TranslationError("CHUNK_TOO_LARGE" /* CHUNK_TOO_LARGE */, {
|
|
1761
|
+
provider: "ollama",
|
|
1762
|
+
model,
|
|
1763
|
+
message: errorMessage
|
|
1764
|
+
});
|
|
1765
|
+
}
|
|
1766
|
+
if (errorMessage.includes("out of memory") || errorMessage.includes("OOM")) {
|
|
1767
|
+
return new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1768
|
+
provider: "ollama",
|
|
1769
|
+
model,
|
|
1770
|
+
message: "Out of memory. Try a smaller model or reduce chunk size."
|
|
1771
|
+
});
|
|
1772
|
+
}
|
|
1773
|
+
return new TranslationError("PROVIDER_ERROR" /* PROVIDER_ERROR */, {
|
|
1774
|
+
provider: "ollama",
|
|
1775
|
+
message: errorMessage
|
|
1776
|
+
});
|
|
1777
|
+
}
|
|
1778
|
+
};
|
|
1779
|
+
function mapFinishReason3(reason) {
|
|
1780
|
+
switch (reason) {
|
|
1781
|
+
case "stop":
|
|
1782
|
+
return "stop";
|
|
1783
|
+
case "length":
|
|
1784
|
+
case "max_tokens":
|
|
1785
|
+
return "length";
|
|
1786
|
+
default:
|
|
1787
|
+
return "error";
|
|
1788
|
+
}
|
|
1789
|
+
}
|
|
1790
|
+
function createOllamaProvider(config2 = {}) {
|
|
1791
|
+
return new OllamaProvider(config2);
|
|
1792
|
+
}
|
|
1793
|
+
|
|
1794
|
+
// src/providers/registry.ts
|
|
1795
|
+
var providers = /* @__PURE__ */ new Map();
|
|
1796
|
+
function registerProvider(name, factory) {
|
|
1797
|
+
providers.set(name, factory);
|
|
1798
|
+
}
|
|
1799
|
+
function getProvider(name, config2 = {}) {
|
|
1800
|
+
const factory = providers.get(name);
|
|
1801
|
+
if (!factory) {
|
|
1802
|
+
throw new TranslationError("PROVIDER_NOT_FOUND" /* PROVIDER_NOT_FOUND */, {
|
|
1803
|
+
provider: name,
|
|
1804
|
+
available: Array.from(providers.keys())
|
|
1805
|
+
});
|
|
1806
|
+
}
|
|
1807
|
+
return factory(config2);
|
|
1808
|
+
}
|
|
1809
|
+
function hasProvider(name) {
|
|
1810
|
+
return providers.has(name);
|
|
1811
|
+
}
|
|
1812
|
+
function getAvailableProviders() {
|
|
1813
|
+
return Array.from(providers.keys());
|
|
1814
|
+
}
|
|
1815
|
+
function getProviderConfigFromEnv(name) {
|
|
1816
|
+
switch (name) {
|
|
1817
|
+
case "claude":
|
|
1818
|
+
return {
|
|
1819
|
+
apiKey: process.env["ANTHROPIC_API_KEY"]
|
|
1820
|
+
// defaultModel is handled by the provider itself
|
|
1821
|
+
};
|
|
1822
|
+
case "openai":
|
|
1823
|
+
return {
|
|
1824
|
+
apiKey: process.env["OPENAI_API_KEY"],
|
|
1825
|
+
defaultModel: "gpt-4o"
|
|
1826
|
+
};
|
|
1827
|
+
case "ollama":
|
|
1828
|
+
return {
|
|
1829
|
+
baseUrl: process.env["OLLAMA_BASE_URL"] ?? "http://localhost:11434",
|
|
1830
|
+
defaultModel: "llama3.2"
|
|
1831
|
+
// Better multilingual support than llama2
|
|
1832
|
+
};
|
|
1833
|
+
case "custom":
|
|
1834
|
+
return {
|
|
1835
|
+
apiKey: process.env["LLM_API_KEY"],
|
|
1836
|
+
baseUrl: process.env["LLM_BASE_URL"]
|
|
1837
|
+
};
|
|
1838
|
+
default:
|
|
1839
|
+
return {};
|
|
1840
|
+
}
|
|
1841
|
+
}
|
|
1842
|
+
registerProvider("claude", createClaudeProvider);
|
|
1843
|
+
registerProvider("openai", createOpenAIProvider);
|
|
1844
|
+
registerProvider("ollama", createOllamaProvider);
|
|
1845
|
+
function canUseProvider(name, config2) {
|
|
1846
|
+
if (!hasProvider(name)) {
|
|
1847
|
+
return false;
|
|
1848
|
+
}
|
|
1849
|
+
if (name === "ollama") {
|
|
1850
|
+
return true;
|
|
1851
|
+
}
|
|
1852
|
+
return !!config2.apiKey;
|
|
1853
|
+
}
|
|
1854
|
+
function createProviderWithFallback(options) {
|
|
1855
|
+
const { primary, fallback = [], config: config2 = {} } = options;
|
|
1856
|
+
const primaryConfig = {
|
|
1857
|
+
...getProviderConfigFromEnv(primary),
|
|
1858
|
+
...config2[primary]
|
|
1859
|
+
};
|
|
1860
|
+
if (canUseProvider(primary, primaryConfig)) {
|
|
1861
|
+
return getProvider(primary, primaryConfig);
|
|
1862
|
+
}
|
|
1863
|
+
for (const fallbackName of fallback) {
|
|
1864
|
+
const fallbackConfig = {
|
|
1865
|
+
...getProviderConfigFromEnv(fallbackName),
|
|
1866
|
+
...config2[fallbackName]
|
|
1867
|
+
};
|
|
1868
|
+
if (canUseProvider(fallbackName, fallbackConfig)) {
|
|
1869
|
+
return getProvider(fallbackName, fallbackConfig);
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
throw new TranslationError("PROVIDER_AUTH_FAILED" /* PROVIDER_AUTH_FAILED */, {
|
|
1873
|
+
primary,
|
|
1874
|
+
fallback,
|
|
1875
|
+
message: "No API key found for any configured provider"
|
|
1876
|
+
});
|
|
1877
|
+
}
|
|
1878
|
+
|
|
1879
|
+
// src/core/chunker.ts
|
|
1880
|
+
var DEFAULT_CONFIG = {
|
|
1881
|
+
maxTokens: 1024,
|
|
1882
|
+
overlapTokens: 150};
|
|
1883
|
+
function chunkContent(content, options = {}) {
|
|
1884
|
+
if (!content.trim()) {
|
|
1885
|
+
return [];
|
|
1886
|
+
}
|
|
1887
|
+
const config2 = {
|
|
1888
|
+
maxTokens: options.maxTokens ?? DEFAULT_CONFIG.maxTokens,
|
|
1889
|
+
overlapTokens: options.overlapTokens ?? DEFAULT_CONFIG.overlapTokens
|
|
1890
|
+
};
|
|
1891
|
+
const headerHierarchy = extractHeaderHierarchy(content);
|
|
1892
|
+
const { segments } = extractPreservedSections(content);
|
|
1893
|
+
const chunks = [];
|
|
1894
|
+
let previousChunkContent;
|
|
1895
|
+
for (const segment of segments) {
|
|
1896
|
+
const segmentHeaders = getHeadersForPosition(
|
|
1897
|
+
headerHierarchy,
|
|
1898
|
+
segment.startOffset
|
|
1899
|
+
);
|
|
1900
|
+
if (segment.type === "preserve") {
|
|
1901
|
+
chunks.push({
|
|
1902
|
+
id: `chunk-${chunks.length}`,
|
|
1903
|
+
content: segment.content,
|
|
1904
|
+
type: "preserve",
|
|
1905
|
+
startOffset: segment.startOffset,
|
|
1906
|
+
endOffset: segment.endOffset,
|
|
1907
|
+
metadata: {
|
|
1908
|
+
headerHierarchy: segmentHeaders
|
|
1909
|
+
}
|
|
1910
|
+
});
|
|
1911
|
+
} else {
|
|
1912
|
+
const textChunks = splitIntoChunks(
|
|
1913
|
+
segment.content,
|
|
1914
|
+
config2,
|
|
1915
|
+
segment.startOffset
|
|
1916
|
+
);
|
|
1917
|
+
for (let idx = 0; idx < textChunks.length; idx++) {
|
|
1918
|
+
const chunk = textChunks[idx];
|
|
1919
|
+
if (!chunk) continue;
|
|
1920
|
+
const chunkHeaders = getHeadersForPosition(
|
|
1921
|
+
headerHierarchy,
|
|
1922
|
+
chunk.startOffset
|
|
1923
|
+
);
|
|
1924
|
+
chunks.push({
|
|
1925
|
+
...chunk,
|
|
1926
|
+
id: `chunk-${chunks.length}`,
|
|
1927
|
+
metadata: {
|
|
1928
|
+
headerHierarchy: chunkHeaders.length > 0 ? chunkHeaders : segmentHeaders,
|
|
1929
|
+
previousContext: previousChunkContent
|
|
1930
|
+
}
|
|
1931
|
+
});
|
|
1932
|
+
previousChunkContent = truncateForContext(chunk.content, 200);
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
}
|
|
1936
|
+
return chunks;
|
|
1937
|
+
}
|
|
1938
|
+
function extractHeaderHierarchy(content) {
|
|
1939
|
+
const headers = [];
|
|
1940
|
+
const headerRegex = /^(#{1,6})\s+(.+)$/gm;
|
|
1941
|
+
let match;
|
|
1942
|
+
while ((match = headerRegex.exec(content)) !== null) {
|
|
1943
|
+
const hashMarks = match[1];
|
|
1944
|
+
if (hashMarks) {
|
|
1945
|
+
headers.push({
|
|
1946
|
+
level: hashMarks.length,
|
|
1947
|
+
text: match[0],
|
|
1948
|
+
offset: match.index
|
|
1949
|
+
});
|
|
1950
|
+
}
|
|
1951
|
+
}
|
|
1952
|
+
return headers;
|
|
1953
|
+
}
|
|
1954
|
+
function getHeadersForPosition(headers, position) {
|
|
1955
|
+
const relevantHeaders = [];
|
|
1956
|
+
const currentLevels = /* @__PURE__ */ new Map();
|
|
1957
|
+
for (const header of headers) {
|
|
1958
|
+
if (header.offset > position) break;
|
|
1959
|
+
for (const [level] of currentLevels) {
|
|
1960
|
+
if (level >= header.level) {
|
|
1961
|
+
currentLevels.delete(level);
|
|
1962
|
+
}
|
|
1963
|
+
}
|
|
1964
|
+
currentLevels.set(header.level, header.text);
|
|
1965
|
+
}
|
|
1966
|
+
for (let level = 1; level <= 6; level++) {
|
|
1967
|
+
const headerText = currentLevels.get(level);
|
|
1968
|
+
if (headerText) {
|
|
1969
|
+
relevantHeaders.push(headerText);
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
return relevantHeaders;
|
|
1973
|
+
}
|
|
1974
|
+
function truncateForContext(content, maxChars) {
|
|
1975
|
+
if (content.length <= maxChars) return content;
|
|
1976
|
+
const truncated = content.slice(-maxChars);
|
|
1977
|
+
const firstSpace = truncated.indexOf(" ");
|
|
1978
|
+
if (firstSpace > 0 && firstSpace < 50) {
|
|
1979
|
+
return "..." + truncated.slice(firstSpace + 1);
|
|
1980
|
+
}
|
|
1981
|
+
return "..." + truncated;
|
|
1982
|
+
}
|
|
1983
|
+
function extractPreservedSections(content) {
|
|
1984
|
+
const preservedRanges = [];
|
|
1985
|
+
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
1986
|
+
let match;
|
|
1987
|
+
while ((match = codeBlockRegex.exec(content)) !== null) {
|
|
1988
|
+
preservedRanges.push({
|
|
1989
|
+
start: match.index,
|
|
1990
|
+
end: match.index + match[0].length,
|
|
1991
|
+
content: match[0]
|
|
1992
|
+
});
|
|
1993
|
+
}
|
|
1994
|
+
preservedRanges.sort((a, b) => a.start - b.start);
|
|
1995
|
+
const segments = [];
|
|
1996
|
+
let lastEnd = 0;
|
|
1997
|
+
for (const range of preservedRanges) {
|
|
1998
|
+
if (range.start > lastEnd) {
|
|
1999
|
+
const translatableContent = content.slice(lastEnd, range.start);
|
|
2000
|
+
if (translatableContent.length > 0) {
|
|
2001
|
+
segments.push({
|
|
2002
|
+
content: translatableContent,
|
|
2003
|
+
type: translatableContent.trim() ? "translatable" : "preserve",
|
|
2004
|
+
startOffset: lastEnd,
|
|
2005
|
+
endOffset: range.start
|
|
2006
|
+
});
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
2009
|
+
segments.push({
|
|
2010
|
+
content: range.content,
|
|
2011
|
+
type: "preserve",
|
|
2012
|
+
startOffset: range.start,
|
|
2013
|
+
endOffset: range.end
|
|
2014
|
+
});
|
|
2015
|
+
lastEnd = range.end;
|
|
2016
|
+
}
|
|
2017
|
+
if (lastEnd < content.length) {
|
|
2018
|
+
const remainingContent = content.slice(lastEnd);
|
|
2019
|
+
if (remainingContent.length > 0) {
|
|
2020
|
+
segments.push({
|
|
2021
|
+
content: remainingContent,
|
|
2022
|
+
type: remainingContent.trim() ? "translatable" : "preserve",
|
|
2023
|
+
startOffset: lastEnd,
|
|
2024
|
+
endOffset: content.length
|
|
2025
|
+
});
|
|
2026
|
+
}
|
|
2027
|
+
}
|
|
2028
|
+
if (segments.length === 0) {
|
|
2029
|
+
segments.push({
|
|
2030
|
+
content,
|
|
2031
|
+
type: "translatable",
|
|
2032
|
+
startOffset: 0,
|
|
2033
|
+
endOffset: content.length
|
|
2034
|
+
});
|
|
2035
|
+
}
|
|
2036
|
+
return { segments };
|
|
2037
|
+
}
|
|
2038
|
+
function splitIntoChunks(text, config2, baseOffset) {
|
|
2039
|
+
const chunks = [];
|
|
2040
|
+
const tokenCount = estimateTokens(text);
|
|
2041
|
+
if (tokenCount <= config2.maxTokens) {
|
|
2042
|
+
return [
|
|
2043
|
+
{
|
|
2044
|
+
id: "",
|
|
2045
|
+
content: text,
|
|
2046
|
+
type: "translatable",
|
|
2047
|
+
startOffset: baseOffset,
|
|
2048
|
+
endOffset: baseOffset + text.length
|
|
2049
|
+
}
|
|
2050
|
+
];
|
|
2051
|
+
}
|
|
2052
|
+
const parts = text.split(/(\n\n+)/);
|
|
2053
|
+
let currentChunk = "";
|
|
2054
|
+
let chunkStartOffset = baseOffset;
|
|
2055
|
+
let textOffset = baseOffset;
|
|
2056
|
+
for (let i = 0; i < parts.length; i++) {
|
|
2057
|
+
const part = parts[i];
|
|
2058
|
+
if (part === void 0) continue;
|
|
2059
|
+
const potentialChunk = currentChunk + part;
|
|
2060
|
+
const potentialTokens = estimateTokens(potentialChunk);
|
|
2061
|
+
if (potentialTokens > config2.maxTokens && currentChunk) {
|
|
2062
|
+
chunks.push({
|
|
2063
|
+
id: "",
|
|
2064
|
+
content: currentChunk,
|
|
2065
|
+
type: "translatable",
|
|
2066
|
+
startOffset: chunkStartOffset,
|
|
2067
|
+
endOffset: textOffset
|
|
2068
|
+
});
|
|
2069
|
+
currentChunk = part;
|
|
2070
|
+
chunkStartOffset = textOffset;
|
|
2071
|
+
} else {
|
|
2072
|
+
currentChunk = potentialChunk;
|
|
2073
|
+
}
|
|
2074
|
+
textOffset += part.length;
|
|
2075
|
+
}
|
|
2076
|
+
if (currentChunk.length > 0) {
|
|
2077
|
+
chunks.push({
|
|
2078
|
+
id: "",
|
|
2079
|
+
content: currentChunk,
|
|
2080
|
+
type: "translatable",
|
|
2081
|
+
startOffset: chunkStartOffset,
|
|
2082
|
+
endOffset: baseOffset + text.length
|
|
2083
|
+
});
|
|
2084
|
+
}
|
|
2085
|
+
return chunks;
|
|
2086
|
+
}
|
|
2087
|
+
function reassembleChunks(chunks) {
|
|
2088
|
+
const sorted = [...chunks].sort((a, b) => a.startOffset - b.startOffset);
|
|
2089
|
+
return sorted.map((chunk) => chunk.content).join("");
|
|
2090
|
+
}
|
|
2091
|
+
function getChunkStats(chunks) {
|
|
2092
|
+
const translatableChunks = chunks.filter((c) => c.type === "translatable");
|
|
2093
|
+
const preservedChunks = chunks.filter((c) => c.type === "preserve");
|
|
2094
|
+
const totalTokens = chunks.reduce(
|
|
2095
|
+
(sum, chunk) => sum + estimateTokens(chunk.content),
|
|
2096
|
+
0
|
|
2097
|
+
);
|
|
2098
|
+
return {
|
|
2099
|
+
totalChunks: chunks.length,
|
|
2100
|
+
translatableChunks: translatableChunks.length,
|
|
2101
|
+
preservedChunks: preservedChunks.length,
|
|
2102
|
+
totalTokens,
|
|
2103
|
+
averageTokens: chunks.length > 0 ? Math.round(totalTokens / chunks.length) : 0
|
|
2104
|
+
};
|
|
2105
|
+
}
|
|
2106
|
+
|
|
2107
|
+
// src/types/mqm.ts
|
|
2108
|
+
var MQM_SEVERITY_WEIGHTS = {
|
|
2109
|
+
minor: 1,
|
|
2110
|
+
// Noticeable but doesn't affect understanding
|
|
2111
|
+
major: 5,
|
|
2112
|
+
// Affects understanding or usability
|
|
2113
|
+
critical: 25
|
|
2114
|
+
// Completely wrong or unusable
|
|
2115
|
+
};
|
|
2116
|
+
function calculateMQMScore(errors) {
|
|
2117
|
+
const totalPenalty = errors.reduce(
|
|
2118
|
+
(sum, err) => sum + MQM_SEVERITY_WEIGHTS[err.severity],
|
|
2119
|
+
0
|
|
2120
|
+
);
|
|
2121
|
+
return Math.max(0, 100 - totalPenalty);
|
|
2122
|
+
}
|
|
2123
|
+
function calculateMQMBreakdown(errors) {
|
|
2124
|
+
return {
|
|
2125
|
+
accuracy: errors.filter((e) => e.type.startsWith("accuracy/")).length,
|
|
2126
|
+
fluency: errors.filter((e) => e.type.startsWith("fluency/")).length,
|
|
2127
|
+
style: errors.filter((e) => e.type.startsWith("style/")).length
|
|
2128
|
+
};
|
|
2129
|
+
}
|
|
2130
|
+
function parseMQMResponse(response) {
|
|
2131
|
+
try {
|
|
2132
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
2133
|
+
if (!jsonMatch) {
|
|
2134
|
+
return null;
|
|
2135
|
+
}
|
|
2136
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
2137
|
+
const errors = parsed.errors ?? [];
|
|
2138
|
+
const score = parsed.score ?? calculateMQMScore(errors);
|
|
2139
|
+
return {
|
|
2140
|
+
errors,
|
|
2141
|
+
score,
|
|
2142
|
+
summary: parsed.summary ?? "",
|
|
2143
|
+
breakdown: calculateMQMBreakdown(errors)
|
|
2144
|
+
};
|
|
2145
|
+
} catch {
|
|
2146
|
+
return null;
|
|
2147
|
+
}
|
|
2148
|
+
}
|
|
2149
|
+
function formatMQMErrorsForPrompt(errors) {
|
|
2150
|
+
if (errors.length === 0) {
|
|
2151
|
+
return "No errors identified.";
|
|
2152
|
+
}
|
|
2153
|
+
return errors.map((err, i) => {
|
|
2154
|
+
const severity = err.severity.toUpperCase();
|
|
2155
|
+
return `${i + 1}. [${severity}] ${err.type}
|
|
2156
|
+
Text: "${err.span}"
|
|
2157
|
+
Fix: "${err.suggestion}"${err.explanation ? `
|
|
2158
|
+
Reason: ${err.explanation}` : ""}`;
|
|
2159
|
+
}).join("\n\n");
|
|
2160
|
+
}
|
|
2161
|
+
|
|
2162
|
+
// src/types/analysis.ts
|
|
2163
|
+
function parseAnalysisResponse(response) {
|
|
2164
|
+
try {
|
|
2165
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
2166
|
+
if (!jsonMatch) {
|
|
2167
|
+
return null;
|
|
2168
|
+
}
|
|
2169
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
2170
|
+
return {
|
|
2171
|
+
keyTerms: parsed.keyTerms ?? [],
|
|
2172
|
+
ambiguousPhrases: parsed.ambiguousPhrases ?? [],
|
|
2173
|
+
preserveExact: parsed.preserveExact ?? [],
|
|
2174
|
+
challenges: parsed.challenges ?? [],
|
|
2175
|
+
domain: parsed.domain ?? "general",
|
|
2176
|
+
registerRecommendation: parsed.registerRecommendation ?? "neutral"
|
|
2177
|
+
};
|
|
2178
|
+
} catch {
|
|
2179
|
+
return null;
|
|
2180
|
+
}
|
|
2181
|
+
}
|
|
2182
|
+
function formatAnalysisForPrompt(analysis) {
|
|
2183
|
+
const sections = [];
|
|
2184
|
+
if (analysis.keyTerms.length > 0) {
|
|
2185
|
+
const terms = analysis.keyTerms.map((t) => {
|
|
2186
|
+
const translation = t.suggestedTranslation ? ` \u2192 ${t.suggestedTranslation}` : "";
|
|
2187
|
+
const source = t.fromGlossary ? " (glossary)" : "";
|
|
2188
|
+
return `- "${t.term}"${translation}${source}: ${t.context}`;
|
|
2189
|
+
}).join("\n");
|
|
2190
|
+
sections.push(`**Key Terms:**
|
|
2191
|
+
${terms}`);
|
|
2192
|
+
}
|
|
2193
|
+
if (analysis.ambiguousPhrases.length > 0) {
|
|
2194
|
+
const phrases = analysis.ambiguousPhrases.map((p) => `- "${p.phrase}": Use interpretation "${p.recommendation}"`).join("\n");
|
|
2195
|
+
sections.push(`**Ambiguous Phrases (use these interpretations):**
|
|
2196
|
+
${phrases}`);
|
|
2197
|
+
}
|
|
2198
|
+
if (analysis.preserveExact.length > 0) {
|
|
2199
|
+
sections.push(
|
|
2200
|
+
`**Do NOT translate (keep exactly as-is):**
|
|
2201
|
+
${analysis.preserveExact.map((s) => `- ${s}`).join("\n")}`
|
|
2202
|
+
);
|
|
2203
|
+
}
|
|
2204
|
+
sections.push(
|
|
2205
|
+
`**Content Type:** ${analysis.domain}
|
|
2206
|
+
**Tone:** ${analysis.registerRecommendation}`
|
|
2207
|
+
);
|
|
2208
|
+
return sections.join("\n\n");
|
|
2209
|
+
}
|
|
2210
|
+
function createEmptyAnalysis() {
|
|
2211
|
+
return {
|
|
2212
|
+
keyTerms: [],
|
|
2213
|
+
ambiguousPhrases: [],
|
|
2214
|
+
preserveExact: [],
|
|
2215
|
+
challenges: [],
|
|
2216
|
+
domain: "general",
|
|
2217
|
+
registerRecommendation: "neutral"
|
|
2218
|
+
};
|
|
2219
|
+
}
|
|
2220
|
+
|
|
2221
|
+
// src/types/modes.ts
|
|
2222
|
+
var MODE_PRESETS = {
|
|
2223
|
+
/**
|
|
2224
|
+
* Fast mode: Single pass, no evaluation
|
|
2225
|
+
* Best for: Quick drafts, large batches, local models
|
|
2226
|
+
* Speed: ~1x (fastest)
|
|
2227
|
+
*/
|
|
2228
|
+
fast: {
|
|
2229
|
+
enableAnalysis: false,
|
|
2230
|
+
useMQMEvaluation: false,
|
|
2231
|
+
maxIterations: 1,
|
|
2232
|
+
qualityThreshold: 0
|
|
2233
|
+
// Skip threshold check
|
|
2234
|
+
},
|
|
2235
|
+
/**
|
|
2236
|
+
* Balanced mode: TEaR with MQM evaluation
|
|
2237
|
+
* Best for: General use, good quality with reasonable speed
|
|
2238
|
+
* Speed: ~2-3x
|
|
2239
|
+
*/
|
|
2240
|
+
balanced: {
|
|
2241
|
+
enableAnalysis: false,
|
|
2242
|
+
useMQMEvaluation: true,
|
|
2243
|
+
maxIterations: 2,
|
|
2244
|
+
qualityThreshold: 75
|
|
2245
|
+
},
|
|
2246
|
+
/**
|
|
2247
|
+
* Quality mode: Full MAPS + TEaR pipeline
|
|
2248
|
+
* Best for: Production content, critical documents
|
|
2249
|
+
* Speed: ~4-5x
|
|
2250
|
+
*/
|
|
2251
|
+
quality: {
|
|
2252
|
+
enableAnalysis: true,
|
|
2253
|
+
useMQMEvaluation: true,
|
|
2254
|
+
maxIterations: 4,
|
|
2255
|
+
qualityThreshold: 85
|
|
2256
|
+
}
|
|
2257
|
+
};
|
|
2258
|
+
function getModeConfig(mode, overrides) {
|
|
2259
|
+
const preset = MODE_PRESETS[mode];
|
|
2260
|
+
{
|
|
2261
|
+
return preset;
|
|
2262
|
+
}
|
|
2263
|
+
}
|
|
2264
|
+
|
|
2265
|
+
// src/core/agent.ts
|
|
2266
|
+
function buildSystemInstructions(sourceLang, targetLang) {
|
|
2267
|
+
return `You are a professional translator specializing in ${sourceLang} to ${targetLang} translation.
|
|
2268
|
+
|
|
2269
|
+
## Rules:
|
|
2270
|
+
1. Apply glossary terms exactly as specified
|
|
2271
|
+
2. Preserve all formatting (markdown, HTML tags, code blocks)
|
|
2272
|
+
3. Maintain the same tone and style
|
|
2273
|
+
4. Do not translate content inside code blocks
|
|
2274
|
+
5. Keep URLs, file paths, and technical identifiers unchanged
|
|
2275
|
+
6. Keep placeholders like __CODE_BLOCK_0__ unchanged`;
|
|
2276
|
+
}
|
|
2277
|
+
function buildGlossarySection(glossaryText) {
|
|
2278
|
+
return `## Glossary (MUST use these exact translations):
|
|
2279
|
+
${glossaryText || "No glossary provided."}`;
|
|
2280
|
+
}
|
|
2281
|
+
function buildTranslationContent(sourceText, context) {
|
|
2282
|
+
const styleSection = context?.styleInstruction ? `Style: ${context.styleInstruction}
|
|
2283
|
+
` : "";
|
|
2284
|
+
return `## Document Context:
|
|
2285
|
+
Purpose: ${context?.documentPurpose ?? "General translation"}
|
|
2286
|
+
${styleSection}Previous content: ${context?.previousContext ?? "None"}
|
|
2287
|
+
|
|
2288
|
+
## Source Text:
|
|
2289
|
+
${sourceText}
|
|
2290
|
+
|
|
2291
|
+
Provide ONLY the translated text below, with no additional commentary or headers:`;
|
|
2292
|
+
}
|
|
2293
|
+
function buildCacheableTranslationMessage(sourceText, sourceLang, targetLang, glossaryText, context) {
|
|
2294
|
+
const systemInstructions = buildSystemInstructions(sourceLang, targetLang);
|
|
2295
|
+
const glossarySection = buildGlossarySection(glossaryText);
|
|
2296
|
+
const translationContent = buildTranslationContent(sourceText, context);
|
|
2297
|
+
const contentParts = [
|
|
2298
|
+
{
|
|
2299
|
+
type: "text",
|
|
2300
|
+
text: systemInstructions,
|
|
2301
|
+
cacheControl: { type: "ephemeral" }
|
|
2302
|
+
},
|
|
2303
|
+
{
|
|
2304
|
+
type: "text",
|
|
2305
|
+
text: glossarySection,
|
|
2306
|
+
cacheControl: { type: "ephemeral" }
|
|
2307
|
+
},
|
|
2308
|
+
{
|
|
2309
|
+
type: "text",
|
|
2310
|
+
text: translationContent
|
|
2311
|
+
// No cache control - this is dynamic per request
|
|
2312
|
+
}
|
|
2313
|
+
];
|
|
2314
|
+
return {
|
|
2315
|
+
role: "user",
|
|
2316
|
+
content: contentParts
|
|
2317
|
+
};
|
|
2318
|
+
}
|
|
2319
|
+
function buildInitialTranslationPrompt(sourceText, sourceLang, targetLang, glossaryText, context) {
|
|
2320
|
+
const styleSection = context?.styleInstruction ? `Style: ${context.styleInstruction}
|
|
2321
|
+
` : "";
|
|
2322
|
+
return `You are a professional translator. Translate the following ${sourceLang} text to ${targetLang}.
|
|
2323
|
+
|
|
2324
|
+
## Glossary (MUST use these exact translations):
|
|
2325
|
+
${glossaryText || "No glossary provided."}
|
|
2326
|
+
|
|
2327
|
+
## Document Context:
|
|
2328
|
+
Purpose: ${context?.documentPurpose ?? "General translation"}
|
|
2329
|
+
${styleSection}Previous content: ${context?.previousContext ?? "None"}
|
|
2330
|
+
|
|
2331
|
+
## Rules:
|
|
2332
|
+
1. Apply glossary terms exactly as specified
|
|
2333
|
+
2. Preserve all formatting (markdown, HTML tags, code blocks)
|
|
2334
|
+
3. Maintain the same tone and style
|
|
2335
|
+
4. Do not translate content inside code blocks
|
|
2336
|
+
5. Keep URLs, file paths, and technical identifiers unchanged
|
|
2337
|
+
6. Keep placeholders like __CODE_BLOCK_0__ unchanged
|
|
2338
|
+
|
|
2339
|
+
## Source Text:
|
|
2340
|
+
${sourceText}
|
|
2341
|
+
|
|
2342
|
+
Provide ONLY the translated text below, with no additional commentary or headers:`;
|
|
2343
|
+
}
|
|
2344
|
+
function buildReflectionPrompt(sourceText, translatedText, sourceLang, targetLang, glossaryText) {
|
|
2345
|
+
return `Review this translation and provide specific improvement suggestions.
|
|
2346
|
+
|
|
2347
|
+
## Source (${sourceLang}):
|
|
2348
|
+
${sourceText}
|
|
2349
|
+
|
|
2350
|
+
## Translation (${targetLang}):
|
|
2351
|
+
${translatedText}
|
|
2352
|
+
|
|
2353
|
+
## Glossary Requirements:
|
|
2354
|
+
${glossaryText || "No glossary provided."}
|
|
2355
|
+
|
|
2356
|
+
## Evaluate and suggest improvements for:
|
|
2357
|
+
1. **Accuracy**: Does the translation convey the exact meaning?
|
|
2358
|
+
2. **Glossary Compliance**: Are all glossary terms applied correctly?
|
|
2359
|
+
3. **Fluency**: Does it read naturally in ${targetLang}?
|
|
2360
|
+
4. **Formatting**: Is the structure preserved?
|
|
2361
|
+
5. **Consistency**: Are terms translated consistently?
|
|
2362
|
+
|
|
2363
|
+
Provide a numbered list of specific, actionable suggestions:`;
|
|
2364
|
+
}
|
|
2365
|
+
function buildImprovementPrompt(sourceText, currentTranslation, suggestions, glossaryText) {
|
|
2366
|
+
return `Improve this translation based on the following suggestions.
|
|
2367
|
+
|
|
2368
|
+
## Source Text:
|
|
2369
|
+
${sourceText}
|
|
2370
|
+
|
|
2371
|
+
## Current Translation:
|
|
2372
|
+
${currentTranslation}
|
|
2373
|
+
|
|
2374
|
+
## Improvement Suggestions:
|
|
2375
|
+
${suggestions}
|
|
2376
|
+
|
|
2377
|
+
## Glossary (MUST apply):
|
|
2378
|
+
${glossaryText || "No glossary provided."}
|
|
2379
|
+
|
|
2380
|
+
Provide ONLY the improved translation below, with no additional commentary or headers:`;
|
|
2381
|
+
}
|
|
2382
|
+
function buildQualityEvaluationPrompt(sourceText, translatedText, sourceLang, targetLang) {
|
|
2383
|
+
return `Rate this translation's quality from 0 to 100.
|
|
2384
|
+
|
|
2385
|
+
## Source (${sourceLang}):
|
|
2386
|
+
${sourceText}
|
|
2387
|
+
|
|
2388
|
+
## Translation (${targetLang}):
|
|
2389
|
+
${translatedText}
|
|
2390
|
+
|
|
2391
|
+
## Evaluation Criteria:
|
|
2392
|
+
- Semantic accuracy (40 points)
|
|
2393
|
+
- Fluency and naturalness (25 points)
|
|
2394
|
+
- Glossary compliance (20 points)
|
|
2395
|
+
- Format preservation (15 points)
|
|
2396
|
+
|
|
2397
|
+
Respond with only a JSON object:
|
|
2398
|
+
{"score": <number>, "breakdown": {"accuracy": <n>, "fluency": <n>, "glossary": <n>, "format": <n>}, "issues": ["issue1", "issue2"]}`;
|
|
2399
|
+
}
|
|
2400
|
+
function buildMQMEvaluationPrompt(sourceText, translatedText, sourceLang, targetLang, glossaryText) {
|
|
2401
|
+
return `Evaluate this translation using MQM (Multidimensional Quality Metrics) framework.
|
|
2402
|
+
|
|
2403
|
+
## Source (${sourceLang}):
|
|
2404
|
+
${sourceText}
|
|
2405
|
+
|
|
2406
|
+
## Translation (${targetLang}):
|
|
2407
|
+
${translatedText}
|
|
2408
|
+
|
|
2409
|
+
## Glossary Terms (must be applied exactly):
|
|
2410
|
+
${glossaryText || "No glossary provided."}
|
|
2411
|
+
|
|
2412
|
+
## MQM Error Categories:
|
|
2413
|
+
- accuracy/mistranslation: Incorrect meaning
|
|
2414
|
+
- accuracy/omission: Missing content from source
|
|
2415
|
+
- accuracy/addition: Extra content not in source
|
|
2416
|
+
- accuracy/untranslated: Source text left unchanged
|
|
2417
|
+
- fluency/grammar: Grammatical errors
|
|
2418
|
+
- fluency/spelling: Spelling/typos
|
|
2419
|
+
- fluency/register: Inappropriate formality
|
|
2420
|
+
- fluency/inconsistency: Inconsistent terminology
|
|
2421
|
+
- style/awkward: Unnatural phrasing
|
|
2422
|
+
- style/unidiomatic: Non-native expressions
|
|
2423
|
+
|
|
2424
|
+
## Severity Weights:
|
|
2425
|
+
- "minor" (1 point): Noticeable but doesn't affect understanding
|
|
2426
|
+
- "major" (5 points): Affects understanding or usability
|
|
2427
|
+
- "critical" (25 points): Completely wrong or unusable
|
|
2428
|
+
|
|
2429
|
+
## Instructions:
|
|
2430
|
+
1. Identify all translation errors
|
|
2431
|
+
2. Classify each by type and severity
|
|
2432
|
+
3. Provide the span and suggested fix
|
|
2433
|
+
4. Calculate score: 100 - sum(weights)
|
|
2434
|
+
|
|
2435
|
+
Respond with only a JSON object:
|
|
2436
|
+
{
|
|
2437
|
+
"errors": [
|
|
2438
|
+
{"type": "accuracy/mistranslation", "severity": "major", "span": "affected text", "suggestion": "corrected text", "explanation": "reason"}
|
|
2439
|
+
],
|
|
2440
|
+
"score": <100 - sum of weights>,
|
|
2441
|
+
"summary": "brief overall assessment"
|
|
2442
|
+
}`;
|
|
2443
|
+
}
|
|
2444
|
+
function buildMQMRefinementPrompt(sourceText, currentTranslation, errors, glossaryText) {
|
|
2445
|
+
const errorList = formatMQMErrorsForPrompt(errors);
|
|
2446
|
+
return `Fix the following translation errors.
|
|
2447
|
+
|
|
2448
|
+
## Source Text:
|
|
2449
|
+
${sourceText}
|
|
2450
|
+
|
|
2451
|
+
## Current Translation:
|
|
2452
|
+
${currentTranslation}
|
|
2453
|
+
|
|
2454
|
+
## Errors to Fix:
|
|
2455
|
+
${errorList}
|
|
2456
|
+
|
|
2457
|
+
## Glossary (MUST apply):
|
|
2458
|
+
${glossaryText || "No glossary provided."}
|
|
2459
|
+
|
|
2460
|
+
Apply ONLY the fixes listed above. Do not make other changes.
|
|
2461
|
+
Provide ONLY the corrected translation, with no additional commentary:`;
|
|
2462
|
+
}
|
|
2463
|
+
function buildPreAnalysisPrompt(sourceText, sourceLang, targetLang, glossaryText) {
|
|
2464
|
+
return `Analyze this ${sourceLang} text before translating to ${targetLang}.
|
|
2465
|
+
|
|
2466
|
+
## Source Text:
|
|
2467
|
+
${sourceText}
|
|
2468
|
+
|
|
2469
|
+
## Available Glossary Terms:
|
|
2470
|
+
${glossaryText || "No glossary provided."}
|
|
2471
|
+
|
|
2472
|
+
## Analyze and extract:
|
|
2473
|
+
1. **Key Terms**: Important domain-specific terms needing careful translation
|
|
2474
|
+
2. **Ambiguous Phrases**: Phrases with multiple possible interpretations
|
|
2475
|
+
3. **Preserve Exact**: Code, URLs, names that should NOT be translated
|
|
2476
|
+
4. **Challenges**: Specific difficulties for ${sourceLang}\u2192${targetLang}
|
|
2477
|
+
|
|
2478
|
+
Respond with only a JSON object:
|
|
2479
|
+
{
|
|
2480
|
+
"keyTerms": [{"term": "...", "context": "...", "suggestedTranslation": "...", "fromGlossary": true/false}],
|
|
2481
|
+
"ambiguousPhrases": [{"phrase": "...", "interpretations": ["..."], "recommendation": "..."}],
|
|
2482
|
+
"preserveExact": ["code snippets", "URLs", "names"],
|
|
2483
|
+
"challenges": ["challenge 1", "challenge 2"],
|
|
2484
|
+
"domain": "technical|marketing|legal|medical|general",
|
|
2485
|
+
"registerRecommendation": "formal|informal|neutral"
|
|
2486
|
+
}`;
|
|
2487
|
+
}
|
|
2488
|
+
var TranslationAgent = class {
|
|
2489
|
+
provider;
|
|
2490
|
+
qualityThreshold;
|
|
2491
|
+
maxIterations;
|
|
2492
|
+
verbose;
|
|
2493
|
+
strictQuality;
|
|
2494
|
+
enableCaching;
|
|
2495
|
+
enableAnalysis;
|
|
2496
|
+
useMQMEvaluation;
|
|
2497
|
+
constructor(options) {
|
|
2498
|
+
this.provider = options.provider;
|
|
2499
|
+
this.verbose = options.verbose ?? false;
|
|
2500
|
+
this.strictQuality = options.strictQuality ?? false;
|
|
2501
|
+
const modeConfig = getModeConfig(options.mode ?? "balanced");
|
|
2502
|
+
this.qualityThreshold = options.qualityThreshold ?? modeConfig.qualityThreshold;
|
|
2503
|
+
this.maxIterations = options.maxIterations ?? modeConfig.maxIterations;
|
|
2504
|
+
this.enableAnalysis = options.enableAnalysis ?? modeConfig.enableAnalysis;
|
|
2505
|
+
this.useMQMEvaluation = options.useMQMEvaluation ?? modeConfig.useMQMEvaluation;
|
|
2506
|
+
this.enableCaching = options.enableCaching ?? options.provider.name === "claude";
|
|
2507
|
+
if (this.verbose) {
|
|
2508
|
+
logger.info(`Translation mode: ${options.mode ?? "balanced"}`);
|
|
2509
|
+
logger.info(` - Analysis: ${this.enableAnalysis ? "enabled" : "disabled"}`);
|
|
2510
|
+
logger.info(` - MQM evaluation: ${this.useMQMEvaluation ? "enabled" : "disabled"}`);
|
|
2511
|
+
logger.info(` - Quality threshold: ${this.qualityThreshold}`);
|
|
2512
|
+
logger.info(` - Max iterations: ${this.maxIterations}`);
|
|
2513
|
+
}
|
|
2514
|
+
}
|
|
2515
|
+
/**
|
|
2516
|
+
* Translate content using Self-Refine loop with optional MAPS analysis and MQM evaluation
|
|
2517
|
+
*/
|
|
2518
|
+
async translate(request) {
|
|
2519
|
+
const timer = createTimer();
|
|
2520
|
+
let totalInputTokens = 0;
|
|
2521
|
+
let totalOutputTokens = 0;
|
|
2522
|
+
let totalCacheReadTokens = 0;
|
|
2523
|
+
let totalCacheWriteTokens = 0;
|
|
2524
|
+
let iterations = 0;
|
|
2525
|
+
const glossaryText = request.glossary ? createGlossaryLookup(
|
|
2526
|
+
request.glossary
|
|
2527
|
+
).formatForPrompt() : "";
|
|
2528
|
+
let analysis = null;
|
|
2529
|
+
if (this.enableAnalysis) {
|
|
2530
|
+
if (this.verbose) {
|
|
2531
|
+
logger.info("Analyzing source text (MAPS)...");
|
|
2532
|
+
}
|
|
2533
|
+
analysis = await this.analyzeSource(
|
|
2534
|
+
request.content,
|
|
2535
|
+
request.sourceLang,
|
|
2536
|
+
request.targetLang,
|
|
2537
|
+
glossaryText
|
|
2538
|
+
);
|
|
2539
|
+
if (this.verbose && analysis) {
|
|
2540
|
+
logger.info(` - Domain: ${analysis.domain}`);
|
|
2541
|
+
logger.info(` - Key terms: ${analysis.keyTerms.length}`);
|
|
2542
|
+
logger.info(` - Challenges: ${analysis.challenges.length}`);
|
|
2543
|
+
}
|
|
2544
|
+
}
|
|
2545
|
+
if (this.verbose) {
|
|
2546
|
+
logger.info("Starting initial translation...");
|
|
2547
|
+
}
|
|
2548
|
+
const initialResult = await this.generateInitialTranslation(
|
|
2549
|
+
request.content,
|
|
2550
|
+
request.sourceLang,
|
|
2551
|
+
request.targetLang,
|
|
2552
|
+
glossaryText,
|
|
2553
|
+
request.context,
|
|
2554
|
+
analysis
|
|
2555
|
+
);
|
|
2556
|
+
let currentTranslation = initialResult.content;
|
|
2557
|
+
iterations++;
|
|
2558
|
+
totalInputTokens += initialResult.usage.inputTokens;
|
|
2559
|
+
totalOutputTokens += initialResult.usage.outputTokens;
|
|
2560
|
+
totalCacheReadTokens += initialResult.usage.cacheReadTokens ?? 0;
|
|
2561
|
+
totalCacheWriteTokens += initialResult.usage.cacheWriteTokens ?? 0;
|
|
2562
|
+
if (this.maxIterations <= 1 && this.qualityThreshold <= 0) {
|
|
2563
|
+
if (this.verbose) {
|
|
2564
|
+
logger.info("Fast mode: Skipping evaluation and refinement");
|
|
2565
|
+
}
|
|
2566
|
+
return {
|
|
2567
|
+
content: currentTranslation,
|
|
2568
|
+
metadata: {
|
|
2569
|
+
qualityScore: 0,
|
|
2570
|
+
qualityThreshold: 0,
|
|
2571
|
+
thresholdMet: true,
|
|
2572
|
+
iterations,
|
|
2573
|
+
tokensUsed: {
|
|
2574
|
+
input: totalInputTokens,
|
|
2575
|
+
output: totalOutputTokens,
|
|
2576
|
+
cacheRead: totalCacheReadTokens,
|
|
2577
|
+
cacheWrite: totalCacheWriteTokens
|
|
2578
|
+
},
|
|
2579
|
+
duration: timer.elapsed(),
|
|
2580
|
+
provider: this.provider.name,
|
|
2581
|
+
model: "default"
|
|
2582
|
+
},
|
|
2583
|
+
glossaryCompliance: request.glossary ? this.checkGlossaryCompliance(
|
|
2584
|
+
request.content,
|
|
2585
|
+
currentTranslation,
|
|
2586
|
+
request.glossary
|
|
2587
|
+
) : void 0
|
|
2588
|
+
};
|
|
2589
|
+
}
|
|
2590
|
+
let qualityScore = 0;
|
|
2591
|
+
let lastEvaluation = null;
|
|
2592
|
+
let lastMQMEvaluation = null;
|
|
2593
|
+
while (iterations < this.maxIterations) {
|
|
2594
|
+
if (this.verbose) {
|
|
2595
|
+
logger.info(
|
|
2596
|
+
`Evaluating translation quality (iteration ${iterations})...`
|
|
2597
|
+
);
|
|
2598
|
+
}
|
|
2599
|
+
if (this.useMQMEvaluation) {
|
|
2600
|
+
lastMQMEvaluation = await this.evaluateQualityMQM(
|
|
2601
|
+
request.content,
|
|
2602
|
+
currentTranslation,
|
|
2603
|
+
request.sourceLang,
|
|
2604
|
+
request.targetLang,
|
|
2605
|
+
glossaryText
|
|
2606
|
+
);
|
|
2607
|
+
qualityScore = lastMQMEvaluation.score;
|
|
2608
|
+
if (this.verbose) {
|
|
2609
|
+
logger.info(`MQM score: ${qualityScore}/${this.qualityThreshold}`);
|
|
2610
|
+
if (lastMQMEvaluation.errors.length > 0) {
|
|
2611
|
+
logger.info(` - Errors: ${lastMQMEvaluation.errors.length} (${lastMQMEvaluation.breakdown.accuracy} accuracy, ${lastMQMEvaluation.breakdown.fluency} fluency, ${lastMQMEvaluation.breakdown.style} style)`);
|
|
2612
|
+
}
|
|
2613
|
+
}
|
|
2614
|
+
} else {
|
|
2615
|
+
lastEvaluation = await this.evaluateQuality(
|
|
2616
|
+
request.content,
|
|
2617
|
+
currentTranslation,
|
|
2618
|
+
request.sourceLang,
|
|
2619
|
+
request.targetLang
|
|
2620
|
+
);
|
|
2621
|
+
qualityScore = lastEvaluation.score;
|
|
2622
|
+
if (this.verbose) {
|
|
2623
|
+
logger.info(`Quality score: ${qualityScore}/${this.qualityThreshold}`);
|
|
2624
|
+
}
|
|
2625
|
+
}
|
|
2626
|
+
if (qualityScore >= this.qualityThreshold) {
|
|
2627
|
+
if (this.verbose) {
|
|
2628
|
+
logger.success(
|
|
2629
|
+
`Quality threshold met after ${iterations} iterations`
|
|
2630
|
+
);
|
|
2631
|
+
}
|
|
2632
|
+
break;
|
|
2633
|
+
}
|
|
2634
|
+
if (this.verbose) {
|
|
2635
|
+
logger.info("Refining translation...");
|
|
2636
|
+
}
|
|
2637
|
+
let improveResult;
|
|
2638
|
+
if (this.useMQMEvaluation && lastMQMEvaluation && lastMQMEvaluation.errors.length > 0) {
|
|
2639
|
+
improveResult = await this.refineWithMQM(
|
|
2640
|
+
request.content,
|
|
2641
|
+
currentTranslation,
|
|
2642
|
+
lastMQMEvaluation.errors,
|
|
2643
|
+
glossaryText
|
|
2644
|
+
);
|
|
2645
|
+
} else {
|
|
2646
|
+
const suggestions = await this.generateReflection(
|
|
2647
|
+
request.content,
|
|
2648
|
+
currentTranslation,
|
|
2649
|
+
request.sourceLang,
|
|
2650
|
+
request.targetLang,
|
|
2651
|
+
glossaryText
|
|
2652
|
+
);
|
|
2653
|
+
improveResult = await this.improveTranslation(
|
|
2654
|
+
request.content,
|
|
2655
|
+
currentTranslation,
|
|
2656
|
+
suggestions,
|
|
2657
|
+
glossaryText
|
|
2658
|
+
);
|
|
2659
|
+
}
|
|
2660
|
+
currentTranslation = improveResult.content;
|
|
2661
|
+
iterations++;
|
|
2662
|
+
totalInputTokens += improveResult.usage.inputTokens;
|
|
2663
|
+
totalOutputTokens += improveResult.usage.outputTokens;
|
|
2664
|
+
totalCacheReadTokens += improveResult.usage.cacheReadTokens ?? 0;
|
|
2665
|
+
totalCacheWriteTokens += improveResult.usage.cacheWriteTokens ?? 0;
|
|
2666
|
+
}
|
|
2667
|
+
if (this.useMQMEvaluation) {
|
|
2668
|
+
if (!lastMQMEvaluation || iterations === this.maxIterations) {
|
|
2669
|
+
lastMQMEvaluation = await this.evaluateQualityMQM(
|
|
2670
|
+
request.content,
|
|
2671
|
+
currentTranslation,
|
|
2672
|
+
request.sourceLang,
|
|
2673
|
+
request.targetLang,
|
|
2674
|
+
glossaryText
|
|
2675
|
+
);
|
|
2676
|
+
qualityScore = lastMQMEvaluation.score;
|
|
2677
|
+
}
|
|
2678
|
+
} else {
|
|
2679
|
+
if (!lastEvaluation || iterations === this.maxIterations) {
|
|
2680
|
+
lastEvaluation = await this.evaluateQuality(
|
|
2681
|
+
request.content,
|
|
2682
|
+
currentTranslation,
|
|
2683
|
+
request.sourceLang,
|
|
2684
|
+
request.targetLang
|
|
2685
|
+
);
|
|
2686
|
+
qualityScore = lastEvaluation.score;
|
|
2687
|
+
}
|
|
2688
|
+
}
|
|
2689
|
+
const thresholdMet = qualityScore >= this.qualityThreshold;
|
|
2690
|
+
if (!thresholdMet && this.strictQuality) {
|
|
2691
|
+
throw new TranslationError("QUALITY_THRESHOLD_NOT_MET" /* QUALITY_THRESHOLD_NOT_MET */, {
|
|
2692
|
+
score: qualityScore,
|
|
2693
|
+
threshold: this.qualityThreshold,
|
|
2694
|
+
iterations,
|
|
2695
|
+
maxIterations: this.maxIterations,
|
|
2696
|
+
issues: lastEvaluation?.issues ?? lastMQMEvaluation?.errors.map((e) => `${e.type}: ${e.span}`) ?? []
|
|
2697
|
+
});
|
|
2698
|
+
}
|
|
2699
|
+
if (!thresholdMet && this.verbose) {
|
|
2700
|
+
logger.warn(
|
|
2701
|
+
`Quality threshold not met: ${qualityScore}/${this.qualityThreshold} after ${iterations} iterations`
|
|
2702
|
+
);
|
|
2703
|
+
}
|
|
2704
|
+
if (this.verbose && (totalCacheReadTokens > 0 || totalCacheWriteTokens > 0)) {
|
|
2705
|
+
const cacheHitRate = totalCacheReadTokens > 0 ? (totalCacheReadTokens / (totalCacheReadTokens + totalInputTokens) * 100).toFixed(1) : "0";
|
|
2706
|
+
logger.info(
|
|
2707
|
+
`Cache stats: ${totalCacheReadTokens} read, ${totalCacheWriteTokens} written (${cacheHitRate}% hit rate)`
|
|
2708
|
+
);
|
|
2709
|
+
}
|
|
2710
|
+
return {
|
|
2711
|
+
content: currentTranslation,
|
|
2712
|
+
metadata: {
|
|
2713
|
+
qualityScore,
|
|
2714
|
+
qualityThreshold: this.qualityThreshold,
|
|
2715
|
+
thresholdMet,
|
|
2716
|
+
iterations,
|
|
2717
|
+
tokensUsed: {
|
|
2718
|
+
input: totalInputTokens,
|
|
2719
|
+
output: totalOutputTokens,
|
|
2720
|
+
cacheRead: totalCacheReadTokens,
|
|
2721
|
+
cacheWrite: totalCacheWriteTokens
|
|
2722
|
+
},
|
|
2723
|
+
duration: timer.elapsed(),
|
|
2724
|
+
provider: this.provider.name,
|
|
2725
|
+
model: "default"
|
|
2726
|
+
},
|
|
2727
|
+
glossaryCompliance: request.glossary ? this.checkGlossaryCompliance(
|
|
2728
|
+
request.content,
|
|
2729
|
+
currentTranslation,
|
|
2730
|
+
request.glossary
|
|
2731
|
+
) : void 0
|
|
2732
|
+
};
|
|
2733
|
+
}
|
|
2734
|
+
// ============================================================================
|
|
2735
|
+
// Private Methods
|
|
2736
|
+
// ============================================================================
|
|
2737
|
+
async generateInitialTranslation(sourceText, sourceLang, targetLang, glossaryText, context, analysis) {
|
|
2738
|
+
let messages;
|
|
2739
|
+
const analysisContext = analysis ? formatAnalysisForPrompt(analysis) : "";
|
|
2740
|
+
const enrichedContext = {
|
|
2741
|
+
documentPurpose: context?.documentPurpose,
|
|
2742
|
+
styleInstruction: context?.styleInstruction,
|
|
2743
|
+
previousContext: context?.previousChunks?.slice(-2).join("\n")
|
|
2744
|
+
};
|
|
2745
|
+
if (this.enableCaching) {
|
|
2746
|
+
const baseMessage = buildCacheableTranslationMessage(
|
|
2747
|
+
sourceText,
|
|
2748
|
+
sourceLang,
|
|
2749
|
+
targetLang,
|
|
2750
|
+
glossaryText,
|
|
2751
|
+
enrichedContext
|
|
2752
|
+
);
|
|
2753
|
+
if (analysisContext && Array.isArray(baseMessage.content)) {
|
|
2754
|
+
const contentParts = baseMessage.content;
|
|
2755
|
+
contentParts.splice(2, 0, {
|
|
2756
|
+
type: "text",
|
|
2757
|
+
text: `
|
|
2758
|
+
## Pre-Translation Analysis:
|
|
2759
|
+
${analysisContext}
|
|
2760
|
+
`
|
|
2761
|
+
});
|
|
2762
|
+
}
|
|
2763
|
+
messages = [baseMessage];
|
|
2764
|
+
} else {
|
|
2765
|
+
let prompt = buildInitialTranslationPrompt(
|
|
2766
|
+
sourceText,
|
|
2767
|
+
sourceLang,
|
|
2768
|
+
targetLang,
|
|
2769
|
+
glossaryText,
|
|
2770
|
+
enrichedContext
|
|
2771
|
+
);
|
|
2772
|
+
if (analysisContext) {
|
|
2773
|
+
prompt = prompt.replace(
|
|
2774
|
+
"## Source Text:",
|
|
2775
|
+
`## Pre-Translation Analysis:
|
|
2776
|
+
${analysisContext}
|
|
2777
|
+
|
|
2778
|
+
## Source Text:`
|
|
2779
|
+
);
|
|
2780
|
+
}
|
|
2781
|
+
messages = [{ role: "user", content: prompt }];
|
|
2782
|
+
}
|
|
2783
|
+
const response = await this.provider.chat({ messages });
|
|
2784
|
+
const cleanedContent = this.cleanTranslationOutput(response.content);
|
|
2785
|
+
return {
|
|
2786
|
+
content: this.preserveWhitespace(sourceText, cleanedContent),
|
|
2787
|
+
usage: {
|
|
2788
|
+
inputTokens: response.usage.inputTokens,
|
|
2789
|
+
outputTokens: response.usage.outputTokens,
|
|
2790
|
+
cacheReadTokens: response.usage.cacheReadTokens,
|
|
2791
|
+
cacheWriteTokens: response.usage.cacheWriteTokens
|
|
2792
|
+
}
|
|
2793
|
+
};
|
|
2794
|
+
}
|
|
2795
|
+
async generateReflection(sourceText, translatedText, sourceLang, targetLang, glossaryText) {
|
|
2796
|
+
const prompt = buildReflectionPrompt(
|
|
2797
|
+
sourceText,
|
|
2798
|
+
translatedText,
|
|
2799
|
+
sourceLang,
|
|
2800
|
+
targetLang,
|
|
2801
|
+
glossaryText
|
|
2802
|
+
);
|
|
2803
|
+
const messages = [{ role: "user", content: prompt }];
|
|
2804
|
+
const response = await this.provider.chat({ messages });
|
|
2805
|
+
return response.content.trim();
|
|
2806
|
+
}
|
|
2807
|
+
async improveTranslation(sourceText, currentTranslation, suggestions, glossaryText) {
|
|
2808
|
+
let messages;
|
|
2809
|
+
if (this.enableCaching) {
|
|
2810
|
+
const contentParts = [
|
|
2811
|
+
{
|
|
2812
|
+
type: "text",
|
|
2813
|
+
text: `Improve this translation based on the following suggestions.
|
|
2814
|
+
|
|
2815
|
+
## Glossary (MUST apply):
|
|
2816
|
+
${glossaryText || "No glossary provided."}`,
|
|
2817
|
+
cacheControl: { type: "ephemeral" }
|
|
2818
|
+
},
|
|
2819
|
+
{
|
|
2820
|
+
type: "text",
|
|
2821
|
+
text: `## Source Text:
|
|
2822
|
+
${sourceText}
|
|
2823
|
+
|
|
2824
|
+
## Current Translation:
|
|
2825
|
+
${currentTranslation}
|
|
2826
|
+
|
|
2827
|
+
## Improvement Suggestions:
|
|
2828
|
+
${suggestions}
|
|
2829
|
+
|
|
2830
|
+
Provide ONLY the improved translation below, with no additional commentary or headers:`
|
|
2831
|
+
}
|
|
2832
|
+
];
|
|
2833
|
+
messages = [{ role: "user", content: contentParts }];
|
|
2834
|
+
} else {
|
|
2835
|
+
const prompt = buildImprovementPrompt(
|
|
2836
|
+
sourceText,
|
|
2837
|
+
currentTranslation,
|
|
2838
|
+
suggestions,
|
|
2839
|
+
glossaryText
|
|
2840
|
+
);
|
|
2841
|
+
messages = [{ role: "user", content: prompt }];
|
|
2842
|
+
}
|
|
2843
|
+
const response = await this.provider.chat({ messages });
|
|
2844
|
+
const cleanedContent = this.cleanTranslationOutput(response.content);
|
|
2845
|
+
return {
|
|
2846
|
+
content: this.preserveWhitespace(sourceText, cleanedContent),
|
|
2847
|
+
usage: {
|
|
2848
|
+
inputTokens: response.usage.inputTokens,
|
|
2849
|
+
outputTokens: response.usage.outputTokens,
|
|
2850
|
+
cacheReadTokens: response.usage.cacheReadTokens,
|
|
2851
|
+
cacheWriteTokens: response.usage.cacheWriteTokens
|
|
2852
|
+
}
|
|
2853
|
+
};
|
|
2854
|
+
}
|
|
2855
|
+
async evaluateQuality(sourceText, translatedText, sourceLang, targetLang) {
|
|
2856
|
+
const prompt = buildQualityEvaluationPrompt(
|
|
2857
|
+
sourceText,
|
|
2858
|
+
translatedText,
|
|
2859
|
+
sourceLang,
|
|
2860
|
+
targetLang
|
|
2861
|
+
);
|
|
2862
|
+
const messages = [{ role: "user", content: prompt }];
|
|
2863
|
+
const response = await this.provider.chat({ messages });
|
|
2864
|
+
try {
|
|
2865
|
+
const jsonMatch = response.content.match(/\{[\s\S]*\}/);
|
|
2866
|
+
if (!jsonMatch) {
|
|
2867
|
+
throw new Error("No JSON found in response");
|
|
2868
|
+
}
|
|
2869
|
+
const evaluation = JSON.parse(jsonMatch[0]);
|
|
2870
|
+
return {
|
|
2871
|
+
score: evaluation.score,
|
|
2872
|
+
breakdown: evaluation.breakdown,
|
|
2873
|
+
issues: evaluation.issues
|
|
2874
|
+
};
|
|
2875
|
+
} catch {
|
|
2876
|
+
return {
|
|
2877
|
+
score: 75,
|
|
2878
|
+
// Default score
|
|
2879
|
+
breakdown: {
|
|
2880
|
+
accuracy: 30,
|
|
2881
|
+
fluency: 20,
|
|
2882
|
+
glossary: 15,
|
|
2883
|
+
format: 10
|
|
2884
|
+
},
|
|
2885
|
+
issues: ["Failed to parse quality evaluation response"]
|
|
2886
|
+
};
|
|
2887
|
+
}
|
|
2888
|
+
}
|
|
2889
|
+
/**
|
|
2890
|
+
* Pre-translation analysis using MAPS-style approach
|
|
2891
|
+
* Identifies key terms, ambiguous phrases, and translation challenges
|
|
2892
|
+
*/
|
|
2893
|
+
async analyzeSource(sourceText, sourceLang, targetLang, glossaryText) {
|
|
2894
|
+
const prompt = buildPreAnalysisPrompt(
|
|
2895
|
+
sourceText,
|
|
2896
|
+
sourceLang,
|
|
2897
|
+
targetLang,
|
|
2898
|
+
glossaryText
|
|
2899
|
+
);
|
|
2900
|
+
const messages = [{ role: "user", content: prompt }];
|
|
2901
|
+
try {
|
|
2902
|
+
const response = await this.provider.chat({ messages });
|
|
2903
|
+
return parseAnalysisResponse(response.content);
|
|
2904
|
+
} catch (error) {
|
|
2905
|
+
if (this.verbose) {
|
|
2906
|
+
logger.warn(`Pre-analysis failed: ${error}`);
|
|
2907
|
+
}
|
|
2908
|
+
return createEmptyAnalysis();
|
|
2909
|
+
}
|
|
2910
|
+
}
|
|
2911
|
+
/**
|
|
2912
|
+
* Evaluate translation quality using MQM framework
|
|
2913
|
+
* Returns structured error annotations for targeted refinement
|
|
2914
|
+
*/
|
|
2915
|
+
async evaluateQualityMQM(sourceText, translatedText, sourceLang, targetLang, glossaryText) {
|
|
2916
|
+
const prompt = buildMQMEvaluationPrompt(
|
|
2917
|
+
sourceText,
|
|
2918
|
+
translatedText,
|
|
2919
|
+
sourceLang,
|
|
2920
|
+
targetLang,
|
|
2921
|
+
glossaryText
|
|
2922
|
+
);
|
|
2923
|
+
const messages = [{ role: "user", content: prompt }];
|
|
2924
|
+
try {
|
|
2925
|
+
const response = await this.provider.chat({ messages });
|
|
2926
|
+
const evaluation = parseMQMResponse(response.content);
|
|
2927
|
+
if (evaluation) {
|
|
2928
|
+
return evaluation;
|
|
2929
|
+
}
|
|
2930
|
+
return {
|
|
2931
|
+
errors: [],
|
|
2932
|
+
score: 75,
|
|
2933
|
+
summary: "Failed to parse MQM evaluation",
|
|
2934
|
+
breakdown: { accuracy: 0, fluency: 0, style: 0 }
|
|
2935
|
+
};
|
|
2936
|
+
} catch {
|
|
2937
|
+
return {
|
|
2938
|
+
errors: [],
|
|
2939
|
+
score: 75,
|
|
2940
|
+
summary: "MQM evaluation failed",
|
|
2941
|
+
breakdown: { accuracy: 0, fluency: 0, style: 0 }
|
|
2942
|
+
};
|
|
2943
|
+
}
|
|
2944
|
+
}
|
|
2945
|
+
/**
|
|
2946
|
+
* Refine translation based on MQM error annotations
|
|
2947
|
+
* Applies targeted fixes for identified errors
|
|
2948
|
+
*/
|
|
2949
|
+
async refineWithMQM(sourceText, currentTranslation, errors, glossaryText) {
|
|
2950
|
+
const prompt = buildMQMRefinementPrompt(
|
|
2951
|
+
sourceText,
|
|
2952
|
+
currentTranslation,
|
|
2953
|
+
errors,
|
|
2954
|
+
glossaryText
|
|
2955
|
+
);
|
|
2956
|
+
const messages = [{ role: "user", content: prompt }];
|
|
2957
|
+
const response = await this.provider.chat({ messages });
|
|
2958
|
+
const cleanedContent = this.cleanTranslationOutput(response.content);
|
|
2959
|
+
return {
|
|
2960
|
+
content: this.preserveWhitespace(sourceText, cleanedContent),
|
|
2961
|
+
usage: {
|
|
2962
|
+
inputTokens: response.usage.inputTokens,
|
|
2963
|
+
outputTokens: response.usage.outputTokens,
|
|
2964
|
+
cacheReadTokens: response.usage.cacheReadTokens,
|
|
2965
|
+
cacheWriteTokens: response.usage.cacheWriteTokens
|
|
2966
|
+
}
|
|
2967
|
+
};
|
|
2968
|
+
}
|
|
2969
|
+
/**
|
|
2970
|
+
* Clean up translation output by removing prompt artifacts
|
|
2971
|
+
* Uses guardrails to detect and remove any trailing prompt-like content
|
|
2972
|
+
*/
|
|
2973
|
+
cleanTranslationOutput(text) {
|
|
2974
|
+
let cleaned = text.trim();
|
|
2975
|
+
const trailingHeaderPattern = /\n+##\s+[A-Z][^:\n]*:\s*$/;
|
|
2976
|
+
cleaned = cleaned.replace(trailingHeaderPattern, "");
|
|
2977
|
+
const incompletePromptPattern = /:\s*$/;
|
|
2978
|
+
if (incompletePromptPattern.test(cleaned)) {
|
|
2979
|
+
const lines = cleaned.split("\n");
|
|
2980
|
+
while (lines.length > 0 && incompletePromptPattern.test(lines[lines.length - 1]?.trim() ?? "")) {
|
|
2981
|
+
lines.pop();
|
|
2982
|
+
}
|
|
2983
|
+
cleaned = lines.join("\n");
|
|
2984
|
+
}
|
|
2985
|
+
const evaluationListPattern = /\n+\d+\.\s*\*\*[^*]+\*\*[\s\S]*$/;
|
|
2986
|
+
if (evaluationListPattern.test(cleaned)) {
|
|
2987
|
+
cleaned = cleaned.replace(evaluationListPattern, "");
|
|
2988
|
+
}
|
|
2989
|
+
return cleaned.trim();
|
|
2990
|
+
}
|
|
2991
|
+
/**
|
|
2992
|
+
* Preserve leading/trailing whitespace from source text in translated text
|
|
2993
|
+
* This ensures document structure (line breaks between sections) is maintained
|
|
2994
|
+
*/
|
|
2995
|
+
preserveWhitespace(sourceText, translatedText) {
|
|
2996
|
+
const leadingMatch = sourceText.match(/^(\s*)/);
|
|
2997
|
+
const leadingWhitespace = leadingMatch ? leadingMatch[1] : "";
|
|
2998
|
+
const trailingMatch = sourceText.match(/(\s*)$/);
|
|
2999
|
+
const trailingWhitespace = trailingMatch ? trailingMatch[1] : "";
|
|
3000
|
+
return leadingWhitespace + translatedText + trailingWhitespace;
|
|
3001
|
+
}
|
|
3002
|
+
checkGlossaryCompliance(sourceText, translatedText, glossary) {
|
|
3003
|
+
const lookup = createGlossaryLookup(glossary);
|
|
3004
|
+
const sourceTerms = lookup.findAll(sourceText);
|
|
3005
|
+
const applied = [];
|
|
3006
|
+
const missed = [];
|
|
3007
|
+
for (const term of sourceTerms) {
|
|
3008
|
+
const targetInTranslation = term.caseSensitive ? translatedText.includes(term.target) : translatedText.toLowerCase().includes(term.target.toLowerCase());
|
|
3009
|
+
if (targetInTranslation) {
|
|
3010
|
+
applied.push(term.source);
|
|
3011
|
+
} else {
|
|
3012
|
+
missed.push(term.source);
|
|
3013
|
+
}
|
|
3014
|
+
}
|
|
3015
|
+
return { applied, missed };
|
|
3016
|
+
}
|
|
3017
|
+
};
|
|
3018
|
+
function createTranslationAgent(options) {
|
|
3019
|
+
return new TranslationAgent(options);
|
|
3020
|
+
}
|
|
3021
|
+
async function parseMarkdown(content) {
|
|
3022
|
+
const processor = unified().use(remarkParse).use(remarkGfm);
|
|
3023
|
+
const ast = processor.parse(content);
|
|
3024
|
+
const textNodes = extractTextNodes(ast);
|
|
3025
|
+
return {
|
|
3026
|
+
original: content,
|
|
3027
|
+
ast,
|
|
3028
|
+
textNodes
|
|
3029
|
+
};
|
|
3030
|
+
}
|
|
3031
|
+
async function applyTranslations(document, translations) {
|
|
3032
|
+
const ast = structuredClone(document.ast);
|
|
3033
|
+
for (const textNode of document.textNodes) {
|
|
3034
|
+
if (!textNode.translatable) continue;
|
|
3035
|
+
const translation = translations[textNode.id];
|
|
3036
|
+
if (!translation) continue;
|
|
3037
|
+
const node = getNodeAtPath(ast, textNode.path);
|
|
3038
|
+
if (node && "value" in node) {
|
|
3039
|
+
node.value = translation;
|
|
3040
|
+
}
|
|
3041
|
+
}
|
|
3042
|
+
const processor = unified().use(remarkGfm).use(remarkStringify, {
|
|
3043
|
+
bullet: "-",
|
|
3044
|
+
emphasis: "*",
|
|
3045
|
+
strong: "*",
|
|
3046
|
+
fence: "`",
|
|
3047
|
+
fences: true,
|
|
3048
|
+
listItemIndent: "one"
|
|
3049
|
+
});
|
|
3050
|
+
const result = processor.stringify(ast);
|
|
3051
|
+
return String(result);
|
|
3052
|
+
}
|
|
3053
|
+
function extractTextNodes(ast) {
|
|
3054
|
+
const textNodes = [];
|
|
3055
|
+
let nodeId = 0;
|
|
3056
|
+
visit(ast, (node, index, parent) => {
|
|
3057
|
+
if (node.type === "code" || node.type === "inlineCode") {
|
|
3058
|
+
textNodes.push({
|
|
3059
|
+
id: `node-${nodeId++}`,
|
|
3060
|
+
content: node.value,
|
|
3061
|
+
type: node.type,
|
|
3062
|
+
position: node.position,
|
|
3063
|
+
path: getNodePath(ast, node, index, parent),
|
|
3064
|
+
translatable: false
|
|
3065
|
+
});
|
|
3066
|
+
return;
|
|
3067
|
+
}
|
|
3068
|
+
if (node.type === "text") {
|
|
3069
|
+
const textContent = node.value;
|
|
3070
|
+
if (!textContent.trim()) return;
|
|
3071
|
+
textNodes.push({
|
|
3072
|
+
id: `node-${nodeId++}`,
|
|
3073
|
+
content: textContent,
|
|
3074
|
+
type: node.type,
|
|
3075
|
+
position: node.position,
|
|
3076
|
+
path: getNodePath(ast, node, index, parent),
|
|
3077
|
+
translatable: true
|
|
3078
|
+
});
|
|
3079
|
+
}
|
|
3080
|
+
});
|
|
3081
|
+
return textNodes;
|
|
3082
|
+
}
|
|
3083
|
+
function getNodePath(_root, _node, index, parent) {
|
|
3084
|
+
const path = [];
|
|
3085
|
+
let currentParent = parent;
|
|
3086
|
+
let currentIndex = index;
|
|
3087
|
+
while (currentParent && currentIndex !== void 0) {
|
|
3088
|
+
path.unshift(currentIndex);
|
|
3089
|
+
break;
|
|
3090
|
+
}
|
|
3091
|
+
if (index !== void 0) {
|
|
3092
|
+
path.push(index);
|
|
3093
|
+
}
|
|
3094
|
+
return path;
|
|
3095
|
+
}
|
|
3096
|
+
function getNodeAtPath(ast, path) {
|
|
3097
|
+
let current = ast;
|
|
3098
|
+
for (const index of path) {
|
|
3099
|
+
if ("children" in current && Array.isArray(current.children)) {
|
|
3100
|
+
const child = current.children[index];
|
|
3101
|
+
if (!child) return null;
|
|
3102
|
+
current = child;
|
|
3103
|
+
} else {
|
|
3104
|
+
return null;
|
|
3105
|
+
}
|
|
3106
|
+
}
|
|
3107
|
+
return current;
|
|
3108
|
+
}
|
|
3109
|
+
function getTranslatableText(document) {
|
|
3110
|
+
return document.textNodes.filter((node) => node.translatable).map((node) => node.content);
|
|
3111
|
+
}
|
|
3112
|
+
function createTranslationMap(document, translations) {
|
|
3113
|
+
const translatableNodes = document.textNodes.filter((node) => node.translatable);
|
|
3114
|
+
const map = {};
|
|
3115
|
+
for (let i = 0; i < translatableNodes.length && i < translations.length; i++) {
|
|
3116
|
+
const node = translatableNodes[i];
|
|
3117
|
+
if (node) {
|
|
3118
|
+
map[node.id] = translations[i] ?? node.content;
|
|
3119
|
+
}
|
|
3120
|
+
}
|
|
3121
|
+
return map;
|
|
3122
|
+
}
|
|
3123
|
+
function extractTextForTranslation(content) {
|
|
3124
|
+
const preservedSections = /* @__PURE__ */ new Map();
|
|
3125
|
+
let placeholderIndex = 0;
|
|
3126
|
+
let text = content.replace(/^[ \t]*```[^\n]*\n[\s\S]*?^[ \t]*```[ \t]*$/gm, (match) => {
|
|
3127
|
+
const placeholder = `__CODE_BLOCK_${placeholderIndex++}__`;
|
|
3128
|
+
preservedSections.set(placeholder, match);
|
|
3129
|
+
return placeholder;
|
|
3130
|
+
});
|
|
3131
|
+
text = text.replace(/(`{2,})(?:[^`\n]|`(?!\1))*?\1/g, (match) => {
|
|
3132
|
+
const placeholder = `__INLINE_CODE_${placeholderIndex++}__`;
|
|
3133
|
+
preservedSections.set(placeholder, match);
|
|
3134
|
+
return placeholder;
|
|
3135
|
+
});
|
|
3136
|
+
text = text.replace(/`[^`\n]+`/g, (match) => {
|
|
3137
|
+
const placeholder = `__INLINE_CODE_${placeholderIndex++}__`;
|
|
3138
|
+
preservedSections.set(placeholder, match);
|
|
3139
|
+
return placeholder;
|
|
3140
|
+
});
|
|
3141
|
+
text = text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, linkText, url) => {
|
|
3142
|
+
const placeholder = `__LINK_URL_${placeholderIndex++}__`;
|
|
3143
|
+
preservedSections.set(placeholder, url);
|
|
3144
|
+
return `[${linkText}](${placeholder})`;
|
|
3145
|
+
});
|
|
3146
|
+
return { text, preservedSections };
|
|
3147
|
+
}
|
|
3148
|
+
function restorePreservedSections(translatedText, preservedSections) {
|
|
3149
|
+
let result = translatedText;
|
|
3150
|
+
const sortedEntries = [...preservedSections.entries()].sort(
|
|
3151
|
+
(a, b) => b[0].length - a[0].length
|
|
3152
|
+
);
|
|
3153
|
+
for (const [placeholder, original] of sortedEntries) {
|
|
3154
|
+
const match = placeholder.match(/^__(.+)__$/);
|
|
3155
|
+
if (match && match[1]) {
|
|
3156
|
+
const identifier = match[1];
|
|
3157
|
+
const escapedId = identifier.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
3158
|
+
const flexiblePattern = new RegExp(
|
|
3159
|
+
`[ \\t]*_*_*[ \\t]*${escapedId}(?!\\d)[ \\t]*_*_*[ \\t]*`,
|
|
3160
|
+
"gi"
|
|
3161
|
+
);
|
|
3162
|
+
result = result.replace(flexiblePattern, () => original);
|
|
3163
|
+
} else {
|
|
3164
|
+
result = result.split(placeholder).join(original);
|
|
3165
|
+
}
|
|
3166
|
+
}
|
|
3167
|
+
result = ensureInlineCodeSpacing(result);
|
|
3168
|
+
return result;
|
|
3169
|
+
}
|
|
3170
|
+
function ensureInlineCodeSpacing(text) {
|
|
3171
|
+
let result = text.replace(
|
|
3172
|
+
/([\w\u3000-\u9fff\uac00-\ud7af])(`+[^`\n]+`+)/g,
|
|
3173
|
+
"$1 $2"
|
|
3174
|
+
);
|
|
3175
|
+
result = result.replace(
|
|
3176
|
+
/(\d+\.)(`+[^`\n]+`+)/g,
|
|
3177
|
+
"$1 $2"
|
|
3178
|
+
);
|
|
3179
|
+
result = result.replace(
|
|
3180
|
+
/(`+[^`\n]+`+)([\w\u3000-\u9fff\uac00-\ud7af])/g,
|
|
3181
|
+
"$1 $2"
|
|
3182
|
+
);
|
|
3183
|
+
return result;
|
|
3184
|
+
}
|
|
3185
|
+
async function translateMarkdownContent(content, translateFn) {
|
|
3186
|
+
const { text, preservedSections } = extractTextForTranslation(content);
|
|
3187
|
+
const translatedText = await translateFn(text);
|
|
3188
|
+
return restorePreservedSections(translatedText, preservedSections);
|
|
3189
|
+
}
|
|
3190
|
+
|
|
3191
|
+
// src/core/engine.ts
|
|
3192
|
+
var TranslationEngine = class {
|
|
3193
|
+
config;
|
|
3194
|
+
provider;
|
|
3195
|
+
verbose;
|
|
3196
|
+
cache;
|
|
3197
|
+
cacheHits = 0;
|
|
3198
|
+
cacheMisses = 0;
|
|
3199
|
+
constructor(options) {
|
|
3200
|
+
this.config = options.config;
|
|
3201
|
+
this.verbose = options.verbose ?? false;
|
|
3202
|
+
if (options.provider) {
|
|
3203
|
+
this.provider = options.provider;
|
|
3204
|
+
} else {
|
|
3205
|
+
const providerConfig = getProviderConfigFromEnv(this.config.provider.default);
|
|
3206
|
+
if (this.config.provider.model) {
|
|
3207
|
+
providerConfig.defaultModel = this.config.provider.model;
|
|
3208
|
+
}
|
|
3209
|
+
this.provider = getProvider(this.config.provider.default, providerConfig);
|
|
3210
|
+
}
|
|
3211
|
+
const cacheDisabled = options.noCache || !this.config.paths?.cache;
|
|
3212
|
+
if (cacheDisabled) {
|
|
3213
|
+
this.cache = createNullCacheManager();
|
|
3214
|
+
if (this.verbose && options.noCache) {
|
|
3215
|
+
logger.info("Cache disabled (--no-cache)");
|
|
3216
|
+
}
|
|
3217
|
+
} else {
|
|
3218
|
+
this.cache = createCacheManager({
|
|
3219
|
+
cacheDir: this.config.paths.cache,
|
|
3220
|
+
verbose: this.verbose
|
|
3221
|
+
});
|
|
3222
|
+
if (this.verbose) {
|
|
3223
|
+
const stats = this.cache.getStats();
|
|
3224
|
+
logger.info(`Cache initialized: ${stats.entries} entries`);
|
|
3225
|
+
}
|
|
3226
|
+
}
|
|
3227
|
+
}
|
|
3228
|
+
/**
|
|
3229
|
+
* Translate a single file/content
|
|
3230
|
+
*/
|
|
3231
|
+
async translateContent(options) {
|
|
3232
|
+
const timer = createTimer();
|
|
3233
|
+
const format = options.format ?? this.detectFormat(options.content);
|
|
3234
|
+
if (this.verbose) {
|
|
3235
|
+
logger.info(`Translating content (${format} format)`);
|
|
3236
|
+
logger.info(`Source: ${options.sourceLang} \u2192 Target: ${options.targetLang}`);
|
|
3237
|
+
}
|
|
3238
|
+
let glossary;
|
|
3239
|
+
if (options.glossaryPath) {
|
|
3240
|
+
try {
|
|
3241
|
+
const rawGlossary = await loadGlossary(options.glossaryPath);
|
|
3242
|
+
glossary = resolveGlossary(rawGlossary, options.targetLang);
|
|
3243
|
+
if (this.verbose) {
|
|
3244
|
+
logger.info(`Loaded glossary: ${glossary.terms.length} terms`);
|
|
3245
|
+
}
|
|
3246
|
+
} catch (error) {
|
|
3247
|
+
if (this.verbose) {
|
|
3248
|
+
logger.warn(`Failed to load glossary: ${error}`);
|
|
3249
|
+
}
|
|
3250
|
+
}
|
|
3251
|
+
} else if (this.config.glossary?.path) {
|
|
3252
|
+
try {
|
|
3253
|
+
const rawGlossary = await loadGlossary(this.config.glossary.path);
|
|
3254
|
+
glossary = resolveGlossary(rawGlossary, options.targetLang);
|
|
3255
|
+
if (this.verbose) {
|
|
3256
|
+
logger.info(`Loaded glossary from config: ${glossary.terms.length} terms`);
|
|
3257
|
+
}
|
|
3258
|
+
} catch {
|
|
3259
|
+
}
|
|
3260
|
+
}
|
|
3261
|
+
let result;
|
|
3262
|
+
switch (format) {
|
|
3263
|
+
case "markdown":
|
|
3264
|
+
result = await this.translateMarkdown(options, glossary);
|
|
3265
|
+
break;
|
|
3266
|
+
case "html":
|
|
3267
|
+
result = await this.translatePlainText(options, glossary);
|
|
3268
|
+
break;
|
|
3269
|
+
case "text":
|
|
3270
|
+
default:
|
|
3271
|
+
result = await this.translatePlainText(options, glossary);
|
|
3272
|
+
break;
|
|
3273
|
+
}
|
|
3274
|
+
result.metadata.totalDuration = timer.elapsed();
|
|
3275
|
+
if (glossary && glossary.terms.length > 0) {
|
|
3276
|
+
const compliance = this.checkDocumentGlossaryCompliance(
|
|
3277
|
+
options.content,
|
|
3278
|
+
result.content,
|
|
3279
|
+
glossary
|
|
3280
|
+
);
|
|
3281
|
+
result.glossaryCompliance = compliance;
|
|
3282
|
+
if (this.verbose) {
|
|
3283
|
+
logger.info(`Glossary compliance: ${compliance.applied.length}/${compliance.applied.length + compliance.missed.length} terms applied`);
|
|
3284
|
+
if (compliance.missed.length > 0) {
|
|
3285
|
+
logger.warn(`Missed glossary terms: ${compliance.missed.join(", ")}`);
|
|
3286
|
+
}
|
|
3287
|
+
}
|
|
3288
|
+
if (options.strictGlossary && !compliance.compliant) {
|
|
3289
|
+
throw new TranslationError("GLOSSARY_COMPLIANCE_FAILED" /* GLOSSARY_COMPLIANCE_FAILED */, {
|
|
3290
|
+
missed: compliance.missed.join(", "),
|
|
3291
|
+
applied: compliance.applied,
|
|
3292
|
+
total: glossary.terms.length
|
|
3293
|
+
});
|
|
3294
|
+
}
|
|
3295
|
+
}
|
|
3296
|
+
if (this.verbose) {
|
|
3297
|
+
logger.success(`Translation complete in ${timer.format()}`);
|
|
3298
|
+
logger.info(`Average quality: ${result.metadata.averageQuality.toFixed(1)}/100`);
|
|
3299
|
+
}
|
|
3300
|
+
return result;
|
|
3301
|
+
}
|
|
3302
|
+
/**
|
|
3303
|
+
* Check glossary compliance for the entire document
|
|
3304
|
+
*/
|
|
3305
|
+
checkDocumentGlossaryCompliance(sourceContent, translatedContent, glossary) {
|
|
3306
|
+
const applied = [];
|
|
3307
|
+
const missed = [];
|
|
3308
|
+
const sourceLower = sourceContent.toLowerCase();
|
|
3309
|
+
const translatedLower = translatedContent.toLowerCase();
|
|
3310
|
+
for (const term of glossary.terms) {
|
|
3311
|
+
const sourceInContent = term.caseSensitive ? sourceContent.includes(term.source) : sourceLower.includes(term.source.toLowerCase());
|
|
3312
|
+
if (!sourceInContent) {
|
|
3313
|
+
continue;
|
|
3314
|
+
}
|
|
3315
|
+
const targetInTranslation = term.caseSensitive ? translatedContent.includes(term.target) : translatedLower.includes(term.target.toLowerCase());
|
|
3316
|
+
if (targetInTranslation) {
|
|
3317
|
+
applied.push(term.source);
|
|
3318
|
+
} else {
|
|
3319
|
+
missed.push(term.source);
|
|
3320
|
+
}
|
|
3321
|
+
}
|
|
3322
|
+
return {
|
|
3323
|
+
applied,
|
|
3324
|
+
missed,
|
|
3325
|
+
compliant: missed.length === 0
|
|
3326
|
+
};
|
|
3327
|
+
}
|
|
3328
|
+
// ============================================================================
|
|
3329
|
+
// Format-Specific Translation
|
|
3330
|
+
// ============================================================================
|
|
3331
|
+
async translateMarkdown(options, glossary) {
|
|
3332
|
+
const { text, preservedSections } = extractTextForTranslation(options.content);
|
|
3333
|
+
const chunks = chunkContent(text, {
|
|
3334
|
+
maxTokens: this.config.chunking.maxTokens,
|
|
3335
|
+
overlapTokens: this.config.chunking.overlapTokens
|
|
3336
|
+
});
|
|
3337
|
+
if (this.verbose) {
|
|
3338
|
+
const stats = getChunkStats(chunks);
|
|
3339
|
+
logger.info(`Chunked into ${stats.translatableChunks} translatable sections`);
|
|
3340
|
+
}
|
|
3341
|
+
const agent = createTranslationAgent({
|
|
3342
|
+
provider: this.provider,
|
|
3343
|
+
qualityThreshold: options.qualityThreshold ?? this.config.quality.threshold,
|
|
3344
|
+
maxIterations: options.maxIterations ?? this.config.quality.maxIterations,
|
|
3345
|
+
verbose: this.verbose,
|
|
3346
|
+
strictQuality: options.strictQuality
|
|
3347
|
+
});
|
|
3348
|
+
const chunkResults = [];
|
|
3349
|
+
let totalInputTokens = 0;
|
|
3350
|
+
let totalOutputTokens = 0;
|
|
3351
|
+
let totalIterations = 0;
|
|
3352
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
3353
|
+
const chunk = chunks[i];
|
|
3354
|
+
if (!chunk) continue;
|
|
3355
|
+
if (chunk.type === "preserve") {
|
|
3356
|
+
chunkResults.push({
|
|
3357
|
+
original: chunk.content,
|
|
3358
|
+
translated: chunk.content,
|
|
3359
|
+
startOffset: chunk.startOffset,
|
|
3360
|
+
endOffset: chunk.endOffset,
|
|
3361
|
+
qualityScore: 100
|
|
3362
|
+
});
|
|
3363
|
+
continue;
|
|
3364
|
+
}
|
|
3365
|
+
if (this.verbose) {
|
|
3366
|
+
logger.info(`Translating chunk ${i + 1}/${chunks.length}...`);
|
|
3367
|
+
}
|
|
3368
|
+
const result = await this.translateChunk(chunk, options, glossary, agent);
|
|
3369
|
+
chunkResults.push(result);
|
|
3370
|
+
if (result.tokensUsed) {
|
|
3371
|
+
totalInputTokens += result.tokensUsed.input;
|
|
3372
|
+
totalOutputTokens += result.tokensUsed.output;
|
|
3373
|
+
}
|
|
3374
|
+
if (result.iterations) {
|
|
3375
|
+
totalIterations += result.iterations;
|
|
3376
|
+
}
|
|
3377
|
+
}
|
|
3378
|
+
const translatedText = chunkResults.map((r) => r.translated).join("");
|
|
3379
|
+
const finalContent = restorePreservedSections(translatedText, preservedSections);
|
|
3380
|
+
const qualityScores = chunkResults.filter((r) => r.qualityScore > 0).map((r) => r.qualityScore);
|
|
3381
|
+
const averageQuality = qualityScores.length > 0 ? qualityScores.reduce((a, b) => a + b, 0) / qualityScores.length : 0;
|
|
3382
|
+
const cacheHits = chunkResults.filter((r) => r.cached).length;
|
|
3383
|
+
const cacheMisses = chunkResults.filter((r) => !r.cached && r.qualityScore > 0).length;
|
|
3384
|
+
return {
|
|
3385
|
+
content: finalContent,
|
|
3386
|
+
chunks: chunkResults,
|
|
3387
|
+
metadata: {
|
|
3388
|
+
totalTokensUsed: totalInputTokens + totalOutputTokens,
|
|
3389
|
+
totalDuration: 0,
|
|
3390
|
+
// Will be set by caller
|
|
3391
|
+
averageQuality,
|
|
3392
|
+
provider: this.provider.name,
|
|
3393
|
+
model: this.config.provider.model ?? this.provider.defaultModel,
|
|
3394
|
+
totalIterations,
|
|
3395
|
+
tokensUsed: {
|
|
3396
|
+
input: totalInputTokens,
|
|
3397
|
+
output: totalOutputTokens
|
|
3398
|
+
},
|
|
3399
|
+
cache: {
|
|
3400
|
+
hits: cacheHits,
|
|
3401
|
+
misses: cacheMisses
|
|
3402
|
+
}
|
|
3403
|
+
}
|
|
3404
|
+
};
|
|
3405
|
+
}
|
|
3406
|
+
async translatePlainText(options, glossary) {
|
|
3407
|
+
const chunks = chunkContent(options.content, {
|
|
3408
|
+
maxTokens: this.config.chunking.maxTokens,
|
|
3409
|
+
overlapTokens: this.config.chunking.overlapTokens
|
|
3410
|
+
});
|
|
3411
|
+
const agent = createTranslationAgent({
|
|
3412
|
+
provider: this.provider,
|
|
3413
|
+
qualityThreshold: options.qualityThreshold ?? this.config.quality.threshold,
|
|
3414
|
+
maxIterations: options.maxIterations ?? this.config.quality.maxIterations,
|
|
3415
|
+
verbose: this.verbose,
|
|
3416
|
+
strictQuality: options.strictQuality
|
|
3417
|
+
});
|
|
3418
|
+
const chunkResults = [];
|
|
3419
|
+
let totalInputTokens = 0;
|
|
3420
|
+
let totalOutputTokens = 0;
|
|
3421
|
+
let totalIterations = 0;
|
|
3422
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
3423
|
+
const chunk = chunks[i];
|
|
3424
|
+
if (!chunk) continue;
|
|
3425
|
+
if (chunk.type === "preserve") {
|
|
3426
|
+
chunkResults.push({
|
|
3427
|
+
original: chunk.content,
|
|
3428
|
+
translated: chunk.content,
|
|
3429
|
+
startOffset: chunk.startOffset,
|
|
3430
|
+
endOffset: chunk.endOffset,
|
|
3431
|
+
qualityScore: 100
|
|
3432
|
+
});
|
|
3433
|
+
continue;
|
|
3434
|
+
}
|
|
3435
|
+
if (this.verbose) {
|
|
3436
|
+
logger.info(`Translating chunk ${i + 1}/${chunks.length}...`);
|
|
3437
|
+
}
|
|
3438
|
+
const result = await this.translateChunk(chunk, options, glossary, agent);
|
|
3439
|
+
chunkResults.push(result);
|
|
3440
|
+
if (result.tokensUsed) {
|
|
3441
|
+
totalInputTokens += result.tokensUsed.input;
|
|
3442
|
+
totalOutputTokens += result.tokensUsed.output;
|
|
3443
|
+
}
|
|
3444
|
+
if (result.iterations) {
|
|
3445
|
+
totalIterations += result.iterations;
|
|
3446
|
+
}
|
|
3447
|
+
}
|
|
3448
|
+
const translatedContent = chunkResults.map((r) => r.translated).join("");
|
|
3449
|
+
const qualityScores = chunkResults.filter((r) => r.qualityScore > 0).map((r) => r.qualityScore);
|
|
3450
|
+
const averageQuality = qualityScores.length > 0 ? qualityScores.reduce((a, b) => a + b, 0) / qualityScores.length : 0;
|
|
3451
|
+
const cacheHits = chunkResults.filter((r) => r.cached).length;
|
|
3452
|
+
const cacheMisses = chunkResults.filter((r) => !r.cached && r.qualityScore > 0).length;
|
|
3453
|
+
return {
|
|
3454
|
+
content: translatedContent,
|
|
3455
|
+
chunks: chunkResults,
|
|
3456
|
+
metadata: {
|
|
3457
|
+
totalTokensUsed: totalInputTokens + totalOutputTokens,
|
|
3458
|
+
totalDuration: 0,
|
|
3459
|
+
averageQuality,
|
|
3460
|
+
provider: this.provider.name,
|
|
3461
|
+
model: this.config.provider.model ?? this.provider.defaultModel,
|
|
3462
|
+
totalIterations,
|
|
3463
|
+
tokensUsed: {
|
|
3464
|
+
input: totalInputTokens,
|
|
3465
|
+
output: totalOutputTokens
|
|
3466
|
+
},
|
|
3467
|
+
cache: {
|
|
3468
|
+
hits: cacheHits,
|
|
3469
|
+
misses: cacheMisses
|
|
3470
|
+
}
|
|
3471
|
+
}
|
|
3472
|
+
};
|
|
3473
|
+
}
|
|
3474
|
+
async translateChunk(chunk, options, glossary, agent) {
|
|
3475
|
+
const glossaryString = glossary ? JSON.stringify(glossary.terms.map((t) => ({ s: t.source, t: t.target }))) : void 0;
|
|
3476
|
+
const cacheKey = {
|
|
3477
|
+
content: chunk.content,
|
|
3478
|
+
sourceLang: options.sourceLang,
|
|
3479
|
+
targetLang: options.targetLang,
|
|
3480
|
+
glossary: glossaryString,
|
|
3481
|
+
provider: this.provider.name,
|
|
3482
|
+
model: this.config.provider.model ?? this.provider.defaultModel
|
|
3483
|
+
};
|
|
3484
|
+
const cacheResult = this.cache.get(cacheKey);
|
|
3485
|
+
if (cacheResult.hit && cacheResult.entry) {
|
|
3486
|
+
this.cacheHits++;
|
|
3487
|
+
if (this.verbose) {
|
|
3488
|
+
logger.info(` \u21B3 Cache hit (quality: ${cacheResult.entry.qualityScore})`);
|
|
3489
|
+
}
|
|
3490
|
+
return {
|
|
3491
|
+
original: chunk.content,
|
|
3492
|
+
translated: cacheResult.entry.translation,
|
|
3493
|
+
startOffset: chunk.startOffset,
|
|
3494
|
+
endOffset: chunk.endOffset,
|
|
3495
|
+
qualityScore: cacheResult.entry.qualityScore,
|
|
3496
|
+
iterations: 0,
|
|
3497
|
+
tokensUsed: { input: 0, output: 0, cacheRead: 1 },
|
|
3498
|
+
cached: true
|
|
3499
|
+
};
|
|
3500
|
+
}
|
|
3501
|
+
this.cacheMisses++;
|
|
3502
|
+
const resolvedStyleInstruction = options.styleInstruction ?? this.config.languages.styles?.[options.targetLang];
|
|
3503
|
+
const context = {
|
|
3504
|
+
documentPurpose: options.context,
|
|
3505
|
+
styleInstruction: resolvedStyleInstruction
|
|
3506
|
+
};
|
|
3507
|
+
if (chunk.metadata?.headerHierarchy && chunk.metadata.headerHierarchy.length > 0) {
|
|
3508
|
+
context.documentSummary = `Current section: ${chunk.metadata.headerHierarchy.join(" > ")}`;
|
|
3509
|
+
}
|
|
3510
|
+
if (chunk.metadata?.previousContext) {
|
|
3511
|
+
context.previousChunks = [chunk.metadata.previousContext];
|
|
3512
|
+
}
|
|
3513
|
+
const request = {
|
|
3514
|
+
content: chunk.content,
|
|
3515
|
+
sourceLang: options.sourceLang,
|
|
3516
|
+
targetLang: options.targetLang,
|
|
3517
|
+
format: options.format ?? "text",
|
|
3518
|
+
glossary,
|
|
3519
|
+
context
|
|
3520
|
+
};
|
|
3521
|
+
try {
|
|
3522
|
+
const result = await agent.translate(request);
|
|
3523
|
+
this.cache.set(cacheKey, result.content, result.metadata.qualityScore);
|
|
3524
|
+
return {
|
|
3525
|
+
original: chunk.content,
|
|
3526
|
+
translated: result.content,
|
|
3527
|
+
startOffset: chunk.startOffset,
|
|
3528
|
+
endOffset: chunk.endOffset,
|
|
3529
|
+
qualityScore: result.metadata.qualityScore,
|
|
3530
|
+
iterations: result.metadata.iterations,
|
|
3531
|
+
tokensUsed: result.metadata.tokensUsed
|
|
3532
|
+
};
|
|
3533
|
+
} catch (error) {
|
|
3534
|
+
logger.error(`Failed to translate chunk: ${error}`);
|
|
3535
|
+
return {
|
|
3536
|
+
original: chunk.content,
|
|
3537
|
+
translated: chunk.content,
|
|
3538
|
+
// Fallback to original
|
|
3539
|
+
startOffset: chunk.startOffset,
|
|
3540
|
+
endOffset: chunk.endOffset,
|
|
3541
|
+
qualityScore: 0,
|
|
3542
|
+
iterations: 0,
|
|
3543
|
+
tokensUsed: { input: 0, output: 0 }
|
|
3544
|
+
};
|
|
3545
|
+
}
|
|
3546
|
+
}
|
|
3547
|
+
// ============================================================================
|
|
3548
|
+
// Utility Methods
|
|
3549
|
+
// ============================================================================
|
|
3550
|
+
detectFormat(content) {
|
|
3551
|
+
if (content.includes("# ") || content.includes("## ") || content.includes("```") || content.includes("- ") || content.match(/\[.+\]\(.+\)/)) {
|
|
3552
|
+
return "markdown";
|
|
3553
|
+
}
|
|
3554
|
+
if (content.includes("<html") || content.includes("<body") || content.includes("<div") || content.includes("<p>")) {
|
|
3555
|
+
return "html";
|
|
3556
|
+
}
|
|
3557
|
+
return "text";
|
|
3558
|
+
}
|
|
3559
|
+
};
|
|
3560
|
+
function createTranslationEngine(options) {
|
|
3561
|
+
return new TranslationEngine(options);
|
|
3562
|
+
}
|
|
3563
|
+
async function translateText(content, sourceLang, targetLang, options) {
|
|
3564
|
+
const defaultConfig2 = {
|
|
3565
|
+
version: "1.0",
|
|
3566
|
+
languages: { source: sourceLang, targets: [targetLang] },
|
|
3567
|
+
provider: { default: "claude" },
|
|
3568
|
+
quality: {
|
|
3569
|
+
threshold: options?.qualityThreshold ?? 85,
|
|
3570
|
+
maxIterations: options?.maxIterations ?? 4,
|
|
3571
|
+
evaluationMethod: "llm"
|
|
3572
|
+
},
|
|
3573
|
+
chunking: {
|
|
3574
|
+
maxTokens: 1024,
|
|
3575
|
+
overlapTokens: 150,
|
|
3576
|
+
preserveStructure: true
|
|
3577
|
+
},
|
|
3578
|
+
paths: { output: "./" }
|
|
3579
|
+
};
|
|
3580
|
+
const engine = createTranslationEngine({
|
|
3581
|
+
config: defaultConfig2,
|
|
3582
|
+
provider: options?.provider,
|
|
3583
|
+
verbose: options?.verbose
|
|
3584
|
+
});
|
|
3585
|
+
const result = await engine.translateContent({
|
|
3586
|
+
content,
|
|
3587
|
+
sourceLang,
|
|
3588
|
+
targetLang,
|
|
3589
|
+
glossaryPath: options?.glossaryPath,
|
|
3590
|
+
qualityThreshold: options?.qualityThreshold,
|
|
3591
|
+
maxIterations: options?.maxIterations
|
|
3592
|
+
});
|
|
3593
|
+
return result.content;
|
|
3594
|
+
}
|
|
3595
|
+
var DEFAULT_LABELS = {
|
|
3596
|
+
en: "English",
|
|
3597
|
+
ko: "\uD55C\uAD6D\uC5B4",
|
|
3598
|
+
ja: "\u65E5\u672C\u8A9E",
|
|
3599
|
+
zh: "\u4E2D\u6587",
|
|
3600
|
+
es: "Espa\xF1ol",
|
|
3601
|
+
fr: "Fran\xE7ais",
|
|
3602
|
+
de: "Deutsch",
|
|
3603
|
+
pt: "Portugu\xEAs",
|
|
3604
|
+
ru: "\u0420\u0443\u0441\u0441\u043A\u0438\u0439",
|
|
3605
|
+
it: "Italiano"
|
|
3606
|
+
};
|
|
3607
|
+
var DEFAULT_LANG_CODES = {
|
|
3608
|
+
en: "en-US",
|
|
3609
|
+
ko: "ko-KR",
|
|
3610
|
+
ja: "ja-JP",
|
|
3611
|
+
zh: "zh-CN",
|
|
3612
|
+
es: "es-ES",
|
|
3613
|
+
fr: "fr-FR",
|
|
3614
|
+
de: "de-DE",
|
|
3615
|
+
pt: "pt-BR",
|
|
3616
|
+
ru: "ru-RU",
|
|
3617
|
+
it: "it-IT"
|
|
3618
|
+
};
|
|
3619
|
+
var DEFAULT_TRANSLATIONS = {
|
|
3620
|
+
ko: {
|
|
3621
|
+
editLinkText: "GitHub\uC5D0\uC11C \uC774 \uD398\uC774\uC9C0 \uD3B8\uC9D1\uD558\uAE30",
|
|
3622
|
+
docFooter: { prev: "\uC774\uC804 \uD398\uC774\uC9C0", next: "\uB2E4\uC74C \uD398\uC774\uC9C0" },
|
|
3623
|
+
outline: { label: "\uBAA9\uCC28" },
|
|
3624
|
+
lastUpdated: { text: "\uCD5C\uC885 \uC5C5\uB370\uC774\uD2B8" },
|
|
3625
|
+
returnToTopLabel: "\uB9E8 \uC704\uB85C",
|
|
3626
|
+
sidebarMenuLabel: "\uBA54\uB274",
|
|
3627
|
+
darkModeSwitchLabel: "\uB2E4\uD06C \uBAA8\uB4DC"
|
|
3628
|
+
},
|
|
3629
|
+
ja: {
|
|
3630
|
+
editLinkText: "GitHub\u3067\u3053\u306E\u30DA\u30FC\u30B8\u3092\u7DE8\u96C6\u3059\u308B",
|
|
3631
|
+
docFooter: { prev: "\u524D\u306E\u30DA\u30FC\u30B8", next: "\u6B21\u306E\u30DA\u30FC\u30B8" },
|
|
3632
|
+
outline: { label: "\u76EE\u6B21" },
|
|
3633
|
+
lastUpdated: { text: "\u6700\u7D42\u66F4\u65B0" },
|
|
3634
|
+
returnToTopLabel: "\u30C8\u30C3\u30D7\u3078\u623B\u308B",
|
|
3635
|
+
sidebarMenuLabel: "\u30E1\u30CB\u30E5\u30FC",
|
|
3636
|
+
darkModeSwitchLabel: "\u30C0\u30FC\u30AF\u30E2\u30FC\u30C9"
|
|
3637
|
+
},
|
|
3638
|
+
zh: {
|
|
3639
|
+
editLinkText: "\u5728 GitHub \u4E0A\u7F16\u8F91\u6B64\u9875",
|
|
3640
|
+
docFooter: { prev: "\u4E0A\u4E00\u9875", next: "\u4E0B\u4E00\u9875" },
|
|
3641
|
+
outline: { label: "\u76EE\u5F55" },
|
|
3642
|
+
lastUpdated: { text: "\u6700\u540E\u66F4\u65B0" },
|
|
3643
|
+
returnToTopLabel: "\u8FD4\u56DE\u9876\u90E8",
|
|
3644
|
+
sidebarMenuLabel: "\u83DC\u5355",
|
|
3645
|
+
darkModeSwitchLabel: "\u6DF1\u8272\u6A21\u5F0F"
|
|
3646
|
+
}
|
|
3647
|
+
};
|
|
3648
|
+
function isDirectory(path) {
|
|
3649
|
+
try {
|
|
3650
|
+
return statSync(path).isDirectory();
|
|
3651
|
+
} catch {
|
|
3652
|
+
return false;
|
|
3653
|
+
}
|
|
3654
|
+
}
|
|
3655
|
+
function getSubdirectories(dir) {
|
|
3656
|
+
if (!existsSync(dir)) return [];
|
|
3657
|
+
return readdirSync(dir).filter((entry) => isDirectory(join(dir, entry))).filter((entry) => !entry.startsWith(".") && !entry.startsWith("_"));
|
|
3658
|
+
}
|
|
3659
|
+
function getTitleFromFile(filePath) {
|
|
3660
|
+
try {
|
|
3661
|
+
const content = readFileSync(filePath, "utf-8");
|
|
3662
|
+
const frontmatterMatch = content.match(/^---\s*\n[\s\S]*?title:\s*['"]?([^'"\n]+)['"]?\s*\n[\s\S]*?---/);
|
|
3663
|
+
if (frontmatterMatch?.[1]) {
|
|
3664
|
+
return frontmatterMatch[1].trim();
|
|
3665
|
+
}
|
|
3666
|
+
const headingMatch = content.match(/^#\s+(.+)$/m);
|
|
3667
|
+
if (headingMatch?.[1]) {
|
|
3668
|
+
return headingMatch[1].trim();
|
|
3669
|
+
}
|
|
3670
|
+
return null;
|
|
3671
|
+
} catch {
|
|
3672
|
+
return null;
|
|
3673
|
+
}
|
|
3674
|
+
}
|
|
3675
|
+
function fileNameToTitle(fileName) {
|
|
3676
|
+
return fileName.replace(/\.md$/, "").replace(/[-_]/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
|
|
3677
|
+
}
|
|
3678
|
+
function generateSidebarItems(dir, options) {
|
|
3679
|
+
const { basePath, localePrefix, useTitleFromHeading } = options;
|
|
3680
|
+
const items = [];
|
|
3681
|
+
if (!existsSync(dir)) return items;
|
|
3682
|
+
const entries = readdirSync(dir).sort((a, b) => {
|
|
3683
|
+
if (a === "index.md") return -1;
|
|
3684
|
+
if (b === "index.md") return 1;
|
|
3685
|
+
return a.localeCompare(b);
|
|
3686
|
+
});
|
|
3687
|
+
for (const entry of entries) {
|
|
3688
|
+
const fullPath = join(dir, entry);
|
|
3689
|
+
const relativePath = relative(basePath, fullPath);
|
|
3690
|
+
if (isDirectory(fullPath)) {
|
|
3691
|
+
const subItems = generateSidebarItems(fullPath, options);
|
|
3692
|
+
if (subItems.length > 0) {
|
|
3693
|
+
const indexFile = join(fullPath, "index.md");
|
|
3694
|
+
const title = existsSync(indexFile) && useTitleFromHeading ? getTitleFromFile(indexFile) || fileNameToTitle(entry) : fileNameToTitle(entry);
|
|
3695
|
+
items.push({
|
|
3696
|
+
text: title,
|
|
3697
|
+
items: subItems
|
|
3698
|
+
});
|
|
3699
|
+
}
|
|
3700
|
+
} else if (entry.endsWith(".md")) {
|
|
3701
|
+
const title = useTitleFromHeading ? getTitleFromFile(fullPath) || fileNameToTitle(entry) : fileNameToTitle(entry);
|
|
3702
|
+
const link = `${localePrefix}/${relativePath.replace(/\.md$/, "").replace(/\/index$/, "/")}`;
|
|
3703
|
+
items.push({
|
|
3704
|
+
text: title,
|
|
3705
|
+
link
|
|
3706
|
+
});
|
|
3707
|
+
}
|
|
3708
|
+
}
|
|
3709
|
+
return items;
|
|
3710
|
+
}
|
|
3711
|
+
function generateSidebar(docsDir, sidebarDirs, localePrefix, useTitleFromHeading) {
|
|
3712
|
+
const sidebar = {};
|
|
3713
|
+
for (const dir of sidebarDirs) {
|
|
3714
|
+
const fullDir = join(docsDir, dir);
|
|
3715
|
+
if (!existsSync(fullDir)) continue;
|
|
3716
|
+
const items = generateSidebarItems(fullDir, {
|
|
3717
|
+
basePath: docsDir,
|
|
3718
|
+
localePrefix,
|
|
3719
|
+
useTitleFromHeading
|
|
3720
|
+
});
|
|
3721
|
+
if (items.length > 0) {
|
|
3722
|
+
const sidebarPath = `${localePrefix}/${dir}/`;
|
|
3723
|
+
sidebar[sidebarPath] = [
|
|
3724
|
+
{
|
|
3725
|
+
text: fileNameToTitle(dir),
|
|
3726
|
+
items
|
|
3727
|
+
}
|
|
3728
|
+
];
|
|
3729
|
+
}
|
|
3730
|
+
}
|
|
3731
|
+
return sidebar;
|
|
3732
|
+
}
|
|
3733
|
+
function generateNav(docsDir, sidebarDirs, localePrefix, useTitleFromHeading) {
|
|
3734
|
+
const nav = [];
|
|
3735
|
+
for (const dir of sidebarDirs) {
|
|
3736
|
+
const fullDir = join(docsDir, dir);
|
|
3737
|
+
if (!existsSync(fullDir)) continue;
|
|
3738
|
+
const indexFile = join(fullDir, "index.md");
|
|
3739
|
+
const title = existsSync(indexFile) && useTitleFromHeading ? getTitleFromFile(indexFile) || fileNameToTitle(dir) : fileNameToTitle(dir);
|
|
3740
|
+
nav.push({
|
|
3741
|
+
text: title,
|
|
3742
|
+
link: `${localePrefix}/${dir}/`,
|
|
3743
|
+
activeMatch: `${localePrefix}/${dir}/`
|
|
3744
|
+
});
|
|
3745
|
+
}
|
|
3746
|
+
return nav;
|
|
3747
|
+
}
|
|
3748
|
+
function detectLocales(docsDir, defaultLocale = "en") {
|
|
3749
|
+
const subdirs = getSubdirectories(docsDir);
|
|
3750
|
+
const localeDirs = subdirs.filter(
|
|
3751
|
+
(dir) => /^[a-z]{2}(-[A-Z]{2})?$/.test(dir) && dir !== defaultLocale
|
|
3752
|
+
);
|
|
3753
|
+
return localeDirs;
|
|
3754
|
+
}
|
|
3755
|
+
function detectSidebarDirs(docsDir) {
|
|
3756
|
+
const subdirs = getSubdirectories(docsDir);
|
|
3757
|
+
const excludePatterns = [
|
|
3758
|
+
/^[a-z]{2}(-[A-Z]{2})?$/,
|
|
3759
|
+
// locale dirs
|
|
3760
|
+
/^public$/,
|
|
3761
|
+
/^\.vitepress$/,
|
|
3762
|
+
/^assets?$/,
|
|
3763
|
+
/^images?$/
|
|
3764
|
+
];
|
|
3765
|
+
return subdirs.filter((dir) => !excludePatterns.some((pattern) => pattern.test(dir)));
|
|
3766
|
+
}
|
|
3767
|
+
function generateLocale(docsDir, locale, options = {}) {
|
|
3768
|
+
const {
|
|
3769
|
+
defaultLocale = "en",
|
|
3770
|
+
labels = DEFAULT_LABELS,
|
|
3771
|
+
langCodes = DEFAULT_LANG_CODES,
|
|
3772
|
+
descriptions = {},
|
|
3773
|
+
sidebarDirs = detectSidebarDirs(docsDir),
|
|
3774
|
+
useTitleFromHeading = true,
|
|
3775
|
+
translations = DEFAULT_TRANSLATIONS
|
|
3776
|
+
} = options;
|
|
3777
|
+
const isDefault = locale === defaultLocale;
|
|
3778
|
+
const localeDir = isDefault ? docsDir : join(docsDir, locale);
|
|
3779
|
+
const localePrefix = isDefault ? "" : `/${locale}`;
|
|
3780
|
+
const nav = generateNav(localeDir, sidebarDirs, localePrefix, useTitleFromHeading);
|
|
3781
|
+
const sidebar = generateSidebar(localeDir, sidebarDirs, localePrefix, useTitleFromHeading);
|
|
3782
|
+
const localeTranslations = translations[locale] || {};
|
|
3783
|
+
const config2 = {
|
|
3784
|
+
label: labels[locale] || locale,
|
|
3785
|
+
lang: langCodes[locale] || locale,
|
|
3786
|
+
description: descriptions[locale],
|
|
3787
|
+
themeConfig: {
|
|
3788
|
+
nav,
|
|
3789
|
+
sidebar
|
|
3790
|
+
}
|
|
3791
|
+
};
|
|
3792
|
+
if (!isDefault && localeTranslations && config2.themeConfig) {
|
|
3793
|
+
if (localeTranslations.docFooter) {
|
|
3794
|
+
config2.themeConfig.docFooter = localeTranslations.docFooter;
|
|
3795
|
+
}
|
|
3796
|
+
if (localeTranslations.outline) {
|
|
3797
|
+
config2.themeConfig.outline = localeTranslations.outline;
|
|
3798
|
+
}
|
|
3799
|
+
if (localeTranslations.lastUpdated) {
|
|
3800
|
+
config2.themeConfig.lastUpdated = localeTranslations.lastUpdated;
|
|
3801
|
+
}
|
|
3802
|
+
if (localeTranslations.returnToTopLabel) {
|
|
3803
|
+
config2.themeConfig.returnToTopLabel = localeTranslations.returnToTopLabel;
|
|
3804
|
+
}
|
|
3805
|
+
if (localeTranslations.sidebarMenuLabel) {
|
|
3806
|
+
config2.themeConfig.sidebarMenuLabel = localeTranslations.sidebarMenuLabel;
|
|
3807
|
+
}
|
|
3808
|
+
if (localeTranslations.darkModeSwitchLabel) {
|
|
3809
|
+
config2.themeConfig.darkModeSwitchLabel = localeTranslations.darkModeSwitchLabel;
|
|
3810
|
+
}
|
|
3811
|
+
}
|
|
3812
|
+
return config2;
|
|
3813
|
+
}
|
|
3814
|
+
function generateLocaleConfig(docsDir, options = {}) {
|
|
3815
|
+
const { defaultLocale = "en", locales = detectLocales(docsDir, defaultLocale) } = options;
|
|
3816
|
+
const config2 = {};
|
|
3817
|
+
config2.root = generateLocale(docsDir, defaultLocale, options);
|
|
3818
|
+
for (const locale of locales) {
|
|
3819
|
+
config2[locale] = generateLocale(docsDir, locale, options);
|
|
3820
|
+
}
|
|
3821
|
+
return config2;
|
|
3822
|
+
}
|
|
3823
|
+
function generateSidebarConfig(docsDir, options = {}) {
|
|
3824
|
+
const {
|
|
3825
|
+
defaultLocale = "en",
|
|
3826
|
+
locales = detectLocales(docsDir, defaultLocale),
|
|
3827
|
+
sidebarDirs = detectSidebarDirs(docsDir),
|
|
3828
|
+
useTitleFromHeading = true
|
|
3829
|
+
} = options;
|
|
3830
|
+
const config2 = {};
|
|
3831
|
+
config2.root = generateSidebar(docsDir, sidebarDirs, "", useTitleFromHeading);
|
|
3832
|
+
for (const locale of locales) {
|
|
3833
|
+
const localeDir = join(docsDir, locale);
|
|
3834
|
+
config2[locale] = generateSidebar(localeDir, sidebarDirs, `/${locale}`, useTitleFromHeading);
|
|
3835
|
+
}
|
|
3836
|
+
return config2;
|
|
3837
|
+
}
|
|
3838
|
+
|
|
3839
|
+
export { CacheManager, ClaudeProvider, CompositePolicy, ErrorCode, ExitCode, GlossaryChangePolicy, ProviderChangePolicy, QualityThresholdPolicy, TTLPolicy, TranslationAgent, TranslationEngine, TranslationError, applyTranslations, checkGlossaryCompliance, chunkContent, configureLogger, createCacheManager, createClaudeProvider, createDefaultPolicies, createGlossaryLookup, createMinimalPolicies, createNullCacheManager, createProviderWithFallback, createStrictPolicies, createTimer, createTranslationAgent, createTranslationEngine, createTranslationMap, detectLocales, detectSidebarDirs, estimateTokens, exceedsTokenLimit, extractTextForTranslation, generateCacheKey, generateLocale, generateLocaleConfig, generateSidebarConfig, getAvailableProviders, getChunkStats, getExitCode, getProvider, getProviderConfigFromEnv, getTranslatableText, hasProvider, hashContent, isErrorCode, isTranslationError, loadConfig, loadGlossary, logger, mergeConfig, parseMarkdown, reassembleChunks, registerProvider, resolveGlossary, restorePreservedSections, translateMarkdownContent, translateText, truncateToTokenLimit };
|
|
3840
|
+
//# sourceMappingURL=index.js.map
|
|
3841
|
+
//# sourceMappingURL=index.js.map
|