@llm-translate/cli 1.0.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +51 -0
- package/.env.example +33 -0
- package/.github/workflows/docs-pages.yml +57 -0
- package/.github/workflows/release.yml +49 -0
- package/.translaterc.json +44 -0
- package/CLAUDE.md +243 -0
- package/Dockerfile +55 -0
- package/README.md +371 -0
- package/RFC.md +1595 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +4494 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/index.d.ts +1152 -0
- package/dist/index.js +3841 -0
- package/dist/index.js.map +1 -0
- package/docker-compose.yml +56 -0
- package/docs/.vitepress/config.ts +161 -0
- package/docs/api/agent.md +262 -0
- package/docs/api/engine.md +274 -0
- package/docs/api/index.md +171 -0
- package/docs/api/providers.md +304 -0
- package/docs/changelog.md +64 -0
- package/docs/cli/dir.md +243 -0
- package/docs/cli/file.md +213 -0
- package/docs/cli/glossary.md +273 -0
- package/docs/cli/index.md +129 -0
- package/docs/cli/init.md +158 -0
- package/docs/cli/serve.md +211 -0
- package/docs/glossary.json +235 -0
- package/docs/guide/chunking.md +272 -0
- package/docs/guide/configuration.md +139 -0
- package/docs/guide/cost-optimization.md +237 -0
- package/docs/guide/docker.md +371 -0
- package/docs/guide/getting-started.md +150 -0
- package/docs/guide/glossary.md +241 -0
- package/docs/guide/index.md +86 -0
- package/docs/guide/ollama.md +515 -0
- package/docs/guide/prompt-caching.md +221 -0
- package/docs/guide/providers.md +232 -0
- package/docs/guide/quality-control.md +206 -0
- package/docs/guide/vitepress-integration.md +265 -0
- package/docs/index.md +63 -0
- package/docs/ja/api/agent.md +262 -0
- package/docs/ja/api/engine.md +274 -0
- package/docs/ja/api/index.md +171 -0
- package/docs/ja/api/providers.md +304 -0
- package/docs/ja/changelog.md +64 -0
- package/docs/ja/cli/dir.md +243 -0
- package/docs/ja/cli/file.md +213 -0
- package/docs/ja/cli/glossary.md +273 -0
- package/docs/ja/cli/index.md +111 -0
- package/docs/ja/cli/init.md +158 -0
- package/docs/ja/guide/chunking.md +271 -0
- package/docs/ja/guide/configuration.md +139 -0
- package/docs/ja/guide/cost-optimization.md +30 -0
- package/docs/ja/guide/getting-started.md +150 -0
- package/docs/ja/guide/glossary.md +214 -0
- package/docs/ja/guide/index.md +32 -0
- package/docs/ja/guide/ollama.md +410 -0
- package/docs/ja/guide/prompt-caching.md +221 -0
- package/docs/ja/guide/providers.md +232 -0
- package/docs/ja/guide/quality-control.md +137 -0
- package/docs/ja/guide/vitepress-integration.md +265 -0
- package/docs/ja/index.md +58 -0
- package/docs/ko/api/agent.md +262 -0
- package/docs/ko/api/engine.md +274 -0
- package/docs/ko/api/index.md +171 -0
- package/docs/ko/api/providers.md +304 -0
- package/docs/ko/changelog.md +64 -0
- package/docs/ko/cli/dir.md +243 -0
- package/docs/ko/cli/file.md +213 -0
- package/docs/ko/cli/glossary.md +273 -0
- package/docs/ko/cli/index.md +111 -0
- package/docs/ko/cli/init.md +158 -0
- package/docs/ko/guide/chunking.md +271 -0
- package/docs/ko/guide/configuration.md +139 -0
- package/docs/ko/guide/cost-optimization.md +30 -0
- package/docs/ko/guide/getting-started.md +150 -0
- package/docs/ko/guide/glossary.md +214 -0
- package/docs/ko/guide/index.md +32 -0
- package/docs/ko/guide/ollama.md +410 -0
- package/docs/ko/guide/prompt-caching.md +221 -0
- package/docs/ko/guide/providers.md +232 -0
- package/docs/ko/guide/quality-control.md +137 -0
- package/docs/ko/guide/vitepress-integration.md +265 -0
- package/docs/ko/index.md +58 -0
- package/docs/zh/api/agent.md +262 -0
- package/docs/zh/api/engine.md +274 -0
- package/docs/zh/api/index.md +171 -0
- package/docs/zh/api/providers.md +304 -0
- package/docs/zh/changelog.md +64 -0
- package/docs/zh/cli/dir.md +243 -0
- package/docs/zh/cli/file.md +213 -0
- package/docs/zh/cli/glossary.md +273 -0
- package/docs/zh/cli/index.md +111 -0
- package/docs/zh/cli/init.md +158 -0
- package/docs/zh/guide/chunking.md +271 -0
- package/docs/zh/guide/configuration.md +139 -0
- package/docs/zh/guide/cost-optimization.md +30 -0
- package/docs/zh/guide/getting-started.md +150 -0
- package/docs/zh/guide/glossary.md +214 -0
- package/docs/zh/guide/index.md +32 -0
- package/docs/zh/guide/ollama.md +410 -0
- package/docs/zh/guide/prompt-caching.md +221 -0
- package/docs/zh/guide/providers.md +232 -0
- package/docs/zh/guide/quality-control.md +137 -0
- package/docs/zh/guide/vitepress-integration.md +265 -0
- package/docs/zh/index.md +58 -0
- package/package.json +91 -0
- package/release.config.mjs +15 -0
- package/schemas/glossary.schema.json +110 -0
- package/src/cli/commands/dir.ts +469 -0
- package/src/cli/commands/file.ts +291 -0
- package/src/cli/commands/glossary.ts +221 -0
- package/src/cli/commands/init.ts +68 -0
- package/src/cli/commands/serve.ts +60 -0
- package/src/cli/index.ts +64 -0
- package/src/cli/options.ts +59 -0
- package/src/core/agent.ts +1119 -0
- package/src/core/chunker.ts +391 -0
- package/src/core/engine.ts +634 -0
- package/src/errors.ts +188 -0
- package/src/index.ts +147 -0
- package/src/integrations/vitepress.ts +549 -0
- package/src/parsers/markdown.ts +383 -0
- package/src/providers/claude.ts +259 -0
- package/src/providers/interface.ts +109 -0
- package/src/providers/ollama.ts +379 -0
- package/src/providers/openai.ts +308 -0
- package/src/providers/registry.ts +153 -0
- package/src/server/index.ts +152 -0
- package/src/server/middleware/auth.ts +93 -0
- package/src/server/middleware/logger.ts +90 -0
- package/src/server/routes/health.ts +84 -0
- package/src/server/routes/translate.ts +210 -0
- package/src/server/types.ts +138 -0
- package/src/services/cache.ts +899 -0
- package/src/services/config.ts +217 -0
- package/src/services/glossary.ts +247 -0
- package/src/types/analysis.ts +164 -0
- package/src/types/index.ts +265 -0
- package/src/types/modes.ts +121 -0
- package/src/types/mqm.ts +157 -0
- package/src/utils/logger.ts +141 -0
- package/src/utils/tokens.ts +116 -0
- package/tests/fixtures/glossaries/ml-glossary.json +53 -0
- package/tests/fixtures/input/lynq-installation.ko.md +350 -0
- package/tests/fixtures/input/lynq-installation.md +350 -0
- package/tests/fixtures/input/simple.ko.md +27 -0
- package/tests/fixtures/input/simple.md +27 -0
- package/tests/unit/chunker.test.ts +229 -0
- package/tests/unit/glossary.test.ts +146 -0
- package/tests/unit/markdown.test.ts +205 -0
- package/tests/unit/tokens.test.ts +81 -0
- package/tsconfig.json +28 -0
- package/tsup.config.ts +34 -0
- package/vitest.config.ts +16 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import { cosmiconfig } from 'cosmiconfig';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import type { TranslateConfig, ProviderName } from '../types/index.js';
|
|
4
|
+
import { TranslationError, ErrorCode } from '../errors.js';
|
|
5
|
+
|
|
6
|
+
// ============================================================================
|
|
7
|
+
// Zod Schema for Config Validation
|
|
8
|
+
// ============================================================================
|
|
9
|
+
|
|
10
|
+
const providerNameSchema = z.enum(['claude', 'openai', 'ollama', 'custom']);
|
|
11
|
+
|
|
12
|
+
const configSchema = z.object({
|
|
13
|
+
version: z.string(),
|
|
14
|
+
project: z
|
|
15
|
+
.object({
|
|
16
|
+
name: z.string(),
|
|
17
|
+
description: z.string(),
|
|
18
|
+
purpose: z.string(),
|
|
19
|
+
})
|
|
20
|
+
.optional(),
|
|
21
|
+
languages: z.object({
|
|
22
|
+
source: z.string(),
|
|
23
|
+
targets: z.array(z.string()),
|
|
24
|
+
styles: z.record(z.string(), z.string()).optional(),
|
|
25
|
+
}),
|
|
26
|
+
provider: z.object({
|
|
27
|
+
default: providerNameSchema,
|
|
28
|
+
model: z.string().optional(),
|
|
29
|
+
fallback: z.array(providerNameSchema).optional(),
|
|
30
|
+
apiKeys: z.record(providerNameSchema, z.string()).optional(),
|
|
31
|
+
}),
|
|
32
|
+
quality: z.object({
|
|
33
|
+
threshold: z.number().min(0).max(100),
|
|
34
|
+
maxIterations: z.number().min(1).max(10),
|
|
35
|
+
evaluationMethod: z.enum(['llm', 'embedding', 'hybrid']),
|
|
36
|
+
}),
|
|
37
|
+
chunking: z.object({
|
|
38
|
+
maxTokens: z.number().min(100).max(8000),
|
|
39
|
+
overlapTokens: z.number().min(0),
|
|
40
|
+
preserveStructure: z.boolean(),
|
|
41
|
+
}),
|
|
42
|
+
glossary: z
|
|
43
|
+
.object({
|
|
44
|
+
path: z.string(),
|
|
45
|
+
strict: z.boolean(),
|
|
46
|
+
})
|
|
47
|
+
.optional(),
|
|
48
|
+
paths: z.object({
|
|
49
|
+
output: z.string(),
|
|
50
|
+
cache: z.string().optional(),
|
|
51
|
+
}),
|
|
52
|
+
ignore: z.array(z.string()).optional(),
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
// ============================================================================
|
|
56
|
+
// Default Configuration
|
|
57
|
+
// ============================================================================
|
|
58
|
+
|
|
59
|
+
const defaultConfig: TranslateConfig = {
|
|
60
|
+
version: '1.0',
|
|
61
|
+
languages: {
|
|
62
|
+
source: 'en',
|
|
63
|
+
targets: [],
|
|
64
|
+
},
|
|
65
|
+
provider: {
|
|
66
|
+
default: 'claude',
|
|
67
|
+
},
|
|
68
|
+
quality: {
|
|
69
|
+
threshold: 85,
|
|
70
|
+
maxIterations: 4,
|
|
71
|
+
evaluationMethod: 'llm',
|
|
72
|
+
},
|
|
73
|
+
chunking: {
|
|
74
|
+
maxTokens: 1024,
|
|
75
|
+
overlapTokens: 150,
|
|
76
|
+
preserveStructure: true,
|
|
77
|
+
},
|
|
78
|
+
paths: {
|
|
79
|
+
output: './{lang}',
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// ============================================================================
|
|
84
|
+
// Config Loader
|
|
85
|
+
// ============================================================================
|
|
86
|
+
|
|
87
|
+
const explorer = cosmiconfig('translate', {
|
|
88
|
+
searchPlaces: [
|
|
89
|
+
'.translaterc',
|
|
90
|
+
'.translaterc.json',
|
|
91
|
+
'.translaterc.yaml',
|
|
92
|
+
'.translaterc.yml',
|
|
93
|
+
'translate.config.js',
|
|
94
|
+
'translate.config.mjs',
|
|
95
|
+
],
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
export interface LoadConfigOptions {
|
|
99
|
+
configPath?: string;
|
|
100
|
+
cwd?: string;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export async function loadConfig(
|
|
104
|
+
options: LoadConfigOptions = {}
|
|
105
|
+
): Promise<TranslateConfig> {
|
|
106
|
+
const { configPath, cwd = process.cwd() } = options;
|
|
107
|
+
|
|
108
|
+
let result;
|
|
109
|
+
|
|
110
|
+
try {
|
|
111
|
+
if (configPath) {
|
|
112
|
+
result = await explorer.load(configPath);
|
|
113
|
+
} else {
|
|
114
|
+
result = await explorer.search(cwd);
|
|
115
|
+
}
|
|
116
|
+
} catch (error) {
|
|
117
|
+
throw new TranslationError(ErrorCode.CONFIG_NOT_FOUND, {
|
|
118
|
+
path: configPath ?? cwd,
|
|
119
|
+
error: error instanceof Error ? error.message : String(error),
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (!result || result.isEmpty) {
|
|
124
|
+
// Return default config if no config file found
|
|
125
|
+
return defaultConfig;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Validate config
|
|
129
|
+
const parseResult = configSchema.safeParse(result.config);
|
|
130
|
+
|
|
131
|
+
if (!parseResult.success) {
|
|
132
|
+
throw new TranslationError(ErrorCode.CONFIG_INVALID, {
|
|
133
|
+
path: result.filepath,
|
|
134
|
+
errors: parseResult.error.errors.map((e) => ({
|
|
135
|
+
path: e.path.join('.'),
|
|
136
|
+
message: e.message,
|
|
137
|
+
})),
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return parseResult.data as TranslateConfig;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// ============================================================================
|
|
145
|
+
// Config Merger (CLI options override config file)
|
|
146
|
+
// ============================================================================
|
|
147
|
+
|
|
148
|
+
export interface CLIOverrides {
|
|
149
|
+
sourceLang?: string;
|
|
150
|
+
targetLang?: string;
|
|
151
|
+
provider?: ProviderName;
|
|
152
|
+
model?: string;
|
|
153
|
+
quality?: number;
|
|
154
|
+
maxIterations?: number;
|
|
155
|
+
chunkSize?: number;
|
|
156
|
+
glossary?: string;
|
|
157
|
+
output?: string;
|
|
158
|
+
noCache?: boolean;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export function mergeConfig(
|
|
162
|
+
config: TranslateConfig,
|
|
163
|
+
overrides: CLIOverrides
|
|
164
|
+
): TranslateConfig {
|
|
165
|
+
const merged = { ...config };
|
|
166
|
+
|
|
167
|
+
if (overrides.sourceLang) {
|
|
168
|
+
merged.languages = { ...merged.languages, source: overrides.sourceLang };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (overrides.targetLang) {
|
|
172
|
+
merged.languages = {
|
|
173
|
+
...merged.languages,
|
|
174
|
+
targets: [overrides.targetLang],
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
if (overrides.provider) {
|
|
179
|
+
merged.provider = { ...merged.provider, default: overrides.provider };
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if (overrides.model) {
|
|
183
|
+
merged.provider = { ...merged.provider, model: overrides.model };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (overrides.quality !== undefined) {
|
|
187
|
+
merged.quality = { ...merged.quality, threshold: overrides.quality };
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (overrides.maxIterations !== undefined) {
|
|
191
|
+
merged.quality = {
|
|
192
|
+
...merged.quality,
|
|
193
|
+
maxIterations: overrides.maxIterations,
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (overrides.chunkSize !== undefined) {
|
|
198
|
+
merged.chunking = { ...merged.chunking, maxTokens: overrides.chunkSize };
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (overrides.glossary) {
|
|
202
|
+
merged.glossary = {
|
|
203
|
+
path: overrides.glossary,
|
|
204
|
+
strict: merged.glossary?.strict ?? false,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (overrides.output) {
|
|
209
|
+
merged.paths = { ...merged.paths, output: overrides.output };
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
if (overrides.noCache) {
|
|
213
|
+
merged.paths = { ...merged.paths, cache: undefined };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return merged;
|
|
217
|
+
}
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
|
2
|
+
import type {
|
|
3
|
+
Glossary,
|
|
4
|
+
GlossaryTerm,
|
|
5
|
+
ResolvedGlossary,
|
|
6
|
+
ResolvedGlossaryTerm,
|
|
7
|
+
} from '../types/index.js';
|
|
8
|
+
import { TranslationError, ErrorCode } from '../errors.js';
|
|
9
|
+
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// Glossary Loading
|
|
12
|
+
// ============================================================================
|
|
13
|
+
|
|
14
|
+
export async function loadGlossary(path: string): Promise<Glossary> {
|
|
15
|
+
let content: string;
|
|
16
|
+
|
|
17
|
+
try {
|
|
18
|
+
content = await readFile(path, 'utf-8');
|
|
19
|
+
} catch (error) {
|
|
20
|
+
throw new TranslationError(ErrorCode.GLOSSARY_NOT_FOUND, {
|
|
21
|
+
path,
|
|
22
|
+
error: error instanceof Error ? error.message : String(error),
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
try {
|
|
27
|
+
return JSON.parse(content) as Glossary;
|
|
28
|
+
} catch (error) {
|
|
29
|
+
throw new TranslationError(ErrorCode.GLOSSARY_INVALID, {
|
|
30
|
+
path,
|
|
31
|
+
error: error instanceof Error ? error.message : String(error),
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ============================================================================
|
|
37
|
+
// Glossary Resolution
|
|
38
|
+
// ============================================================================
|
|
39
|
+
|
|
40
|
+
export function resolveGlossary(
|
|
41
|
+
glossary: Glossary,
|
|
42
|
+
targetLang: string
|
|
43
|
+
): ResolvedGlossary {
|
|
44
|
+
return {
|
|
45
|
+
metadata: {
|
|
46
|
+
name: glossary.metadata.name,
|
|
47
|
+
sourceLang: glossary.metadata.sourceLang,
|
|
48
|
+
targetLang,
|
|
49
|
+
version: glossary.metadata.version,
|
|
50
|
+
domain: glossary.metadata.domain,
|
|
51
|
+
},
|
|
52
|
+
terms: glossary.terms
|
|
53
|
+
.map((term) => resolveGlossaryTerm(term, targetLang))
|
|
54
|
+
.filter((term): term is ResolvedGlossaryTerm => term !== null),
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function resolveGlossaryTerm(
|
|
59
|
+
term: GlossaryTerm,
|
|
60
|
+
targetLang: string
|
|
61
|
+
): ResolvedGlossaryTerm | null {
|
|
62
|
+
const target = resolveTarget(term, targetLang);
|
|
63
|
+
|
|
64
|
+
// Skip if no translation available and not a doNotTranslate term
|
|
65
|
+
if (target === undefined) {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
source: term.source,
|
|
71
|
+
target,
|
|
72
|
+
context: term.context,
|
|
73
|
+
caseSensitive: term.caseSensitive ?? false,
|
|
74
|
+
doNotTranslate: resolveDoNotTranslate(term, targetLang),
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
function resolveTarget(term: GlossaryTerm, targetLang: string): string | undefined {
|
|
79
|
+
if (term.doNotTranslate) {
|
|
80
|
+
return term.source;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (term.doNotTranslateFor?.includes(targetLang)) {
|
|
84
|
+
return term.source;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const translation = term.targets[targetLang];
|
|
88
|
+
if (translation) {
|
|
89
|
+
return translation;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// No translation available for this language
|
|
93
|
+
return undefined;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function resolveDoNotTranslate(term: GlossaryTerm, targetLang: string): boolean {
|
|
97
|
+
return (
|
|
98
|
+
term.doNotTranslate === true ||
|
|
99
|
+
term.doNotTranslateFor?.includes(targetLang) === true
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ============================================================================
|
|
104
|
+
// Glossary Lookup
|
|
105
|
+
// ============================================================================
|
|
106
|
+
|
|
107
|
+
export interface GlossaryLookup {
|
|
108
|
+
/**
|
|
109
|
+
* Find a term in the glossary
|
|
110
|
+
*/
|
|
111
|
+
find(text: string): ResolvedGlossaryTerm | undefined;
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Find all matching terms in a text
|
|
115
|
+
*/
|
|
116
|
+
findAll(text: string): ResolvedGlossaryTerm[];
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Get all terms
|
|
120
|
+
*/
|
|
121
|
+
getTerms(): ResolvedGlossaryTerm[];
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Format glossary for prompt injection
|
|
125
|
+
*/
|
|
126
|
+
formatForPrompt(): string;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export function createGlossaryLookup(glossary: ResolvedGlossary): GlossaryLookup {
|
|
130
|
+
// Create a map for fast lookup
|
|
131
|
+
const termMap = new Map<string, ResolvedGlossaryTerm>();
|
|
132
|
+
const caseSensitiveTerms: ResolvedGlossaryTerm[] = [];
|
|
133
|
+
const caseInsensitiveTerms: ResolvedGlossaryTerm[] = [];
|
|
134
|
+
|
|
135
|
+
for (const term of glossary.terms) {
|
|
136
|
+
if (term.caseSensitive) {
|
|
137
|
+
termMap.set(term.source, term);
|
|
138
|
+
caseSensitiveTerms.push(term);
|
|
139
|
+
} else {
|
|
140
|
+
termMap.set(term.source.toLowerCase(), term);
|
|
141
|
+
caseInsensitiveTerms.push(term);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
find(text: string): ResolvedGlossaryTerm | undefined {
|
|
147
|
+
// Try exact match first
|
|
148
|
+
const exact = termMap.get(text);
|
|
149
|
+
if (exact) return exact;
|
|
150
|
+
|
|
151
|
+
// Try case-insensitive
|
|
152
|
+
return termMap.get(text.toLowerCase());
|
|
153
|
+
},
|
|
154
|
+
|
|
155
|
+
findAll(text: string): ResolvedGlossaryTerm[] {
|
|
156
|
+
const matches: ResolvedGlossaryTerm[] = [];
|
|
157
|
+
|
|
158
|
+
// Check case-sensitive terms
|
|
159
|
+
for (const term of caseSensitiveTerms) {
|
|
160
|
+
if (text.includes(term.source)) {
|
|
161
|
+
matches.push(term);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Check case-insensitive terms
|
|
166
|
+
const lowerText = text.toLowerCase();
|
|
167
|
+
for (const term of caseInsensitiveTerms) {
|
|
168
|
+
if (lowerText.includes(term.source.toLowerCase())) {
|
|
169
|
+
matches.push(term);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return matches;
|
|
174
|
+
},
|
|
175
|
+
|
|
176
|
+
getTerms(): ResolvedGlossaryTerm[] {
|
|
177
|
+
return glossary.terms;
|
|
178
|
+
},
|
|
179
|
+
|
|
180
|
+
formatForPrompt(): string {
|
|
181
|
+
const lines: string[] = [];
|
|
182
|
+
|
|
183
|
+
for (const term of glossary.terms) {
|
|
184
|
+
const flags: string[] = [];
|
|
185
|
+
|
|
186
|
+
if (term.caseSensitive) {
|
|
187
|
+
flags.push('case-sensitive');
|
|
188
|
+
} else {
|
|
189
|
+
flags.push('case-insensitive');
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (term.context) {
|
|
193
|
+
flags.push(`context: ${term.context}`);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const flagStr = flags.length > 0 ? ` (${flags.join(', ')})` : '';
|
|
197
|
+
|
|
198
|
+
if (term.doNotTranslate) {
|
|
199
|
+
lines.push(`- "${term.source}" → [DO NOT TRANSLATE, keep as-is]${flagStr}`);
|
|
200
|
+
} else {
|
|
201
|
+
lines.push(`- "${term.source}" → "${term.target}"${flagStr}`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return lines.join('\n');
|
|
206
|
+
},
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// ============================================================================
|
|
211
|
+
// Glossary Compliance Check
|
|
212
|
+
// ============================================================================
|
|
213
|
+
|
|
214
|
+
export interface ComplianceResult {
|
|
215
|
+
applied: string[];
|
|
216
|
+
missed: string[];
|
|
217
|
+
score: number;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export function checkGlossaryCompliance(
|
|
221
|
+
sourceText: string,
|
|
222
|
+
translatedText: string,
|
|
223
|
+
glossary: ResolvedGlossary
|
|
224
|
+
): ComplianceResult {
|
|
225
|
+
const lookup = createGlossaryLookup(glossary);
|
|
226
|
+
const sourceTerms = lookup.findAll(sourceText);
|
|
227
|
+
|
|
228
|
+
const applied: string[] = [];
|
|
229
|
+
const missed: string[] = [];
|
|
230
|
+
|
|
231
|
+
for (const term of sourceTerms) {
|
|
232
|
+
const targetInTranslation = term.caseSensitive
|
|
233
|
+
? translatedText.includes(term.target)
|
|
234
|
+
: translatedText.toLowerCase().includes(term.target.toLowerCase());
|
|
235
|
+
|
|
236
|
+
if (targetInTranslation) {
|
|
237
|
+
applied.push(term.source);
|
|
238
|
+
} else {
|
|
239
|
+
missed.push(term.source);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const total = sourceTerms.length;
|
|
244
|
+
const score = total > 0 ? (applied.length / total) * 100 : 100;
|
|
245
|
+
|
|
246
|
+
return { applied, missed, score };
|
|
247
|
+
}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-Translation Analysis Types
|
|
3
|
+
* Based on MAPS (Multi-Aspect Prompting and Selection) framework
|
|
4
|
+
*
|
|
5
|
+
* Reference: TACL 2024
|
|
6
|
+
* https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00642/119992
|
|
7
|
+
* https://github.com/zwhe99/MAPS-mt
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Key term identified during pre-analysis
|
|
12
|
+
*/
|
|
13
|
+
export interface AnalyzedTerm {
|
|
14
|
+
/** The term in source language */
|
|
15
|
+
term: string;
|
|
16
|
+
|
|
17
|
+
/** Usage context */
|
|
18
|
+
context: string;
|
|
19
|
+
|
|
20
|
+
/** Suggested translation (if not in glossary) */
|
|
21
|
+
suggestedTranslation?: string;
|
|
22
|
+
|
|
23
|
+
/** Whether this term was found in the glossary */
|
|
24
|
+
fromGlossary: boolean;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Ambiguous phrase that needs clarification
|
|
29
|
+
*/
|
|
30
|
+
export interface AmbiguousPhrase {
|
|
31
|
+
/** The ambiguous phrase */
|
|
32
|
+
phrase: string;
|
|
33
|
+
|
|
34
|
+
/** Possible interpretations */
|
|
35
|
+
interpretations: string[];
|
|
36
|
+
|
|
37
|
+
/** Recommended interpretation */
|
|
38
|
+
recommendation: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Domain classification for the content
|
|
43
|
+
*/
|
|
44
|
+
export type ContentDomain =
|
|
45
|
+
| 'technical'
|
|
46
|
+
| 'marketing'
|
|
47
|
+
| 'legal'
|
|
48
|
+
| 'medical'
|
|
49
|
+
| 'general';
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Register/formality recommendation
|
|
53
|
+
*/
|
|
54
|
+
export type RegisterLevel = 'formal' | 'informal' | 'neutral';
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Pre-translation analysis result (MAPS-style)
|
|
58
|
+
*/
|
|
59
|
+
export interface PreTranslationAnalysis {
|
|
60
|
+
/** Key domain-specific terms identified */
|
|
61
|
+
keyTerms: AnalyzedTerm[];
|
|
62
|
+
|
|
63
|
+
/** Phrases with multiple possible interpretations */
|
|
64
|
+
ambiguousPhrases: AmbiguousPhrase[];
|
|
65
|
+
|
|
66
|
+
/** Items that should NOT be translated (code, URLs, names) */
|
|
67
|
+
preserveExact: string[];
|
|
68
|
+
|
|
69
|
+
/** Identified translation challenges for this language pair */
|
|
70
|
+
challenges: string[];
|
|
71
|
+
|
|
72
|
+
/** Detected content domain */
|
|
73
|
+
domain: ContentDomain;
|
|
74
|
+
|
|
75
|
+
/** Recommended formality level */
|
|
76
|
+
registerRecommendation: RegisterLevel;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Parse pre-analysis JSON response from LLM
|
|
81
|
+
*/
|
|
82
|
+
export function parseAnalysisResponse(
|
|
83
|
+
response: string
|
|
84
|
+
): PreTranslationAnalysis | null {
|
|
85
|
+
try {
|
|
86
|
+
// Extract JSON from response
|
|
87
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
88
|
+
if (!jsonMatch) {
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const parsed = JSON.parse(jsonMatch[0]) as Partial<PreTranslationAnalysis>;
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
keyTerms: parsed.keyTerms ?? [],
|
|
96
|
+
ambiguousPhrases: parsed.ambiguousPhrases ?? [],
|
|
97
|
+
preserveExact: parsed.preserveExact ?? [],
|
|
98
|
+
challenges: parsed.challenges ?? [],
|
|
99
|
+
domain: parsed.domain ?? 'general',
|
|
100
|
+
registerRecommendation: parsed.registerRecommendation ?? 'neutral',
|
|
101
|
+
};
|
|
102
|
+
} catch {
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Format analysis result for translation prompt
|
|
109
|
+
*/
|
|
110
|
+
export function formatAnalysisForPrompt(
|
|
111
|
+
analysis: PreTranslationAnalysis
|
|
112
|
+
): string {
|
|
113
|
+
const sections: string[] = [];
|
|
114
|
+
|
|
115
|
+
// Key terms section
|
|
116
|
+
if (analysis.keyTerms.length > 0) {
|
|
117
|
+
const terms = analysis.keyTerms
|
|
118
|
+
.map((t) => {
|
|
119
|
+
const translation = t.suggestedTranslation
|
|
120
|
+
? ` → ${t.suggestedTranslation}`
|
|
121
|
+
: '';
|
|
122
|
+
const source = t.fromGlossary ? ' (glossary)' : '';
|
|
123
|
+
return `- "${t.term}"${translation}${source}: ${t.context}`;
|
|
124
|
+
})
|
|
125
|
+
.join('\n');
|
|
126
|
+
sections.push(`**Key Terms:**\n${terms}`);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Ambiguous phrases section
|
|
130
|
+
if (analysis.ambiguousPhrases.length > 0) {
|
|
131
|
+
const phrases = analysis.ambiguousPhrases
|
|
132
|
+
.map((p) => `- "${p.phrase}": Use interpretation "${p.recommendation}"`)
|
|
133
|
+
.join('\n');
|
|
134
|
+
sections.push(`**Ambiguous Phrases (use these interpretations):**\n${phrases}`);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Preserve exact section
|
|
138
|
+
if (analysis.preserveExact.length > 0) {
|
|
139
|
+
sections.push(
|
|
140
|
+
`**Do NOT translate (keep exactly as-is):**\n${analysis.preserveExact.map((s) => `- ${s}`).join('\n')}`
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Domain and register
|
|
145
|
+
sections.push(
|
|
146
|
+
`**Content Type:** ${analysis.domain}\n**Tone:** ${analysis.registerRecommendation}`
|
|
147
|
+
);
|
|
148
|
+
|
|
149
|
+
return sections.join('\n\n');
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Create empty analysis result (for fast mode)
|
|
154
|
+
*/
|
|
155
|
+
export function createEmptyAnalysis(): PreTranslationAnalysis {
|
|
156
|
+
return {
|
|
157
|
+
keyTerms: [],
|
|
158
|
+
ambiguousPhrases: [],
|
|
159
|
+
preserveExact: [],
|
|
160
|
+
challenges: [],
|
|
161
|
+
domain: 'general',
|
|
162
|
+
registerRecommendation: 'neutral',
|
|
163
|
+
};
|
|
164
|
+
}
|