@llm-translate/cli 1.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/.dockerignore +51 -0
  2. package/.env.example +33 -0
  3. package/.github/workflows/docs-pages.yml +57 -0
  4. package/.github/workflows/release.yml +49 -0
  5. package/.translaterc.json +44 -0
  6. package/CLAUDE.md +243 -0
  7. package/Dockerfile +55 -0
  8. package/README.md +371 -0
  9. package/RFC.md +1595 -0
  10. package/dist/cli/index.d.ts +2 -0
  11. package/dist/cli/index.js +4494 -0
  12. package/dist/cli/index.js.map +1 -0
  13. package/dist/index.d.ts +1152 -0
  14. package/dist/index.js +3841 -0
  15. package/dist/index.js.map +1 -0
  16. package/docker-compose.yml +56 -0
  17. package/docs/.vitepress/config.ts +161 -0
  18. package/docs/api/agent.md +262 -0
  19. package/docs/api/engine.md +274 -0
  20. package/docs/api/index.md +171 -0
  21. package/docs/api/providers.md +304 -0
  22. package/docs/changelog.md +64 -0
  23. package/docs/cli/dir.md +243 -0
  24. package/docs/cli/file.md +213 -0
  25. package/docs/cli/glossary.md +273 -0
  26. package/docs/cli/index.md +129 -0
  27. package/docs/cli/init.md +158 -0
  28. package/docs/cli/serve.md +211 -0
  29. package/docs/glossary.json +235 -0
  30. package/docs/guide/chunking.md +272 -0
  31. package/docs/guide/configuration.md +139 -0
  32. package/docs/guide/cost-optimization.md +237 -0
  33. package/docs/guide/docker.md +371 -0
  34. package/docs/guide/getting-started.md +150 -0
  35. package/docs/guide/glossary.md +241 -0
  36. package/docs/guide/index.md +86 -0
  37. package/docs/guide/ollama.md +515 -0
  38. package/docs/guide/prompt-caching.md +221 -0
  39. package/docs/guide/providers.md +232 -0
  40. package/docs/guide/quality-control.md +206 -0
  41. package/docs/guide/vitepress-integration.md +265 -0
  42. package/docs/index.md +63 -0
  43. package/docs/ja/api/agent.md +262 -0
  44. package/docs/ja/api/engine.md +274 -0
  45. package/docs/ja/api/index.md +171 -0
  46. package/docs/ja/api/providers.md +304 -0
  47. package/docs/ja/changelog.md +64 -0
  48. package/docs/ja/cli/dir.md +243 -0
  49. package/docs/ja/cli/file.md +213 -0
  50. package/docs/ja/cli/glossary.md +273 -0
  51. package/docs/ja/cli/index.md +111 -0
  52. package/docs/ja/cli/init.md +158 -0
  53. package/docs/ja/guide/chunking.md +271 -0
  54. package/docs/ja/guide/configuration.md +139 -0
  55. package/docs/ja/guide/cost-optimization.md +30 -0
  56. package/docs/ja/guide/getting-started.md +150 -0
  57. package/docs/ja/guide/glossary.md +214 -0
  58. package/docs/ja/guide/index.md +32 -0
  59. package/docs/ja/guide/ollama.md +410 -0
  60. package/docs/ja/guide/prompt-caching.md +221 -0
  61. package/docs/ja/guide/providers.md +232 -0
  62. package/docs/ja/guide/quality-control.md +137 -0
  63. package/docs/ja/guide/vitepress-integration.md +265 -0
  64. package/docs/ja/index.md +58 -0
  65. package/docs/ko/api/agent.md +262 -0
  66. package/docs/ko/api/engine.md +274 -0
  67. package/docs/ko/api/index.md +171 -0
  68. package/docs/ko/api/providers.md +304 -0
  69. package/docs/ko/changelog.md +64 -0
  70. package/docs/ko/cli/dir.md +243 -0
  71. package/docs/ko/cli/file.md +213 -0
  72. package/docs/ko/cli/glossary.md +273 -0
  73. package/docs/ko/cli/index.md +111 -0
  74. package/docs/ko/cli/init.md +158 -0
  75. package/docs/ko/guide/chunking.md +271 -0
  76. package/docs/ko/guide/configuration.md +139 -0
  77. package/docs/ko/guide/cost-optimization.md +30 -0
  78. package/docs/ko/guide/getting-started.md +150 -0
  79. package/docs/ko/guide/glossary.md +214 -0
  80. package/docs/ko/guide/index.md +32 -0
  81. package/docs/ko/guide/ollama.md +410 -0
  82. package/docs/ko/guide/prompt-caching.md +221 -0
  83. package/docs/ko/guide/providers.md +232 -0
  84. package/docs/ko/guide/quality-control.md +137 -0
  85. package/docs/ko/guide/vitepress-integration.md +265 -0
  86. package/docs/ko/index.md +58 -0
  87. package/docs/zh/api/agent.md +262 -0
  88. package/docs/zh/api/engine.md +274 -0
  89. package/docs/zh/api/index.md +171 -0
  90. package/docs/zh/api/providers.md +304 -0
  91. package/docs/zh/changelog.md +64 -0
  92. package/docs/zh/cli/dir.md +243 -0
  93. package/docs/zh/cli/file.md +213 -0
  94. package/docs/zh/cli/glossary.md +273 -0
  95. package/docs/zh/cli/index.md +111 -0
  96. package/docs/zh/cli/init.md +158 -0
  97. package/docs/zh/guide/chunking.md +271 -0
  98. package/docs/zh/guide/configuration.md +139 -0
  99. package/docs/zh/guide/cost-optimization.md +30 -0
  100. package/docs/zh/guide/getting-started.md +150 -0
  101. package/docs/zh/guide/glossary.md +214 -0
  102. package/docs/zh/guide/index.md +32 -0
  103. package/docs/zh/guide/ollama.md +410 -0
  104. package/docs/zh/guide/prompt-caching.md +221 -0
  105. package/docs/zh/guide/providers.md +232 -0
  106. package/docs/zh/guide/quality-control.md +137 -0
  107. package/docs/zh/guide/vitepress-integration.md +265 -0
  108. package/docs/zh/index.md +58 -0
  109. package/package.json +91 -0
  110. package/release.config.mjs +15 -0
  111. package/schemas/glossary.schema.json +110 -0
  112. package/src/cli/commands/dir.ts +469 -0
  113. package/src/cli/commands/file.ts +291 -0
  114. package/src/cli/commands/glossary.ts +221 -0
  115. package/src/cli/commands/init.ts +68 -0
  116. package/src/cli/commands/serve.ts +60 -0
  117. package/src/cli/index.ts +64 -0
  118. package/src/cli/options.ts +59 -0
  119. package/src/core/agent.ts +1119 -0
  120. package/src/core/chunker.ts +391 -0
  121. package/src/core/engine.ts +634 -0
  122. package/src/errors.ts +188 -0
  123. package/src/index.ts +147 -0
  124. package/src/integrations/vitepress.ts +549 -0
  125. package/src/parsers/markdown.ts +383 -0
  126. package/src/providers/claude.ts +259 -0
  127. package/src/providers/interface.ts +109 -0
  128. package/src/providers/ollama.ts +379 -0
  129. package/src/providers/openai.ts +308 -0
  130. package/src/providers/registry.ts +153 -0
  131. package/src/server/index.ts +152 -0
  132. package/src/server/middleware/auth.ts +93 -0
  133. package/src/server/middleware/logger.ts +90 -0
  134. package/src/server/routes/health.ts +84 -0
  135. package/src/server/routes/translate.ts +210 -0
  136. package/src/server/types.ts +138 -0
  137. package/src/services/cache.ts +899 -0
  138. package/src/services/config.ts +217 -0
  139. package/src/services/glossary.ts +247 -0
  140. package/src/types/analysis.ts +164 -0
  141. package/src/types/index.ts +265 -0
  142. package/src/types/modes.ts +121 -0
  143. package/src/types/mqm.ts +157 -0
  144. package/src/utils/logger.ts +141 -0
  145. package/src/utils/tokens.ts +116 -0
  146. package/tests/fixtures/glossaries/ml-glossary.json +53 -0
  147. package/tests/fixtures/input/lynq-installation.ko.md +350 -0
  148. package/tests/fixtures/input/lynq-installation.md +350 -0
  149. package/tests/fixtures/input/simple.ko.md +27 -0
  150. package/tests/fixtures/input/simple.md +27 -0
  151. package/tests/unit/chunker.test.ts +229 -0
  152. package/tests/unit/glossary.test.ts +146 -0
  153. package/tests/unit/markdown.test.ts +205 -0
  154. package/tests/unit/tokens.test.ts +81 -0
  155. package/tsconfig.json +28 -0
  156. package/tsup.config.ts +34 -0
  157. package/vitest.config.ts +16 -0
@@ -0,0 +1,217 @@
1
+ import { cosmiconfig } from 'cosmiconfig';
2
+ import { z } from 'zod';
3
+ import type { TranslateConfig, ProviderName } from '../types/index.js';
4
+ import { TranslationError, ErrorCode } from '../errors.js';
5
+
6
+ // ============================================================================
7
+ // Zod Schema for Config Validation
8
+ // ============================================================================
9
+
10
+ const providerNameSchema = z.enum(['claude', 'openai', 'ollama', 'custom']);
11
+
12
+ const configSchema = z.object({
13
+ version: z.string(),
14
+ project: z
15
+ .object({
16
+ name: z.string(),
17
+ description: z.string(),
18
+ purpose: z.string(),
19
+ })
20
+ .optional(),
21
+ languages: z.object({
22
+ source: z.string(),
23
+ targets: z.array(z.string()),
24
+ styles: z.record(z.string(), z.string()).optional(),
25
+ }),
26
+ provider: z.object({
27
+ default: providerNameSchema,
28
+ model: z.string().optional(),
29
+ fallback: z.array(providerNameSchema).optional(),
30
+ apiKeys: z.record(providerNameSchema, z.string()).optional(),
31
+ }),
32
+ quality: z.object({
33
+ threshold: z.number().min(0).max(100),
34
+ maxIterations: z.number().min(1).max(10),
35
+ evaluationMethod: z.enum(['llm', 'embedding', 'hybrid']),
36
+ }),
37
+ chunking: z.object({
38
+ maxTokens: z.number().min(100).max(8000),
39
+ overlapTokens: z.number().min(0),
40
+ preserveStructure: z.boolean(),
41
+ }),
42
+ glossary: z
43
+ .object({
44
+ path: z.string(),
45
+ strict: z.boolean(),
46
+ })
47
+ .optional(),
48
+ paths: z.object({
49
+ output: z.string(),
50
+ cache: z.string().optional(),
51
+ }),
52
+ ignore: z.array(z.string()).optional(),
53
+ });
54
+
55
+ // ============================================================================
56
+ // Default Configuration
57
+ // ============================================================================
58
+
59
+ const defaultConfig: TranslateConfig = {
60
+ version: '1.0',
61
+ languages: {
62
+ source: 'en',
63
+ targets: [],
64
+ },
65
+ provider: {
66
+ default: 'claude',
67
+ },
68
+ quality: {
69
+ threshold: 85,
70
+ maxIterations: 4,
71
+ evaluationMethod: 'llm',
72
+ },
73
+ chunking: {
74
+ maxTokens: 1024,
75
+ overlapTokens: 150,
76
+ preserveStructure: true,
77
+ },
78
+ paths: {
79
+ output: './{lang}',
80
+ },
81
+ };
82
+
83
+ // ============================================================================
84
+ // Config Loader
85
+ // ============================================================================
86
+
87
+ const explorer = cosmiconfig('translate', {
88
+ searchPlaces: [
89
+ '.translaterc',
90
+ '.translaterc.json',
91
+ '.translaterc.yaml',
92
+ '.translaterc.yml',
93
+ 'translate.config.js',
94
+ 'translate.config.mjs',
95
+ ],
96
+ });
97
+
98
+ export interface LoadConfigOptions {
99
+ configPath?: string;
100
+ cwd?: string;
101
+ }
102
+
103
+ export async function loadConfig(
104
+ options: LoadConfigOptions = {}
105
+ ): Promise<TranslateConfig> {
106
+ const { configPath, cwd = process.cwd() } = options;
107
+
108
+ let result;
109
+
110
+ try {
111
+ if (configPath) {
112
+ result = await explorer.load(configPath);
113
+ } else {
114
+ result = await explorer.search(cwd);
115
+ }
116
+ } catch (error) {
117
+ throw new TranslationError(ErrorCode.CONFIG_NOT_FOUND, {
118
+ path: configPath ?? cwd,
119
+ error: error instanceof Error ? error.message : String(error),
120
+ });
121
+ }
122
+
123
+ if (!result || result.isEmpty) {
124
+ // Return default config if no config file found
125
+ return defaultConfig;
126
+ }
127
+
128
+ // Validate config
129
+ const parseResult = configSchema.safeParse(result.config);
130
+
131
+ if (!parseResult.success) {
132
+ throw new TranslationError(ErrorCode.CONFIG_INVALID, {
133
+ path: result.filepath,
134
+ errors: parseResult.error.errors.map((e) => ({
135
+ path: e.path.join('.'),
136
+ message: e.message,
137
+ })),
138
+ });
139
+ }
140
+
141
+ return parseResult.data as TranslateConfig;
142
+ }
143
+
144
+ // ============================================================================
145
+ // Config Merger (CLI options override config file)
146
+ // ============================================================================
147
+
148
+ export interface CLIOverrides {
149
+ sourceLang?: string;
150
+ targetLang?: string;
151
+ provider?: ProviderName;
152
+ model?: string;
153
+ quality?: number;
154
+ maxIterations?: number;
155
+ chunkSize?: number;
156
+ glossary?: string;
157
+ output?: string;
158
+ noCache?: boolean;
159
+ }
160
+
161
+ export function mergeConfig(
162
+ config: TranslateConfig,
163
+ overrides: CLIOverrides
164
+ ): TranslateConfig {
165
+ const merged = { ...config };
166
+
167
+ if (overrides.sourceLang) {
168
+ merged.languages = { ...merged.languages, source: overrides.sourceLang };
169
+ }
170
+
171
+ if (overrides.targetLang) {
172
+ merged.languages = {
173
+ ...merged.languages,
174
+ targets: [overrides.targetLang],
175
+ };
176
+ }
177
+
178
+ if (overrides.provider) {
179
+ merged.provider = { ...merged.provider, default: overrides.provider };
180
+ }
181
+
182
+ if (overrides.model) {
183
+ merged.provider = { ...merged.provider, model: overrides.model };
184
+ }
185
+
186
+ if (overrides.quality !== undefined) {
187
+ merged.quality = { ...merged.quality, threshold: overrides.quality };
188
+ }
189
+
190
+ if (overrides.maxIterations !== undefined) {
191
+ merged.quality = {
192
+ ...merged.quality,
193
+ maxIterations: overrides.maxIterations,
194
+ };
195
+ }
196
+
197
+ if (overrides.chunkSize !== undefined) {
198
+ merged.chunking = { ...merged.chunking, maxTokens: overrides.chunkSize };
199
+ }
200
+
201
+ if (overrides.glossary) {
202
+ merged.glossary = {
203
+ path: overrides.glossary,
204
+ strict: merged.glossary?.strict ?? false,
205
+ };
206
+ }
207
+
208
+ if (overrides.output) {
209
+ merged.paths = { ...merged.paths, output: overrides.output };
210
+ }
211
+
212
+ if (overrides.noCache) {
213
+ merged.paths = { ...merged.paths, cache: undefined };
214
+ }
215
+
216
+ return merged;
217
+ }
@@ -0,0 +1,247 @@
1
+ import { readFile } from 'node:fs/promises';
2
+ import type {
3
+ Glossary,
4
+ GlossaryTerm,
5
+ ResolvedGlossary,
6
+ ResolvedGlossaryTerm,
7
+ } from '../types/index.js';
8
+ import { TranslationError, ErrorCode } from '../errors.js';
9
+
10
+ // ============================================================================
11
+ // Glossary Loading
12
+ // ============================================================================
13
+
14
+ export async function loadGlossary(path: string): Promise<Glossary> {
15
+ let content: string;
16
+
17
+ try {
18
+ content = await readFile(path, 'utf-8');
19
+ } catch (error) {
20
+ throw new TranslationError(ErrorCode.GLOSSARY_NOT_FOUND, {
21
+ path,
22
+ error: error instanceof Error ? error.message : String(error),
23
+ });
24
+ }
25
+
26
+ try {
27
+ return JSON.parse(content) as Glossary;
28
+ } catch (error) {
29
+ throw new TranslationError(ErrorCode.GLOSSARY_INVALID, {
30
+ path,
31
+ error: error instanceof Error ? error.message : String(error),
32
+ });
33
+ }
34
+ }
35
+
36
+ // ============================================================================
37
+ // Glossary Resolution
38
+ // ============================================================================
39
+
40
+ export function resolveGlossary(
41
+ glossary: Glossary,
42
+ targetLang: string
43
+ ): ResolvedGlossary {
44
+ return {
45
+ metadata: {
46
+ name: glossary.metadata.name,
47
+ sourceLang: glossary.metadata.sourceLang,
48
+ targetLang,
49
+ version: glossary.metadata.version,
50
+ domain: glossary.metadata.domain,
51
+ },
52
+ terms: glossary.terms
53
+ .map((term) => resolveGlossaryTerm(term, targetLang))
54
+ .filter((term): term is ResolvedGlossaryTerm => term !== null),
55
+ };
56
+ }
57
+
58
+ function resolveGlossaryTerm(
59
+ term: GlossaryTerm,
60
+ targetLang: string
61
+ ): ResolvedGlossaryTerm | null {
62
+ const target = resolveTarget(term, targetLang);
63
+
64
+ // Skip if no translation available and not a doNotTranslate term
65
+ if (target === undefined) {
66
+ return null;
67
+ }
68
+
69
+ return {
70
+ source: term.source,
71
+ target,
72
+ context: term.context,
73
+ caseSensitive: term.caseSensitive ?? false,
74
+ doNotTranslate: resolveDoNotTranslate(term, targetLang),
75
+ };
76
+ }
77
+
78
+ function resolveTarget(term: GlossaryTerm, targetLang: string): string | undefined {
79
+ if (term.doNotTranslate) {
80
+ return term.source;
81
+ }
82
+
83
+ if (term.doNotTranslateFor?.includes(targetLang)) {
84
+ return term.source;
85
+ }
86
+
87
+ const translation = term.targets[targetLang];
88
+ if (translation) {
89
+ return translation;
90
+ }
91
+
92
+ // No translation available for this language
93
+ return undefined;
94
+ }
95
+
96
+ function resolveDoNotTranslate(term: GlossaryTerm, targetLang: string): boolean {
97
+ return (
98
+ term.doNotTranslate === true ||
99
+ term.doNotTranslateFor?.includes(targetLang) === true
100
+ );
101
+ }
102
+
103
+ // ============================================================================
104
+ // Glossary Lookup
105
+ // ============================================================================
106
+
107
+ export interface GlossaryLookup {
108
+ /**
109
+ * Find a term in the glossary
110
+ */
111
+ find(text: string): ResolvedGlossaryTerm | undefined;
112
+
113
+ /**
114
+ * Find all matching terms in a text
115
+ */
116
+ findAll(text: string): ResolvedGlossaryTerm[];
117
+
118
+ /**
119
+ * Get all terms
120
+ */
121
+ getTerms(): ResolvedGlossaryTerm[];
122
+
123
+ /**
124
+ * Format glossary for prompt injection
125
+ */
126
+ formatForPrompt(): string;
127
+ }
128
+
129
+ export function createGlossaryLookup(glossary: ResolvedGlossary): GlossaryLookup {
130
+ // Create a map for fast lookup
131
+ const termMap = new Map<string, ResolvedGlossaryTerm>();
132
+ const caseSensitiveTerms: ResolvedGlossaryTerm[] = [];
133
+ const caseInsensitiveTerms: ResolvedGlossaryTerm[] = [];
134
+
135
+ for (const term of glossary.terms) {
136
+ if (term.caseSensitive) {
137
+ termMap.set(term.source, term);
138
+ caseSensitiveTerms.push(term);
139
+ } else {
140
+ termMap.set(term.source.toLowerCase(), term);
141
+ caseInsensitiveTerms.push(term);
142
+ }
143
+ }
144
+
145
+ return {
146
+ find(text: string): ResolvedGlossaryTerm | undefined {
147
+ // Try exact match first
148
+ const exact = termMap.get(text);
149
+ if (exact) return exact;
150
+
151
+ // Try case-insensitive
152
+ return termMap.get(text.toLowerCase());
153
+ },
154
+
155
+ findAll(text: string): ResolvedGlossaryTerm[] {
156
+ const matches: ResolvedGlossaryTerm[] = [];
157
+
158
+ // Check case-sensitive terms
159
+ for (const term of caseSensitiveTerms) {
160
+ if (text.includes(term.source)) {
161
+ matches.push(term);
162
+ }
163
+ }
164
+
165
+ // Check case-insensitive terms
166
+ const lowerText = text.toLowerCase();
167
+ for (const term of caseInsensitiveTerms) {
168
+ if (lowerText.includes(term.source.toLowerCase())) {
169
+ matches.push(term);
170
+ }
171
+ }
172
+
173
+ return matches;
174
+ },
175
+
176
+ getTerms(): ResolvedGlossaryTerm[] {
177
+ return glossary.terms;
178
+ },
179
+
180
+ formatForPrompt(): string {
181
+ const lines: string[] = [];
182
+
183
+ for (const term of glossary.terms) {
184
+ const flags: string[] = [];
185
+
186
+ if (term.caseSensitive) {
187
+ flags.push('case-sensitive');
188
+ } else {
189
+ flags.push('case-insensitive');
190
+ }
191
+
192
+ if (term.context) {
193
+ flags.push(`context: ${term.context}`);
194
+ }
195
+
196
+ const flagStr = flags.length > 0 ? ` (${flags.join(', ')})` : '';
197
+
198
+ if (term.doNotTranslate) {
199
+ lines.push(`- "${term.source}" → [DO NOT TRANSLATE, keep as-is]${flagStr}`);
200
+ } else {
201
+ lines.push(`- "${term.source}" → "${term.target}"${flagStr}`);
202
+ }
203
+ }
204
+
205
+ return lines.join('\n');
206
+ },
207
+ };
208
+ }
209
+
210
+ // ============================================================================
211
+ // Glossary Compliance Check
212
+ // ============================================================================
213
+
214
+ export interface ComplianceResult {
215
+ applied: string[];
216
+ missed: string[];
217
+ score: number;
218
+ }
219
+
220
+ export function checkGlossaryCompliance(
221
+ sourceText: string,
222
+ translatedText: string,
223
+ glossary: ResolvedGlossary
224
+ ): ComplianceResult {
225
+ const lookup = createGlossaryLookup(glossary);
226
+ const sourceTerms = lookup.findAll(sourceText);
227
+
228
+ const applied: string[] = [];
229
+ const missed: string[] = [];
230
+
231
+ for (const term of sourceTerms) {
232
+ const targetInTranslation = term.caseSensitive
233
+ ? translatedText.includes(term.target)
234
+ : translatedText.toLowerCase().includes(term.target.toLowerCase());
235
+
236
+ if (targetInTranslation) {
237
+ applied.push(term.source);
238
+ } else {
239
+ missed.push(term.source);
240
+ }
241
+ }
242
+
243
+ const total = sourceTerms.length;
244
+ const score = total > 0 ? (applied.length / total) * 100 : 100;
245
+
246
+ return { applied, missed, score };
247
+ }
@@ -0,0 +1,164 @@
1
+ /**
2
+ * Pre-Translation Analysis Types
3
+ * Based on MAPS (Multi-Aspect Prompting and Selection) framework
4
+ *
5
+ * Reference: TACL 2024
6
+ * https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00642/119992
7
+ * https://github.com/zwhe99/MAPS-mt
8
+ */
9
+
10
+ /**
11
+ * Key term identified during pre-analysis
12
+ */
13
+ export interface AnalyzedTerm {
14
+ /** The term in source language */
15
+ term: string;
16
+
17
+ /** Usage context */
18
+ context: string;
19
+
20
+ /** Suggested translation (if not in glossary) */
21
+ suggestedTranslation?: string;
22
+
23
+ /** Whether this term was found in the glossary */
24
+ fromGlossary: boolean;
25
+ }
26
+
27
+ /**
28
+ * Ambiguous phrase that needs clarification
29
+ */
30
+ export interface AmbiguousPhrase {
31
+ /** The ambiguous phrase */
32
+ phrase: string;
33
+
34
+ /** Possible interpretations */
35
+ interpretations: string[];
36
+
37
+ /** Recommended interpretation */
38
+ recommendation: string;
39
+ }
40
+
41
+ /**
42
+ * Domain classification for the content
43
+ */
44
+ export type ContentDomain =
45
+ | 'technical'
46
+ | 'marketing'
47
+ | 'legal'
48
+ | 'medical'
49
+ | 'general';
50
+
51
+ /**
52
+ * Register/formality recommendation
53
+ */
54
+ export type RegisterLevel = 'formal' | 'informal' | 'neutral';
55
+
56
+ /**
57
+ * Pre-translation analysis result (MAPS-style)
58
+ */
59
+ export interface PreTranslationAnalysis {
60
+ /** Key domain-specific terms identified */
61
+ keyTerms: AnalyzedTerm[];
62
+
63
+ /** Phrases with multiple possible interpretations */
64
+ ambiguousPhrases: AmbiguousPhrase[];
65
+
66
+ /** Items that should NOT be translated (code, URLs, names) */
67
+ preserveExact: string[];
68
+
69
+ /** Identified translation challenges for this language pair */
70
+ challenges: string[];
71
+
72
+ /** Detected content domain */
73
+ domain: ContentDomain;
74
+
75
+ /** Recommended formality level */
76
+ registerRecommendation: RegisterLevel;
77
+ }
78
+
79
+ /**
80
+ * Parse pre-analysis JSON response from LLM
81
+ */
82
+ export function parseAnalysisResponse(
83
+ response: string
84
+ ): PreTranslationAnalysis | null {
85
+ try {
86
+ // Extract JSON from response
87
+ const jsonMatch = response.match(/\{[\s\S]*\}/);
88
+ if (!jsonMatch) {
89
+ return null;
90
+ }
91
+
92
+ const parsed = JSON.parse(jsonMatch[0]) as Partial<PreTranslationAnalysis>;
93
+
94
+ return {
95
+ keyTerms: parsed.keyTerms ?? [],
96
+ ambiguousPhrases: parsed.ambiguousPhrases ?? [],
97
+ preserveExact: parsed.preserveExact ?? [],
98
+ challenges: parsed.challenges ?? [],
99
+ domain: parsed.domain ?? 'general',
100
+ registerRecommendation: parsed.registerRecommendation ?? 'neutral',
101
+ };
102
+ } catch {
103
+ return null;
104
+ }
105
+ }
106
+
107
+ /**
108
+ * Format analysis result for translation prompt
109
+ */
110
+ export function formatAnalysisForPrompt(
111
+ analysis: PreTranslationAnalysis
112
+ ): string {
113
+ const sections: string[] = [];
114
+
115
+ // Key terms section
116
+ if (analysis.keyTerms.length > 0) {
117
+ const terms = analysis.keyTerms
118
+ .map((t) => {
119
+ const translation = t.suggestedTranslation
120
+ ? ` → ${t.suggestedTranslation}`
121
+ : '';
122
+ const source = t.fromGlossary ? ' (glossary)' : '';
123
+ return `- "${t.term}"${translation}${source}: ${t.context}`;
124
+ })
125
+ .join('\n');
126
+ sections.push(`**Key Terms:**\n${terms}`);
127
+ }
128
+
129
+ // Ambiguous phrases section
130
+ if (analysis.ambiguousPhrases.length > 0) {
131
+ const phrases = analysis.ambiguousPhrases
132
+ .map((p) => `- "${p.phrase}": Use interpretation "${p.recommendation}"`)
133
+ .join('\n');
134
+ sections.push(`**Ambiguous Phrases (use these interpretations):**\n${phrases}`);
135
+ }
136
+
137
+ // Preserve exact section
138
+ if (analysis.preserveExact.length > 0) {
139
+ sections.push(
140
+ `**Do NOT translate (keep exactly as-is):**\n${analysis.preserveExact.map((s) => `- ${s}`).join('\n')}`
141
+ );
142
+ }
143
+
144
+ // Domain and register
145
+ sections.push(
146
+ `**Content Type:** ${analysis.domain}\n**Tone:** ${analysis.registerRecommendation}`
147
+ );
148
+
149
+ return sections.join('\n\n');
150
+ }
151
+
152
+ /**
153
+ * Create empty analysis result (for fast mode)
154
+ */
155
+ export function createEmptyAnalysis(): PreTranslationAnalysis {
156
+ return {
157
+ keyTerms: [],
158
+ ambiguousPhrases: [],
159
+ preserveExact: [],
160
+ challenges: [],
161
+ domain: 'general',
162
+ registerRecommendation: 'neutral',
163
+ };
164
+ }