@llm-translate/cli 1.0.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +51 -0
- package/.env.example +33 -0
- package/.github/workflows/docs-pages.yml +57 -0
- package/.github/workflows/release.yml +49 -0
- package/.translaterc.json +44 -0
- package/CLAUDE.md +243 -0
- package/Dockerfile +55 -0
- package/README.md +371 -0
- package/RFC.md +1595 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +4494 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/index.d.ts +1152 -0
- package/dist/index.js +3841 -0
- package/dist/index.js.map +1 -0
- package/docker-compose.yml +56 -0
- package/docs/.vitepress/config.ts +161 -0
- package/docs/api/agent.md +262 -0
- package/docs/api/engine.md +274 -0
- package/docs/api/index.md +171 -0
- package/docs/api/providers.md +304 -0
- package/docs/changelog.md +64 -0
- package/docs/cli/dir.md +243 -0
- package/docs/cli/file.md +213 -0
- package/docs/cli/glossary.md +273 -0
- package/docs/cli/index.md +129 -0
- package/docs/cli/init.md +158 -0
- package/docs/cli/serve.md +211 -0
- package/docs/glossary.json +235 -0
- package/docs/guide/chunking.md +272 -0
- package/docs/guide/configuration.md +139 -0
- package/docs/guide/cost-optimization.md +237 -0
- package/docs/guide/docker.md +371 -0
- package/docs/guide/getting-started.md +150 -0
- package/docs/guide/glossary.md +241 -0
- package/docs/guide/index.md +86 -0
- package/docs/guide/ollama.md +515 -0
- package/docs/guide/prompt-caching.md +221 -0
- package/docs/guide/providers.md +232 -0
- package/docs/guide/quality-control.md +206 -0
- package/docs/guide/vitepress-integration.md +265 -0
- package/docs/index.md +63 -0
- package/docs/ja/api/agent.md +262 -0
- package/docs/ja/api/engine.md +274 -0
- package/docs/ja/api/index.md +171 -0
- package/docs/ja/api/providers.md +304 -0
- package/docs/ja/changelog.md +64 -0
- package/docs/ja/cli/dir.md +243 -0
- package/docs/ja/cli/file.md +213 -0
- package/docs/ja/cli/glossary.md +273 -0
- package/docs/ja/cli/index.md +111 -0
- package/docs/ja/cli/init.md +158 -0
- package/docs/ja/guide/chunking.md +271 -0
- package/docs/ja/guide/configuration.md +139 -0
- package/docs/ja/guide/cost-optimization.md +30 -0
- package/docs/ja/guide/getting-started.md +150 -0
- package/docs/ja/guide/glossary.md +214 -0
- package/docs/ja/guide/index.md +32 -0
- package/docs/ja/guide/ollama.md +410 -0
- package/docs/ja/guide/prompt-caching.md +221 -0
- package/docs/ja/guide/providers.md +232 -0
- package/docs/ja/guide/quality-control.md +137 -0
- package/docs/ja/guide/vitepress-integration.md +265 -0
- package/docs/ja/index.md +58 -0
- package/docs/ko/api/agent.md +262 -0
- package/docs/ko/api/engine.md +274 -0
- package/docs/ko/api/index.md +171 -0
- package/docs/ko/api/providers.md +304 -0
- package/docs/ko/changelog.md +64 -0
- package/docs/ko/cli/dir.md +243 -0
- package/docs/ko/cli/file.md +213 -0
- package/docs/ko/cli/glossary.md +273 -0
- package/docs/ko/cli/index.md +111 -0
- package/docs/ko/cli/init.md +158 -0
- package/docs/ko/guide/chunking.md +271 -0
- package/docs/ko/guide/configuration.md +139 -0
- package/docs/ko/guide/cost-optimization.md +30 -0
- package/docs/ko/guide/getting-started.md +150 -0
- package/docs/ko/guide/glossary.md +214 -0
- package/docs/ko/guide/index.md +32 -0
- package/docs/ko/guide/ollama.md +410 -0
- package/docs/ko/guide/prompt-caching.md +221 -0
- package/docs/ko/guide/providers.md +232 -0
- package/docs/ko/guide/quality-control.md +137 -0
- package/docs/ko/guide/vitepress-integration.md +265 -0
- package/docs/ko/index.md +58 -0
- package/docs/zh/api/agent.md +262 -0
- package/docs/zh/api/engine.md +274 -0
- package/docs/zh/api/index.md +171 -0
- package/docs/zh/api/providers.md +304 -0
- package/docs/zh/changelog.md +64 -0
- package/docs/zh/cli/dir.md +243 -0
- package/docs/zh/cli/file.md +213 -0
- package/docs/zh/cli/glossary.md +273 -0
- package/docs/zh/cli/index.md +111 -0
- package/docs/zh/cli/init.md +158 -0
- package/docs/zh/guide/chunking.md +271 -0
- package/docs/zh/guide/configuration.md +139 -0
- package/docs/zh/guide/cost-optimization.md +30 -0
- package/docs/zh/guide/getting-started.md +150 -0
- package/docs/zh/guide/glossary.md +214 -0
- package/docs/zh/guide/index.md +32 -0
- package/docs/zh/guide/ollama.md +410 -0
- package/docs/zh/guide/prompt-caching.md +221 -0
- package/docs/zh/guide/providers.md +232 -0
- package/docs/zh/guide/quality-control.md +137 -0
- package/docs/zh/guide/vitepress-integration.md +265 -0
- package/docs/zh/index.md +58 -0
- package/package.json +91 -0
- package/release.config.mjs +15 -0
- package/schemas/glossary.schema.json +110 -0
- package/src/cli/commands/dir.ts +469 -0
- package/src/cli/commands/file.ts +291 -0
- package/src/cli/commands/glossary.ts +221 -0
- package/src/cli/commands/init.ts +68 -0
- package/src/cli/commands/serve.ts +60 -0
- package/src/cli/index.ts +64 -0
- package/src/cli/options.ts +59 -0
- package/src/core/agent.ts +1119 -0
- package/src/core/chunker.ts +391 -0
- package/src/core/engine.ts +634 -0
- package/src/errors.ts +188 -0
- package/src/index.ts +147 -0
- package/src/integrations/vitepress.ts +549 -0
- package/src/parsers/markdown.ts +383 -0
- package/src/providers/claude.ts +259 -0
- package/src/providers/interface.ts +109 -0
- package/src/providers/ollama.ts +379 -0
- package/src/providers/openai.ts +308 -0
- package/src/providers/registry.ts +153 -0
- package/src/server/index.ts +152 -0
- package/src/server/middleware/auth.ts +93 -0
- package/src/server/middleware/logger.ts +90 -0
- package/src/server/routes/health.ts +84 -0
- package/src/server/routes/translate.ts +210 -0
- package/src/server/types.ts +138 -0
- package/src/services/cache.ts +899 -0
- package/src/services/config.ts +217 -0
- package/src/services/glossary.ts +247 -0
- package/src/types/analysis.ts +164 -0
- package/src/types/index.ts +265 -0
- package/src/types/modes.ts +121 -0
- package/src/types/mqm.ts +157 -0
- package/src/utils/logger.ts +141 -0
- package/src/utils/tokens.ts +116 -0
- package/tests/fixtures/glossaries/ml-glossary.json +53 -0
- package/tests/fixtures/input/lynq-installation.ko.md +350 -0
- package/tests/fixtures/input/lynq-installation.md +350 -0
- package/tests/fixtures/input/simple.ko.md +27 -0
- package/tests/fixtures/input/simple.md +27 -0
- package/tests/unit/chunker.test.ts +229 -0
- package/tests/unit/glossary.test.ts +146 -0
- package/tests/unit/markdown.test.ts +205 -0
- package/tests/unit/tokens.test.ts +81 -0
- package/tsconfig.json +28 -0
- package/tsup.config.ts +34 -0
- package/vitest.config.ts +16 -0
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
import { unified } from 'unified';
|
|
2
|
+
import remarkParse from 'remark-parse';
|
|
3
|
+
import remarkStringify from 'remark-stringify';
|
|
4
|
+
import remarkGfm from 'remark-gfm';
|
|
5
|
+
import type { Root, RootContent, Text, Code, InlineCode } from 'mdast';
|
|
6
|
+
import { visit } from 'unist-util-visit';
|
|
7
|
+
|
|
8
|
+
// ============================================================================
|
|
9
|
+
// Types
|
|
10
|
+
// ============================================================================
|
|
11
|
+
|
|
12
|
+
export interface ParsedDocument {
|
|
13
|
+
/** Original markdown content */
|
|
14
|
+
original: string;
|
|
15
|
+
/** AST representation */
|
|
16
|
+
ast: Root;
|
|
17
|
+
/** Extracted text nodes for translation */
|
|
18
|
+
textNodes: TextNode[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface TextNode {
|
|
22
|
+
/** Unique identifier for this node */
|
|
23
|
+
id: string;
|
|
24
|
+
/** Text content to translate */
|
|
25
|
+
content: string;
|
|
26
|
+
/** Node type in AST */
|
|
27
|
+
type: string;
|
|
28
|
+
/** Position in source document */
|
|
29
|
+
position?: {
|
|
30
|
+
start: { line: number; column: number; offset?: number };
|
|
31
|
+
end: { line: number; column: number; offset?: number };
|
|
32
|
+
};
|
|
33
|
+
/** Path to node in AST (for restoration) */
|
|
34
|
+
path: number[];
|
|
35
|
+
/** Whether this node should be translated */
|
|
36
|
+
translatable: boolean;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface TranslationMap {
|
|
40
|
+
[nodeId: string]: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ============================================================================
|
|
44
|
+
// Parser Implementation
|
|
45
|
+
// ============================================================================
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Parse markdown content and extract translatable text nodes
|
|
49
|
+
*/
|
|
50
|
+
export async function parseMarkdown(content: string): Promise<ParsedDocument> {
|
|
51
|
+
const processor = unified()
|
|
52
|
+
.use(remarkParse)
|
|
53
|
+
.use(remarkGfm);
|
|
54
|
+
|
|
55
|
+
const ast = processor.parse(content) as Root;
|
|
56
|
+
const textNodes = extractTextNodes(ast);
|
|
57
|
+
|
|
58
|
+
return {
|
|
59
|
+
original: content,
|
|
60
|
+
ast,
|
|
61
|
+
textNodes,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Apply translations to AST and stringify back to markdown
|
|
67
|
+
*/
|
|
68
|
+
export async function applyTranslations(
|
|
69
|
+
document: ParsedDocument,
|
|
70
|
+
translations: TranslationMap
|
|
71
|
+
): Promise<string> {
|
|
72
|
+
// Clone the AST to avoid mutating original
|
|
73
|
+
const ast = structuredClone(document.ast);
|
|
74
|
+
|
|
75
|
+
// Apply translations to each text node
|
|
76
|
+
for (const textNode of document.textNodes) {
|
|
77
|
+
if (!textNode.translatable) continue;
|
|
78
|
+
|
|
79
|
+
const translation = translations[textNode.id];
|
|
80
|
+
if (!translation) continue;
|
|
81
|
+
|
|
82
|
+
// Navigate to the node in AST and update its value
|
|
83
|
+
const node = getNodeAtPath(ast, textNode.path);
|
|
84
|
+
if (node && 'value' in node) {
|
|
85
|
+
(node as Text).value = translation;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Stringify back to markdown
|
|
90
|
+
const processor = unified()
|
|
91
|
+
.use(remarkGfm)
|
|
92
|
+
.use(remarkStringify, {
|
|
93
|
+
bullet: '-',
|
|
94
|
+
emphasis: '*',
|
|
95
|
+
strong: '*',
|
|
96
|
+
fence: '`',
|
|
97
|
+
fences: true,
|
|
98
|
+
listItemIndent: 'one',
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
const result = processor.stringify(ast);
|
|
102
|
+
return String(result);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ============================================================================
|
|
106
|
+
// Text Node Extraction
|
|
107
|
+
// ============================================================================
|
|
108
|
+
|
|
109
|
+
function extractTextNodes(ast: Root): TextNode[] {
|
|
110
|
+
const textNodes: TextNode[] = [];
|
|
111
|
+
let nodeId = 0;
|
|
112
|
+
|
|
113
|
+
visit(ast, (node, index, parent) => {
|
|
114
|
+
// Skip code blocks - they should not be translated
|
|
115
|
+
if (node.type === 'code' || node.type === 'inlineCode') {
|
|
116
|
+
textNodes.push({
|
|
117
|
+
id: `node-${nodeId++}`,
|
|
118
|
+
content: (node as Code | InlineCode).value,
|
|
119
|
+
type: node.type,
|
|
120
|
+
position: node.position,
|
|
121
|
+
path: getNodePath(ast, node, index, parent),
|
|
122
|
+
translatable: false,
|
|
123
|
+
});
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Extract text nodes
|
|
128
|
+
if (node.type === 'text') {
|
|
129
|
+
const textContent = (node as Text).value;
|
|
130
|
+
|
|
131
|
+
// Skip empty or whitespace-only text
|
|
132
|
+
if (!textContent.trim()) return;
|
|
133
|
+
|
|
134
|
+
textNodes.push({
|
|
135
|
+
id: `node-${nodeId++}`,
|
|
136
|
+
content: textContent,
|
|
137
|
+
type: node.type,
|
|
138
|
+
position: node.position,
|
|
139
|
+
path: getNodePath(ast, node, index, parent),
|
|
140
|
+
translatable: true,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
return textNodes;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ============================================================================
|
|
149
|
+
// AST Navigation Helpers
|
|
150
|
+
// ============================================================================
|
|
151
|
+
|
|
152
|
+
function getNodePath(
|
|
153
|
+
_root: Root,
|
|
154
|
+
_node: unknown,
|
|
155
|
+
index: number | undefined,
|
|
156
|
+
parent: unknown
|
|
157
|
+
): number[] {
|
|
158
|
+
const path: number[] = [];
|
|
159
|
+
|
|
160
|
+
// Build path by traversing up to root
|
|
161
|
+
let currentParent = parent as { children?: unknown[] } | null;
|
|
162
|
+
let currentIndex = index;
|
|
163
|
+
|
|
164
|
+
while (currentParent && currentIndex !== undefined) {
|
|
165
|
+
path.unshift(currentIndex);
|
|
166
|
+
// Note: This is a simplified path - for full implementation,
|
|
167
|
+
// we'd need to track parent references during traversal
|
|
168
|
+
break;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (index !== undefined) {
|
|
172
|
+
path.push(index);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return path;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function getNodeAtPath(ast: Root, path: number[]): RootContent | null {
|
|
179
|
+
let current: Root | RootContent = ast;
|
|
180
|
+
|
|
181
|
+
for (const index of path) {
|
|
182
|
+
if ('children' in current && Array.isArray(current.children)) {
|
|
183
|
+
const child: RootContent | undefined = current.children[index];
|
|
184
|
+
if (!child) return null;
|
|
185
|
+
current = child;
|
|
186
|
+
} else {
|
|
187
|
+
return null;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return current as RootContent;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ============================================================================
|
|
195
|
+
// Utility Functions
|
|
196
|
+
// ============================================================================
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Get only translatable text from a parsed document
|
|
200
|
+
*/
|
|
201
|
+
export function getTranslatableText(document: ParsedDocument): string[] {
|
|
202
|
+
return document.textNodes
|
|
203
|
+
.filter((node) => node.translatable)
|
|
204
|
+
.map((node) => node.content);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Create a translation map from an array of translations
|
|
209
|
+
* (in same order as getTranslatableText output)
|
|
210
|
+
*/
|
|
211
|
+
export function createTranslationMap(
|
|
212
|
+
document: ParsedDocument,
|
|
213
|
+
translations: string[]
|
|
214
|
+
): TranslationMap {
|
|
215
|
+
const translatableNodes = document.textNodes.filter((node) => node.translatable);
|
|
216
|
+
const map: TranslationMap = {};
|
|
217
|
+
|
|
218
|
+
for (let i = 0; i < translatableNodes.length && i < translations.length; i++) {
|
|
219
|
+
const node = translatableNodes[i];
|
|
220
|
+
if (node) {
|
|
221
|
+
map[node.id] = translations[i] ?? node.content;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return map;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Extract full text content for translation (preserving structure markers)
|
|
230
|
+
*
|
|
231
|
+
* Processing order is important:
|
|
232
|
+
* 1. First, handle fenced code blocks (must be at line start with newline after opener)
|
|
233
|
+
* 2. Then, handle multi-backtick inline code (for examples like ` ```js...``` `)
|
|
234
|
+
* 3. Then, handle single-backtick inline code
|
|
235
|
+
* 4. Finally, handle link URLs
|
|
236
|
+
*/
|
|
237
|
+
export function extractTextForTranslation(content: string): {
|
|
238
|
+
text: string;
|
|
239
|
+
preservedSections: Map<string, string>;
|
|
240
|
+
} {
|
|
241
|
+
const preservedSections = new Map<string, string>();
|
|
242
|
+
let placeholderIndex = 0;
|
|
243
|
+
|
|
244
|
+
// Step 1: Replace fenced code blocks FIRST (must start at beginning of line with newline)
|
|
245
|
+
// This ensures proper code blocks are captured before multi-backtick inline code
|
|
246
|
+
let text = content.replace(/^[ \t]*```[^\n]*\n[\s\S]*?^[ \t]*```[ \t]*$/gm, (match) => {
|
|
247
|
+
const placeholder = `__CODE_BLOCK_${placeholderIndex++}__`;
|
|
248
|
+
preservedSections.set(placeholder, match);
|
|
249
|
+
return placeholder;
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
// Step 2: Replace multi-backtick inline code (2+ backticks on same line)
|
|
253
|
+
// This catches examples like `` `variable` `` or ` ```js...``` ` in tables
|
|
254
|
+
// Only matches within a single line to avoid matching across paragraphs
|
|
255
|
+
text = text.replace(/(`{2,})(?:[^`\n]|`(?!\1))*?\1/g, (match) => {
|
|
256
|
+
const placeholder = `__INLINE_CODE_${placeholderIndex++}__`;
|
|
257
|
+
preservedSections.set(placeholder, match);
|
|
258
|
+
return placeholder;
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
// Step 3: Replace remaining single-backtick inline code
|
|
262
|
+
text = text.replace(/`[^`\n]+`/g, (match) => {
|
|
263
|
+
const placeholder = `__INLINE_CODE_${placeholderIndex++}__`;
|
|
264
|
+
preservedSections.set(placeholder, match);
|
|
265
|
+
return placeholder;
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
// Step 4: Replace URLs in links with placeholders
|
|
269
|
+
text = text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, linkText, url) => {
|
|
270
|
+
const placeholder = `__LINK_URL_${placeholderIndex++}__`;
|
|
271
|
+
preservedSections.set(placeholder, url as string);
|
|
272
|
+
return `[${linkText}](${placeholder})`;
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
return { text, preservedSections };
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Restore preserved sections after translation
|
|
280
|
+
*
|
|
281
|
+
* Uses flexible regex matching to handle cases where LLM may have:
|
|
282
|
+
* - Added spaces around placeholders
|
|
283
|
+
* - Changed case
|
|
284
|
+
* - Added extra underscores
|
|
285
|
+
*/
|
|
286
|
+
export function restorePreservedSections(
|
|
287
|
+
translatedText: string,
|
|
288
|
+
preservedSections: Map<string, string>
|
|
289
|
+
): string {
|
|
290
|
+
let result = translatedText;
|
|
291
|
+
|
|
292
|
+
// Sort by key length descending to handle CODE_BLOCK_12 before CODE_BLOCK_1
|
|
293
|
+
const sortedEntries = [...preservedSections.entries()].sort(
|
|
294
|
+
(a, b) => b[0].length - a[0].length
|
|
295
|
+
);
|
|
296
|
+
|
|
297
|
+
for (const [placeholder, original] of sortedEntries) {
|
|
298
|
+
// Extract the core identifier (e.g., "CODE_BLOCK_12" from "__CODE_BLOCK_12__")
|
|
299
|
+
const match = placeholder.match(/^__(.+)__$/);
|
|
300
|
+
if (match && match[1]) {
|
|
301
|
+
const identifier = match[1];
|
|
302
|
+
// Escape any regex special characters in identifier
|
|
303
|
+
const escapedId = identifier.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
304
|
+
// Create flexible regex that handles:
|
|
305
|
+
// - Optional surrounding spaces (but NOT newlines - use [ \t]* instead of \s*)
|
|
306
|
+
// - Extra underscores
|
|
307
|
+
// - Case insensitivity
|
|
308
|
+
// - (?!\d) ensures CODE_BLOCK_1 doesn't match part of CODE_BLOCK_12
|
|
309
|
+
const flexiblePattern = new RegExp(
|
|
310
|
+
`[ \\t]*_*_*[ \\t]*${escapedId}(?!\\d)[ \\t]*_*_*[ \\t]*`,
|
|
311
|
+
'gi'
|
|
312
|
+
);
|
|
313
|
+
// Use function replacement to avoid special character interpretation ($&, $', etc.)
|
|
314
|
+
result = result.replace(flexiblePattern, () => original);
|
|
315
|
+
} else {
|
|
316
|
+
// Fallback to exact replacement - also use function to avoid special chars
|
|
317
|
+
result = result.split(placeholder).join(original);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Post-process: Ensure proper spacing around inline code
|
|
322
|
+
// This fixes cases where LLM removed spaces around placeholders during translation
|
|
323
|
+
result = ensureInlineCodeSpacing(result);
|
|
324
|
+
|
|
325
|
+
return result;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Ensure proper spacing around inline code backticks.
|
|
330
|
+
* LLMs often remove spaces around placeholders, causing markdown formatting issues.
|
|
331
|
+
*
|
|
332
|
+
* Rules:
|
|
333
|
+
* - Add space before ` if preceded by word char (letter/number/CJK)
|
|
334
|
+
* - Add space before ` if preceded by number+period (markdown list like "1.")
|
|
335
|
+
* - Add space after ` if followed by word char/CJK
|
|
336
|
+
* - Don't add spaces at line start/end
|
|
337
|
+
*/
|
|
338
|
+
function ensureInlineCodeSpacing(text: string): string {
|
|
339
|
+
// Match inline code: backtick(s) + content + same backticks
|
|
340
|
+
// We need to add spaces where they're missing around inline code
|
|
341
|
+
|
|
342
|
+
// CJK Unicode ranges: \u3000-\u9fff\uac00-\ud7af (Chinese, Japanese, Korean)
|
|
343
|
+
|
|
344
|
+
// Add space before inline code if preceded by:
|
|
345
|
+
// - word/CJK character
|
|
346
|
+
// - number followed by period (markdown numbered list: "1.")
|
|
347
|
+
let result = text.replace(
|
|
348
|
+
/([\w\u3000-\u9fff\uac00-\ud7af])(`+[^`\n]+`+)/g,
|
|
349
|
+
'$1 $2'
|
|
350
|
+
);
|
|
351
|
+
|
|
352
|
+
// Handle markdown numbered list case: "1.`code`" → "1. `code`"
|
|
353
|
+
result = result.replace(
|
|
354
|
+
/(\d+\.)(`+[^`\n]+`+)/g,
|
|
355
|
+
'$1 $2'
|
|
356
|
+
);
|
|
357
|
+
|
|
358
|
+
// Add space after inline code if followed by word/CJK character
|
|
359
|
+
result = result.replace(
|
|
360
|
+
/(`+[^`\n]+`+)([\w\u3000-\u9fff\uac00-\ud7af])/g,
|
|
361
|
+
'$1 $2'
|
|
362
|
+
);
|
|
363
|
+
|
|
364
|
+
return result;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Simple markdown translation that preserves structure
|
|
369
|
+
* This is the main function to use for translating markdown content
|
|
370
|
+
*/
|
|
371
|
+
export async function translateMarkdownContent(
|
|
372
|
+
content: string,
|
|
373
|
+
translateFn: (text: string) => Promise<string>
|
|
374
|
+
): Promise<string> {
|
|
375
|
+
// Extract text for translation with preserved sections
|
|
376
|
+
const { text, preservedSections } = extractTextForTranslation(content);
|
|
377
|
+
|
|
378
|
+
// Translate the text
|
|
379
|
+
const translatedText = await translateFn(text);
|
|
380
|
+
|
|
381
|
+
// Restore preserved sections
|
|
382
|
+
return restorePreservedSections(translatedText, preservedSections);
|
|
383
|
+
}
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import { createAnthropic } from '@ai-sdk/anthropic';
|
|
2
|
+
import { generateText, streamText } from 'ai';
|
|
3
|
+
import type { ProviderName } from '../types/index.js';
|
|
4
|
+
import type {
|
|
5
|
+
LLMProvider,
|
|
6
|
+
ProviderConfig,
|
|
7
|
+
ChatRequest,
|
|
8
|
+
ChatResponse,
|
|
9
|
+
ModelInfo,
|
|
10
|
+
CacheableTextPart,
|
|
11
|
+
} from './interface.js';
|
|
12
|
+
import { TranslationError, ErrorCode } from '../errors.js';
|
|
13
|
+
import { estimateTokens } from '../utils/tokens.js';
|
|
14
|
+
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Model Information
|
|
17
|
+
// ============================================================================
|
|
18
|
+
|
|
19
|
+
const MODEL_INFO: Record<string, ModelInfo> = {
|
|
20
|
+
// Latest Claude 4.5 models
|
|
21
|
+
'claude-sonnet-4-5-20250929': {
|
|
22
|
+
maxContextTokens: 200000,
|
|
23
|
+
supportsStreaming: true,
|
|
24
|
+
costPer1kInput: 0.003,
|
|
25
|
+
costPer1kOutput: 0.015,
|
|
26
|
+
},
|
|
27
|
+
'claude-opus-4-5-20251101': {
|
|
28
|
+
maxContextTokens: 200000,
|
|
29
|
+
supportsStreaming: true,
|
|
30
|
+
costPer1kInput: 0.015,
|
|
31
|
+
costPer1kOutput: 0.075,
|
|
32
|
+
},
|
|
33
|
+
'claude-haiku-4-5-20251001': {
|
|
34
|
+
maxContextTokens: 200000,
|
|
35
|
+
supportsStreaming: true,
|
|
36
|
+
costPer1kInput: 0.001,
|
|
37
|
+
costPer1kOutput: 0.005,
|
|
38
|
+
},
|
|
39
|
+
// Claude 4 models (previous generation)
|
|
40
|
+
'claude-sonnet-4-20250514': {
|
|
41
|
+
maxContextTokens: 200000,
|
|
42
|
+
supportsStreaming: true,
|
|
43
|
+
costPer1kInput: 0.003,
|
|
44
|
+
costPer1kOutput: 0.015,
|
|
45
|
+
},
|
|
46
|
+
'claude-opus-4-20250514': {
|
|
47
|
+
maxContextTokens: 200000,
|
|
48
|
+
supportsStreaming: true,
|
|
49
|
+
costPer1kInput: 0.015,
|
|
50
|
+
costPer1kOutput: 0.075,
|
|
51
|
+
},
|
|
52
|
+
// Claude 3.5 models
|
|
53
|
+
'claude-3-5-haiku-20241022': {
|
|
54
|
+
maxContextTokens: 200000,
|
|
55
|
+
supportsStreaming: true,
|
|
56
|
+
costPer1kInput: 0.001,
|
|
57
|
+
costPer1kOutput: 0.005,
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// Use Claude Haiku 4.5 as default for cost-efficiency
|
|
62
|
+
const DEFAULT_MODEL = 'claude-haiku-4-5-20251001';
|
|
63
|
+
|
|
64
|
+
// ============================================================================
|
|
65
|
+
// Claude Provider Implementation
|
|
66
|
+
// ============================================================================
|
|
67
|
+
|
|
68
|
+
export class ClaudeProvider implements LLMProvider {
|
|
69
|
+
readonly name: ProviderName = 'claude';
|
|
70
|
+
readonly defaultModel: string;
|
|
71
|
+
private readonly client: ReturnType<typeof createAnthropic>;
|
|
72
|
+
|
|
73
|
+
constructor(config: ProviderConfig = {}) {
|
|
74
|
+
const apiKey = config.apiKey ?? process.env['ANTHROPIC_API_KEY'];
|
|
75
|
+
|
|
76
|
+
if (!apiKey) {
|
|
77
|
+
throw new TranslationError(ErrorCode.PROVIDER_AUTH_FAILED, {
|
|
78
|
+
provider: 'claude',
|
|
79
|
+
message: 'ANTHROPIC_API_KEY environment variable is not set',
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
this.client = createAnthropic({
|
|
84
|
+
apiKey,
|
|
85
|
+
baseURL: config.baseUrl,
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
this.defaultModel = config.defaultModel ?? DEFAULT_MODEL;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async chat(request: ChatRequest): Promise<ChatResponse> {
|
|
92
|
+
const model = request.model ?? this.defaultModel;
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
96
|
+
const messages = this.convertMessages(request.messages) as any;
|
|
97
|
+
|
|
98
|
+
const result = await generateText({
|
|
99
|
+
model: this.client(model),
|
|
100
|
+
messages,
|
|
101
|
+
temperature: request.temperature ?? 0,
|
|
102
|
+
maxTokens: request.maxTokens ?? 4096,
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
// Extract cache token usage from provider metadata
|
|
106
|
+
const anthropicMeta = result.providerMetadata?.anthropic as
|
|
107
|
+
| {
|
|
108
|
+
cacheCreationInputTokens?: number;
|
|
109
|
+
cacheReadInputTokens?: number;
|
|
110
|
+
}
|
|
111
|
+
| undefined;
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
content: result.text,
|
|
115
|
+
usage: {
|
|
116
|
+
inputTokens: result.usage?.promptTokens ?? 0,
|
|
117
|
+
outputTokens: result.usage?.completionTokens ?? 0,
|
|
118
|
+
cacheReadTokens: anthropicMeta?.cacheReadInputTokens,
|
|
119
|
+
cacheWriteTokens: anthropicMeta?.cacheCreationInputTokens,
|
|
120
|
+
},
|
|
121
|
+
model,
|
|
122
|
+
finishReason: mapFinishReason(result.finishReason),
|
|
123
|
+
};
|
|
124
|
+
} catch (error) {
|
|
125
|
+
throw this.handleError(error);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Convert messages to Vercel AI SDK format with cache control support
|
|
131
|
+
*/
|
|
132
|
+
private convertMessages(
|
|
133
|
+
messages: Array<{
|
|
134
|
+
role: 'system' | 'user' | 'assistant';
|
|
135
|
+
content: string | CacheableTextPart[];
|
|
136
|
+
}>
|
|
137
|
+
) {
|
|
138
|
+
return messages.map((msg) => {
|
|
139
|
+
// Simple string content - no caching
|
|
140
|
+
if (typeof msg.content === 'string') {
|
|
141
|
+
return { role: msg.role, content: msg.content };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Array of parts with potential cache control
|
|
145
|
+
const parts = msg.content.map((part) => ({
|
|
146
|
+
type: 'text' as const,
|
|
147
|
+
text: part.text,
|
|
148
|
+
...(part.cacheControl && {
|
|
149
|
+
providerOptions: {
|
|
150
|
+
anthropic: { cacheControl: part.cacheControl },
|
|
151
|
+
},
|
|
152
|
+
}),
|
|
153
|
+
}));
|
|
154
|
+
|
|
155
|
+
return { role: msg.role, content: parts };
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
async *stream(request: ChatRequest): AsyncIterable<string> {
|
|
160
|
+
const model = request.model ?? this.defaultModel;
|
|
161
|
+
|
|
162
|
+
try {
|
|
163
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
164
|
+
const messages = this.convertMessages(request.messages) as any;
|
|
165
|
+
|
|
166
|
+
const result = streamText({
|
|
167
|
+
model: this.client(model),
|
|
168
|
+
messages,
|
|
169
|
+
temperature: request.temperature ?? 0,
|
|
170
|
+
maxTokens: request.maxTokens ?? 4096,
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
for await (const chunk of result.textStream) {
|
|
174
|
+
yield chunk;
|
|
175
|
+
}
|
|
176
|
+
} catch (error) {
|
|
177
|
+
throw this.handleError(error);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
countTokens(text: string): number {
|
|
182
|
+
// Use estimation since exact counting requires API call
|
|
183
|
+
return estimateTokens(text);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
getModelInfo(model?: string): ModelInfo {
|
|
187
|
+
const modelName = model ?? this.defaultModel;
|
|
188
|
+
return (
|
|
189
|
+
MODEL_INFO[modelName] ?? {
|
|
190
|
+
maxContextTokens: 200000,
|
|
191
|
+
supportsStreaming: true,
|
|
192
|
+
}
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
private handleError(error: unknown): TranslationError {
|
|
197
|
+
if (error instanceof TranslationError) {
|
|
198
|
+
return error;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const errorMessage =
|
|
202
|
+
error instanceof Error ? error.message : String(error);
|
|
203
|
+
|
|
204
|
+
// Check for rate limiting
|
|
205
|
+
if (
|
|
206
|
+
errorMessage.includes('rate_limit') ||
|
|
207
|
+
errorMessage.includes('429')
|
|
208
|
+
) {
|
|
209
|
+
return new TranslationError(ErrorCode.PROVIDER_RATE_LIMITED, {
|
|
210
|
+
provider: 'claude',
|
|
211
|
+
message: errorMessage,
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Check for auth errors
|
|
216
|
+
if (
|
|
217
|
+
errorMessage.includes('authentication') ||
|
|
218
|
+
errorMessage.includes('401') ||
|
|
219
|
+
errorMessage.includes('invalid_api_key')
|
|
220
|
+
) {
|
|
221
|
+
return new TranslationError(ErrorCode.PROVIDER_AUTH_FAILED, {
|
|
222
|
+
provider: 'claude',
|
|
223
|
+
message: errorMessage,
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return new TranslationError(ErrorCode.PROVIDER_ERROR, {
|
|
228
|
+
provider: 'claude',
|
|
229
|
+
message: errorMessage,
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ============================================================================
|
|
235
|
+
// Helper Functions
|
|
236
|
+
// ============================================================================
|
|
237
|
+
|
|
238
|
+
function mapFinishReason(
|
|
239
|
+
reason: string | null | undefined
|
|
240
|
+
): 'stop' | 'length' | 'error' {
|
|
241
|
+
switch (reason) {
|
|
242
|
+
case 'stop':
|
|
243
|
+
case 'end_turn':
|
|
244
|
+
return 'stop';
|
|
245
|
+
case 'length':
|
|
246
|
+
case 'max_tokens':
|
|
247
|
+
return 'length';
|
|
248
|
+
default:
|
|
249
|
+
return 'error';
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ============================================================================
|
|
254
|
+
// Factory Function
|
|
255
|
+
// ============================================================================
|
|
256
|
+
|
|
257
|
+
export function createClaudeProvider(config: ProviderConfig = {}): LLMProvider {
|
|
258
|
+
return new ClaudeProvider(config);
|
|
259
|
+
}
|