astro-llm-translator 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -0
- package/README.md +156 -0
- package/dist/integration/index.d.ts +50 -0
- package/dist/integration/index.js +488 -0
- package/dist/starlight/index.d.ts +5 -0
- package/dist/starlight/index.js +182 -0
- package/package.json +65 -0
package/README.md
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# astro-llm-translator
|
|
2
|
+
|
|
3
|
+
An [Astro](https://astro.build) integration that automatically translates your Markdown and MDX content using LLMs (OpenAI, Anthropic, DeepSeek, etc.) during the build process.
|
|
4
|
+
|
|
5
|
+
It features incremental builds, smart caching, and automatic synchronization of deletions, ensuring you only pay for what needs to be translated.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install astro-llm-translator openai
|
|
11
|
+
# or
|
|
12
|
+
pnpm add astro-llm-translator openai
|
|
13
|
+
# or
|
|
14
|
+
bun add astro-llm-translator openai
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
> **Note**: You also need to install the `openai` package as a peer dependency.
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
Add the integration to your `astro.config.mjs`:
|
|
22
|
+
|
|
23
|
+
```javascript
|
|
24
|
+
import { defineConfig } from 'astro/config';
|
|
25
|
+
import llmTranslator from 'astro-llm-translator';
|
|
26
|
+
|
|
27
|
+
export default defineConfig({
|
|
28
|
+
integrations: [
|
|
29
|
+
llmTranslator({
|
|
30
|
+
sourceLang: 'en', // or 'root'
|
|
31
|
+
targetLangs: ['es', 'fr', 'de'],
|
|
32
|
+
contentDir: 'src/content/docs',
|
|
33
|
+
openai: {
|
|
34
|
+
model: 'gpt-4o-mini', // Optional: defaults to gpt-4o-mini
|
|
35
|
+
}
|
|
36
|
+
})
|
|
37
|
+
]
|
|
38
|
+
});
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Configuration
|
|
42
|
+
|
|
43
|
+
### Options
|
|
44
|
+
|
|
45
|
+
| Option | Type | Default | Description |
|
|
46
|
+
| :--- | :--- | :--- | :--- |
|
|
47
|
+
| `sourceLang` | `string` | **Required** | The language code of your source content (e.g., `'en'`). Set to `'root'` if your source files are directly inside `contentDir`. |
|
|
48
|
+
| `targetLangs` | `string[]` | **Required** | Array of language codes to translate into (e.g., `['es', 'fr']`). |
|
|
49
|
+
| `contentDir` | `string` | `'src/content/docs'` | Directory relative to project root containing your content. |
|
|
50
|
+
| `customInstructions` | `string` | `undefined` | Additional instructions to guide the translator (e.g. "Use a friendly tone"). |
|
|
51
|
+
| `dictionary` | `Record<string, string>` | `{}` | Key-value pairs of terms to force translation for (e.g. `{'Astro': 'AstroFramework'}`). |
|
|
52
|
+
| `openai` | `object` | `{}` | Optional configuration for the LLM client. |
|
|
53
|
+
|
|
54
|
+
### OpenAI Options (`openai`)
|
|
55
|
+
|
|
56
|
+
| Option | Type | Default | Description |
|
|
57
|
+
| :--- | :--- | :--- | :--- |
|
|
58
|
+
| `apiKey` | `string` | `process.env.OPENAI_API_KEY` | Your API key. |
|
|
59
|
+
| `baseURL` | `string` | `process.env.OPENAI_BASE_URL` | Base URL for the API (useful for OpenRouter or local models). |
|
|
60
|
+
| `model` | `string` | `'gpt-4o-mini'` | The model identifier to use. |
|
|
61
|
+
|
|
62
|
+
## Environment Variables
|
|
63
|
+
|
|
64
|
+
You can configure the LLM client using environment variables in your `.env` file:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# Standard OpenAI
|
|
68
|
+
OPENAI_API_KEY=sk-...
|
|
69
|
+
|
|
70
|
+
# Or OpenRouter / Custom Provider
|
|
71
|
+
OPENROUTER_API_KEY=sk-or-...
|
|
72
|
+
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
|
|
73
|
+
OPENAI_MODEL=anthropic/claude-3-haiku
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
The integration checks for keys in this order:
|
|
77
|
+
1. `options.openai.apiKey`
|
|
78
|
+
2. `OPENAI_API_KEY`
|
|
79
|
+
3. `OPENROUTER_API_KEY`
|
|
80
|
+
|
|
81
|
+
## Starlight Support
|
|
82
|
+
|
|
83
|
+
If you are using [Starlight](https://starlight.astro.build/), you can use the companion plugin to automatically translate sidebar directory labels.
|
|
84
|
+
|
|
85
|
+
First, the main integration collects and translates directory names during the build process. Then, the Starlight plugin uses this data to reconstruct the sidebar with translated labels.
|
|
86
|
+
|
|
87
|
+
```javascript
|
|
88
|
+
// astro.config.mjs
|
|
89
|
+
import { defineConfig } from 'astro/config';
|
|
90
|
+
import starlight from '@astrojs/starlight';
|
|
91
|
+
import llmTranslator from 'astro-llm-translator';
|
|
92
|
+
import starlightTranslator from 'astro-llm-translator/starlight'; // Import the plugin
|
|
93
|
+
|
|
94
|
+
export default defineConfig({
|
|
95
|
+
integrations: [
|
|
96
|
+
llmTranslator({
|
|
97
|
+
sourceLang: 'en',
|
|
98
|
+
targetLangs: ['es', 'fr'],
|
|
99
|
+
// ... other options
|
|
100
|
+
}),
|
|
101
|
+
starlight({
|
|
102
|
+
title: 'My Docs',
|
|
103
|
+
defaultLocale: 'en',
|
|
104
|
+
locales: {
|
|
105
|
+
en: { label: 'English', lang: 'en' },
|
|
106
|
+
es: { label: 'Español', lang: 'es' },
|
|
107
|
+
fr: { label: 'Français', lang: 'fr' },
|
|
108
|
+
},
|
|
109
|
+
plugins: [
|
|
110
|
+
starlightTranslator(), // Add the plugin here
|
|
111
|
+
],
|
|
112
|
+
sidebar: [
|
|
113
|
+
// Works with autogenerate!
|
|
114
|
+
{
|
|
115
|
+
label: 'Guides',
|
|
116
|
+
autogenerate: { directory: 'guides' },
|
|
117
|
+
},
|
|
118
|
+
],
|
|
119
|
+
}),
|
|
120
|
+
],
|
|
121
|
+
});
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
> **Note**: The main `llmTranslator` integration must be present to generate the translation data (`src/.translator-segments.json`) that the Starlight plugin relies on.
|
|
125
|
+
|
|
126
|
+
## Directory Structure Examples
|
|
127
|
+
|
|
128
|
+
### 1. Subdirectory Mode (Default)
|
|
129
|
+
|
|
130
|
+
Config: `sourceLang: 'en'`
|
|
131
|
+
|
|
132
|
+
```text
|
|
133
|
+
src/content/docs/
|
|
134
|
+
├── en/ <-- Source
|
|
135
|
+
│ └── index.md
|
|
136
|
+
├── es/ <-- Auto-generated
|
|
137
|
+
│ └── index.md
|
|
138
|
+
└── fr/ <-- Auto-generated
|
|
139
|
+
└── index.md
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### 2. Root Mode
|
|
143
|
+
|
|
144
|
+
Config: `sourceLang: 'root'`
|
|
145
|
+
|
|
146
|
+
```text
|
|
147
|
+
src/content/docs/
|
|
148
|
+
├── index.md <-- Source
|
|
149
|
+
├── about.md <-- Source
|
|
150
|
+
├── es/ <-- Auto-generated (ignored by scanner)
|
|
151
|
+
│ ├── index.md
|
|
152
|
+
│ └── about.md
|
|
153
|
+
└── fr/ <-- Auto-generated (ignored by scanner)
|
|
154
|
+
├── index.md
|
|
155
|
+
└── about.md
|
|
156
|
+
```
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { AstroIntegration } from 'astro';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
|
|
4
|
+
declare const TranslatorOptionsSchema: z.ZodObject<{
|
|
5
|
+
sourceLang: z.ZodString;
|
|
6
|
+
targetLangs: z.ZodArray<z.ZodString, "many">;
|
|
7
|
+
contentDir: z.ZodOptional<z.ZodString>;
|
|
8
|
+
customInstructions: z.ZodOptional<z.ZodString>;
|
|
9
|
+
dictionary: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
10
|
+
openai: z.ZodOptional<z.ZodObject<{
|
|
11
|
+
baseURL: z.ZodOptional<z.ZodString>;
|
|
12
|
+
apiKey: z.ZodOptional<z.ZodString>;
|
|
13
|
+
model: z.ZodOptional<z.ZodString>;
|
|
14
|
+
}, "strip", z.ZodTypeAny, {
|
|
15
|
+
baseURL?: string | undefined;
|
|
16
|
+
apiKey?: string | undefined;
|
|
17
|
+
model?: string | undefined;
|
|
18
|
+
}, {
|
|
19
|
+
baseURL?: string | undefined;
|
|
20
|
+
apiKey?: string | undefined;
|
|
21
|
+
model?: string | undefined;
|
|
22
|
+
}>>;
|
|
23
|
+
}, "strip", z.ZodTypeAny, {
|
|
24
|
+
sourceLang: string;
|
|
25
|
+
targetLangs: string[];
|
|
26
|
+
contentDir?: string | undefined;
|
|
27
|
+
customInstructions?: string | undefined;
|
|
28
|
+
dictionary?: Record<string, string> | undefined;
|
|
29
|
+
openai?: {
|
|
30
|
+
baseURL?: string | undefined;
|
|
31
|
+
apiKey?: string | undefined;
|
|
32
|
+
model?: string | undefined;
|
|
33
|
+
} | undefined;
|
|
34
|
+
}, {
|
|
35
|
+
sourceLang: string;
|
|
36
|
+
targetLangs: string[];
|
|
37
|
+
contentDir?: string | undefined;
|
|
38
|
+
customInstructions?: string | undefined;
|
|
39
|
+
dictionary?: Record<string, string> | undefined;
|
|
40
|
+
openai?: {
|
|
41
|
+
baseURL?: string | undefined;
|
|
42
|
+
apiKey?: string | undefined;
|
|
43
|
+
model?: string | undefined;
|
|
44
|
+
} | undefined;
|
|
45
|
+
}>;
|
|
46
|
+
type TranslatorOptions = z.infer<typeof TranslatorOptionsSchema>;
|
|
47
|
+
|
|
48
|
+
declare function llmTranslator(inputOptions: TranslatorOptions): AstroIntegration;
|
|
49
|
+
|
|
50
|
+
export { llmTranslator as default };
|
|
@@ -0,0 +1,488 @@
|
|
|
1
|
+
// src/integration/index.ts
|
|
2
|
+
import fs2 from "fs/promises";
|
|
3
|
+
import path3 from "path";
|
|
4
|
+
import { glob } from "glob";
|
|
5
|
+
import "dotenv/config";
|
|
6
|
+
|
|
7
|
+
// src/integration/state-manager.ts
|
|
8
|
+
import fs from "fs/promises";
|
|
9
|
+
import path from "path";
|
|
10
|
+
var StateManager = class {
|
|
11
|
+
rootDir;
|
|
12
|
+
stateFilePath;
|
|
13
|
+
segmentStateFilePath;
|
|
14
|
+
translationState = {};
|
|
15
|
+
segmentState = {};
|
|
16
|
+
constructor(rootDir) {
|
|
17
|
+
this.rootDir = rootDir;
|
|
18
|
+
this.stateFilePath = path.join(rootDir, "src", ".translator.json");
|
|
19
|
+
this.segmentStateFilePath = path.join(
|
|
20
|
+
rootDir,
|
|
21
|
+
"src",
|
|
22
|
+
".translator-segments.json"
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
async load() {
|
|
26
|
+
try {
|
|
27
|
+
const stateContent = await fs.readFile(this.stateFilePath, "utf-8");
|
|
28
|
+
this.translationState = JSON.parse(stateContent);
|
|
29
|
+
} catch {
|
|
30
|
+
this.translationState = {};
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
const segContent = await fs.readFile(this.segmentStateFilePath, "utf-8");
|
|
34
|
+
this.segmentState = JSON.parse(segContent);
|
|
35
|
+
} catch {
|
|
36
|
+
this.segmentState = {};
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
async saveTranslationState() {
|
|
40
|
+
try {
|
|
41
|
+
await fs.writeFile(
|
|
42
|
+
this.stateFilePath,
|
|
43
|
+
JSON.stringify(this.translationState, null, 2)
|
|
44
|
+
);
|
|
45
|
+
} catch (e) {
|
|
46
|
+
console.error(`Failed to save translation state: ${e}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
async saveSegmentState() {
|
|
50
|
+
try {
|
|
51
|
+
await fs.writeFile(
|
|
52
|
+
this.segmentStateFilePath,
|
|
53
|
+
JSON.stringify(this.segmentState, null, 2)
|
|
54
|
+
);
|
|
55
|
+
} catch (e) {
|
|
56
|
+
console.error(`Failed to save segment state: ${e}`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
updateTranslationEntry(key, hash, relativePath) {
|
|
60
|
+
if (!this.translationState[key]) {
|
|
61
|
+
this.translationState[key] = {
|
|
62
|
+
sourceHash: hash,
|
|
63
|
+
relativePath,
|
|
64
|
+
completedLangs: []
|
|
65
|
+
};
|
|
66
|
+
} else {
|
|
67
|
+
this.translationState[key].sourceHash = hash;
|
|
68
|
+
this.translationState[key].relativePath = relativePath;
|
|
69
|
+
this.translationState[key].completedLangs = [];
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
markLanguageCompleted(key, lang) {
|
|
73
|
+
if (this.translationState[key] && !this.translationState[key].completedLangs.includes(lang)) {
|
|
74
|
+
this.translationState[key].completedLangs.push(lang);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
getEntry(key) {
|
|
78
|
+
return this.translationState[key];
|
|
79
|
+
}
|
|
80
|
+
removeEntry(key) {
|
|
81
|
+
delete this.translationState[key];
|
|
82
|
+
}
|
|
83
|
+
getSegmentTranslation(segment, lang) {
|
|
84
|
+
return this.segmentState[segment]?.[lang];
|
|
85
|
+
}
|
|
86
|
+
setSegmentTranslation(segment, lang, translation) {
|
|
87
|
+
if (!this.segmentState[segment]) {
|
|
88
|
+
this.segmentState[segment] = {};
|
|
89
|
+
}
|
|
90
|
+
this.segmentState[segment][lang] = translation;
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
// src/integration/translator.ts
|
|
95
|
+
import matter2 from "gray-matter";
|
|
96
|
+
import OpenAI from "openai";
|
|
97
|
+
|
|
98
|
+
// src/integration/utils.ts
|
|
99
|
+
import crypto from "crypto";
|
|
100
|
+
import path2 from "path";
|
|
101
|
+
import matter from "gray-matter";
|
|
102
|
+
function computeHash(content) {
|
|
103
|
+
return crypto.createHash("sha256").update(content).digest("hex");
|
|
104
|
+
}
|
|
105
|
+
function rebaseRelativePath(sourceFilePath, targetFilePath, relativeUrl) {
|
|
106
|
+
try {
|
|
107
|
+
const sourceDir = path2.dirname(sourceFilePath);
|
|
108
|
+
const absPath = path2.resolve(sourceDir, relativeUrl);
|
|
109
|
+
const targetDir = path2.dirname(targetFilePath);
|
|
110
|
+
let newRelPath = path2.relative(targetDir, absPath);
|
|
111
|
+
if (!newRelPath.startsWith(".")) {
|
|
112
|
+
newRelPath = `./${newRelPath}`;
|
|
113
|
+
}
|
|
114
|
+
return newRelPath;
|
|
115
|
+
} catch (_e) {
|
|
116
|
+
return relativeUrl;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
function getKey(content, relativePath) {
|
|
120
|
+
try {
|
|
121
|
+
const parsed = matter(content);
|
|
122
|
+
if (parsed.data.slug) {
|
|
123
|
+
return parsed.data.slug.normalize("NFC");
|
|
124
|
+
}
|
|
125
|
+
} catch (e) {
|
|
126
|
+
}
|
|
127
|
+
const match = content.match(/^---([\s\S]*?)---/);
|
|
128
|
+
if (match) {
|
|
129
|
+
const frontmatter = match[1];
|
|
130
|
+
const slugMatch = frontmatter.match(/^\s*slug:\s*(.+)$/m);
|
|
131
|
+
if (slugMatch) {
|
|
132
|
+
return slugMatch[1].trim().replace(/^['"]|['"]$/g, "").normalize("NFC");
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return relativePath.normalize("NFC");
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// src/integration/markdown-processor.ts
|
|
139
|
+
var adjustImagePaths = (content, sourceFilePath, targetFilePath) => {
|
|
140
|
+
const regex = /!\[(.*?)\]\((.*?)\)/g;
|
|
141
|
+
return content.replace(regex, (match, alt, url) => {
|
|
142
|
+
if (url.startsWith("/") || url.startsWith("http") || url.startsWith("data:")) {
|
|
143
|
+
return match;
|
|
144
|
+
}
|
|
145
|
+
const newRelPath = rebaseRelativePath(sourceFilePath, targetFilePath, url);
|
|
146
|
+
return ``;
|
|
147
|
+
});
|
|
148
|
+
};
|
|
149
|
+
var adjustImportPaths = (content, sourceFilePath, targetFilePath) => {
|
|
150
|
+
const regex = /(^import\s+(?:(?:[\w*\s{},]*)\s+from\s+)?['"])(\.[^'"]+)(['"])/gm;
|
|
151
|
+
return content.replace(regex, (match, prefix, url, suffix) => {
|
|
152
|
+
const newRelPath = rebaseRelativePath(sourceFilePath, targetFilePath, url);
|
|
153
|
+
return `${prefix}${newRelPath}${suffix}`;
|
|
154
|
+
});
|
|
155
|
+
};
|
|
156
|
+
var processors = [adjustImagePaths, adjustImportPaths];
|
|
157
|
+
function processMarkdownContent(content, sourceFilePath, targetFilePath) {
|
|
158
|
+
return processors.reduce(
|
|
159
|
+
(currentContent, processor) => processor(currentContent, sourceFilePath, targetFilePath),
|
|
160
|
+
content
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// src/integration/translator.ts
|
|
165
|
+
var Translator = class {
|
|
166
|
+
openai;
|
|
167
|
+
model;
|
|
168
|
+
customInstructions;
|
|
169
|
+
dictionary;
|
|
170
|
+
constructor(options) {
|
|
171
|
+
const apiKey = options.openai?.apiKey || process.env.OPENAI_API_KEY || process.env.OPENROUTER_API_KEY;
|
|
172
|
+
if (!apiKey) {
|
|
173
|
+
throw new Error("No OpenAI API Key found.");
|
|
174
|
+
}
|
|
175
|
+
const baseURL = options.openai?.baseURL || process.env.OPENAI_BASE_URL || process.env.OPENROUTER_BASE_URL;
|
|
176
|
+
this.model = options.openai?.model || process.env.OPENAI_MODEL || "gpt-4o-mini";
|
|
177
|
+
this.openai = new OpenAI({ apiKey, baseURL });
|
|
178
|
+
this.customInstructions = options.customInstructions;
|
|
179
|
+
this.dictionary = options.dictionary;
|
|
180
|
+
}
|
|
181
|
+
async translateSegment(segment, targetLang) {
|
|
182
|
+
const prompt = `
|
|
183
|
+
Translate the following directory name/category label into ${targetLang}.
|
|
184
|
+
Return ONLY the translated string. Keep it short and title-cased if appropriate for a menu label.
|
|
185
|
+
Preserve any emojis or special symbols from the original text.
|
|
186
|
+
Do not include quotes or extra text.
|
|
187
|
+
|
|
188
|
+
Original: "${segment}"
|
|
189
|
+
`;
|
|
190
|
+
try {
|
|
191
|
+
const completion = await this.openai.chat.completions.create({
|
|
192
|
+
messages: [{ role: "user", content: prompt }],
|
|
193
|
+
model: this.model
|
|
194
|
+
});
|
|
195
|
+
return completion.choices[0].message.content?.trim() || segment;
|
|
196
|
+
} catch (e) {
|
|
197
|
+
return segment;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
async translateContent(content, sourceLang, targetLang, sourceFilePath, targetFilePath, logger) {
|
|
201
|
+
let parsed;
|
|
202
|
+
try {
|
|
203
|
+
parsed = matter2(content);
|
|
204
|
+
} catch (e) {
|
|
205
|
+
const msg = `Failed to parse source content: ${e}`;
|
|
206
|
+
logger ? logger.error(msg) : console.error(msg);
|
|
207
|
+
return content;
|
|
208
|
+
}
|
|
209
|
+
const { data, content: body } = parsed;
|
|
210
|
+
const payload = { body };
|
|
211
|
+
if (data.title) payload.title = data.title;
|
|
212
|
+
if (data.description) payload.description = data.description;
|
|
213
|
+
if (data.summary) payload.summary = data.summary;
|
|
214
|
+
const sourceLangName = sourceLang === "root" ? "the original language" : sourceLang;
|
|
215
|
+
let dictionaryPrompt = "";
|
|
216
|
+
if (this.dictionary && Object.keys(this.dictionary).length > 0) {
|
|
217
|
+
dictionaryPrompt = `
|
|
218
|
+
USE THE FOLLOWING DICTIONARY FOR SPECIFIC TERMS:
|
|
219
|
+
${Object.entries(this.dictionary).map(([key, value]) => `- "${key}": "${value}"`).join("\n")}
|
|
220
|
+
`;
|
|
221
|
+
}
|
|
222
|
+
const prompt = `
|
|
223
|
+
You are a professional translator. Translate the content in the following JSON object from ${sourceLangName} to ${targetLang}.
|
|
224
|
+
|
|
225
|
+
${this.customInstructions || ""}
|
|
226
|
+
${dictionaryPrompt}
|
|
227
|
+
|
|
228
|
+
INSTRUCTIONS:
|
|
229
|
+
1. Return ONLY a valid JSON object. Do not include markdown code blocks.
|
|
230
|
+
2. The JSON keys must match the input exactly ("title", "description", "body", etc.).
|
|
231
|
+
3. Translate the VALUES associated with the keys.
|
|
232
|
+
4. For the "body" key, preserve all Markdown formatting (headers, lists, links) exactly.
|
|
233
|
+
5. Do NOT translate technical terms if inappropriate.
|
|
234
|
+
|
|
235
|
+
Input JSON:
|
|
236
|
+
${JSON.stringify(payload, null, 2)}
|
|
237
|
+
`;
|
|
238
|
+
let translatedPayload = { ...payload };
|
|
239
|
+
try {
|
|
240
|
+
const completion = await this.openai.chat.completions.create({
|
|
241
|
+
messages: [{ role: "user", content: prompt }],
|
|
242
|
+
model: this.model,
|
|
243
|
+
response_format: { type: "json_object" }
|
|
244
|
+
});
|
|
245
|
+
const responseContent = completion.choices[0].message.content || "{}";
|
|
246
|
+
translatedPayload = JSON.parse(responseContent);
|
|
247
|
+
} catch (e) {
|
|
248
|
+
const msg = `LLM Translation failed for ${targetLang}: ${e}`;
|
|
249
|
+
logger ? logger.error(msg) : console.error(msg);
|
|
250
|
+
}
|
|
251
|
+
const translatedData = { ...data };
|
|
252
|
+
if (translatedPayload.title) translatedData.title = translatedPayload.title;
|
|
253
|
+
if (translatedPayload.description)
|
|
254
|
+
translatedData.description = translatedPayload.description;
|
|
255
|
+
if (translatedPayload.summary)
|
|
256
|
+
translatedData.summary = translatedPayload.summary;
|
|
257
|
+
if (translatedData.slug) {
|
|
258
|
+
const cleanSlug = translatedData.slug.replace(/^\/+/, "");
|
|
259
|
+
translatedData.slug = `${targetLang}/${cleanSlug}`;
|
|
260
|
+
}
|
|
261
|
+
const finalBody = processMarkdownContent(
|
|
262
|
+
translatedPayload.body || body,
|
|
263
|
+
sourceFilePath,
|
|
264
|
+
targetFilePath
|
|
265
|
+
);
|
|
266
|
+
return matter2.stringify(finalBody, translatedData);
|
|
267
|
+
}
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
// src/integration/types.ts
|
|
271
|
+
import { z } from "zod";
|
|
272
|
+
var TranslatorOptionsSchema = z.object({
|
|
273
|
+
sourceLang: z.string().min(1),
|
|
274
|
+
targetLangs: z.array(z.string().min(1)).min(1),
|
|
275
|
+
contentDir: z.string().optional(),
|
|
276
|
+
customInstructions: z.string().optional(),
|
|
277
|
+
dictionary: z.record(z.string()).optional(),
|
|
278
|
+
openai: z.object({
|
|
279
|
+
baseURL: z.string().url().optional(),
|
|
280
|
+
apiKey: z.string().optional(),
|
|
281
|
+
model: z.string().optional()
|
|
282
|
+
}).optional()
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
// src/integration/index.ts
|
|
286
|
+
function llmTranslator(inputOptions) {
|
|
287
|
+
const options = TranslatorOptionsSchema.parse(inputOptions);
|
|
288
|
+
const { sourceLang, targetLangs, contentDir = "src/content/docs" } = options;
|
|
289
|
+
return {
|
|
290
|
+
name: "astro-llm-translator",
|
|
291
|
+
hooks: {
|
|
292
|
+
"astro:config:setup": async ({ command, logger }) => {
|
|
293
|
+
if (command !== "build" && command !== "dev") return;
|
|
294
|
+
logger.info("Starting LLM Translation...");
|
|
295
|
+
let translator;
|
|
296
|
+
try {
|
|
297
|
+
translator = new Translator(options);
|
|
298
|
+
} catch (e) {
|
|
299
|
+
logger.error(
|
|
300
|
+
`Translator init failed: ${e instanceof Error ? e.message : String(e)}`
|
|
301
|
+
);
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
const rootDir = process.cwd();
|
|
305
|
+
const stateManager = new StateManager(rootDir);
|
|
306
|
+
await stateManager.load();
|
|
307
|
+
const isRootSource = sourceLang === "root";
|
|
308
|
+
const sourceDir = isRootSource ? path3.join(rootDir, contentDir) : path3.join(rootDir, contentDir, sourceLang);
|
|
309
|
+
const ignore = isRootSource ? targetLangs.map((lang) => `${lang}/**`) : [];
|
|
310
|
+
const sourceFiles = await glob("**/*.{md,mdx}", {
|
|
311
|
+
cwd: sourceDir,
|
|
312
|
+
ignore
|
|
313
|
+
});
|
|
314
|
+
if (sourceFiles.length === 0) {
|
|
315
|
+
logger.warn(`No source files found in ${sourceDir}`);
|
|
316
|
+
}
|
|
317
|
+
const foundKeys = /* @__PURE__ */ new Set();
|
|
318
|
+
for (const rawFile of sourceFiles) {
|
|
319
|
+
const file = rawFile.normalize("NFC");
|
|
320
|
+
const sourceFilePath = path3.join(sourceDir, file);
|
|
321
|
+
let sourceContent = "";
|
|
322
|
+
try {
|
|
323
|
+
sourceContent = await fs2.readFile(sourceFilePath, "utf-8");
|
|
324
|
+
} catch (_e) {
|
|
325
|
+
logger.error(`Failed to read source file: ${file}`);
|
|
326
|
+
continue;
|
|
327
|
+
}
|
|
328
|
+
const key = getKey(sourceContent, file);
|
|
329
|
+
foundKeys.add(key);
|
|
330
|
+
const dir = path3.dirname(file);
|
|
331
|
+
if (dir !== "." && dir !== "") {
|
|
332
|
+
const segments = dir.split(path3.sep);
|
|
333
|
+
for (const segment of segments) {
|
|
334
|
+
for (const lang of targetLangs) {
|
|
335
|
+
if (!stateManager.getSegmentTranslation(segment, lang)) {
|
|
336
|
+
logger.info(
|
|
337
|
+
`Translating directory segment "${segment}" to ${lang}...`
|
|
338
|
+
);
|
|
339
|
+
try {
|
|
340
|
+
const translatedSeg = await translator.translateSegment(
|
|
341
|
+
segment,
|
|
342
|
+
lang
|
|
343
|
+
);
|
|
344
|
+
stateManager.setSegmentTranslation(
|
|
345
|
+
segment,
|
|
346
|
+
lang,
|
|
347
|
+
translatedSeg
|
|
348
|
+
);
|
|
349
|
+
await stateManager.saveSegmentState();
|
|
350
|
+
} catch (e) {
|
|
351
|
+
logger.warn(`Failed to translate segment ${segment}: ${e}`);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
const currentHash = computeHash(sourceContent);
|
|
358
|
+
const stateEntry = stateManager.getEntry(key);
|
|
359
|
+
if (stateEntry && stateEntry.relativePath !== file) {
|
|
360
|
+
const oldRelativePath = stateEntry.relativePath;
|
|
361
|
+
if (stateEntry.sourceHash === currentHash) {
|
|
362
|
+
logger.info(
|
|
363
|
+
`Detected file move for slug "${key}": ${oldRelativePath} -> ${file}. Moving translations...`
|
|
364
|
+
);
|
|
365
|
+
for (const lang of stateEntry.completedLangs) {
|
|
366
|
+
const oldTargetPath = path3.join(
|
|
367
|
+
rootDir,
|
|
368
|
+
contentDir,
|
|
369
|
+
lang,
|
|
370
|
+
oldRelativePath
|
|
371
|
+
);
|
|
372
|
+
const newTargetPath = path3.join(
|
|
373
|
+
rootDir,
|
|
374
|
+
contentDir,
|
|
375
|
+
lang,
|
|
376
|
+
file
|
|
377
|
+
);
|
|
378
|
+
try {
|
|
379
|
+
await fs2.mkdir(path3.dirname(newTargetPath), {
|
|
380
|
+
recursive: true
|
|
381
|
+
});
|
|
382
|
+
await fs2.rename(oldTargetPath, newTargetPath);
|
|
383
|
+
} catch (e) {
|
|
384
|
+
logger.warn(`Failed to move translation: ${e}`);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
const completedLangs = [...stateEntry.completedLangs];
|
|
388
|
+
stateManager.updateTranslationEntry(key, currentHash, file);
|
|
389
|
+
completedLangs.forEach(
|
|
390
|
+
(lang) => stateManager.markLanguageCompleted(key, lang)
|
|
391
|
+
);
|
|
392
|
+
await stateManager.saveTranslationState();
|
|
393
|
+
continue;
|
|
394
|
+
} else {
|
|
395
|
+
logger.info(
|
|
396
|
+
`Detected file move with update for slug "${key}". Cleaning up old path...`
|
|
397
|
+
);
|
|
398
|
+
for (const lang of stateEntry.completedLangs) {
|
|
399
|
+
const oldTargetPath = path3.join(
|
|
400
|
+
rootDir,
|
|
401
|
+
contentDir,
|
|
402
|
+
lang,
|
|
403
|
+
oldRelativePath
|
|
404
|
+
);
|
|
405
|
+
try {
|
|
406
|
+
await fs2.unlink(oldTargetPath);
|
|
407
|
+
} catch {
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
if (!stateEntry || currentHash !== stateEntry.sourceHash) {
|
|
413
|
+
stateManager.updateTranslationEntry(key, currentHash, file);
|
|
414
|
+
await stateManager.saveTranslationState();
|
|
415
|
+
}
|
|
416
|
+
for (const lang of targetLangs) {
|
|
417
|
+
const targetDir = path3.join(rootDir, contentDir, lang);
|
|
418
|
+
const targetFilePath = path3.join(targetDir, file);
|
|
419
|
+
let isCompleted = stateManager.getEntry(key)?.completedLangs.includes(lang);
|
|
420
|
+
if (isCompleted) {
|
|
421
|
+
try {
|
|
422
|
+
await fs2.access(targetFilePath);
|
|
423
|
+
} catch {
|
|
424
|
+
isCompleted = false;
|
|
425
|
+
logger.warn(
|
|
426
|
+
`Missing file for ${lang}/${file}. Re-translating...`
|
|
427
|
+
);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
if (!isCompleted) {
|
|
431
|
+
logger.info(`Translating ${file} to ${lang}...`);
|
|
432
|
+
try {
|
|
433
|
+
await fs2.mkdir(path3.dirname(targetFilePath), {
|
|
434
|
+
recursive: true
|
|
435
|
+
});
|
|
436
|
+
const translatedContent = await translator.translateContent(
|
|
437
|
+
sourceContent,
|
|
438
|
+
sourceLang,
|
|
439
|
+
lang,
|
|
440
|
+
sourceFilePath,
|
|
441
|
+
targetFilePath,
|
|
442
|
+
logger
|
|
443
|
+
);
|
|
444
|
+
await fs2.writeFile(targetFilePath, translatedContent);
|
|
445
|
+
stateManager.markLanguageCompleted(key, lang);
|
|
446
|
+
await stateManager.saveTranslationState();
|
|
447
|
+
logger.info(`\u2713 Translated ${file} to ${lang}`);
|
|
448
|
+
} catch (error) {
|
|
449
|
+
logger.error(
|
|
450
|
+
`Failed to translate ${file} to ${lang}: ${error}`
|
|
451
|
+
);
|
|
452
|
+
}
|
|
453
|
+
} else {
|
|
454
|
+
logger.info(`Skipping ${file} (${lang}) - up to date.`);
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
for (const key of Object.keys(stateManager.translationState)) {
|
|
459
|
+
if (!foundKeys.has(key)) {
|
|
460
|
+
logger.info(`Source file for key "${key}" deleted. Cleaning up...`);
|
|
461
|
+
const entry = stateManager.getEntry(key);
|
|
462
|
+
if (!entry) continue;
|
|
463
|
+
const relativePath = entry.relativePath;
|
|
464
|
+
for (const lang of targetLangs) {
|
|
465
|
+
const targetPath = path3.join(
|
|
466
|
+
rootDir,
|
|
467
|
+
contentDir,
|
|
468
|
+
lang,
|
|
469
|
+
relativePath
|
|
470
|
+
);
|
|
471
|
+
try {
|
|
472
|
+
await fs2.unlink(targetPath);
|
|
473
|
+
logger.info(`Deleted orphan translation: ${targetPath}`);
|
|
474
|
+
} catch {
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
stateManager.removeEntry(key);
|
|
478
|
+
await stateManager.saveTranslationState();
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
logger.info("LLM Translation complete.");
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
export {
|
|
487
|
+
llmTranslator as default
|
|
488
|
+
};
|