docs-i18n 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/admin/dist/server/server.js +20 -20
- package/package.json +1 -1
- package/template/app/utils/content-loader.ts +4 -0
- package/template/app/utils/docs.server.ts +7 -0
- package/template/content/blog/en/announcing-query-v5.md +110 -0
- package/template/content/blog/en/hello-world.md +26 -0
- package/template/content/blog/en/i18n-best-practices.md +57 -0
- package/template/content/blog/en/react-query-vs-swr.md +100 -0
- package/template/content/blog/en/state-management-2024.md +143 -0
- package/template/content/blog/en/tanstack-router-1.0.md +121 -0
- package/template/content/blog/ja/announcing-query-v5.md +110 -0
- package/template/content/blog/ja/hello-world.md +26 -0
- package/template/content/blog/zh-hans/announcing-query-v5.md +93 -0
- package/template/content/blog/zh-hans/hello-world.md +26 -0
- package/template/content/docs-i18n/docs.config.json +25 -0
- package/template/content/docs-i18n/en/architecture.md +222 -0
- package/template/content/docs-i18n/en/configuration.md +331 -0
- package/template/content/docs-i18n/en/deployment.md +209 -0
- package/template/content/docs-i18n/en/getting-started.md +168 -0
- package/template/content/docs.config.json +25 -0
- package/template/content/en/admin.md +151 -0
- package/template/content/en/architecture.md +222 -0
- package/template/content/en/cli.md +269 -0
- package/template/content/en/configuration.md +331 -0
- package/template/content/en/deployment.md +209 -0
- package/template/content/en/getting-started.md +168 -0
- package/template/content/form/docs.config.json +18 -0
- package/template/content/form/en/guides/validation.md +175 -0
- package/template/content/form/en/installation.md +63 -0
- package/template/content/form/en/overview.md +71 -0
- package/template/content/form/en/quick-start.md +121 -0
- package/template/content/form/ja/installation.md +63 -0
- package/template/content/form/ja/overview.md +71 -0
- package/template/content/form/zh-hans/installation.md +63 -0
- package/template/content/form/zh-hans/overview.md +71 -0
- package/template/content/query/docs.config.json +32 -0
- package/template/content/query/en/guides/mutations.md +126 -0
- package/template/content/query/en/guides/pagination.md +98 -0
- package/template/content/query/en/guides/queries.md +120 -0
- package/template/content/query/en/installation.md +78 -0
- package/template/content/query/en/overview.md +72 -0
- package/template/content/query/en/quick-start.md +108 -0
- package/template/content/query/ja/installation.md +78 -0
- package/template/content/query/ja/overview.md +72 -0
- package/template/content/query/zh-hans/guides/mutations.md +126 -0
- package/template/content/query/zh-hans/guides/pagination.md +98 -0
- package/template/content/query/zh-hans/guides/queries.md +120 -0
- package/template/content/query/zh-hans/installation.md +95 -0
- package/template/content/query/zh-hans/overview.md +72 -0
- package/template/content/query/zh-hans/quick-start.md +108 -0
- package/template/content/router/docs.config.json +18 -0
- package/template/content/router/en/guides/routing-concepts.md +131 -0
- package/template/content/router/en/installation.md +57 -0
- package/template/content/router/en/overview.md +74 -0
- package/template/content/router/en/quick-start.md +88 -0
- package/template/content/router/ja/installation.md +57 -0
- package/template/content/router/ja/overview.md +78 -0
- package/template/content/router/zh-hans/guides/routing-concepts.md +131 -0
- package/template/content/router/zh-hans/installation.md +57 -0
- package/template/content/router/zh-hans/overview.md +81 -0
- package/template/content/router/zh-hans/quick-start.md +88 -0
- package/template/content/table/docs.config.json +18 -0
- package/template/content/table/en/guides/column-definitions.md +135 -0
- package/template/content/table/en/installation.md +56 -0
- package/template/content/table/en/overview.md +79 -0
- package/template/content/table/en/quick-start.md +112 -0
- package/template/content/table/ja/installation.md +56 -0
- package/template/content/table/ja/overview.md +79 -0
- package/template/content/table/zh-hans/installation.md +56 -0
- package/template/content/table/zh-hans/overview.md +79 -0
- package/template/content/virtual/docs.config.json +18 -0
- package/template/content/virtual/en/guides/dynamic-sizing.md +129 -0
- package/template/content/virtual/en/installation.md +57 -0
- package/template/content/virtual/en/overview.md +74 -0
- package/template/content/virtual/en/quick-start.md +114 -0
- package/template/content/virtual/ja/installation.md +57 -0
- package/template/content/virtual/ja/overview.md +74 -0
- package/template/content/virtual/zh-hans/installation.md +57 -0
- package/template/content/virtual/zh-hans/overview.md +74 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"sections": [
|
|
3
|
+
{
|
|
4
|
+
"label": "Getting Started",
|
|
5
|
+
"children": [
|
|
6
|
+
{ "label": "Introduction", "to": "getting-started" }
|
|
7
|
+
]
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"label": "Usage",
|
|
11
|
+
"children": [
|
|
12
|
+
{ "label": "CLI Commands", "to": "cli" },
|
|
13
|
+
{ "label": "Configuration", "to": "configuration" },
|
|
14
|
+
{ "label": "Admin Dashboard", "to": "admin" }
|
|
15
|
+
]
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"label": "Advanced",
|
|
19
|
+
"children": [
|
|
20
|
+
{ "label": "Architecture", "to": "architecture" },
|
|
21
|
+
{ "label": "Deployment", "to": "deployment" }
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Admin Dashboard
|
|
3
|
+
description: Web UI for monitoring translation progress, managing translation jobs, previewing files, and browsing LLM models.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Admin Dashboard
|
|
7
|
+
|
|
8
|
+
The docs-i18n admin dashboard is a web-based UI for managing your translations. It is built with TanStack Start and React, and runs as a local development server.
|
|
9
|
+
|
|
10
|
+
## Starting the Dashboard
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
npx docs-i18n admin
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
The dashboard opens at `http://localhost:3456`. Use `--port` to change the port:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npx docs-i18n admin --port 4000
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**Prerequisites:**
|
|
23
|
+
|
|
24
|
+
Your project must have `vite` and `@vitejs/plugin-react` installed:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
npm install -D vite @vitejs/plugin-react
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
The dashboard reads your `docs-i18n.config.ts` to discover projects, versions, and languages.
|
|
31
|
+
|
|
32
|
+
## Features
|
|
33
|
+
|
|
34
|
+
### Translation Overview
|
|
35
|
+
|
|
36
|
+
The main dashboard page shows a grid of all versions and languages with translation progress. For each version/language pair, you see:
|
|
37
|
+
|
|
38
|
+
- Total number of source nodes (EN content units).
|
|
39
|
+
- Number of translated nodes.
|
|
40
|
+
- Percentage complete.
|
|
41
|
+
- Breakdown by section (e.g., `docs/`, `blog/`, `learn/`), showing file counts and node counts per section.
|
|
42
|
+
|
|
43
|
+
English is always shown as 100% complete since it is the source language.
|
|
44
|
+
|
|
45
|
+
The overview auto-scans source files on load. If source files have not been scanned yet, the dashboard triggers a scan automatically.
|
|
46
|
+
|
|
47
|
+
### File Browser
|
|
48
|
+
|
|
49
|
+
Clicking on a version/language cell opens a file-level coverage list. Each file shows:
|
|
50
|
+
|
|
51
|
+
- File path (relative to the source directory).
|
|
52
|
+
- Total translatable nodes in that file.
|
|
53
|
+
- Number of translated nodes.
|
|
54
|
+
|
|
55
|
+
Files are sorted by path. You can click on any file to open the block-level preview.
|
|
56
|
+
|
|
57
|
+
### File Preview
|
|
58
|
+
|
|
59
|
+
The file preview shows every block (AST node) in a file side by side:
|
|
60
|
+
|
|
61
|
+
- **Source** -- the original English text.
|
|
62
|
+
- **Translation** -- the cached translation for the selected language, or empty if not yet translated.
|
|
63
|
+
|
|
64
|
+
Each block displays its type (heading, paragraph, list, blockquote, frontmatter, code, html) and MD5 key. Non-translatable blocks (code, pure HTML tags, gaps between nodes) are shown but clearly distinguished.
|
|
65
|
+
|
|
66
|
+
### Cache Management
|
|
67
|
+
|
|
68
|
+
From the file preview, you can delete individual cache entries. This is useful when a translation is incorrect and you want to re-translate a specific node. After deleting, run the translate command again to get a fresh translation for that key.
|
|
69
|
+
|
|
70
|
+
The dashboard also supports rescanning source files for a specific version via the UI. This rebuilds the source index and cleans orphaned entries.
|
|
71
|
+
|
|
72
|
+
### Translation Jobs
|
|
73
|
+
|
|
74
|
+
The dashboard includes a job management system for running translations directly from the UI instead of the command line.
|
|
75
|
+
|
|
76
|
+
#### Creating a Job
|
|
77
|
+
|
|
78
|
+
Click the job creation button and configure:
|
|
79
|
+
|
|
80
|
+
- **Language** -- target language code.
|
|
81
|
+
- **Version** -- which version to translate.
|
|
82
|
+
- **Project** -- optionally filter to a specific project.
|
|
83
|
+
- **Model** -- LLM model to use (can be selected from the model browser).
|
|
84
|
+
- **Model rotation** -- optionally provide multiple models to rotate through.
|
|
85
|
+
- **Max chunks** -- limit the number of API call chunks.
|
|
86
|
+
- **Concurrency** -- number of parallel API calls (default: 3).
|
|
87
|
+
- **Files** -- optionally select specific files to translate.
|
|
88
|
+
|
|
89
|
+
#### Job Status
|
|
90
|
+
|
|
91
|
+
Running jobs show:
|
|
92
|
+
|
|
93
|
+
- Status: `running`, `completed`, `failed`, or `cancelled`.
|
|
94
|
+
- Start time and finish time.
|
|
95
|
+
- Number of translated chunks and total chunks.
|
|
96
|
+
- Current chunk being processed.
|
|
97
|
+
- Live log output (last 20 lines displayed, up to 500 lines stored).
|
|
98
|
+
|
|
99
|
+
You can cancel a running job, which sends SIGTERM to the translation process. Completed or failed jobs can be removed from the list.
|
|
100
|
+
|
|
101
|
+
#### How Jobs Work
|
|
102
|
+
|
|
103
|
+
Under the hood, the job manager spawns a child process running the `docs-i18n translate` CLI command with the configured options. It captures stdout and stderr, parses progress information from the output, and exposes it through the dashboard API. The child process inherits the API key from your config or environment variables.
|
|
104
|
+
|
|
105
|
+
### Model Browser
|
|
106
|
+
|
|
107
|
+
The dashboard includes an OpenRouter model browser that fetches the list of available models from the OpenRouter API. For each model, it displays:
|
|
108
|
+
|
|
109
|
+
- Model ID and name.
|
|
110
|
+
- Pricing (prompt and completion per million tokens).
|
|
111
|
+
- Context length and maximum output tokens.
|
|
112
|
+
- Whether the model supports JSON response format and tool use.
|
|
113
|
+
- Provider name.
|
|
114
|
+
- Whether the model is free.
|
|
115
|
+
|
|
116
|
+
The model list is cached for 5 minutes. It only shows text-to-text models (filtered by architecture modality) and excludes models with negative pricing. Models are sorted by prompt price (cheapest first).
|
|
117
|
+
|
|
118
|
+
This is useful for selecting a model when creating a translation job.
|
|
119
|
+
|
|
120
|
+
### Open in Editor
|
|
121
|
+
|
|
122
|
+
The dashboard can open source files in your local editor. It tries the following editors in order:
|
|
123
|
+
|
|
124
|
+
1. The value of the `EDITOR_CMD` environment variable (if set).
|
|
125
|
+
2. `code` (VS Code)
|
|
126
|
+
3. `cursor` (Cursor)
|
|
127
|
+
4. `zed` (Zed)
|
|
128
|
+
|
|
129
|
+
If none are found, it falls back to the system default (`open` on macOS, `xdg-open` on Linux, `start` on Windows).
|
|
130
|
+
|
|
131
|
+
## Architecture
|
|
132
|
+
|
|
133
|
+
The admin dashboard uses:
|
|
134
|
+
|
|
135
|
+
- **TanStack Start** -- Full-stack React framework with server functions.
|
|
136
|
+
- **TanStack React Query** -- For data fetching and cache management.
|
|
137
|
+
- **TanStack Router** -- For client-side routing.
|
|
138
|
+
- **Vite** -- Dev server and build tool.
|
|
139
|
+
- **Hono** -- HTTP server (used by TanStack Start internally).
|
|
140
|
+
|
|
141
|
+
Server functions are defined in `src/admin/server/functions/` and handle:
|
|
142
|
+
|
|
143
|
+
- `fetchStatus` / `fetchFileCoverage` / `fetchFileBlocks` -- Read translation status from SQLite.
|
|
144
|
+
- `deleteCacheEntry` -- Delete a specific translation from the cache.
|
|
145
|
+
- `rescanVersion` -- Rescan source files for a version.
|
|
146
|
+
- `createJob` / `fetchJobs` / `fetchJob` / `deleteJob` -- Manage translation jobs.
|
|
147
|
+
- `fetchModels` -- Fetch available models from OpenRouter.
|
|
148
|
+
- `fetchVersion` / `fetchConfig` -- Get docs-i18n version and project root.
|
|
149
|
+
- `openFile` -- Open a file in the local editor.
|
|
150
|
+
|
|
151
|
+
The dashboard shares the same `TranslationCache` and `parseMdx` functions as the CLI, ensuring consistent behavior.
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Architecture
|
|
3
|
+
description: How docs-i18n works internally -- the translation pipeline, AST parsing, caching, and chunking strategies.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Architecture
|
|
7
|
+
|
|
8
|
+
This document explains how docs-i18n works internally. Understanding the pipeline helps you tune configuration, debug issues, and contribute to the project.
|
|
9
|
+
|
|
10
|
+
## Translation Pipeline
|
|
11
|
+
|
|
12
|
+
The end-to-end flow is:
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
Source files (EN)
|
|
16
|
+
|
|
|
17
|
+
v
|
|
18
|
+
[1. Normalize] -- Ensure JSX tags are separated by blank lines
|
|
19
|
+
|
|
|
20
|
+
v
|
|
21
|
+
[2. Parse] -- remark AST -> flat list of typed nodes
|
|
22
|
+
|
|
|
23
|
+
v
|
|
24
|
+
[3. Hash] -- MD5 of each translatable node's text
|
|
25
|
+
|
|
|
26
|
+
v
|
|
27
|
+
[4. Chunk] -- Group nodes into chunks that fit the LLM context window
|
|
28
|
+
|
|
|
29
|
+
v
|
|
30
|
+
[5. Translate] -- Send JSON to LLM, receive JSON translations
|
|
31
|
+
|
|
|
32
|
+
v
|
|
33
|
+
[6. Cache] -- Store translations in SQLite keyed by (lang, md5)
|
|
34
|
+
|
|
|
35
|
+
v
|
|
36
|
+
[7. Assemble] -- EN source + cache -> translated output files
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Step 1: Normalization
|
|
40
|
+
|
|
41
|
+
The `normalize()` function (`src/core/normalize.ts`) preprocesses MDX content to ensure that JSX tags like `<AppOnly>`, `<PagesOnly>`, `<details>`, and `<div>` are separated from surrounding content by blank lines. This ensures remark parses them as independent HTML nodes rather than merging them with adjacent text.
|
|
42
|
+
|
|
43
|
+
For example, this input:
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
<AppOnly>
|
|
47
|
+
Some text here
|
|
48
|
+
</AppOnly>
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Becomes:
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
<AppOnly>
|
|
55
|
+
|
|
56
|
+
Some text here
|
|
57
|
+
|
|
58
|
+
</AppOnly>
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Step 2: AST Parsing
|
|
62
|
+
|
|
63
|
+
The `parseMdx()` function (`src/core/parser.ts`) uses remark to parse markdown into a flat list of `ParsedNode` objects. Each node has:
|
|
64
|
+
|
|
65
|
+
- `type` -- The AST node type: `paragraph`, `heading`, `list`, `blockquote`, `code`, `html`, `thematicBreak`, or `frontmatter`.
|
|
66
|
+
- `rawText` -- The raw text content from the normalized source.
|
|
67
|
+
- `needsTranslation` -- Whether this node contains human-readable text.
|
|
68
|
+
- `md5` -- MD5 hash of the raw text (only for translatable nodes).
|
|
69
|
+
- `startOffset` / `endOffset` -- Character offsets in the normalized content.
|
|
70
|
+
|
|
71
|
+
**Translatable node types:** `paragraph`, `heading`, `list`, `blockquote`, and `html` nodes that contain non-tag text (e.g., `<summary>Examples</summary>`).
|
|
72
|
+
|
|
73
|
+
**Non-translatable:** `code` blocks, `thematicBreak`, and pure HTML/JSX tags (self-closing tags like `<Check size={18} />`, opening/closing tags like `<AppOnly></AppOnly>`).
|
|
74
|
+
|
|
75
|
+
**Frontmatter handling:** If the content starts with `---`, the parser detects YAML frontmatter and emits it as a single `frontmatter` node spanning from the opening `---` to the closing `---`. The frontmatter module (`src/core/frontmatter.ts`) then extracts only the configured translatable fields (e.g., `title`, `description`) using the `yaml` library, sends them to the LLM as plain text, and reconstructs the YAML with translated values while preserving all other fields and formatting.
|
|
76
|
+
|
|
77
|
+
## Step 3: MD5 Hashing
|
|
78
|
+
|
|
79
|
+
Each translatable node's raw text is hashed with MD5 to produce a stable key. This key is used for:
|
|
80
|
+
|
|
81
|
+
- **Deduplication** -- identical content appearing in multiple files (or even multiple projects) shares a single translation.
|
|
82
|
+
- **Incremental updates** -- when source content changes, only the nodes with new MD5 hashes need translation. Unchanged nodes reuse their cached translations.
|
|
83
|
+
- **Heading differentiation** -- heading nodes include their level markers (`##`, `###`) in the hash, so "## Installation" and "### Installation" produce different keys.
|
|
84
|
+
|
|
85
|
+
## Step 4: Smart Chunking
|
|
86
|
+
|
|
87
|
+
The translate command groups untranslated nodes into chunks that fit within the LLM's context window. The chunking algorithm (`src/commands/translate.ts`) accounts for:
|
|
88
|
+
|
|
89
|
+
- **Input budget** -- system prompt tokens + source text tokens. Estimated at `text.length / 4 + 80` tokens per node (accounting for JSON structure overhead).
|
|
90
|
+
- **Output budget** -- translated text tokens. Scaled by a per-language multiplier since different languages use tokens differently:
|
|
91
|
+
|
|
92
|
+
| Language | Multiplier | Reason |
|
|
93
|
+
| --- | --- | --- |
|
|
94
|
+
| Japanese, Korean, Hindi, Thai | 2.5x | CJK/Indic tokenization |
|
|
95
|
+
| Russian, Arabic, Ukrainian, Hebrew | 2.0x | Cyrillic/Arabic scripts |
|
|
96
|
+
| Chinese (Simplified/Traditional) | 1.5x | CJK but more concise |
|
|
97
|
+
| German, French, Portuguese | 1.3x | Slightly longer than English |
|
|
98
|
+
| Spanish, Vietnamese | 1.2x | Close to English length |
|
|
99
|
+
| Other languages | 2.0x | Safe default |
|
|
100
|
+
|
|
101
|
+
- **System prompt overhead** -- approximately 700 tokens for the translation prompt.
|
|
102
|
+
- **JSON structure overhead** -- approximately 80 tokens per key for JSON schema properties.
|
|
103
|
+
- **Safety margin** -- 85% of input budget and 75% of output budget are used to leave room for estimation errors.
|
|
104
|
+
|
|
105
|
+
When a chunk reaches its budget, a new chunk is started. This prevents context window overflow and output truncation.
|
|
106
|
+
|
|
107
|
+
## Step 5: LLM Translation
|
|
108
|
+
|
|
109
|
+
The translator (`src/core/translator.ts`) uses structured JSON mode for translations:
|
|
110
|
+
|
|
111
|
+
**Input format:** A JSON object with a `nodes` array. Each node has a `key` (MD5), `type` (heading/paragraph/list/etc.), and `text` (content to translate).
|
|
112
|
+
|
|
113
|
+
```json
|
|
114
|
+
{
|
|
115
|
+
"nodes": [
|
|
116
|
+
{ "key": "a1b2c3...", "type": "heading", "text": "## Installation" },
|
|
117
|
+
{ "key": "d4e5f6...", "type": "paragraph", "text": "Run the following command:" }
|
|
118
|
+
]
|
|
119
|
+
}
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Output format:** A flat JSON object mapping each key to its translation.
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"a1b2c3...": "## \u5b89\u88c5",
|
|
127
|
+
"d4e5f6...": "\u8fd0\u884c\u4ee5\u4e0b\u547d\u4ee4\uff1a"
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
**Robustness features:**
|
|
132
|
+
|
|
133
|
+
- **JSON repair** -- Handles common LLM JSON errors: unescaped newlines in strings, trailing commas, missing closing braces.
|
|
134
|
+
- **Thinking block stripping** -- Removes `<think>...</think>` blocks from reasoning models.
|
|
135
|
+
- **Unwrapping** -- If the model wraps output in `{"nodes": {...}}` or `{"translations": {...}}`, it is automatically unwrapped.
|
|
136
|
+
- **Key recovery** -- If the model corrupts an MD5 key (e.g., truncates it), the translator attempts fuzzy matching to recover the translation (up to 3 character differences).
|
|
137
|
+
- **Garbage detection** -- If more than 50% of translation values are identical, the model output is rejected.
|
|
138
|
+
- **Retry with backoff** -- Retries on 429 (rate limit), 503, 405, timeout, and connection errors. Uses exponential backoff starting at 2 seconds.
|
|
139
|
+
- **Model rotation** -- Supports a list of models to rotate through. Dead models (400/404 errors) are skipped. Rate-limited models (429) are deprioritized.
|
|
140
|
+
- **Truncation detection** -- If `finish_reason` is `'length'`, the output was truncated and the request is retried.
|
|
141
|
+
|
|
142
|
+
**Frontmatter translation:**
|
|
143
|
+
|
|
144
|
+
Frontmatter nodes are handled specially. Instead of sending the entire YAML block, the translator extracts individual translatable fields (e.g., `title`, `description`) and sends them as plain `paragraph` type nodes with virtual keys like `fm:<md5>:title`. After translation, the fields are reassembled into the original YAML structure using `reconstructFrontmatter()`.
|
|
145
|
+
|
|
146
|
+
## Step 6: SQLite Cache
|
|
147
|
+
|
|
148
|
+
The `TranslationCache` class (`src/core/cache.ts`) manages all persistent state in a single SQLite database at `<cacheDir>/translations.db`.
|
|
149
|
+
|
|
150
|
+
**Schema:**
|
|
151
|
+
|
|
152
|
+
```sql
|
|
153
|
+
-- EN source texts (deduplicated by MD5)
|
|
154
|
+
CREATE TABLE sources (
|
|
155
|
+
key TEXT PRIMARY KEY NOT NULL, -- MD5 hash
|
|
156
|
+
text TEXT NOT NULL, -- original English text
|
|
157
|
+
type TEXT NOT NULL DEFAULT 'paragraph'
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
-- Which files use each source node
|
|
161
|
+
CREATE TABLE source_files (
|
|
162
|
+
key TEXT NOT NULL, -- MD5 hash
|
|
163
|
+
file TEXT NOT NULL, -- relative file path
|
|
164
|
+
line INTEGER NOT NULL, -- line number
|
|
165
|
+
version TEXT NOT NULL DEFAULT 'latest',
|
|
166
|
+
PRIMARY KEY (version, key, file, line)
|
|
167
|
+
);
|
|
168
|
+
|
|
169
|
+
-- Translated texts per language
|
|
170
|
+
CREATE TABLE translations (
|
|
171
|
+
lang TEXT NOT NULL, -- language code
|
|
172
|
+
key TEXT NOT NULL, -- MD5 hash
|
|
173
|
+
value TEXT NOT NULL, -- translated text
|
|
174
|
+
created_at INTEGER NOT NULL DEFAULT (unixepoch()),
|
|
175
|
+
updated_at INTEGER NOT NULL DEFAULT (unixepoch()),
|
|
176
|
+
PRIMARY KEY (lang, key)
|
|
177
|
+
);
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
**Performance configuration:**
|
|
181
|
+
|
|
182
|
+
- **WAL mode** -- Write-Ahead Logging enables concurrent readers with a single writer. No locking issues during parallel translation and assembly.
|
|
183
|
+
- **busy_timeout = 5000** -- Wait up to 5 seconds for a lock before failing.
|
|
184
|
+
- **synchronous = NORMAL** -- Balanced between safety and performance.
|
|
185
|
+
- **WITHOUT ROWID** -- Tables use the primary key directly, avoiding an extra rowid column.
|
|
186
|
+
- **Immediate writes** -- All `set()` calls write directly to disk. No explicit save step needed.
|
|
187
|
+
|
|
188
|
+
**Key operations:**
|
|
189
|
+
|
|
190
|
+
- `get(lang, md5)` -- Look up a cached translation.
|
|
191
|
+
- `set(lang, md5, translation)` -- Store or update a translation (upsert).
|
|
192
|
+
- `untranslatedKeys(lang, version)` -- Find all source keys that have no translation for a given language.
|
|
193
|
+
- `fileCoverage(version, lang)` -- Get per-file translation coverage (used by the admin dashboard).
|
|
194
|
+
- `prune(lang, usedMd5s)` -- Remove translations whose keys are no longer referenced.
|
|
195
|
+
- `exportJsonl(lang, outputPath)` / `importJsonl(lang, inputPath)` -- Export/import translations in JSONL format for backup or migration.
|
|
196
|
+
|
|
197
|
+
**SQLite compatibility:**
|
|
198
|
+
|
|
199
|
+
The `openDatabase()` function (`src/core/sqlite.ts`) automatically detects the runtime environment. Under Bun, it uses `bun:sqlite`. Under Node.js, it uses `better-sqlite3`. Both expose the same interface.
|
|
200
|
+
|
|
201
|
+
## Step 7: Assembly
|
|
202
|
+
|
|
203
|
+
The `assemble()` function (`src/core/assembler.ts`) produces a translated file from English source content and cached translations:
|
|
204
|
+
|
|
205
|
+
1. Normalizes the source content.
|
|
206
|
+
2. Parses it into AST nodes.
|
|
207
|
+
3. For each node:
|
|
208
|
+
- **Non-translatable nodes** (code blocks, HTML tags) are kept as-is.
|
|
209
|
+
- **Translatable nodes with a cached translation** are replaced with the cached value.
|
|
210
|
+
- **Translatable nodes without a cache hit** are either wrapped in `<!-- NEEDS_TRANSLATION -->` markers (for the legacy whole-file translation mode) or fall back to the original English text (for assembled output files).
|
|
211
|
+
4. Preserves all whitespace and newlines between nodes.
|
|
212
|
+
|
|
213
|
+
The `AssembleResult` includes statistics: `cachedCount`, `uncachedCount`, `totalTranslatable`, and whether all nodes were cached (`allCached`).
|
|
214
|
+
|
|
215
|
+
## Validation
|
|
216
|
+
|
|
217
|
+
The `validate()` function (`src/core/validator.ts`) compares LLM output against the translation cache to detect and correct modifications to already-cached translations. It uses two alignment strategies:
|
|
218
|
+
|
|
219
|
+
- **Fast path** -- When the number of translatable nodes in the source and output match, nodes are aligned by index.
|
|
220
|
+
- **Anchor-based alignment** -- When node counts differ (the LLM merged or split paragraphs), cached translations serve as anchor points. The validator finds exact matches between output text and cached translations, then aligns nodes between anchors by type matching.
|
|
221
|
+
|
|
222
|
+
Cached translations always override LLM modifications, ensuring translation consistency across runs.
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: CLI Reference
|
|
3
|
+
description: Complete reference for all docs-i18n CLI commands, flags, and common workflows.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# CLI Reference
|
|
7
|
+
|
|
8
|
+
docs-i18n provides a command-line interface for managing documentation translations. All commands read from `docs-i18n.config.ts` in the current working directory by default.
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
docs-i18n <command> [options]
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Global Options
|
|
15
|
+
|
|
16
|
+
| Flag | Description |
|
|
17
|
+
| --- | --- |
|
|
18
|
+
| `--config <path>` | Path to config file (default: `docs-i18n.config.ts`) |
|
|
19
|
+
| `--version`, `-v` | Print the docs-i18n version |
|
|
20
|
+
| `--help`, `-h` | Show help text |
|
|
21
|
+
|
|
22
|
+
## Commands
|
|
23
|
+
|
|
24
|
+
### `translate`
|
|
25
|
+
|
|
26
|
+
Translate content to a target language. Sends untranslated nodes to the configured LLM and caches the results.
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
docs-i18n translate --lang <code> [options]
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**Required:**
|
|
33
|
+
|
|
34
|
+
| Flag | Description |
|
|
35
|
+
| --- | --- |
|
|
36
|
+
| `--lang <code>` | Target language code (e.g., `zh-hans`, `ja`, `es`) |
|
|
37
|
+
|
|
38
|
+
**Optional:**
|
|
39
|
+
|
|
40
|
+
| Flag | Default | Description |
|
|
41
|
+
| --- | --- | --- |
|
|
42
|
+
| `--project <id>` | all projects | Filter to a specific project |
|
|
43
|
+
| `--version <ver>` | all versions | Filter to a specific version |
|
|
44
|
+
| `--files <paths>` | all files | Comma-separated list of relative file paths to translate |
|
|
45
|
+
| `--max <n>` | 999 | Maximum number of API call chunks to process |
|
|
46
|
+
| `--concurrency <n>` | 3 | Number of parallel API calls |
|
|
47
|
+
| `--model <model>` | config value | Override the LLM model |
|
|
48
|
+
| `--api-key <key>` | config/env | Override the API key |
|
|
49
|
+
| `--max-tokens <n>` | 16384 | Max output tokens per API call |
|
|
50
|
+
| `--context-length <n>` | 32768 | Model context window size |
|
|
51
|
+
| `--dry-run` | false | Preview what would be translated without making API calls |
|
|
52
|
+
|
|
53
|
+
**Examples:**
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Translate everything to Simplified Chinese
|
|
57
|
+
docs-i18n translate --lang zh-hans
|
|
58
|
+
|
|
59
|
+
# Translate a specific version with dry run
|
|
60
|
+
docs-i18n translate --lang zh-hans --version latest --dry-run
|
|
61
|
+
|
|
62
|
+
# Translate specific files only
|
|
63
|
+
docs-i18n translate --lang zh-hans --files docs/intro.mdx,docs/guide.mdx
|
|
64
|
+
|
|
65
|
+
# Use a different model with higher token limits
|
|
66
|
+
docs-i18n translate --lang ja --model qwen/qwen3.5-flash-02-23 --max-tokens 65536 --context-length 1000000
|
|
67
|
+
|
|
68
|
+
# Limit API calls for testing
|
|
69
|
+
docs-i18n translate --lang es --max 5 --concurrency 1
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
**How it works:**
|
|
73
|
+
|
|
74
|
+
1. Loads the SQLite cache and finds all untranslated keys for the given language and version.
|
|
75
|
+
2. Groups untranslated nodes into chunks that fit within the model's context window, accounting for input tokens, output tokens, and language-specific token multipliers (e.g., Japanese uses 2.5x more output tokens than English).
|
|
76
|
+
3. Sends each chunk to the LLM as structured JSON: an array of typed nodes with MD5 keys.
|
|
77
|
+
4. Parses the JSON response, validates keys, and stores translations in the cache.
|
|
78
|
+
5. Runs chunks in parallel up to the concurrency limit.
|
|
79
|
+
|
|
80
|
+
Translation logs are written to `.logs/` with timestamps for debugging.
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
### `assemble`
|
|
85
|
+
|
|
86
|
+
Assemble translated files by combining English source content with cached translations. For any untranslated nodes, the original English text is used as a fallback.
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
docs-i18n assemble [options]
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**Optional:**
|
|
93
|
+
|
|
94
|
+
| Flag | Default | Description |
|
|
95
|
+
| --- | --- | --- |
|
|
96
|
+
| `--project <id>` | all projects | Filter to a specific project |
|
|
97
|
+
| `--version <ver>` | all versions | Filter to a specific version |
|
|
98
|
+
| `--lang <code>` | all languages | Filter to a specific language |
|
|
99
|
+
|
|
100
|
+
**Examples:**
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# Assemble all projects, versions, and languages
|
|
104
|
+
docs-i18n assemble
|
|
105
|
+
|
|
106
|
+
# Assemble only Chinese for the latest version
|
|
107
|
+
docs-i18n assemble --lang zh-hans --version latest
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Output is written to `.cache/content/<version>/<lang>/` by default. Each file mirrors the structure of the English source directory.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
### `rescan`
|
|
115
|
+
|
|
116
|
+
Rescan source files and rebuild the source index. Also cleans orphaned translations and sources that no longer exist in the source files.
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
docs-i18n rescan [options]
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Optional:**
|
|
123
|
+
|
|
124
|
+
| Flag | Default | Description |
|
|
125
|
+
| --- | --- | --- |
|
|
126
|
+
| `--project <id>` | all projects | Filter to a specific project |
|
|
127
|
+
| `--version <ver>` | all versions | Filter to a specific version |
|
|
128
|
+
|
|
129
|
+
**Examples:**
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# Rescan all source files
|
|
133
|
+
docs-i18n rescan
|
|
134
|
+
|
|
135
|
+
# Rescan a specific version only
|
|
136
|
+
docs-i18n rescan --version latest
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
**What it does:**
|
|
140
|
+
|
|
141
|
+
1. Walks all source directories and parses every markdown/MDX file.
|
|
142
|
+
2. Extracts translatable nodes and stores their MD5 keys, source text, node types, file paths, and line numbers in the SQLite cache.
|
|
143
|
+
3. Deletes orphaned translations whose source keys no longer exist in any source file.
|
|
144
|
+
4. Deletes orphaned source entries that are no longer referenced by any file.
|
|
145
|
+
|
|
146
|
+
Run `rescan` after adding, removing, or significantly editing source files.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
### `status`
|
|
151
|
+
|
|
152
|
+
Show translation coverage for all projects and versions.
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
docs-i18n status [options]
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
**Optional:**
|
|
159
|
+
|
|
160
|
+
| Flag | Default | Description |
|
|
161
|
+
| --- | --- | --- |
|
|
162
|
+
| `--lang <code>` | all languages | Show status for a specific language only |
|
|
163
|
+
|
|
164
|
+
**Examples:**
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
# Show status for all languages
|
|
168
|
+
docs-i18n status
|
|
169
|
+
|
|
170
|
+
# Show status for Chinese only
|
|
171
|
+
docs-i18n status --lang zh-hans
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**Output:**
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
Translation Status
|
|
178
|
+
|
|
179
|
+
latest (1523 keys):
|
|
180
|
+
zh-hans ████████████████████ 100% (1523/1523)
|
|
181
|
+
ja ████████████░░░░░░░░ 62% (944/1523)
|
|
182
|
+
es ██░░░░░░░░░░░░░░░░░░ 10% (152/1523)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
If a version shows "no source files (run rescan first)", you need to run `docs-i18n rescan` before checking status.
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
### `admin`
|
|
190
|
+
|
|
191
|
+
Start the admin dashboard web UI for managing translations.
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
docs-i18n admin [options]
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
**Optional:**
|
|
198
|
+
|
|
199
|
+
| Flag | Default | Description |
|
|
200
|
+
| --- | --- | --- |
|
|
201
|
+
| `--port <n>` | 3456 | Port to run the dashboard on |
|
|
202
|
+
|
|
203
|
+
**Examples:**
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
# Start on default port
|
|
207
|
+
docs-i18n admin
|
|
208
|
+
|
|
209
|
+
# Start on custom port
|
|
210
|
+
docs-i18n admin --port 4000
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
The dashboard opens at `http://localhost:3456` (or your custom port). See the [Admin Dashboard](./admin.md) documentation for details on its features.
|
|
214
|
+
|
|
215
|
+
## Common Workflows
|
|
216
|
+
|
|
217
|
+
### Full translation pipeline
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
# 1. Scan source files
|
|
221
|
+
docs-i18n rescan
|
|
222
|
+
|
|
223
|
+
# 2. Check what needs translating
|
|
224
|
+
docs-i18n status
|
|
225
|
+
|
|
226
|
+
# 3. Translate
|
|
227
|
+
docs-i18n translate --lang zh-hans
|
|
228
|
+
|
|
229
|
+
# 4. Assemble output
|
|
230
|
+
docs-i18n assemble --lang zh-hans
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Translate a single file for review
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
docs-i18n translate --lang zh-hans --files docs/getting-started.mdx --max 1
|
|
237
|
+
docs-i18n assemble --lang zh-hans
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
### Preview translation volume before spending API credits
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
docs-i18n translate --lang ja --dry-run
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
This reports the number of untranslated keys and chunks without making any API calls.
|
|
247
|
+
|
|
248
|
+
### Multi-project translation
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
# Translate only the "query" project
|
|
252
|
+
docs-i18n translate --lang zh-hans --project query
|
|
253
|
+
|
|
254
|
+
# Assemble only the "table" project
|
|
255
|
+
docs-i18n assemble --project table --lang zh-hans
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### After editing source files
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
# Rescan to detect changes and clean orphans
|
|
262
|
+
docs-i18n rescan
|
|
263
|
+
|
|
264
|
+
# Translate only new/changed content (cached translations are reused)
|
|
265
|
+
docs-i18n translate --lang zh-hans
|
|
266
|
+
|
|
267
|
+
# Rebuild output files
|
|
268
|
+
docs-i18n assemble
|
|
269
|
+
```
|