@llm-translate/cli 1.0.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +51 -0
- package/.env.example +33 -0
- package/.github/workflows/docs-pages.yml +57 -0
- package/.github/workflows/release.yml +49 -0
- package/.translaterc.json +44 -0
- package/CLAUDE.md +243 -0
- package/Dockerfile +55 -0
- package/README.md +371 -0
- package/RFC.md +1595 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +4494 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/index.d.ts +1152 -0
- package/dist/index.js +3841 -0
- package/dist/index.js.map +1 -0
- package/docker-compose.yml +56 -0
- package/docs/.vitepress/config.ts +161 -0
- package/docs/api/agent.md +262 -0
- package/docs/api/engine.md +274 -0
- package/docs/api/index.md +171 -0
- package/docs/api/providers.md +304 -0
- package/docs/changelog.md +64 -0
- package/docs/cli/dir.md +243 -0
- package/docs/cli/file.md +213 -0
- package/docs/cli/glossary.md +273 -0
- package/docs/cli/index.md +129 -0
- package/docs/cli/init.md +158 -0
- package/docs/cli/serve.md +211 -0
- package/docs/glossary.json +235 -0
- package/docs/guide/chunking.md +272 -0
- package/docs/guide/configuration.md +139 -0
- package/docs/guide/cost-optimization.md +237 -0
- package/docs/guide/docker.md +371 -0
- package/docs/guide/getting-started.md +150 -0
- package/docs/guide/glossary.md +241 -0
- package/docs/guide/index.md +86 -0
- package/docs/guide/ollama.md +515 -0
- package/docs/guide/prompt-caching.md +221 -0
- package/docs/guide/providers.md +232 -0
- package/docs/guide/quality-control.md +206 -0
- package/docs/guide/vitepress-integration.md +265 -0
- package/docs/index.md +63 -0
- package/docs/ja/api/agent.md +262 -0
- package/docs/ja/api/engine.md +274 -0
- package/docs/ja/api/index.md +171 -0
- package/docs/ja/api/providers.md +304 -0
- package/docs/ja/changelog.md +64 -0
- package/docs/ja/cli/dir.md +243 -0
- package/docs/ja/cli/file.md +213 -0
- package/docs/ja/cli/glossary.md +273 -0
- package/docs/ja/cli/index.md +111 -0
- package/docs/ja/cli/init.md +158 -0
- package/docs/ja/guide/chunking.md +271 -0
- package/docs/ja/guide/configuration.md +139 -0
- package/docs/ja/guide/cost-optimization.md +30 -0
- package/docs/ja/guide/getting-started.md +150 -0
- package/docs/ja/guide/glossary.md +214 -0
- package/docs/ja/guide/index.md +32 -0
- package/docs/ja/guide/ollama.md +410 -0
- package/docs/ja/guide/prompt-caching.md +221 -0
- package/docs/ja/guide/providers.md +232 -0
- package/docs/ja/guide/quality-control.md +137 -0
- package/docs/ja/guide/vitepress-integration.md +265 -0
- package/docs/ja/index.md +58 -0
- package/docs/ko/api/agent.md +262 -0
- package/docs/ko/api/engine.md +274 -0
- package/docs/ko/api/index.md +171 -0
- package/docs/ko/api/providers.md +304 -0
- package/docs/ko/changelog.md +64 -0
- package/docs/ko/cli/dir.md +243 -0
- package/docs/ko/cli/file.md +213 -0
- package/docs/ko/cli/glossary.md +273 -0
- package/docs/ko/cli/index.md +111 -0
- package/docs/ko/cli/init.md +158 -0
- package/docs/ko/guide/chunking.md +271 -0
- package/docs/ko/guide/configuration.md +139 -0
- package/docs/ko/guide/cost-optimization.md +30 -0
- package/docs/ko/guide/getting-started.md +150 -0
- package/docs/ko/guide/glossary.md +214 -0
- package/docs/ko/guide/index.md +32 -0
- package/docs/ko/guide/ollama.md +410 -0
- package/docs/ko/guide/prompt-caching.md +221 -0
- package/docs/ko/guide/providers.md +232 -0
- package/docs/ko/guide/quality-control.md +137 -0
- package/docs/ko/guide/vitepress-integration.md +265 -0
- package/docs/ko/index.md +58 -0
- package/docs/zh/api/agent.md +262 -0
- package/docs/zh/api/engine.md +274 -0
- package/docs/zh/api/index.md +171 -0
- package/docs/zh/api/providers.md +304 -0
- package/docs/zh/changelog.md +64 -0
- package/docs/zh/cli/dir.md +243 -0
- package/docs/zh/cli/file.md +213 -0
- package/docs/zh/cli/glossary.md +273 -0
- package/docs/zh/cli/index.md +111 -0
- package/docs/zh/cli/init.md +158 -0
- package/docs/zh/guide/chunking.md +271 -0
- package/docs/zh/guide/configuration.md +139 -0
- package/docs/zh/guide/cost-optimization.md +30 -0
- package/docs/zh/guide/getting-started.md +150 -0
- package/docs/zh/guide/glossary.md +214 -0
- package/docs/zh/guide/index.md +32 -0
- package/docs/zh/guide/ollama.md +410 -0
- package/docs/zh/guide/prompt-caching.md +221 -0
- package/docs/zh/guide/providers.md +232 -0
- package/docs/zh/guide/quality-control.md +137 -0
- package/docs/zh/guide/vitepress-integration.md +265 -0
- package/docs/zh/index.md +58 -0
- package/package.json +91 -0
- package/release.config.mjs +15 -0
- package/schemas/glossary.schema.json +110 -0
- package/src/cli/commands/dir.ts +469 -0
- package/src/cli/commands/file.ts +291 -0
- package/src/cli/commands/glossary.ts +221 -0
- package/src/cli/commands/init.ts +68 -0
- package/src/cli/commands/serve.ts +60 -0
- package/src/cli/index.ts +64 -0
- package/src/cli/options.ts +59 -0
- package/src/core/agent.ts +1119 -0
- package/src/core/chunker.ts +391 -0
- package/src/core/engine.ts +634 -0
- package/src/errors.ts +188 -0
- package/src/index.ts +147 -0
- package/src/integrations/vitepress.ts +549 -0
- package/src/parsers/markdown.ts +383 -0
- package/src/providers/claude.ts +259 -0
- package/src/providers/interface.ts +109 -0
- package/src/providers/ollama.ts +379 -0
- package/src/providers/openai.ts +308 -0
- package/src/providers/registry.ts +153 -0
- package/src/server/index.ts +152 -0
- package/src/server/middleware/auth.ts +93 -0
- package/src/server/middleware/logger.ts +90 -0
- package/src/server/routes/health.ts +84 -0
- package/src/server/routes/translate.ts +210 -0
- package/src/server/types.ts +138 -0
- package/src/services/cache.ts +899 -0
- package/src/services/config.ts +217 -0
- package/src/services/glossary.ts +247 -0
- package/src/types/analysis.ts +164 -0
- package/src/types/index.ts +265 -0
- package/src/types/modes.ts +121 -0
- package/src/types/mqm.ts +157 -0
- package/src/utils/logger.ts +141 -0
- package/src/utils/tokens.ts +116 -0
- package/tests/fixtures/glossaries/ml-glossary.json +53 -0
- package/tests/fixtures/input/lynq-installation.ko.md +350 -0
- package/tests/fixtures/input/lynq-installation.md +350 -0
- package/tests/fixtures/input/simple.ko.md +27 -0
- package/tests/fixtures/input/simple.md +27 -0
- package/tests/unit/chunker.test.ts +229 -0
- package/tests/unit/glossary.test.ts +146 -0
- package/tests/unit/markdown.test.ts +205 -0
- package/tests/unit/tokens.test.ts +81 -0
- package/tsconfig.json +28 -0
- package/tsup.config.ts +34 -0
- package/vitest.config.ts +16 -0
package/.dockerignore
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Dependencies
|
|
2
|
+
node_modules
|
|
3
|
+
|
|
4
|
+
# Build output (will be created during build)
|
|
5
|
+
dist
|
|
6
|
+
|
|
7
|
+
# Git
|
|
8
|
+
.git
|
|
9
|
+
.github
|
|
10
|
+
.gitignore
|
|
11
|
+
|
|
12
|
+
# Documentation (not needed in container)
|
|
13
|
+
docs
|
|
14
|
+
*.md
|
|
15
|
+
!README.md
|
|
16
|
+
|
|
17
|
+
# Test and coverage
|
|
18
|
+
coverage
|
|
19
|
+
.nyc_output
|
|
20
|
+
*.test.ts
|
|
21
|
+
*.spec.ts
|
|
22
|
+
__tests__
|
|
23
|
+
|
|
24
|
+
# Environment files (secrets should be passed via env vars)
|
|
25
|
+
.env
|
|
26
|
+
.env.*
|
|
27
|
+
!.env.example
|
|
28
|
+
|
|
29
|
+
# IDE and editor
|
|
30
|
+
.vscode
|
|
31
|
+
.idea
|
|
32
|
+
*.swp
|
|
33
|
+
*.swo
|
|
34
|
+
*~
|
|
35
|
+
|
|
36
|
+
# OS files
|
|
37
|
+
.DS_Store
|
|
38
|
+
Thumbs.db
|
|
39
|
+
|
|
40
|
+
# Logs
|
|
41
|
+
*.log
|
|
42
|
+
npm-debug.log*
|
|
43
|
+
|
|
44
|
+
# Cache
|
|
45
|
+
.translate-cache
|
|
46
|
+
.cache
|
|
47
|
+
|
|
48
|
+
# Misc
|
|
49
|
+
.eslintcache
|
|
50
|
+
.prettierrc
|
|
51
|
+
tsconfig.tsbuildinfo
|
package/.env.example
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# llm-translate Server Configuration
|
|
3
|
+
# Copy this file to .env and fill in your values
|
|
4
|
+
# ==============================================================================
|
|
5
|
+
|
|
6
|
+
# ------------------------------------------------------------------------------
|
|
7
|
+
# Server Configuration
|
|
8
|
+
# ------------------------------------------------------------------------------
|
|
9
|
+
|
|
10
|
+
# Server port (default: 3000)
|
|
11
|
+
TRANSLATE_PORT=3000
|
|
12
|
+
|
|
13
|
+
# API key for authenticating requests to the translation API
|
|
14
|
+
# Generate a secure random key: openssl rand -base64 32
|
|
15
|
+
TRANSLATE_API_KEY=your-api-key-here
|
|
16
|
+
|
|
17
|
+
# ------------------------------------------------------------------------------
|
|
18
|
+
# LLM Provider API Keys
|
|
19
|
+
# At least one provider key is required for translation to work
|
|
20
|
+
# ------------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
# Anthropic Claude API key (recommended)
|
|
23
|
+
# Get your key at: https://console.anthropic.com/
|
|
24
|
+
ANTHROPIC_API_KEY=sk-ant-xxxxx
|
|
25
|
+
|
|
26
|
+
# OpenAI API key
|
|
27
|
+
# Get your key at: https://platform.openai.com/api-keys
|
|
28
|
+
OPENAI_API_KEY=sk-xxxxx
|
|
29
|
+
|
|
30
|
+
# Ollama server URL (for self-hosted models)
|
|
31
|
+
# Default: http://localhost:11434
|
|
32
|
+
# Use http://ollama:11434 when running with docker-compose
|
|
33
|
+
OLLAMA_BASE_URL=http://localhost:11434
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
name: Deploy Docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
pages: write
|
|
12
|
+
id-token: write
|
|
13
|
+
|
|
14
|
+
concurrency:
|
|
15
|
+
group: pages
|
|
16
|
+
cancel-in-progress: true
|
|
17
|
+
|
|
18
|
+
jobs:
|
|
19
|
+
build:
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
steps:
|
|
22
|
+
- name: Checkout
|
|
23
|
+
uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Setup Node.js
|
|
26
|
+
uses: actions/setup-node@v4
|
|
27
|
+
with:
|
|
28
|
+
node-version: 20
|
|
29
|
+
cache: npm
|
|
30
|
+
|
|
31
|
+
- name: Install dependencies
|
|
32
|
+
run: npm ci
|
|
33
|
+
|
|
34
|
+
- name: Build package (docs config depends on dist)
|
|
35
|
+
run: npm run build
|
|
36
|
+
|
|
37
|
+
- name: Build docs
|
|
38
|
+
run: npm run docs:build
|
|
39
|
+
|
|
40
|
+
- name: Setup Pages
|
|
41
|
+
uses: actions/configure-pages@v5
|
|
42
|
+
with:
|
|
43
|
+
enablement: true
|
|
44
|
+
continue-on-error: true # skip failure if repo doesn't allow auto-enablement; enable Pages manually in settings
|
|
45
|
+
|
|
46
|
+
- name: Upload artifact
|
|
47
|
+
uses: actions/upload-pages-artifact@v3
|
|
48
|
+
with:
|
|
49
|
+
path: docs/.vitepress/dist
|
|
50
|
+
|
|
51
|
+
deploy:
|
|
52
|
+
needs: build
|
|
53
|
+
runs-on: ubuntu-latest
|
|
54
|
+
environment: github-pages
|
|
55
|
+
steps:
|
|
56
|
+
- name: Deploy to GitHub Pages
|
|
57
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
- develop
|
|
8
|
+
paths-ignore:
|
|
9
|
+
- "docs/**"
|
|
10
|
+
- "*.md"
|
|
11
|
+
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
|
|
15
|
+
jobs:
|
|
16
|
+
release:
|
|
17
|
+
name: Release
|
|
18
|
+
runs-on: ubuntu-latest
|
|
19
|
+
permissions:
|
|
20
|
+
contents: write # to be able to publish a GitHub release
|
|
21
|
+
issues: write # to be able to comment on released issues
|
|
22
|
+
pull-requests: write # to be able to comment on released pull requests
|
|
23
|
+
id-token: write # to enable use of OIDC for npm provenance
|
|
24
|
+
steps:
|
|
25
|
+
- name: Checkout
|
|
26
|
+
uses: actions/checkout@v4
|
|
27
|
+
with:
|
|
28
|
+
fetch-depth: 0 # fetch all history for proper commit analysis
|
|
29
|
+
|
|
30
|
+
- name: Setup Node.js
|
|
31
|
+
uses: actions/setup-node@v4
|
|
32
|
+
with:
|
|
33
|
+
node-version: "lts/*"
|
|
34
|
+
cache: npm
|
|
35
|
+
|
|
36
|
+
- name: Install dependencies
|
|
37
|
+
run: npm clean-install
|
|
38
|
+
|
|
39
|
+
- name: Verify integrity
|
|
40
|
+
run: npm audit signatures
|
|
41
|
+
|
|
42
|
+
- name: Build
|
|
43
|
+
run: npm run build
|
|
44
|
+
|
|
45
|
+
- name: Release
|
|
46
|
+
env:
|
|
47
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
48
|
+
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
|
|
49
|
+
run: npx semantic-release
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1.0",
|
|
3
|
+
"project": {
|
|
4
|
+
"name": "Example Project",
|
|
5
|
+
"description": "Example project configuration for llm-translate",
|
|
6
|
+
"purpose": "Technical documentation translation"
|
|
7
|
+
},
|
|
8
|
+
"languages": {
|
|
9
|
+
"source": "en",
|
|
10
|
+
"targets": ["ko", "ja", "zh-CN"],
|
|
11
|
+
"styles": {
|
|
12
|
+
"ko": "기술 문서로서 경어체(존댓말, ~습니다/~됩니다 체)를 사용하세요",
|
|
13
|
+
"ja": "技術文書として敬語(です・ます調)を使用してください"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"provider": {
|
|
17
|
+
"default": "claude",
|
|
18
|
+
"model": "claude-sonnet-4-20250514",
|
|
19
|
+
"fallback": ["openai"]
|
|
20
|
+
},
|
|
21
|
+
"quality": {
|
|
22
|
+
"threshold": 85,
|
|
23
|
+
"maxIterations": 4,
|
|
24
|
+
"evaluationMethod": "llm"
|
|
25
|
+
},
|
|
26
|
+
"chunking": {
|
|
27
|
+
"maxTokens": 1024,
|
|
28
|
+
"overlapTokens": 150,
|
|
29
|
+
"preserveStructure": true
|
|
30
|
+
},
|
|
31
|
+
"glossary": {
|
|
32
|
+
"path": "./glossary.json",
|
|
33
|
+
"strict": false
|
|
34
|
+
},
|
|
35
|
+
"paths": {
|
|
36
|
+
"output": "./docs/{lang}",
|
|
37
|
+
"cache": "./.translate-cache"
|
|
38
|
+
},
|
|
39
|
+
"ignore": [
|
|
40
|
+
"**/node_modules/**",
|
|
41
|
+
"**/*.test.md",
|
|
42
|
+
"**/drafts/**"
|
|
43
|
+
]
|
|
44
|
+
}
|
package/CLAUDE.md
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# Project Guidelines for llm-translate
|
|
2
|
+
|
|
3
|
+
This document provides essential guidelines for AI assistants working on the llm-translate project. All implementations MUST adhere to the specifications defined in `RFC.md`.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
**llm-translate** is a CLI-based document translation tool powered by Large Language Models. It ensures translation consistency through glossary enforcement, context-aware chunking, and iterative quality refinement.
|
|
8
|
+
|
|
9
|
+
## Core Principles
|
|
10
|
+
|
|
11
|
+
1. **RFC.md is the source of truth** - All implementations must follow the RFC specification
|
|
12
|
+
2. **Glossary enforcement is mandatory** - Domain-specific terminology must be translated consistently
|
|
13
|
+
3. **Quality-aware refinement** - Use iterative Self-Refine loop until quality threshold is met
|
|
14
|
+
4. **Structure preservation** - AST-based processing must maintain document formatting integrity
|
|
15
|
+
5. **Provider-agnostic design** - Support Claude, OpenAI, Ollama via plugin architecture
|
|
16
|
+
|
|
17
|
+
## Technology Stack (MUST use)
|
|
18
|
+
|
|
19
|
+
| Component | Technology |
|
|
20
|
+
|-----------|------------|
|
|
21
|
+
| Runtime | Node.js 24+ |
|
|
22
|
+
| Language | TypeScript 5.x |
|
|
23
|
+
| CLI Framework | Commander.js |
|
|
24
|
+
| Markdown Parser | unified/remark |
|
|
25
|
+
| HTML Parser | cheerio |
|
|
26
|
+
| LLM SDK | Vercel AI SDK |
|
|
27
|
+
| Testing | Vitest |
|
|
28
|
+
| Build | tsup |
|
|
29
|
+
| Module System | ESM only |
|
|
30
|
+
|
|
31
|
+
## Project Structure
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
src/
|
|
35
|
+
├── cli/ # CLI entry point and commands
|
|
36
|
+
│ ├── commands/ # file, dir, init, glossary commands
|
|
37
|
+
│ └── options.ts # Shared CLI options
|
|
38
|
+
├── core/ # Translation engine components
|
|
39
|
+
│ ├── engine.ts # Main translation engine
|
|
40
|
+
│ ├── agent.ts # Self-refine translation agent
|
|
41
|
+
│ ├── chunker.ts # Semantic document chunker
|
|
42
|
+
│ └── evaluator.ts # Quality evaluation
|
|
43
|
+
├── parsers/ # Format-specific parsers
|
|
44
|
+
│ ├── markdown.ts
|
|
45
|
+
│ ├── html.ts
|
|
46
|
+
│ └── plaintext.ts
|
|
47
|
+
├── providers/ # LLM provider adapters
|
|
48
|
+
│ ├── interface.ts # Provider interface (LLMProvider)
|
|
49
|
+
│ ├── registry.ts # Provider registry with fallback
|
|
50
|
+
│ ├── claude.ts
|
|
51
|
+
│ ├── openai.ts
|
|
52
|
+
│ └── ollama.ts
|
|
53
|
+
├── services/ # Supporting services
|
|
54
|
+
│ ├── glossary.ts # Glossary loading and resolution
|
|
55
|
+
│ ├── cache.ts # Translation cache
|
|
56
|
+
│ └── config.ts # Configuration loader
|
|
57
|
+
├── types/ # Shared type definitions
|
|
58
|
+
│ ├── index.ts
|
|
59
|
+
│ ├── mqm.ts # MQM quality evaluation types
|
|
60
|
+
│ ├── analysis.ts # Pre-translation analysis types
|
|
61
|
+
│ └── modes.ts # Translation mode configurations
|
|
62
|
+
├── utils/ # Utilities
|
|
63
|
+
│ ├── tokens.ts # Token counting
|
|
64
|
+
│ └── logger.ts # Logging
|
|
65
|
+
└── errors.ts # Error types and codes
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Key Interfaces
|
|
69
|
+
|
|
70
|
+
### LLMProvider Interface
|
|
71
|
+
|
|
72
|
+
All providers MUST implement this interface:
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
interface LLMProvider {
|
|
76
|
+
readonly name: ProviderName;
|
|
77
|
+
chat(request: ChatRequest): Promise<ChatResponse>;
|
|
78
|
+
stream(request: ChatRequest): AsyncIterable<string>;
|
|
79
|
+
countTokens(text: string): number;
|
|
80
|
+
getModelInfo(model?: string): ModelInfo;
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Translation Agent Flow
|
|
85
|
+
|
|
86
|
+
The translation pipeline follows research-backed multi-step approach:
|
|
87
|
+
- **MAPS** (Multi-Aspect Prompting and Selection) - TACL 2024
|
|
88
|
+
- **TEaR** (Translate, Estimate, Refine) - NAACL 2025
|
|
89
|
+
- **MQM** (Multidimensional Quality Metrics) - WMT Standard
|
|
90
|
+
|
|
91
|
+
**Pipeline Steps:**
|
|
92
|
+
|
|
93
|
+
1. **PREPARE** - Load glossary, build context
|
|
94
|
+
2. **ANALYZE** (Optional, MAPS-style) - Pre-translation analysis
|
|
95
|
+
- Extract key terms and potential challenges
|
|
96
|
+
- Identify ambiguous phrases
|
|
97
|
+
- Skip in `fast` mode
|
|
98
|
+
3. **TRANSLATE** - Generate translation with glossary + analysis context
|
|
99
|
+
4. **EVALUATE** (MQM-based) - Structured error annotation
|
|
100
|
+
- Error types: accuracy/fluency/style
|
|
101
|
+
- Severity: minor(1), major(5), critical(25)
|
|
102
|
+
- Score = 100 - Σ(error_weights)
|
|
103
|
+
5. **REFINE** - Apply MQM error fixes directly
|
|
104
|
+
6. **REPEAT** - Loop until quality >= threshold OR max iterations
|
|
105
|
+
|
|
106
|
+
**Translation Modes:**
|
|
107
|
+
|
|
108
|
+
| Mode | Analysis | MQM | Iterations | Threshold |
|
|
109
|
+
|------|----------|-----|------------|-----------|
|
|
110
|
+
| `fast` | ❌ | ❌ | 1 | 0 |
|
|
111
|
+
| `balanced` | ❌ | ✅ | 2 | 75 |
|
|
112
|
+
| `quality` | ✅ | ✅ | 4 | 85 |
|
|
113
|
+
|
|
114
|
+
### Glossary Resolution
|
|
115
|
+
|
|
116
|
+
When resolving glossary terms for a target language:
|
|
117
|
+
|
|
118
|
+
- `doNotTranslate: true` - Keep source term for ALL languages
|
|
119
|
+
- `doNotTranslateFor: ["ko", "ja"]` - Keep source for SPECIFIC languages
|
|
120
|
+
- Otherwise use `targets[targetLang]` or fallback to source
|
|
121
|
+
|
|
122
|
+
## CLI Commands
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
llm-translate file <input> [output] # Single file translation
|
|
126
|
+
llm-translate dir <input> <output> # Batch directory translation
|
|
127
|
+
llm-translate init # Initialize configuration
|
|
128
|
+
llm-translate glossary <subcommand> # Manage glossary (list, validate, add, remove)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Exit Codes
|
|
132
|
+
|
|
133
|
+
| Code | Meaning |
|
|
134
|
+
|------|---------|
|
|
135
|
+
| 0 | Success |
|
|
136
|
+
| 1 | General error |
|
|
137
|
+
| 2 | Invalid arguments |
|
|
138
|
+
| 3 | File not found |
|
|
139
|
+
| 4 | Quality threshold not met |
|
|
140
|
+
| 5 | Provider/API error |
|
|
141
|
+
| 6 | Glossary validation failed |
|
|
142
|
+
|
|
143
|
+
## Configuration
|
|
144
|
+
|
|
145
|
+
Configuration is loaded from `.translaterc.json` with this priority:
|
|
146
|
+
1. CLI arguments (highest)
|
|
147
|
+
2. Environment variables
|
|
148
|
+
3. Config file
|
|
149
|
+
4. Defaults (lowest)
|
|
150
|
+
|
|
151
|
+
## Environment Variables
|
|
152
|
+
|
|
153
|
+
| Variable | Description |
|
|
154
|
+
|----------|-------------|
|
|
155
|
+
| `ANTHROPIC_API_KEY` | Claude API key |
|
|
156
|
+
| `OPENAI_API_KEY` | OpenAI API key |
|
|
157
|
+
| `OLLAMA_BASE_URL` | Ollama server URL |
|
|
158
|
+
|
|
159
|
+
## Quality Evaluation (MQM-Based)
|
|
160
|
+
|
|
161
|
+
**MQM Error Types:**
|
|
162
|
+
```
|
|
163
|
+
Accuracy: mistranslation, omission, addition, untranslated
|
|
164
|
+
Fluency: grammar, spelling, register, inconsistency
|
|
165
|
+
Style: awkward, unidiomatic
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
**MQM Severity Weights:**
|
|
169
|
+
| Severity | Weight | Description |
|
|
170
|
+
|----------|--------|-------------|
|
|
171
|
+
| Minor | 1 | Noticeable but doesn't affect understanding |
|
|
172
|
+
| Major | 5 | Affects understanding or usability |
|
|
173
|
+
| Critical | 25 | Completely wrong or unusable |
|
|
174
|
+
|
|
175
|
+
**Score Calculation:** `score = max(0, 100 - Σ(error_weights))`
|
|
176
|
+
|
|
177
|
+
**Default Thresholds:**
|
|
178
|
+
- Quality threshold: 85/100 (quality mode), 75/100 (balanced), 0 (fast)
|
|
179
|
+
- Max iterations: 4 (quality), 2 (balanced), 1 (fast)
|
|
180
|
+
|
|
181
|
+
## Critical Rules
|
|
182
|
+
|
|
183
|
+
1. **NEVER translate content inside code blocks**
|
|
184
|
+
2. **ALWAYS apply glossary terms exactly as specified**
|
|
185
|
+
3. **PRESERVE all formatting** (markdown, HTML tags, links, tables)
|
|
186
|
+
4. **Keep URLs, file paths, and technical identifiers unchanged**
|
|
187
|
+
5. **Maintain header hierarchy in chunking**
|
|
188
|
+
|
|
189
|
+
## Testing Requirements
|
|
190
|
+
|
|
191
|
+
- Unit tests for: chunker, glossary, markdown parser, providers
|
|
192
|
+
- Integration tests for full translation pipeline
|
|
193
|
+
- Test coverage target: > 80%
|
|
194
|
+
- Use Vitest with ESM configuration
|
|
195
|
+
|
|
196
|
+
## Error Handling
|
|
197
|
+
|
|
198
|
+
Use the `TranslationError` class with appropriate `ErrorCode`:
|
|
199
|
+
|
|
200
|
+
```typescript
|
|
201
|
+
throw new TranslationError(ErrorCode.GLOSSARY_NOT_FOUND, {
|
|
202
|
+
path: glossaryPath,
|
|
203
|
+
});
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## Implementation Phases
|
|
207
|
+
|
|
208
|
+
### Phase 1 (MVP)
|
|
209
|
+
- Single file translation with Claude
|
|
210
|
+
- Basic glossary support
|
|
211
|
+
- Markdown parser
|
|
212
|
+
- stdin/stdout support
|
|
213
|
+
|
|
214
|
+
### Phase 2
|
|
215
|
+
- OpenAI and Ollama providers
|
|
216
|
+
- Quality evaluator with LLM scoring
|
|
217
|
+
- Full Self-Refine loop
|
|
218
|
+
- HTML parser
|
|
219
|
+
- Cache manager
|
|
220
|
+
|
|
221
|
+
### Phase 3
|
|
222
|
+
- Batch directory processing
|
|
223
|
+
- Parallel file translation
|
|
224
|
+
- Progress reporting
|
|
225
|
+
- Integration tests
|
|
226
|
+
|
|
227
|
+
## Development Commands
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
npm run build # Build with tsup
|
|
231
|
+
npm run dev # Watch mode
|
|
232
|
+
npm test # Run Vitest
|
|
233
|
+
npm run typecheck # TypeScript check
|
|
234
|
+
npm run lint # ESLint
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Code Style
|
|
238
|
+
|
|
239
|
+
- Use strict TypeScript (strict: true in tsconfig)
|
|
240
|
+
- Prefer async/await over callbacks
|
|
241
|
+
- Use Zod for runtime validation of config/glossary
|
|
242
|
+
- Export types from `src/types/index.ts`
|
|
243
|
+
- All public APIs must have JSDoc comments
|
package/Dockerfile
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# llm-translate Docker Image
|
|
3
|
+
# Multi-stage build for optimized production image
|
|
4
|
+
# ==============================================================================
|
|
5
|
+
|
|
6
|
+
# ==============================================================================
|
|
7
|
+
# Stage 1: Build
|
|
8
|
+
# ==============================================================================
|
|
9
|
+
FROM node:24-alpine AS builder
|
|
10
|
+
|
|
11
|
+
WORKDIR /app
|
|
12
|
+
|
|
13
|
+
# Install dependencies first (cache layer)
|
|
14
|
+
COPY package*.json ./
|
|
15
|
+
RUN npm ci
|
|
16
|
+
|
|
17
|
+
# Copy source and build
|
|
18
|
+
COPY tsconfig.json tsup.config.ts ./
|
|
19
|
+
COPY src ./src
|
|
20
|
+
RUN npm run build
|
|
21
|
+
|
|
22
|
+
# Prune dev dependencies for smaller image
|
|
23
|
+
RUN npm prune --production
|
|
24
|
+
|
|
25
|
+
# ==============================================================================
|
|
26
|
+
# Stage 2: Production
|
|
27
|
+
# ==============================================================================
|
|
28
|
+
FROM node:24-alpine AS production
|
|
29
|
+
|
|
30
|
+
# Security: run as non-root user
|
|
31
|
+
RUN addgroup -g 1001 -S nodejs && \
|
|
32
|
+
adduser -S llmtranslate -u 1001
|
|
33
|
+
|
|
34
|
+
WORKDIR /app
|
|
35
|
+
|
|
36
|
+
# Copy only production artifacts
|
|
37
|
+
COPY --from=builder --chown=llmtranslate:nodejs /app/node_modules ./node_modules
|
|
38
|
+
COPY --from=builder --chown=llmtranslate:nodejs /app/dist ./dist
|
|
39
|
+
COPY --from=builder --chown=llmtranslate:nodejs /app/package.json ./
|
|
40
|
+
|
|
41
|
+
# Environment
|
|
42
|
+
ENV NODE_ENV=production
|
|
43
|
+
ENV TRANSLATE_PORT=3000
|
|
44
|
+
|
|
45
|
+
# Switch to non-root user
|
|
46
|
+
USER llmtranslate
|
|
47
|
+
|
|
48
|
+
EXPOSE 3000
|
|
49
|
+
|
|
50
|
+
# Health check for container orchestration
|
|
51
|
+
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
|
52
|
+
CMD node -e "fetch('http://localhost:3000/health/live').then(r => r.ok ? process.exit(0) : process.exit(1)).catch(() => process.exit(1))"
|
|
53
|
+
|
|
54
|
+
# Start server with JSON logging for container environments
|
|
55
|
+
CMD ["node", "dist/cli/index.js", "serve", "--json"]
|