@llm-translate/cli 1.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/.dockerignore +51 -0
  2. package/.env.example +33 -0
  3. package/.github/workflows/docs-pages.yml +57 -0
  4. package/.github/workflows/release.yml +49 -0
  5. package/.translaterc.json +44 -0
  6. package/CLAUDE.md +243 -0
  7. package/Dockerfile +55 -0
  8. package/README.md +371 -0
  9. package/RFC.md +1595 -0
  10. package/dist/cli/index.d.ts +2 -0
  11. package/dist/cli/index.js +4494 -0
  12. package/dist/cli/index.js.map +1 -0
  13. package/dist/index.d.ts +1152 -0
  14. package/dist/index.js +3841 -0
  15. package/dist/index.js.map +1 -0
  16. package/docker-compose.yml +56 -0
  17. package/docs/.vitepress/config.ts +161 -0
  18. package/docs/api/agent.md +262 -0
  19. package/docs/api/engine.md +274 -0
  20. package/docs/api/index.md +171 -0
  21. package/docs/api/providers.md +304 -0
  22. package/docs/changelog.md +64 -0
  23. package/docs/cli/dir.md +243 -0
  24. package/docs/cli/file.md +213 -0
  25. package/docs/cli/glossary.md +273 -0
  26. package/docs/cli/index.md +129 -0
  27. package/docs/cli/init.md +158 -0
  28. package/docs/cli/serve.md +211 -0
  29. package/docs/glossary.json +235 -0
  30. package/docs/guide/chunking.md +272 -0
  31. package/docs/guide/configuration.md +139 -0
  32. package/docs/guide/cost-optimization.md +237 -0
  33. package/docs/guide/docker.md +371 -0
  34. package/docs/guide/getting-started.md +150 -0
  35. package/docs/guide/glossary.md +241 -0
  36. package/docs/guide/index.md +86 -0
  37. package/docs/guide/ollama.md +515 -0
  38. package/docs/guide/prompt-caching.md +221 -0
  39. package/docs/guide/providers.md +232 -0
  40. package/docs/guide/quality-control.md +206 -0
  41. package/docs/guide/vitepress-integration.md +265 -0
  42. package/docs/index.md +63 -0
  43. package/docs/ja/api/agent.md +262 -0
  44. package/docs/ja/api/engine.md +274 -0
  45. package/docs/ja/api/index.md +171 -0
  46. package/docs/ja/api/providers.md +304 -0
  47. package/docs/ja/changelog.md +64 -0
  48. package/docs/ja/cli/dir.md +243 -0
  49. package/docs/ja/cli/file.md +213 -0
  50. package/docs/ja/cli/glossary.md +273 -0
  51. package/docs/ja/cli/index.md +111 -0
  52. package/docs/ja/cli/init.md +158 -0
  53. package/docs/ja/guide/chunking.md +271 -0
  54. package/docs/ja/guide/configuration.md +139 -0
  55. package/docs/ja/guide/cost-optimization.md +30 -0
  56. package/docs/ja/guide/getting-started.md +150 -0
  57. package/docs/ja/guide/glossary.md +214 -0
  58. package/docs/ja/guide/index.md +32 -0
  59. package/docs/ja/guide/ollama.md +410 -0
  60. package/docs/ja/guide/prompt-caching.md +221 -0
  61. package/docs/ja/guide/providers.md +232 -0
  62. package/docs/ja/guide/quality-control.md +137 -0
  63. package/docs/ja/guide/vitepress-integration.md +265 -0
  64. package/docs/ja/index.md +58 -0
  65. package/docs/ko/api/agent.md +262 -0
  66. package/docs/ko/api/engine.md +274 -0
  67. package/docs/ko/api/index.md +171 -0
  68. package/docs/ko/api/providers.md +304 -0
  69. package/docs/ko/changelog.md +64 -0
  70. package/docs/ko/cli/dir.md +243 -0
  71. package/docs/ko/cli/file.md +213 -0
  72. package/docs/ko/cli/glossary.md +273 -0
  73. package/docs/ko/cli/index.md +111 -0
  74. package/docs/ko/cli/init.md +158 -0
  75. package/docs/ko/guide/chunking.md +271 -0
  76. package/docs/ko/guide/configuration.md +139 -0
  77. package/docs/ko/guide/cost-optimization.md +30 -0
  78. package/docs/ko/guide/getting-started.md +150 -0
  79. package/docs/ko/guide/glossary.md +214 -0
  80. package/docs/ko/guide/index.md +32 -0
  81. package/docs/ko/guide/ollama.md +410 -0
  82. package/docs/ko/guide/prompt-caching.md +221 -0
  83. package/docs/ko/guide/providers.md +232 -0
  84. package/docs/ko/guide/quality-control.md +137 -0
  85. package/docs/ko/guide/vitepress-integration.md +265 -0
  86. package/docs/ko/index.md +58 -0
  87. package/docs/zh/api/agent.md +262 -0
  88. package/docs/zh/api/engine.md +274 -0
  89. package/docs/zh/api/index.md +171 -0
  90. package/docs/zh/api/providers.md +304 -0
  91. package/docs/zh/changelog.md +64 -0
  92. package/docs/zh/cli/dir.md +243 -0
  93. package/docs/zh/cli/file.md +213 -0
  94. package/docs/zh/cli/glossary.md +273 -0
  95. package/docs/zh/cli/index.md +111 -0
  96. package/docs/zh/cli/init.md +158 -0
  97. package/docs/zh/guide/chunking.md +271 -0
  98. package/docs/zh/guide/configuration.md +139 -0
  99. package/docs/zh/guide/cost-optimization.md +30 -0
  100. package/docs/zh/guide/getting-started.md +150 -0
  101. package/docs/zh/guide/glossary.md +214 -0
  102. package/docs/zh/guide/index.md +32 -0
  103. package/docs/zh/guide/ollama.md +410 -0
  104. package/docs/zh/guide/prompt-caching.md +221 -0
  105. package/docs/zh/guide/providers.md +232 -0
  106. package/docs/zh/guide/quality-control.md +137 -0
  107. package/docs/zh/guide/vitepress-integration.md +265 -0
  108. package/docs/zh/index.md +58 -0
  109. package/package.json +91 -0
  110. package/release.config.mjs +15 -0
  111. package/schemas/glossary.schema.json +110 -0
  112. package/src/cli/commands/dir.ts +469 -0
  113. package/src/cli/commands/file.ts +291 -0
  114. package/src/cli/commands/glossary.ts +221 -0
  115. package/src/cli/commands/init.ts +68 -0
  116. package/src/cli/commands/serve.ts +60 -0
  117. package/src/cli/index.ts +64 -0
  118. package/src/cli/options.ts +59 -0
  119. package/src/core/agent.ts +1119 -0
  120. package/src/core/chunker.ts +391 -0
  121. package/src/core/engine.ts +634 -0
  122. package/src/errors.ts +188 -0
  123. package/src/index.ts +147 -0
  124. package/src/integrations/vitepress.ts +549 -0
  125. package/src/parsers/markdown.ts +383 -0
  126. package/src/providers/claude.ts +259 -0
  127. package/src/providers/interface.ts +109 -0
  128. package/src/providers/ollama.ts +379 -0
  129. package/src/providers/openai.ts +308 -0
  130. package/src/providers/registry.ts +153 -0
  131. package/src/server/index.ts +152 -0
  132. package/src/server/middleware/auth.ts +93 -0
  133. package/src/server/middleware/logger.ts +90 -0
  134. package/src/server/routes/health.ts +84 -0
  135. package/src/server/routes/translate.ts +210 -0
  136. package/src/server/types.ts +138 -0
  137. package/src/services/cache.ts +899 -0
  138. package/src/services/config.ts +217 -0
  139. package/src/services/glossary.ts +247 -0
  140. package/src/types/analysis.ts +164 -0
  141. package/src/types/index.ts +265 -0
  142. package/src/types/modes.ts +121 -0
  143. package/src/types/mqm.ts +157 -0
  144. package/src/utils/logger.ts +141 -0
  145. package/src/utils/tokens.ts +116 -0
  146. package/tests/fixtures/glossaries/ml-glossary.json +53 -0
  147. package/tests/fixtures/input/lynq-installation.ko.md +350 -0
  148. package/tests/fixtures/input/lynq-installation.md +350 -0
  149. package/tests/fixtures/input/simple.ko.md +27 -0
  150. package/tests/fixtures/input/simple.md +27 -0
  151. package/tests/unit/chunker.test.ts +229 -0
  152. package/tests/unit/glossary.test.ts +146 -0
  153. package/tests/unit/markdown.test.ts +205 -0
  154. package/tests/unit/tokens.test.ts +81 -0
  155. package/tsconfig.json +28 -0
  156. package/tsup.config.ts +34 -0
  157. package/vitest.config.ts +16 -0
package/README.md ADDED
@@ -0,0 +1,371 @@
1
+ # llm-translate
2
+
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
4
+ [![Node.js](https://img.shields.io/badge/Node.js-20%2B-green.svg)](https://nodejs.org/)
5
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.x-blue.svg)](https://www.typescriptlang.org/)
6
+
7
+ CLI-based document translation tool powered by Large Language Models with glossary enforcement and quality-aware refinement.
8
+
9
+ ## Highlights
10
+
11
+ - **Glossary-Enforced Consistency** - Domain-specific terminology is translated consistently across documents
12
+ - **Self-Refine Quality Loop** - Iterative improvement until target quality threshold is met
13
+ - **Structure Preservation** - AST-based processing maintains Markdown formatting integrity
14
+ - **Unix-Friendly** - Supports stdin/stdout for pipeline integration
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ npm install -g llm-translate
20
+ ```
21
+
22
+ Or run directly with npx:
23
+
24
+ ```bash
25
+ npx llm-translate file README.md -s en -t ko
26
+ ```
27
+
28
+ ## Quick Start
29
+
30
+ ```bash
31
+ # Set your API key
32
+ export ANTHROPIC_API_KEY=your-api-key
33
+
34
+ # Translate a single file
35
+ llm-translate file README.md -s en -t ko -o README.ko.md
36
+
37
+ # Use stdin/stdout pipeline
38
+ cat doc.md | llm-translate file - -s en -t ja > doc.ja.md
39
+
40
+ # With glossary for consistent terminology
41
+ llm-translate file guide.md -s en -t ko -g glossary.json -o guide.ko.md
42
+ ```
43
+
44
+ ## How It Works
45
+
46
+ llm-translate uses a **Self-Refine** translation algorithm:
47
+
48
+ ```mermaid
49
+ flowchart TD
50
+ A[📄 Input Document] --> B[1. PREPARE]
51
+ B --> |Load glossary & extract text| C[2. TRANSLATE]
52
+ C --> |Initial translation with glossary| D[3. EVALUATE]
53
+ D --> E{Quality >= Threshold?}
54
+ E --> |Yes| F[✅ Output Translation]
55
+ E --> |No| G[4. REFLECT]
56
+ G --> |Generate critique| H[5. IMPROVE]
57
+ H --> |Apply suggestions| I{Max iterations?}
58
+ I --> |No| D
59
+ I --> |Yes| F
60
+
61
+ style A fill:#e1f5fe
62
+ style F fill:#c8e6c9
63
+ style E fill:#fff9c4
64
+ style I fill:#fff9c4
65
+ ```
66
+
67
+ **Quality Evaluation Criteria:**
68
+
69
+ | Criteria | Weight | Description |
70
+ |----------|--------|-------------|
71
+ | Semantic Accuracy | 40% | Meaning preservation from source |
72
+ | Fluency | 25% | Natural expression in target language |
73
+ | Glossary Compliance | 20% | Correct use of glossary terms |
74
+ | Format Preservation | 15% | Markdown structure integrity |
75
+
76
+ ## CLI Usage
77
+
78
+ ### `llm-translate file <input> [output]`
79
+
80
+ Translate a single file.
81
+
82
+ ```bash
83
+ # Basic usage
84
+ llm-translate file guide.md -s en -t ko -o guide.ko.md
85
+
86
+ # With glossary
87
+ llm-translate file doc.md -s en -t ja -g ./glossary.json
88
+
89
+ # Dry run (preview without API calls)
90
+ llm-translate file doc.md -s en -t ko --dry-run
91
+
92
+ # JSON output format
93
+ llm-translate file doc.md -s en -t ko --json
94
+
95
+ # Adjust quality threshold
96
+ llm-translate file doc.md -s en -t ko --quality 90 --max-iterations 5
97
+
98
+ # Verbose output
99
+ llm-translate file doc.md -s en -t ko -v
100
+ ```
101
+
102
+ **Options:**
103
+
104
+ | Option | Description |
105
+ |--------|-------------|
106
+ | `-s, --source-lang <lang>` | Source language code (required) |
107
+ | `-t, --target-lang <lang>` | Target language code (required) |
108
+ | `-o, --output <path>` | Output file path |
109
+ | `-g, --glossary <path>` | Path to glossary JSON file |
110
+ | `-p, --provider <name>` | LLM provider (default: claude) |
111
+ | `-m, --model <name>` | Model name |
112
+ | `-f, --format <type>` | Force format: markdown, html, text |
113
+ | `--quality <number>` | Quality threshold 0-100 (default: 85) |
114
+ | `--max-iterations <n>` | Max refinement iterations (default: 4) |
115
+ | `--chunk-size <tokens>` | Chunk size in tokens (default: 1024) |
116
+ | `--context <text>` | Additional context for translation |
117
+ | `--strict-quality` | Fail if quality threshold not met |
118
+ | `--strict-glossary` | Fail if glossary terms not applied |
119
+ | `--dry-run` | Preview without making API calls |
120
+ | `--json` | Output result as JSON |
121
+ | `--no-cache` | Disable translation cache |
122
+ | `-v, --verbose` | Verbose output |
123
+ | `-q, --quiet` | Suppress non-essential output |
124
+
125
+ ### `llm-translate init`
126
+
127
+ Initialize project configuration.
128
+
129
+ ```bash
130
+ llm-translate init
131
+ # Creates .translaterc.json with default settings
132
+ ```
133
+
134
+ ### `llm-translate glossary <subcommand>`
135
+
136
+ Manage glossary files.
137
+
138
+ ```bash
139
+ # List all terms
140
+ llm-translate glossary list ./glossary.json
141
+
142
+ # List terms for specific language
143
+ llm-translate glossary list ./glossary.json -t ko
144
+
145
+ # Validate glossary file
146
+ llm-translate glossary validate ./glossary.json
147
+
148
+ # Add a term
149
+ llm-translate glossary add ./glossary.json "machine learning" --target ko:머신러닝
150
+
151
+ # Remove a term
152
+ llm-translate glossary remove ./glossary.json "machine learning"
153
+ ```
154
+
155
+ ## Configuration
156
+
157
+ Create a `.translaterc.json` file in your project root:
158
+
159
+ ```json
160
+ {
161
+ "version": "1.0",
162
+ "languages": {
163
+ "source": "en",
164
+ "targets": ["ko", "ja", "zh"]
165
+ },
166
+ "provider": {
167
+ "default": "claude",
168
+ "model": "claude-haiku-4-5-20251001"
169
+ },
170
+ "quality": {
171
+ "threshold": 85,
172
+ "maxIterations": 4
173
+ },
174
+ "glossary": {
175
+ "path": "./glossary.json",
176
+ "strict": false
177
+ },
178
+ "chunking": {
179
+ "maxTokens": 1024,
180
+ "overlap": 150
181
+ }
182
+ }
183
+ ```
184
+
185
+ Configuration is loaded with this priority:
186
+ 1. CLI arguments (highest)
187
+ 2. Environment variables
188
+ 3. `.translaterc.json`
189
+ 4. Defaults (lowest)
190
+
191
+ ## Glossary Format
192
+
193
+ The glossary enforces consistent translation of domain-specific terminology:
194
+
195
+ ```json
196
+ {
197
+ "metadata": {
198
+ "name": "Technical Documentation Glossary",
199
+ "sourceLang": "en",
200
+ "targetLangs": ["ko", "ja"],
201
+ "version": "1.0.0"
202
+ },
203
+ "terms": [
204
+ {
205
+ "source": "machine learning",
206
+ "targets": {
207
+ "ko": "머신러닝",
208
+ "ja": "機械学習"
209
+ },
210
+ "context": "AI/ML domain terminology"
211
+ },
212
+ {
213
+ "source": "API",
214
+ "targets": {},
215
+ "doNotTranslate": true
216
+ },
217
+ {
218
+ "source": "frontend",
219
+ "targets": {
220
+ "ja": "フロントエンド"
221
+ },
222
+ "doNotTranslateFor": ["ko"],
223
+ "caseSensitive": false
224
+ }
225
+ ]
226
+ }
227
+ ```
228
+
229
+ **Term Options:**
230
+
231
+ | Field | Description |
232
+ |-------|-------------|
233
+ | `source` | Source term to match |
234
+ | `targets` | Translation map by language code |
235
+ | `doNotTranslate` | Keep source term for all languages |
236
+ | `doNotTranslateFor` | Keep source term for specific languages |
237
+ | `caseSensitive` | Case-sensitive matching (default: true) |
238
+ | `context` | Usage hint for LLM |
239
+
240
+ ## Environment Variables
241
+
242
+ | Variable | Description |
243
+ |----------|-------------|
244
+ | `ANTHROPIC_API_KEY` | Claude API key |
245
+ | `OPENAI_API_KEY` | OpenAI API key |
246
+ | `OLLAMA_BASE_URL` | Ollama server URL (default: `http://localhost:11434`) |
247
+
248
+ ## Supported Formats
249
+
250
+ | Format | Status | Notes |
251
+ |--------|--------|-------|
252
+ | Markdown | Stable | Full AST-based processing, preserves code blocks, links, tables |
253
+ | Plain Text | Stable | Direct translation |
254
+ | HTML | Planned | Coming soon |
255
+
256
+ ## Exit Codes
257
+
258
+ | Code | Meaning |
259
+ |------|---------|
260
+ | 0 | Success |
261
+ | 1 | General error |
262
+ | 2 | Invalid arguments |
263
+ | 3 | File not found |
264
+ | 4 | Quality threshold not met (with `--strict-quality`) |
265
+ | 5 | Provider/API error |
266
+ | 6 | Glossary validation failed |
267
+
268
+ ## Development
269
+
270
+ ### Prerequisites
271
+
272
+ - Node.js 20+
273
+ - npm or yarn
274
+
275
+ ### Setup
276
+
277
+ ```bash
278
+ # Clone the repository
279
+ git clone https://github.com/selenehyun/llm-translate.git
280
+ cd llm-translate
281
+
282
+ # Install dependencies
283
+ npm install
284
+
285
+ # Build
286
+ npm run build
287
+
288
+ # Run in development mode
289
+ npm run dev
290
+ ```
291
+
292
+ ### Scripts
293
+
294
+ ```bash
295
+ npm run build # Build with tsup
296
+ npm run dev # Watch mode
297
+ npm test # Run tests
298
+ npm run test:run # Run tests once
299
+ npm run typecheck # TypeScript check
300
+ npm run lint # ESLint
301
+ ```
302
+
303
+ ### Project Structure
304
+
305
+ ```
306
+ src/
307
+ ├── cli/ # CLI entry point and commands
308
+ │ ├── commands/ # file, dir, init, glossary commands
309
+ │ └── options.ts # Shared CLI options
310
+ ├── core/ # Translation engine
311
+ │ ├── engine.ts # Main translation orchestrator
312
+ │ ├── agent.ts # Self-refine translation agent
313
+ │ ├── chunker.ts # Semantic document chunker
314
+ │ └── evaluator.ts # Quality evaluation
315
+ ├── parsers/ # Format-specific parsers
316
+ │ └── markdown.ts # Markdown AST parser
317
+ ├── providers/ # LLM provider adapters
318
+ │ ├── interface.ts # Provider interface
319
+ │ ├── registry.ts # Provider registry
320
+ │ └── claude.ts # Claude provider
321
+ ├── services/ # Supporting services
322
+ │ ├── glossary.ts # Glossary loading and resolution
323
+ │ └── config.ts # Configuration loader
324
+ ├── types/ # Type definitions
325
+ └── utils/ # Utilities
326
+ ```
327
+
328
+ ## Roadmap
329
+
330
+ ### v0.1.0 (Current)
331
+ - [x] Single file translation
332
+ - [x] Claude provider
333
+ - [x] Markdown parser with structure preservation
334
+ - [x] Self-Refine quality loop
335
+ - [x] Glossary enforcement
336
+ - [x] stdin/stdout support
337
+ - [x] Configuration system
338
+
339
+ ### v0.2.0 (Planned)
340
+ - [ ] OpenAI provider
341
+ - [ ] Ollama provider (local LLMs)
342
+ - [ ] HTML parser
343
+ - [ ] Translation cache
344
+ - [ ] Batch directory processing
345
+
346
+ ### v0.3.0 (Future)
347
+ - [ ] Parallel file processing
348
+ - [ ] Progress reporting
349
+ - [ ] MCP server integration
350
+
351
+ ## Contributing
352
+
353
+ Contributions are welcome! Please read our contributing guidelines before submitting a PR.
354
+
355
+ 1. Fork the repository
356
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
357
+ 3. Commit your changes (`git commit -m 'Add amazing feature'`)
358
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
359
+ 5. Open a Pull Request
360
+
361
+ ## License
362
+
363
+ MIT License - see [LICENSE](LICENSE) for details.
364
+
365
+ ## Acknowledgments
366
+
367
+ Built with:
368
+ - [Vercel AI SDK](https://sdk.vercel.ai/) - LLM integration
369
+ - [Commander.js](https://github.com/tj/commander.js/) - CLI framework
370
+ - [unified/remark](https://unifiedjs.com/) - Markdown processing
371
+ - [Zod](https://zod.dev/) - Schema validation