@llm-translate/cli 1.0.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +51 -0
- package/.env.example +33 -0
- package/.github/workflows/docs-pages.yml +57 -0
- package/.github/workflows/release.yml +49 -0
- package/.translaterc.json +44 -0
- package/CLAUDE.md +243 -0
- package/Dockerfile +55 -0
- package/README.md +371 -0
- package/RFC.md +1595 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +4494 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/index.d.ts +1152 -0
- package/dist/index.js +3841 -0
- package/dist/index.js.map +1 -0
- package/docker-compose.yml +56 -0
- package/docs/.vitepress/config.ts +161 -0
- package/docs/api/agent.md +262 -0
- package/docs/api/engine.md +274 -0
- package/docs/api/index.md +171 -0
- package/docs/api/providers.md +304 -0
- package/docs/changelog.md +64 -0
- package/docs/cli/dir.md +243 -0
- package/docs/cli/file.md +213 -0
- package/docs/cli/glossary.md +273 -0
- package/docs/cli/index.md +129 -0
- package/docs/cli/init.md +158 -0
- package/docs/cli/serve.md +211 -0
- package/docs/glossary.json +235 -0
- package/docs/guide/chunking.md +272 -0
- package/docs/guide/configuration.md +139 -0
- package/docs/guide/cost-optimization.md +237 -0
- package/docs/guide/docker.md +371 -0
- package/docs/guide/getting-started.md +150 -0
- package/docs/guide/glossary.md +241 -0
- package/docs/guide/index.md +86 -0
- package/docs/guide/ollama.md +515 -0
- package/docs/guide/prompt-caching.md +221 -0
- package/docs/guide/providers.md +232 -0
- package/docs/guide/quality-control.md +206 -0
- package/docs/guide/vitepress-integration.md +265 -0
- package/docs/index.md +63 -0
- package/docs/ja/api/agent.md +262 -0
- package/docs/ja/api/engine.md +274 -0
- package/docs/ja/api/index.md +171 -0
- package/docs/ja/api/providers.md +304 -0
- package/docs/ja/changelog.md +64 -0
- package/docs/ja/cli/dir.md +243 -0
- package/docs/ja/cli/file.md +213 -0
- package/docs/ja/cli/glossary.md +273 -0
- package/docs/ja/cli/index.md +111 -0
- package/docs/ja/cli/init.md +158 -0
- package/docs/ja/guide/chunking.md +271 -0
- package/docs/ja/guide/configuration.md +139 -0
- package/docs/ja/guide/cost-optimization.md +30 -0
- package/docs/ja/guide/getting-started.md +150 -0
- package/docs/ja/guide/glossary.md +214 -0
- package/docs/ja/guide/index.md +32 -0
- package/docs/ja/guide/ollama.md +410 -0
- package/docs/ja/guide/prompt-caching.md +221 -0
- package/docs/ja/guide/providers.md +232 -0
- package/docs/ja/guide/quality-control.md +137 -0
- package/docs/ja/guide/vitepress-integration.md +265 -0
- package/docs/ja/index.md +58 -0
- package/docs/ko/api/agent.md +262 -0
- package/docs/ko/api/engine.md +274 -0
- package/docs/ko/api/index.md +171 -0
- package/docs/ko/api/providers.md +304 -0
- package/docs/ko/changelog.md +64 -0
- package/docs/ko/cli/dir.md +243 -0
- package/docs/ko/cli/file.md +213 -0
- package/docs/ko/cli/glossary.md +273 -0
- package/docs/ko/cli/index.md +111 -0
- package/docs/ko/cli/init.md +158 -0
- package/docs/ko/guide/chunking.md +271 -0
- package/docs/ko/guide/configuration.md +139 -0
- package/docs/ko/guide/cost-optimization.md +30 -0
- package/docs/ko/guide/getting-started.md +150 -0
- package/docs/ko/guide/glossary.md +214 -0
- package/docs/ko/guide/index.md +32 -0
- package/docs/ko/guide/ollama.md +410 -0
- package/docs/ko/guide/prompt-caching.md +221 -0
- package/docs/ko/guide/providers.md +232 -0
- package/docs/ko/guide/quality-control.md +137 -0
- package/docs/ko/guide/vitepress-integration.md +265 -0
- package/docs/ko/index.md +58 -0
- package/docs/zh/api/agent.md +262 -0
- package/docs/zh/api/engine.md +274 -0
- package/docs/zh/api/index.md +171 -0
- package/docs/zh/api/providers.md +304 -0
- package/docs/zh/changelog.md +64 -0
- package/docs/zh/cli/dir.md +243 -0
- package/docs/zh/cli/file.md +213 -0
- package/docs/zh/cli/glossary.md +273 -0
- package/docs/zh/cli/index.md +111 -0
- package/docs/zh/cli/init.md +158 -0
- package/docs/zh/guide/chunking.md +271 -0
- package/docs/zh/guide/configuration.md +139 -0
- package/docs/zh/guide/cost-optimization.md +30 -0
- package/docs/zh/guide/getting-started.md +150 -0
- package/docs/zh/guide/glossary.md +214 -0
- package/docs/zh/guide/index.md +32 -0
- package/docs/zh/guide/ollama.md +410 -0
- package/docs/zh/guide/prompt-caching.md +221 -0
- package/docs/zh/guide/providers.md +232 -0
- package/docs/zh/guide/quality-control.md +137 -0
- package/docs/zh/guide/vitepress-integration.md +265 -0
- package/docs/zh/index.md +58 -0
- package/package.json +91 -0
- package/release.config.mjs +15 -0
- package/schemas/glossary.schema.json +110 -0
- package/src/cli/commands/dir.ts +469 -0
- package/src/cli/commands/file.ts +291 -0
- package/src/cli/commands/glossary.ts +221 -0
- package/src/cli/commands/init.ts +68 -0
- package/src/cli/commands/serve.ts +60 -0
- package/src/cli/index.ts +64 -0
- package/src/cli/options.ts +59 -0
- package/src/core/agent.ts +1119 -0
- package/src/core/chunker.ts +391 -0
- package/src/core/engine.ts +634 -0
- package/src/errors.ts +188 -0
- package/src/index.ts +147 -0
- package/src/integrations/vitepress.ts +549 -0
- package/src/parsers/markdown.ts +383 -0
- package/src/providers/claude.ts +259 -0
- package/src/providers/interface.ts +109 -0
- package/src/providers/ollama.ts +379 -0
- package/src/providers/openai.ts +308 -0
- package/src/providers/registry.ts +153 -0
- package/src/server/index.ts +152 -0
- package/src/server/middleware/auth.ts +93 -0
- package/src/server/middleware/logger.ts +90 -0
- package/src/server/routes/health.ts +84 -0
- package/src/server/routes/translate.ts +210 -0
- package/src/server/types.ts +138 -0
- package/src/services/cache.ts +899 -0
- package/src/services/config.ts +217 -0
- package/src/services/glossary.ts +247 -0
- package/src/types/analysis.ts +164 -0
- package/src/types/index.ts +265 -0
- package/src/types/modes.ts +121 -0
- package/src/types/mqm.ts +157 -0
- package/src/utils/logger.ts +141 -0
- package/src/utils/tokens.ts +116 -0
- package/tests/fixtures/glossaries/ml-glossary.json +53 -0
- package/tests/fixtures/input/lynq-installation.ko.md +350 -0
- package/tests/fixtures/input/lynq-installation.md +350 -0
- package/tests/fixtures/input/simple.ko.md +27 -0
- package/tests/fixtures/input/simple.md +27 -0
- package/tests/unit/chunker.test.ts +229 -0
- package/tests/unit/glossary.test.ts +146 -0
- package/tests/unit/markdown.test.ts +205 -0
- package/tests/unit/tokens.test.ts +81 -0
- package/tsconfig.json +28 -0
- package/tsup.config.ts +34 -0
- package/vitest.config.ts +16 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
resolveGlossary,
|
|
4
|
+
createGlossaryLookup,
|
|
5
|
+
checkGlossaryCompliance,
|
|
6
|
+
} from '../../src/services/glossary.js';
|
|
7
|
+
import type { Glossary } from '../../src/types/index.js';
|
|
8
|
+
|
|
9
|
+
const testGlossary: Glossary = {
|
|
10
|
+
metadata: {
|
|
11
|
+
name: 'Test Glossary',
|
|
12
|
+
sourceLang: 'en',
|
|
13
|
+
targetLangs: ['ko', 'ja'],
|
|
14
|
+
version: '1.0.0',
|
|
15
|
+
},
|
|
16
|
+
terms: [
|
|
17
|
+
{
|
|
18
|
+
source: 'machine learning',
|
|
19
|
+
targets: { ko: '머신러닝', ja: '機械学習' },
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
source: 'API',
|
|
23
|
+
targets: {},
|
|
24
|
+
doNotTranslate: true,
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
source: 'neural network',
|
|
28
|
+
targets: { ko: '신경망', ja: 'ニューラルネットワーク' },
|
|
29
|
+
caseSensitive: false,
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
source: 'SDK',
|
|
33
|
+
targets: { ko: 'SDK', ja: 'SDK' },
|
|
34
|
+
doNotTranslateFor: ['ko', 'ja'],
|
|
35
|
+
},
|
|
36
|
+
],
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
describe('resolveGlossary', () => {
|
|
40
|
+
it('should resolve glossary for Korean target language', () => {
|
|
41
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
42
|
+
|
|
43
|
+
expect(resolved.metadata.targetLang).toBe('ko');
|
|
44
|
+
expect(resolved.terms).toHaveLength(4);
|
|
45
|
+
|
|
46
|
+
const mlTerm = resolved.terms.find((t) => t.source === 'machine learning');
|
|
47
|
+
expect(mlTerm?.target).toBe('머신러닝');
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it('should handle doNotTranslate terms', () => {
|
|
51
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
52
|
+
|
|
53
|
+
const apiTerm = resolved.terms.find((t) => t.source === 'API');
|
|
54
|
+
expect(apiTerm?.target).toBe('API');
|
|
55
|
+
expect(apiTerm?.doNotTranslate).toBe(true);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it('should handle doNotTranslateFor specific languages', () => {
|
|
59
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
60
|
+
|
|
61
|
+
const sdkTerm = resolved.terms.find((t) => t.source === 'SDK');
|
|
62
|
+
expect(sdkTerm?.target).toBe('SDK');
|
|
63
|
+
expect(sdkTerm?.doNotTranslate).toBe(true);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe('createGlossaryLookup', () => {
|
|
68
|
+
it('should find exact term matches', () => {
|
|
69
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
70
|
+
const lookup = createGlossaryLookup(resolved);
|
|
71
|
+
|
|
72
|
+
const term = lookup.find('machine learning');
|
|
73
|
+
expect(term?.source).toBe('machine learning');
|
|
74
|
+
expect(term?.target).toBe('머신러닝');
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it('should handle case-insensitive lookup', () => {
|
|
78
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
79
|
+
const lookup = createGlossaryLookup(resolved);
|
|
80
|
+
|
|
81
|
+
const term = lookup.find('Neural Network');
|
|
82
|
+
expect(term?.source).toBe('neural network');
|
|
83
|
+
expect(term?.target).toBe('신경망');
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it('should find all matching terms in text', () => {
|
|
87
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
88
|
+
const lookup = createGlossaryLookup(resolved);
|
|
89
|
+
|
|
90
|
+
const text = 'Learn about machine learning and neural network using our API.';
|
|
91
|
+
const matches = lookup.findAll(text);
|
|
92
|
+
|
|
93
|
+
expect(matches).toHaveLength(3);
|
|
94
|
+
expect(matches.map((m) => m.source)).toContain('machine learning');
|
|
95
|
+
expect(matches.map((m) => m.source)).toContain('neural network');
|
|
96
|
+
expect(matches.map((m) => m.source)).toContain('API');
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('should format glossary for prompt injection', () => {
|
|
100
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
101
|
+
const lookup = createGlossaryLookup(resolved);
|
|
102
|
+
|
|
103
|
+
const formatted = lookup.formatForPrompt();
|
|
104
|
+
|
|
105
|
+
expect(formatted).toContain('"machine learning" → "머신러닝"');
|
|
106
|
+
expect(formatted).toContain('[DO NOT TRANSLATE, keep as-is]');
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
describe('checkGlossaryCompliance', () => {
|
|
111
|
+
it('should detect applied glossary terms', () => {
|
|
112
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
113
|
+
|
|
114
|
+
const source = 'Machine learning is a field of artificial intelligence.';
|
|
115
|
+
const translation = '머신러닝은 인공지능의 한 분야입니다.';
|
|
116
|
+
|
|
117
|
+
const result = checkGlossaryCompliance(source, translation, resolved);
|
|
118
|
+
|
|
119
|
+
expect(result.applied).toContain('machine learning');
|
|
120
|
+
expect(result.score).toBe(100);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it('should detect missed glossary terms', () => {
|
|
124
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
125
|
+
|
|
126
|
+
const source = 'Machine learning uses neural networks.';
|
|
127
|
+
const translation = '기계 학습은 뉴럴 네트워크를 사용합니다.'; // Wrong translations
|
|
128
|
+
|
|
129
|
+
const result = checkGlossaryCompliance(source, translation, resolved);
|
|
130
|
+
|
|
131
|
+
expect(result.missed).toContain('machine learning');
|
|
132
|
+
expect(result.missed).toContain('neural network');
|
|
133
|
+
expect(result.score).toBe(0);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it('should return 100% for text without glossary terms', () => {
|
|
137
|
+
const resolved = resolveGlossary(testGlossary, 'ko');
|
|
138
|
+
|
|
139
|
+
const source = 'Hello world!';
|
|
140
|
+
const translation = '안녕하세요!';
|
|
141
|
+
|
|
142
|
+
const result = checkGlossaryCompliance(source, translation, resolved);
|
|
143
|
+
|
|
144
|
+
expect(result.score).toBe(100);
|
|
145
|
+
});
|
|
146
|
+
});
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
parseMarkdown,
|
|
4
|
+
applyTranslations,
|
|
5
|
+
getTranslatableText,
|
|
6
|
+
createTranslationMap,
|
|
7
|
+
extractTextForTranslation,
|
|
8
|
+
restorePreservedSections,
|
|
9
|
+
} from '../../src/parsers/markdown.js';
|
|
10
|
+
|
|
11
|
+
describe('parseMarkdown', () => {
|
|
12
|
+
it('should parse simple markdown content', async () => {
|
|
13
|
+
const content = '# Hello World\n\nThis is a paragraph.';
|
|
14
|
+
const doc = await parseMarkdown(content);
|
|
15
|
+
|
|
16
|
+
expect(doc.original).toBe(content);
|
|
17
|
+
expect(doc.ast).toBeDefined();
|
|
18
|
+
expect(doc.textNodes.length).toBeGreaterThan(0);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('should identify text nodes as translatable', async () => {
|
|
22
|
+
const content = 'This is translatable text.';
|
|
23
|
+
const doc = await parseMarkdown(content);
|
|
24
|
+
|
|
25
|
+
const translatableNodes = doc.textNodes.filter(n => n.translatable);
|
|
26
|
+
expect(translatableNodes.length).toBeGreaterThan(0);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('should mark code blocks as non-translatable', async () => {
|
|
30
|
+
const content = '```javascript\nconst x = 1;\n```';
|
|
31
|
+
const doc = await parseMarkdown(content);
|
|
32
|
+
|
|
33
|
+
const codeNodes = doc.textNodes.filter(n => n.type === 'code');
|
|
34
|
+
expect(codeNodes.every(n => !n.translatable)).toBe(true);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('should mark inline code as non-translatable', async () => {
|
|
38
|
+
const content = 'Use the `console.log()` function.';
|
|
39
|
+
const doc = await parseMarkdown(content);
|
|
40
|
+
|
|
41
|
+
const inlineCodeNodes = doc.textNodes.filter(n => n.type === 'inlineCode');
|
|
42
|
+
expect(inlineCodeNodes.every(n => !n.translatable)).toBe(true);
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
describe('getTranslatableText', () => {
|
|
47
|
+
it('should return only translatable text', async () => {
|
|
48
|
+
const content = '# Title\n\nSome text.\n\n```code\nblock\n```\n\nMore text.';
|
|
49
|
+
const doc = await parseMarkdown(content);
|
|
50
|
+
const texts = getTranslatableText(doc);
|
|
51
|
+
|
|
52
|
+
expect(texts.length).toBeGreaterThan(0);
|
|
53
|
+
expect(texts.some(t => t.includes('Title'))).toBe(true);
|
|
54
|
+
expect(texts.some(t => t.includes('Some text'))).toBe(true);
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
describe('createTranslationMap', () => {
|
|
59
|
+
it('should create map from translations array', async () => {
|
|
60
|
+
const content = 'Hello. World.';
|
|
61
|
+
const doc = await parseMarkdown(content);
|
|
62
|
+
const translatableTexts = getTranslatableText(doc);
|
|
63
|
+
const translations = translatableTexts.map(t => `[TRANSLATED] ${t}`);
|
|
64
|
+
|
|
65
|
+
const map = createTranslationMap(doc, translations);
|
|
66
|
+
|
|
67
|
+
expect(Object.keys(map).length).toBe(translatableTexts.length);
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
describe('applyTranslations', () => {
|
|
72
|
+
it('should apply translations to document', async () => {
|
|
73
|
+
const content = 'Hello World';
|
|
74
|
+
const doc = await parseMarkdown(content);
|
|
75
|
+
|
|
76
|
+
const translatableNodes = doc.textNodes.filter(n => n.translatable);
|
|
77
|
+
const map: Record<string, string> = {};
|
|
78
|
+
for (const node of translatableNodes) {
|
|
79
|
+
map[node.id] = '안녕하세요 세계';
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const result = await applyTranslations(doc, map);
|
|
83
|
+
expect(result).toContain('안녕하세요');
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
describe('extractTextForTranslation', () => {
|
|
88
|
+
it('should preserve code blocks with placeholders', () => {
|
|
89
|
+
const content = 'Before code\n\n```js\nconst x = 1;\n```\n\nAfter code';
|
|
90
|
+
const { text, preservedSections } = extractTextForTranslation(content);
|
|
91
|
+
|
|
92
|
+
expect(text).toContain('Before code');
|
|
93
|
+
expect(text).toContain('After code');
|
|
94
|
+
expect(text).toContain('__CODE_BLOCK_');
|
|
95
|
+
expect(text).not.toContain('const x = 1');
|
|
96
|
+
expect(preservedSections.size).toBeGreaterThan(0);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('should preserve inline code with placeholders', () => {
|
|
100
|
+
const content = 'Use `npm install` to install.';
|
|
101
|
+
const { text, preservedSections } = extractTextForTranslation(content);
|
|
102
|
+
|
|
103
|
+
expect(text).toContain('Use');
|
|
104
|
+
expect(text).toContain('to install');
|
|
105
|
+
expect(text).toContain('__INLINE_CODE_');
|
|
106
|
+
expect(text).not.toContain('`npm install`');
|
|
107
|
+
expect(preservedSections.size).toBe(1);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it('should preserve URLs in links with placeholders', () => {
|
|
111
|
+
const content = 'Visit [our site](https://example.com) for more.';
|
|
112
|
+
const { text, preservedSections } = extractTextForTranslation(content);
|
|
113
|
+
|
|
114
|
+
expect(text).toContain('[our site]');
|
|
115
|
+
expect(text).toContain('__LINK_URL_');
|
|
116
|
+
expect(text).not.toContain('https://example.com');
|
|
117
|
+
|
|
118
|
+
// URL should be in preserved sections
|
|
119
|
+
const urls = Array.from(preservedSections.values());
|
|
120
|
+
expect(urls.some(u => u.includes('example.com'))).toBe(true);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it('should handle multiple code blocks', () => {
|
|
124
|
+
const content = `\`\`\`python
|
|
125
|
+
print("hello")
|
|
126
|
+
\`\`\`
|
|
127
|
+
|
|
128
|
+
Some text
|
|
129
|
+
|
|
130
|
+
\`\`\`javascript
|
|
131
|
+
console.log("world");
|
|
132
|
+
\`\`\``;
|
|
133
|
+
|
|
134
|
+
const { text, preservedSections } = extractTextForTranslation(content);
|
|
135
|
+
|
|
136
|
+
expect(text).toContain('Some text');
|
|
137
|
+
expect(preservedSections.size).toBe(2);
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
describe('restorePreservedSections', () => {
|
|
142
|
+
it('should restore code blocks from placeholders', () => {
|
|
143
|
+
const content = 'Text with ```js\ncode\n```';
|
|
144
|
+
const { text, preservedSections } = extractTextForTranslation(content);
|
|
145
|
+
|
|
146
|
+
// Simulate translation (keep placeholders)
|
|
147
|
+
const translated = text.replace('Text with', '텍스트와 함께');
|
|
148
|
+
const restored = restorePreservedSections(translated, preservedSections);
|
|
149
|
+
|
|
150
|
+
expect(restored).toContain('텍스트와 함께');
|
|
151
|
+
expect(restored).toContain('```js');
|
|
152
|
+
expect(restored).toContain('code');
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it('should restore URLs in links', () => {
|
|
156
|
+
const content = 'Check [this link](https://test.com).';
|
|
157
|
+
const { text, preservedSections } = extractTextForTranslation(content);
|
|
158
|
+
|
|
159
|
+
const translated = text.replace('Check', '확인하세요').replace('this link', '이 링크');
|
|
160
|
+
const restored = restorePreservedSections(translated, preservedSections);
|
|
161
|
+
|
|
162
|
+
expect(restored).toContain('확인하세요');
|
|
163
|
+
expect(restored).toContain('[이 링크]');
|
|
164
|
+
expect(restored).toContain('https://test.com');
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it('should handle empty preserved sections', () => {
|
|
168
|
+
const text = 'Simple text without code';
|
|
169
|
+
const preserved = new Map<string, string>();
|
|
170
|
+
|
|
171
|
+
const result = restorePreservedSections(text, preserved);
|
|
172
|
+
expect(result).toBe(text);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
it('should ensure proper spacing around inline code after restoration', () => {
|
|
176
|
+
// Simulate a case where LLM removed spaces around placeholder
|
|
177
|
+
const content = '1. Scan your `./docs` directory';
|
|
178
|
+
const { text, preservedSections } = extractTextForTranslation(content);
|
|
179
|
+
|
|
180
|
+
// Simulate LLM translation that removes spaces around placeholder
|
|
181
|
+
// Original: "1. Scan your __INLINE_CODE_0__ directory"
|
|
182
|
+
// LLM might produce: "1.__INLINE_CODE_0__디렉토리를 스캔합니다"
|
|
183
|
+
const translatedWithoutSpaces = text
|
|
184
|
+
.replace('1. Scan your ', '1.')
|
|
185
|
+
.replace(' directory', '디렉토리를 스캔합니다');
|
|
186
|
+
|
|
187
|
+
const restored = restorePreservedSections(translatedWithoutSpaces, preservedSections);
|
|
188
|
+
|
|
189
|
+
// Should have proper spacing around inline code
|
|
190
|
+
expect(restored).toContain('1. `./docs`'); // Space before backtick
|
|
191
|
+
expect(restored).toContain('`./docs` 디렉토리'); // Space after backtick
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it('should add space between Korean text and inline code', () => {
|
|
195
|
+
const preserved = new Map<string, string>();
|
|
196
|
+
preserved.set('__INLINE_CODE_0__', '`code`');
|
|
197
|
+
|
|
198
|
+
// Korean text directly touching placeholder
|
|
199
|
+
const translated = '스캔합니다__INLINE_CODE_0__디렉토리';
|
|
200
|
+
const restored = restorePreservedSections(translated, preserved);
|
|
201
|
+
|
|
202
|
+
// Should have spaces around inline code
|
|
203
|
+
expect(restored).toBe('스캔합니다 `code` 디렉토리');
|
|
204
|
+
});
|
|
205
|
+
});
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
estimateTokens,
|
|
4
|
+
exceedsTokenLimit,
|
|
5
|
+
truncateToTokenLimit,
|
|
6
|
+
} from '../../src/utils/tokens.js';
|
|
7
|
+
|
|
8
|
+
describe('estimateTokens', () => {
|
|
9
|
+
it('should return 0 for empty string', () => {
|
|
10
|
+
expect(estimateTokens('')).toBe(0);
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it('should estimate tokens for English text', () => {
|
|
14
|
+
const text = 'Hello world, this is a test sentence for token estimation.';
|
|
15
|
+
const tokens = estimateTokens(text);
|
|
16
|
+
|
|
17
|
+
// ~60 characters, ~4 chars/token = ~15 tokens
|
|
18
|
+
expect(tokens).toBeGreaterThan(10);
|
|
19
|
+
expect(tokens).toBeLessThan(25);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it('should estimate tokens for Korean text', () => {
|
|
23
|
+
const text = '안녕하세요. 이것은 토큰 추정을 위한 테스트 문장입니다.';
|
|
24
|
+
const tokens = estimateTokens(text);
|
|
25
|
+
|
|
26
|
+
// CJK characters are ~1.5 chars/token, so higher token count
|
|
27
|
+
expect(tokens).toBeGreaterThan(15);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it('should estimate tokens for mixed text', () => {
|
|
31
|
+
const text = 'Hello 세계! This is 테스트입니다.';
|
|
32
|
+
const tokens = estimateTokens(text);
|
|
33
|
+
|
|
34
|
+
expect(tokens).toBeGreaterThan(5);
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
it('should handle code snippets', () => {
|
|
38
|
+
const code = `function hello() {
|
|
39
|
+
console.log("Hello, World!");
|
|
40
|
+
}`;
|
|
41
|
+
const tokens = estimateTokens(code);
|
|
42
|
+
|
|
43
|
+
expect(tokens).toBeGreaterThan(10);
|
|
44
|
+
expect(tokens).toBeLessThan(50);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
describe('exceedsTokenLimit', () => {
|
|
49
|
+
it('should return false when under limit', () => {
|
|
50
|
+
const text = 'Short text';
|
|
51
|
+
expect(exceedsTokenLimit(text, 100)).toBe(false);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('should return true when over limit', () => {
|
|
55
|
+
const text = 'a'.repeat(1000); // ~250 tokens
|
|
56
|
+
expect(exceedsTokenLimit(text, 100)).toBe(true);
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
describe('truncateToTokenLimit', () => {
|
|
61
|
+
it('should return original text when under limit', () => {
|
|
62
|
+
const text = 'Short text';
|
|
63
|
+
expect(truncateToTokenLimit(text, 100)).toBe(text);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
it('should truncate text when over limit', () => {
|
|
67
|
+
const text = 'a'.repeat(1000); // ~250 tokens
|
|
68
|
+
const truncated = truncateToTokenLimit(text, 50);
|
|
69
|
+
|
|
70
|
+
expect(truncated.length).toBeLessThan(text.length);
|
|
71
|
+
expect(truncated.endsWith('...')).toBe(true);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it('should preserve approximately the requested token count', () => {
|
|
75
|
+
const text = 'This is a longer piece of text that should be truncated to fit within the token limit.';
|
|
76
|
+
const truncated = truncateToTokenLimit(text, 10);
|
|
77
|
+
|
|
78
|
+
const estimatedTokens = estimateTokens(truncated);
|
|
79
|
+
expect(estimatedTokens).toBeLessThanOrEqual(15); // Some margin for the ellipsis
|
|
80
|
+
});
|
|
81
|
+
});
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "ESNext",
|
|
5
|
+
"moduleResolution": "bundler",
|
|
6
|
+
"lib": ["ES2022"],
|
|
7
|
+
"outDir": "./dist",
|
|
8
|
+
"rootDir": "./src",
|
|
9
|
+
"strict": true,
|
|
10
|
+
"esModuleInterop": true,
|
|
11
|
+
"skipLibCheck": true,
|
|
12
|
+
"forceConsistentCasingInFileNames": true,
|
|
13
|
+
"resolveJsonModule": true,
|
|
14
|
+
"declaration": true,
|
|
15
|
+
"declarationMap": true,
|
|
16
|
+
"sourceMap": true,
|
|
17
|
+
"noUncheckedIndexedAccess": true,
|
|
18
|
+
"noImplicitReturns": true,
|
|
19
|
+
"noFallthroughCasesInSwitch": true,
|
|
20
|
+
"noUnusedLocals": true,
|
|
21
|
+
"noUnusedParameters": true,
|
|
22
|
+
"exactOptionalPropertyTypes": false,
|
|
23
|
+
"isolatedModules": true,
|
|
24
|
+
"verbatimModuleSyntax": true
|
|
25
|
+
},
|
|
26
|
+
"include": ["src/**/*"],
|
|
27
|
+
"exclude": ["node_modules", "dist", "tests"]
|
|
28
|
+
}
|
package/tsup.config.ts
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { defineConfig } from 'tsup';
|
|
2
|
+
|
|
3
|
+
export default defineConfig([
|
|
4
|
+
// Library entry point
|
|
5
|
+
{
|
|
6
|
+
entry: {
|
|
7
|
+
index: 'src/index.ts',
|
|
8
|
+
},
|
|
9
|
+
format: ['esm'],
|
|
10
|
+
dts: true,
|
|
11
|
+
sourcemap: true,
|
|
12
|
+
clean: true,
|
|
13
|
+
target: 'node20',
|
|
14
|
+
shims: false,
|
|
15
|
+
splitting: false,
|
|
16
|
+
treeshake: true,
|
|
17
|
+
},
|
|
18
|
+
// CLI entry point
|
|
19
|
+
{
|
|
20
|
+
entry: {
|
|
21
|
+
'cli/index': 'src/cli/index.ts',
|
|
22
|
+
},
|
|
23
|
+
format: ['esm'],
|
|
24
|
+
dts: true,
|
|
25
|
+
sourcemap: true,
|
|
26
|
+
target: 'node20',
|
|
27
|
+
shims: false,
|
|
28
|
+
splitting: false,
|
|
29
|
+
treeshake: true,
|
|
30
|
+
banner: {
|
|
31
|
+
js: '#!/usr/bin/env node',
|
|
32
|
+
},
|
|
33
|
+
},
|
|
34
|
+
]);
|
package/vitest.config.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { defineConfig } from 'vitest/config';
|
|
2
|
+
|
|
3
|
+
export default defineConfig({
|
|
4
|
+
test: {
|
|
5
|
+
globals: true,
|
|
6
|
+
environment: 'node',
|
|
7
|
+
include: ['tests/**/*.test.ts'],
|
|
8
|
+
coverage: {
|
|
9
|
+
provider: 'v8',
|
|
10
|
+
reporter: ['text', 'json', 'html'],
|
|
11
|
+
include: ['src/**/*.ts'],
|
|
12
|
+
exclude: ['src/**/*.d.ts', 'src/cli/index.ts'],
|
|
13
|
+
},
|
|
14
|
+
testTimeout: 30000,
|
|
15
|
+
},
|
|
16
|
+
});
|