@intlayer/cli 7.0.6 → 7.0.8-canary.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/translation-alignment/ARCHITECTURE.md +518 -0
- package/dist/assets/translation-alignment/IMPROVEMENTS.md +550 -0
- package/dist/assets/translation-alignment/INTEGRATION_EXAMPLE.md +682 -0
- package/dist/assets/translation-alignment/QUICK_START.md +494 -0
- package/dist/assets/translation-alignment/README.md +485 -0
- package/dist/assets/translation-alignment/SUMMARY.md +440 -0
- package/dist/cjs/IntlayerEventListener.cjs +0 -3
- package/dist/cjs/IntlayerEventListener.cjs.map +1 -1
- package/dist/cjs/_virtual/_utils_asset.cjs +0 -3
- package/dist/cjs/build.cjs +0 -2
- package/dist/cjs/build.cjs.map +1 -1
- package/dist/cjs/cli.cjs +6 -7
- package/dist/cjs/cli.cjs.map +1 -1
- package/dist/cjs/config.cjs +0 -1
- package/dist/cjs/config.cjs.map +1 -1
- package/dist/cjs/editor.cjs +0 -4
- package/dist/cjs/editor.cjs.map +1 -1
- package/dist/cjs/fill/fill.cjs +0 -3
- package/dist/cjs/fill/fill.cjs.map +1 -1
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs +0 -1
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs.map +1 -1
- package/dist/cjs/fill/listTranslationsTasks.cjs +0 -6
- package/dist/cjs/fill/listTranslationsTasks.cjs.map +1 -1
- package/dist/cjs/fill/translateDictionary.cjs +0 -6
- package/dist/cjs/fill/translateDictionary.cjs.map +1 -1
- package/dist/cjs/fill/writeFill.cjs +0 -4
- package/dist/cjs/fill/writeFill.cjs.map +1 -1
- package/dist/cjs/getTargetDictionary.cjs +0 -4
- package/dist/cjs/getTargetDictionary.cjs.map +1 -1
- package/dist/cjs/index.cjs +0 -1
- package/dist/cjs/listContentDeclaration.cjs +0 -4
- package/dist/cjs/listContentDeclaration.cjs.map +1 -1
- package/dist/cjs/liveSync.cjs +0 -6
- package/dist/cjs/liveSync.cjs.map +1 -1
- package/dist/cjs/pull.cjs +0 -5
- package/dist/cjs/pull.cjs.map +1 -1
- package/dist/cjs/push/pullLog.cjs +0 -1
- package/dist/cjs/push/pullLog.cjs.map +1 -1
- package/dist/cjs/push/push.cjs +0 -5
- package/dist/cjs/push/push.cjs.map +1 -1
- package/dist/cjs/pushConfig.cjs +0 -2
- package/dist/cjs/pushConfig.cjs.map +1 -1
- package/dist/cjs/pushLog.cjs +0 -1
- package/dist/cjs/pushLog.cjs.map +1 -1
- package/dist/cjs/reviewDoc.cjs +8 -131
- package/dist/cjs/reviewDoc.cjs.map +1 -1
- package/dist/cjs/reviewDocBlockAware.cjs +90 -0
- package/dist/cjs/reviewDocBlockAware.cjs.map +1 -0
- package/dist/cjs/test/index.cjs +0 -2
- package/dist/cjs/test/index.cjs.map +1 -1
- package/dist/cjs/test/listMissingTranslations.cjs +0 -4
- package/dist/cjs/test/listMissingTranslations.cjs.map +1 -1
- package/dist/cjs/translateDoc.cjs +8 -8
- package/dist/cjs/translateDoc.cjs.map +1 -1
- package/dist/cjs/translation-alignment/alignBlocks.cjs +67 -0
- package/dist/cjs/translation-alignment/alignBlocks.cjs.map +1 -0
- package/dist/cjs/translation-alignment/computeSimilarity.cjs +25 -0
- package/dist/cjs/translation-alignment/computeSimilarity.cjs.map +1 -0
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs +23 -0
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs.map +1 -0
- package/dist/cjs/translation-alignment/index.cjs +21 -0
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs +18 -0
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs.map +1 -0
- package/dist/cjs/translation-alignment/normalizeBlock.cjs +22 -0
- package/dist/cjs/translation-alignment/normalizeBlock.cjs.map +1 -0
- package/dist/cjs/translation-alignment/pipeline.cjs +37 -0
- package/dist/cjs/translation-alignment/pipeline.cjs.map +1 -0
- package/dist/cjs/translation-alignment/planActions.cjs +48 -0
- package/dist/cjs/translation-alignment/planActions.cjs.map +1 -0
- package/dist/cjs/translation-alignment/rebuildDocument.cjs +49 -0
- package/dist/cjs/translation-alignment/rebuildDocument.cjs.map +1 -0
- package/dist/cjs/translation-alignment/segmentDocument.cjs +132 -0
- package/dist/cjs/translation-alignment/segmentDocument.cjs.map +1 -0
- package/dist/cjs/translation-alignment/types.cjs +0 -0
- package/dist/cjs/utils/calculateChunks.cjs +0 -1
- package/dist/cjs/utils/calculateChunks.cjs.map +1 -1
- package/dist/cjs/utils/checkAccess.cjs +0 -2
- package/dist/cjs/utils/checkAccess.cjs.map +1 -1
- package/dist/cjs/utils/checkLastUpdateTime.cjs +0 -1
- package/dist/cjs/utils/checkLastUpdateTime.cjs.map +1 -1
- package/dist/cjs/utils/chunkInference.cjs +0 -2
- package/dist/cjs/utils/chunkInference.cjs.map +1 -1
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs +0 -1
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs.map +1 -1
- package/dist/cjs/utils/getParentPackageJSON.cjs +0 -2
- package/dist/cjs/utils/getParentPackageJSON.cjs.map +1 -1
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs +0 -1
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs.map +1 -1
- package/dist/cjs/watch.cjs +0 -2
- package/dist/cjs/watch.cjs.map +1 -1
- package/dist/esm/cli.mjs +6 -3
- package/dist/esm/cli.mjs.map +1 -1
- package/dist/esm/index.mjs +2 -2
- package/dist/esm/reviewDoc.mjs +13 -128
- package/dist/esm/reviewDoc.mjs.map +1 -1
- package/dist/esm/reviewDocBlockAware.mjs +89 -0
- package/dist/esm/reviewDocBlockAware.mjs.map +1 -0
- package/dist/esm/translateDoc.mjs +8 -3
- package/dist/esm/translateDoc.mjs.map +1 -1
- package/dist/esm/translation-alignment/alignBlocks.mjs +67 -0
- package/dist/esm/translation-alignment/alignBlocks.mjs.map +1 -0
- package/dist/esm/translation-alignment/computeSimilarity.mjs +23 -0
- package/dist/esm/translation-alignment/computeSimilarity.mjs.map +1 -0
- package/dist/esm/translation-alignment/fingerprintBlock.mjs +21 -0
- package/dist/esm/translation-alignment/fingerprintBlock.mjs.map +1 -0
- package/dist/esm/translation-alignment/index.mjs +11 -0
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs +17 -0
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs.map +1 -0
- package/dist/esm/translation-alignment/normalizeBlock.mjs +21 -0
- package/dist/esm/translation-alignment/normalizeBlock.mjs.map +1 -0
- package/dist/esm/translation-alignment/pipeline.mjs +36 -0
- package/dist/esm/translation-alignment/pipeline.mjs.map +1 -0
- package/dist/esm/translation-alignment/planActions.mjs +47 -0
- package/dist/esm/translation-alignment/planActions.mjs.map +1 -0
- package/dist/esm/translation-alignment/rebuildDocument.mjs +47 -0
- package/dist/esm/translation-alignment/rebuildDocument.mjs.map +1 -0
- package/dist/esm/translation-alignment/segmentDocument.mjs +131 -0
- package/dist/esm/translation-alignment/segmentDocument.mjs.map +1 -0
- package/dist/esm/translation-alignment/types.mjs +0 -0
- package/dist/types/cli.d.ts.map +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/types/reviewDoc.d.ts +3 -6
- package/dist/types/reviewDoc.d.ts.map +1 -1
- package/dist/types/reviewDocBlockAware.d.ts +19 -0
- package/dist/types/reviewDocBlockAware.d.ts.map +1 -0
- package/dist/types/translateDoc.d.ts +2 -0
- package/dist/types/translateDoc.d.ts.map +1 -1
- package/dist/types/translation-alignment/alignBlocks.d.ts +7 -0
- package/dist/types/translation-alignment/alignBlocks.d.ts.map +1 -0
- package/dist/types/translation-alignment/computeSimilarity.d.ts +6 -0
- package/dist/types/translation-alignment/computeSimilarity.d.ts.map +1 -0
- package/dist/types/translation-alignment/fingerprintBlock.d.ts +7 -0
- package/dist/types/translation-alignment/fingerprintBlock.d.ts.map +1 -0
- package/dist/types/translation-alignment/index.d.ts +11 -0
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts +7 -0
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts.map +1 -0
- package/dist/types/translation-alignment/normalizeBlock.d.ts +7 -0
- package/dist/types/translation-alignment/normalizeBlock.d.ts.map +1 -0
- package/dist/types/translation-alignment/pipeline.d.ts +25 -0
- package/dist/types/translation-alignment/pipeline.d.ts.map +1 -0
- package/dist/types/translation-alignment/planActions.d.ts +7 -0
- package/dist/types/translation-alignment/planActions.d.ts.map +1 -0
- package/dist/types/translation-alignment/rebuildDocument.d.ts +32 -0
- package/dist/types/translation-alignment/rebuildDocument.d.ts.map +1 -0
- package/dist/types/translation-alignment/segmentDocument.d.ts +7 -0
- package/dist/types/translation-alignment/segmentDocument.d.ts.map +1 -0
- package/dist/types/translation-alignment/types.d.ts +49 -0
- package/dist/types/translation-alignment/types.d.ts.map +1 -0
- package/package.json +23 -23
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
# Quick Start Guide
|
|
2
|
+
|
|
3
|
+
Get started with the block-aware translation alignment system in 5 minutes.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
The system is already integrated into your CLI package. No additional dependencies needed.
|
|
8
|
+
|
|
9
|
+
## Basic Usage
|
|
10
|
+
|
|
11
|
+
### 1. Simple Translation Review
|
|
12
|
+
|
|
13
|
+
```typescript
|
|
14
|
+
import { reviewFileBlockAware } from './reviewDocBlockAware';
|
|
15
|
+
import { Locales } from '@intlayer/types';
|
|
16
|
+
|
|
17
|
+
await reviewFileBlockAware(
|
|
18
|
+
'docs/en/getting-started.md', // Source file
|
|
19
|
+
'docs/fr/getting-started.md', // Target file
|
|
20
|
+
Locales.FRENCH, // Target locale
|
|
21
|
+
Locales.ENGLISH // Source locale
|
|
22
|
+
);
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### 2. With Git Integration
|
|
26
|
+
|
|
27
|
+
```typescript
|
|
28
|
+
import { listGitLines } from '@intlayer/chokidar';
|
|
29
|
+
|
|
30
|
+
const changedLines = await listGitLines(
|
|
31
|
+
'docs/en/getting-started.md',
|
|
32
|
+
{ sinceDays: 7 } // Changes in last 7 days
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
await reviewFileBlockAware(
|
|
36
|
+
'docs/en/getting-started.md',
|
|
37
|
+
'docs/fr/getting-started.md',
|
|
38
|
+
Locales.FRENCH,
|
|
39
|
+
Locales.ENGLISH,
|
|
40
|
+
aiOptions,
|
|
41
|
+
configOptions,
|
|
42
|
+
undefined,
|
|
43
|
+
changedLines // Only review changed blocks!
|
|
44
|
+
);
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### 3. With Custom AI Instructions
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
const customInstructions = `
|
|
51
|
+
- Use formal French (vous, not tu)
|
|
52
|
+
- Preserve technical terminology in English
|
|
53
|
+
- Maintain consistent tone
|
|
54
|
+
`;
|
|
55
|
+
|
|
56
|
+
await reviewFileBlockAware(
|
|
57
|
+
'docs/en/api.md',
|
|
58
|
+
'docs/fr/api.md',
|
|
59
|
+
Locales.FRENCH,
|
|
60
|
+
Locales.ENGLISH,
|
|
61
|
+
aiOptions,
|
|
62
|
+
configOptions,
|
|
63
|
+
customInstructions
|
|
64
|
+
);
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Using the Pipeline Directly
|
|
68
|
+
|
|
69
|
+
For more control, use the pipeline API:
|
|
70
|
+
|
|
71
|
+
```typescript
|
|
72
|
+
import { buildAlignmentPlan, mergeReviewedSegments } from './translation-alignment/pipeline';
|
|
73
|
+
|
|
74
|
+
// 1. Build alignment plan
|
|
75
|
+
const { englishBlocks, frenchBlocks, plan, segmentsToReview } = buildAlignmentPlan({
|
|
76
|
+
englishText: await readFile('docs/en/guide.md', 'utf-8'),
|
|
77
|
+
frenchText: await readFile('docs/fr/guide.md', 'utf-8'),
|
|
78
|
+
changedLines: [10, 15, 20],
|
|
79
|
+
similarityOptions: {
|
|
80
|
+
minimumMatchForReuse: 0.92,
|
|
81
|
+
},
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// 2. Log plan details
|
|
85
|
+
console.log(`Blocks to reuse: ${plan.actions.filter(a => a.kind === 'reuse').length}`);
|
|
86
|
+
console.log(`Blocks to review: ${segmentsToReview.length}`);
|
|
87
|
+
|
|
88
|
+
// 3. Translate segments (your AI logic here)
|
|
89
|
+
const reviewedMap = new Map();
|
|
90
|
+
for (const segment of segmentsToReview) {
|
|
91
|
+
const translation = await yourTranslationFunction(segment.englishBlock.content);
|
|
92
|
+
reviewedMap.set(segment.actionIndex, translation);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// 4. Merge and output
|
|
96
|
+
const finalOutput = mergeReviewedSegments(plan, frenchBlocks, reviewedMap);
|
|
97
|
+
await writeFile('docs/fr/guide.md', finalOutput);
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Common Scenarios
|
|
101
|
+
|
|
102
|
+
### Scenario 1: New Document (No Translation Exists)
|
|
103
|
+
|
|
104
|
+
```typescript
|
|
105
|
+
// French file doesn't exist yet
|
|
106
|
+
await reviewFileBlockAware(
|
|
107
|
+
'docs/en/new-feature.md',
|
|
108
|
+
'docs/fr/new-feature.md', // Will be created
|
|
109
|
+
Locales.FRENCH,
|
|
110
|
+
Locales.ENGLISH
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
// Result: All blocks translated from scratch
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Scenario 2: Minor Edit (2 paragraphs changed)
|
|
117
|
+
|
|
118
|
+
```typescript
|
|
119
|
+
// Git shows lines 45-60 changed
|
|
120
|
+
const changedLines = [45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60];
|
|
121
|
+
|
|
122
|
+
await reviewFileBlockAware(
|
|
123
|
+
'docs/en/guide.md',
|
|
124
|
+
'docs/fr/guide.md',
|
|
125
|
+
Locales.FRENCH,
|
|
126
|
+
Locales.ENGLISH,
|
|
127
|
+
aiOptions,
|
|
128
|
+
configOptions,
|
|
129
|
+
undefined,
|
|
130
|
+
changedLines
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
// Result: Only 2 blocks sent to AI, 98% reused
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Scenario 3: Reordered Sections
|
|
137
|
+
|
|
138
|
+
```markdown
|
|
139
|
+
<!-- Before -->
|
|
140
|
+
## Installation
|
|
141
|
+
## Configuration
|
|
142
|
+
## Usage
|
|
143
|
+
|
|
144
|
+
<!-- After -->
|
|
145
|
+
## Installation
|
|
146
|
+
## Usage
|
|
147
|
+
## Configuration
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
```typescript
|
|
151
|
+
await reviewFileBlockAware(
|
|
152
|
+
'docs/en/guide.md',
|
|
153
|
+
'docs/fr/guide.md',
|
|
154
|
+
Locales.FRENCH,
|
|
155
|
+
Locales.ENGLISH
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
// Result: Sections automatically reordered, 0 AI calls needed
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Scenario 4: Added Section
|
|
162
|
+
|
|
163
|
+
```markdown
|
|
164
|
+
<!-- Before -->
|
|
165
|
+
## Installation
|
|
166
|
+
## Usage
|
|
167
|
+
|
|
168
|
+
<!-- After -->
|
|
169
|
+
## Installation
|
|
170
|
+
## Configuration ← NEW
|
|
171
|
+
## Usage
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
```typescript
|
|
175
|
+
await reviewFileBlockAware(
|
|
176
|
+
'docs/en/guide.md',
|
|
177
|
+
'docs/fr/guide.md',
|
|
178
|
+
Locales.FRENCH,
|
|
179
|
+
Locales.ENGLISH
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
// Result:
|
|
183
|
+
// - Installation: reused
|
|
184
|
+
// - Configuration: translated (1 AI call)
|
|
185
|
+
// - Usage: reused
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Scenario 5: Deleted Section
|
|
189
|
+
|
|
190
|
+
```markdown
|
|
191
|
+
<!-- Before -->
|
|
192
|
+
## Installation
|
|
193
|
+
## Deprecated ← DELETE
|
|
194
|
+
## Usage
|
|
195
|
+
|
|
196
|
+
<!-- After -->
|
|
197
|
+
## Installation
|
|
198
|
+
## Usage
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
```typescript
|
|
202
|
+
await reviewFileBlockAware(
|
|
203
|
+
'docs/en/guide.md',
|
|
204
|
+
'docs/fr/guide.md',
|
|
205
|
+
Locales.FRENCH,
|
|
206
|
+
Locales.ENGLISH
|
|
207
|
+
);
|
|
208
|
+
|
|
209
|
+
// Result:
|
|
210
|
+
// - Installation: reused
|
|
211
|
+
// - Deprecated: deleted from output
|
|
212
|
+
// - Usage: reused
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## Batch Processing
|
|
216
|
+
|
|
217
|
+
Process multiple files:
|
|
218
|
+
|
|
219
|
+
```typescript
|
|
220
|
+
import fg from 'fast-glob';
|
|
221
|
+
|
|
222
|
+
const englishFiles = await fg('docs/en/**/*.md');
|
|
223
|
+
|
|
224
|
+
for (const englishFile of englishFiles) {
|
|
225
|
+
const frenchFile = englishFile.replace('/en/', '/fr/');
|
|
226
|
+
|
|
227
|
+
await reviewFileBlockAware(
|
|
228
|
+
englishFile,
|
|
229
|
+
frenchFile,
|
|
230
|
+
Locales.FRENCH,
|
|
231
|
+
Locales.ENGLISH,
|
|
232
|
+
aiOptions,
|
|
233
|
+
configOptions
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Monitoring & Debugging
|
|
239
|
+
|
|
240
|
+
### Enable Verbose Logging
|
|
241
|
+
|
|
242
|
+
```typescript
|
|
243
|
+
import { getConfiguration, getAppLogger } from '@intlayer/config';
|
|
244
|
+
|
|
245
|
+
const configuration = getConfiguration();
|
|
246
|
+
const logger = getAppLogger(configuration);
|
|
247
|
+
|
|
248
|
+
// Logger will output:
|
|
249
|
+
// - Block segmentation results
|
|
250
|
+
// - Alignment statistics
|
|
251
|
+
// - Action plan details
|
|
252
|
+
// - Token usage per block
|
|
253
|
+
// - Efficiency metrics
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### Inspect Alignment Plan
|
|
257
|
+
|
|
258
|
+
```typescript
|
|
259
|
+
const { plan, segmentsToReview } = buildAlignmentPlan({
|
|
260
|
+
englishText,
|
|
261
|
+
frenchText,
|
|
262
|
+
changedLines,
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
// Analyze plan
|
|
266
|
+
plan.actions.forEach((action, index) => {
|
|
267
|
+
console.log(`Action ${index}: ${action.kind}`);
|
|
268
|
+
if (action.kind === 'reuse') {
|
|
269
|
+
console.log(` English block ${action.englishIndex} → French block ${action.frenchIndex}`);
|
|
270
|
+
} else if (action.kind === 'review') {
|
|
271
|
+
console.log(` Reviewing English block ${action.englishIndex}`);
|
|
272
|
+
}
|
|
273
|
+
});
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Debug Block Segmentation
|
|
277
|
+
|
|
278
|
+
```typescript
|
|
279
|
+
import { segmentDocument } from './translation-alignment/segmentDocument';
|
|
280
|
+
|
|
281
|
+
const blocks = segmentDocument(englishText);
|
|
282
|
+
|
|
283
|
+
blocks.forEach((block, index) => {
|
|
284
|
+
console.log(`Block ${index}:`);
|
|
285
|
+
console.log(` Type: ${block.type}`);
|
|
286
|
+
console.log(` Lines: ${block.lineStart}-${block.lineEnd}`);
|
|
287
|
+
console.log(` Content preview: ${block.content.slice(0, 50)}...`);
|
|
288
|
+
});
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
### Analyze Similarity Scores
|
|
292
|
+
|
|
293
|
+
```typescript
|
|
294
|
+
import { computeJaccardSimilarity } from './translation-alignment/computeSimilarity';
|
|
295
|
+
|
|
296
|
+
const similarity = computeJaccardSimilarity(
|
|
297
|
+
"Hello [World](https://example.com)",
|
|
298
|
+
"Bonjour [Monde](https://example.com)",
|
|
299
|
+
3
|
|
300
|
+
);
|
|
301
|
+
|
|
302
|
+
console.log(`Similarity: ${(similarity * 100).toFixed(1)}%`);
|
|
303
|
+
// Output: Similarity: 75.3%
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
## Configuration Tuning
|
|
307
|
+
|
|
308
|
+
### Adjust Similarity Thresholds
|
|
309
|
+
|
|
310
|
+
```typescript
|
|
311
|
+
const { plan } = buildAlignmentPlan({
|
|
312
|
+
englishText,
|
|
313
|
+
frenchText,
|
|
314
|
+
changedLines,
|
|
315
|
+
similarityOptions: {
|
|
316
|
+
// Higher = more conservative (more blocks reviewed)
|
|
317
|
+
minimumMatchForReuse: 0.95,
|
|
318
|
+
|
|
319
|
+
// For future near-duplicate detection
|
|
320
|
+
minimumMatchForNearDuplicate: 0.85,
|
|
321
|
+
},
|
|
322
|
+
});
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### Finding Optimal Thresholds
|
|
326
|
+
|
|
327
|
+
Run experiments:
|
|
328
|
+
|
|
329
|
+
```typescript
|
|
330
|
+
const thresholds = [0.85, 0.87, 0.90, 0.92, 0.95, 0.97];
|
|
331
|
+
const results = [];
|
|
332
|
+
|
|
333
|
+
for (const threshold of thresholds) {
|
|
334
|
+
const { plan } = buildAlignmentPlan({
|
|
335
|
+
englishText,
|
|
336
|
+
frenchText,
|
|
337
|
+
changedLines,
|
|
338
|
+
similarityOptions: { minimumMatchForReuse: threshold },
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
const reusedCount = plan.actions.filter(a => a.kind === 'reuse').length;
|
|
342
|
+
const reviewCount = plan.actions.filter(a => a.kind === 'review').length;
|
|
343
|
+
|
|
344
|
+
results.push({
|
|
345
|
+
threshold,
|
|
346
|
+
reused: reusedCount,
|
|
347
|
+
reviewed: reviewCount,
|
|
348
|
+
efficiency: (reusedCount / plan.actions.length * 100).toFixed(1) + '%',
|
|
349
|
+
});
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
console.table(results);
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
Example output:
|
|
356
|
+
|
|
357
|
+
```
|
|
358
|
+
┌─────────┬───────────┬────────┬──────────┬────────────┐
|
|
359
|
+
│ (index) │ threshold │ reused │ reviewed │ efficiency │
|
|
360
|
+
├─────────┼───────────┼────────┼──────────┼────────────┤
|
|
361
|
+
│ 0 │ 0.85 │ 92 │ 8 │ '92.0%' │
|
|
362
|
+
│ 1 │ 0.87 │ 90 │ 10 │ '90.0%' │
|
|
363
|
+
│ 2 │ 0.90 │ 88 │ 12 │ '88.0%' │
|
|
364
|
+
│ 3 │ 0.92 │ 85 │ 15 │ '85.0%' │
|
|
365
|
+
│ 4 │ 0.95 │ 80 │ 20 │ '80.0%' │
|
|
366
|
+
│ 5 │ 0.97 │ 72 │ 28 │ '72.0%' │
|
|
367
|
+
└─────────┴───────────┴────────┴──────────┴────────────┘
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
Choose based on your priorities:
|
|
371
|
+
- **High accuracy**: Use 0.95+ (more AI reviews)
|
|
372
|
+
- **High efficiency**: Use 0.85-0.90 (fewer AI reviews)
|
|
373
|
+
- **Balanced**: Use 0.90-0.92 (default)
|
|
374
|
+
|
|
375
|
+
## Testing
|
|
376
|
+
|
|
377
|
+
### Unit Test Example
|
|
378
|
+
|
|
379
|
+
```typescript
|
|
380
|
+
import { segmentDocument } from './translation-alignment/segmentDocument';
|
|
381
|
+
|
|
382
|
+
describe('Block Segmentation', () => {
|
|
383
|
+
it('should segment markdown correctly', () => {
|
|
384
|
+
const input = '# Title\n\nParagraph 1\n\n## Subtitle\n\nParagraph 2\n';
|
|
385
|
+
const blocks = segmentDocument(input);
|
|
386
|
+
|
|
387
|
+
expect(blocks).toHaveLength(4);
|
|
388
|
+
expect(blocks[0].type).toBe('heading');
|
|
389
|
+
expect(blocks[0].content).toBe('# Title\n');
|
|
390
|
+
expect(blocks[1].type).toBe('paragraph');
|
|
391
|
+
expect(blocks[2].type).toBe('heading');
|
|
392
|
+
expect(blocks[3].type).toBe('paragraph');
|
|
393
|
+
});
|
|
394
|
+
});
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
### Integration Test Example
|
|
398
|
+
|
|
399
|
+
```typescript
|
|
400
|
+
import { buildAlignmentPlan } from './translation-alignment/pipeline';
|
|
401
|
+
|
|
402
|
+
describe('Reordering Detection', () => {
|
|
403
|
+
it('should detect reordered paragraphs', () => {
|
|
404
|
+
const englishV1 = 'Para A\n\nPara B\n\nPara C\n';
|
|
405
|
+
const englishV2 = 'Para B\n\nPara A\n\nPara C\n';
|
|
406
|
+
const frenchV1 = 'Para A (fr)\n\nPara B (fr)\n\nPara C (fr)\n';
|
|
407
|
+
|
|
408
|
+
const { plan, segmentsToReview } = buildAlignmentPlan({
|
|
409
|
+
englishText: englishV2,
|
|
410
|
+
frenchText: frenchV1,
|
|
411
|
+
changedLines: undefined,
|
|
412
|
+
});
|
|
413
|
+
|
|
414
|
+
// All blocks should be reused (just reordered)
|
|
415
|
+
expect(segmentsToReview).toHaveLength(0);
|
|
416
|
+
const reusedCount = plan.actions.filter(a => a.kind === 'reuse').length;
|
|
417
|
+
expect(reusedCount).toBe(3);
|
|
418
|
+
});
|
|
419
|
+
});
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
## Troubleshooting
|
|
423
|
+
|
|
424
|
+
### Problem: Too Many Blocks Marked for Review
|
|
425
|
+
|
|
426
|
+
```typescript
|
|
427
|
+
// Check if threshold is too high
|
|
428
|
+
const { plan } = buildAlignmentPlan({
|
|
429
|
+
englishText,
|
|
430
|
+
frenchText,
|
|
431
|
+
changedLines,
|
|
432
|
+
similarityOptions: { minimumMatchForReuse: 0.85 }, // Lower threshold
|
|
433
|
+
});
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
### Problem: Blocks Not Aligning Correctly
|
|
437
|
+
|
|
438
|
+
```typescript
|
|
439
|
+
// Debug anchor text extraction
|
|
440
|
+
import { normalizeBlock } from './translation-alignment/normalizeBlock';
|
|
441
|
+
import { segmentDocument } from './translation-alignment/segmentDocument';
|
|
442
|
+
|
|
443
|
+
const blocks = segmentDocument(englishText);
|
|
444
|
+
const normalized = blocks.map(normalizeBlock);
|
|
445
|
+
|
|
446
|
+
normalized.forEach((block, index) => {
|
|
447
|
+
console.log(`Block ${index}:`);
|
|
448
|
+
console.log(` Semantic: ${block.semanticText.slice(0, 50)}`);
|
|
449
|
+
console.log(` Anchor: ${block.anchorText}`);
|
|
450
|
+
});
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
### Problem: Output Has Extra Blank Lines
|
|
454
|
+
|
|
455
|
+
```typescript
|
|
456
|
+
// Check block content preservation
|
|
457
|
+
const { englishBlocks } = buildAlignmentPlan({
|
|
458
|
+
englishText,
|
|
459
|
+
frenchText,
|
|
460
|
+
changedLines,
|
|
461
|
+
});
|
|
462
|
+
|
|
463
|
+
englishBlocks.forEach((block, index) => {
|
|
464
|
+
const hasTrailingNewline = block.content.endsWith('\n');
|
|
465
|
+
console.log(`Block ${index}: trailing newline = ${hasTrailingNewline}`);
|
|
466
|
+
});
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
## Performance Tips
|
|
470
|
+
|
|
471
|
+
1. **Always use Git integration** when available for maximum efficiency
|
|
472
|
+
2. **Batch process during off-peak hours** to reduce API rate limit issues
|
|
473
|
+
3. **Cache fingerprints** for documents that don't change often
|
|
474
|
+
4. **Use parallel processing** for multiple locales (built into `reviewDoc`)
|
|
475
|
+
5. **Monitor token usage** and adjust thresholds if costs are too high
|
|
476
|
+
|
|
477
|
+
## Next Steps
|
|
478
|
+
|
|
479
|
+
- Read [README.md](./README.md) for detailed architecture
|
|
480
|
+
- Review [IMPROVEMENTS.md](./IMPROVEMENTS.md) for comparison with old system
|
|
481
|
+
- Follow [INTEGRATION_EXAMPLE.md](./INTEGRATION_EXAMPLE.md) for production deployment
|
|
482
|
+
- Check [TESTING.md](./TESTING.md) for comprehensive test examples
|
|
483
|
+
|
|
484
|
+
## Support
|
|
485
|
+
|
|
486
|
+
For issues or questions:
|
|
487
|
+
1. Check logs for alignment details
|
|
488
|
+
2. Verify input markdown is valid
|
|
489
|
+
3. Test with simplified example
|
|
490
|
+
4. Review similarity scores
|
|
491
|
+
5. Open an issue with sample files
|
|
492
|
+
|
|
493
|
+
Happy translating! 🌍
|
|
494
|
+
|