@intlayer/cli 7.0.6 → 7.0.8-canary.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/translation-alignment/ARCHITECTURE.md +518 -0
- package/dist/assets/translation-alignment/IMPROVEMENTS.md +550 -0
- package/dist/assets/translation-alignment/INTEGRATION_EXAMPLE.md +682 -0
- package/dist/assets/translation-alignment/QUICK_START.md +494 -0
- package/dist/assets/translation-alignment/README.md +485 -0
- package/dist/assets/translation-alignment/SUMMARY.md +440 -0
- package/dist/cjs/IntlayerEventListener.cjs +0 -3
- package/dist/cjs/IntlayerEventListener.cjs.map +1 -1
- package/dist/cjs/_virtual/_utils_asset.cjs +0 -3
- package/dist/cjs/build.cjs +0 -2
- package/dist/cjs/build.cjs.map +1 -1
- package/dist/cjs/cli.cjs +6 -7
- package/dist/cjs/cli.cjs.map +1 -1
- package/dist/cjs/config.cjs +0 -1
- package/dist/cjs/config.cjs.map +1 -1
- package/dist/cjs/editor.cjs +0 -4
- package/dist/cjs/editor.cjs.map +1 -1
- package/dist/cjs/fill/fill.cjs +0 -3
- package/dist/cjs/fill/fill.cjs.map +1 -1
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs +0 -1
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs.map +1 -1
- package/dist/cjs/fill/listTranslationsTasks.cjs +0 -6
- package/dist/cjs/fill/listTranslationsTasks.cjs.map +1 -1
- package/dist/cjs/fill/translateDictionary.cjs +0 -6
- package/dist/cjs/fill/translateDictionary.cjs.map +1 -1
- package/dist/cjs/fill/writeFill.cjs +0 -4
- package/dist/cjs/fill/writeFill.cjs.map +1 -1
- package/dist/cjs/getTargetDictionary.cjs +0 -4
- package/dist/cjs/getTargetDictionary.cjs.map +1 -1
- package/dist/cjs/index.cjs +0 -1
- package/dist/cjs/listContentDeclaration.cjs +0 -4
- package/dist/cjs/listContentDeclaration.cjs.map +1 -1
- package/dist/cjs/liveSync.cjs +0 -6
- package/dist/cjs/liveSync.cjs.map +1 -1
- package/dist/cjs/pull.cjs +0 -5
- package/dist/cjs/pull.cjs.map +1 -1
- package/dist/cjs/push/pullLog.cjs +0 -1
- package/dist/cjs/push/pullLog.cjs.map +1 -1
- package/dist/cjs/push/push.cjs +0 -5
- package/dist/cjs/push/push.cjs.map +1 -1
- package/dist/cjs/pushConfig.cjs +0 -2
- package/dist/cjs/pushConfig.cjs.map +1 -1
- package/dist/cjs/pushLog.cjs +0 -1
- package/dist/cjs/pushLog.cjs.map +1 -1
- package/dist/cjs/reviewDoc.cjs +8 -131
- package/dist/cjs/reviewDoc.cjs.map +1 -1
- package/dist/cjs/reviewDocBlockAware.cjs +90 -0
- package/dist/cjs/reviewDocBlockAware.cjs.map +1 -0
- package/dist/cjs/test/index.cjs +0 -2
- package/dist/cjs/test/index.cjs.map +1 -1
- package/dist/cjs/test/listMissingTranslations.cjs +0 -4
- package/dist/cjs/test/listMissingTranslations.cjs.map +1 -1
- package/dist/cjs/translateDoc.cjs +8 -8
- package/dist/cjs/translateDoc.cjs.map +1 -1
- package/dist/cjs/translation-alignment/alignBlocks.cjs +67 -0
- package/dist/cjs/translation-alignment/alignBlocks.cjs.map +1 -0
- package/dist/cjs/translation-alignment/computeSimilarity.cjs +25 -0
- package/dist/cjs/translation-alignment/computeSimilarity.cjs.map +1 -0
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs +23 -0
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs.map +1 -0
- package/dist/cjs/translation-alignment/index.cjs +21 -0
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs +18 -0
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs.map +1 -0
- package/dist/cjs/translation-alignment/normalizeBlock.cjs +22 -0
- package/dist/cjs/translation-alignment/normalizeBlock.cjs.map +1 -0
- package/dist/cjs/translation-alignment/pipeline.cjs +37 -0
- package/dist/cjs/translation-alignment/pipeline.cjs.map +1 -0
- package/dist/cjs/translation-alignment/planActions.cjs +48 -0
- package/dist/cjs/translation-alignment/planActions.cjs.map +1 -0
- package/dist/cjs/translation-alignment/rebuildDocument.cjs +49 -0
- package/dist/cjs/translation-alignment/rebuildDocument.cjs.map +1 -0
- package/dist/cjs/translation-alignment/segmentDocument.cjs +132 -0
- package/dist/cjs/translation-alignment/segmentDocument.cjs.map +1 -0
- package/dist/cjs/translation-alignment/types.cjs +0 -0
- package/dist/cjs/utils/calculateChunks.cjs +0 -1
- package/dist/cjs/utils/calculateChunks.cjs.map +1 -1
- package/dist/cjs/utils/checkAccess.cjs +0 -2
- package/dist/cjs/utils/checkAccess.cjs.map +1 -1
- package/dist/cjs/utils/checkLastUpdateTime.cjs +0 -1
- package/dist/cjs/utils/checkLastUpdateTime.cjs.map +1 -1
- package/dist/cjs/utils/chunkInference.cjs +0 -2
- package/dist/cjs/utils/chunkInference.cjs.map +1 -1
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs +0 -1
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs.map +1 -1
- package/dist/cjs/utils/getParentPackageJSON.cjs +0 -2
- package/dist/cjs/utils/getParentPackageJSON.cjs.map +1 -1
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs +0 -1
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs.map +1 -1
- package/dist/cjs/watch.cjs +0 -2
- package/dist/cjs/watch.cjs.map +1 -1
- package/dist/esm/cli.mjs +6 -3
- package/dist/esm/cli.mjs.map +1 -1
- package/dist/esm/index.mjs +2 -2
- package/dist/esm/reviewDoc.mjs +13 -128
- package/dist/esm/reviewDoc.mjs.map +1 -1
- package/dist/esm/reviewDocBlockAware.mjs +89 -0
- package/dist/esm/reviewDocBlockAware.mjs.map +1 -0
- package/dist/esm/translateDoc.mjs +8 -3
- package/dist/esm/translateDoc.mjs.map +1 -1
- package/dist/esm/translation-alignment/alignBlocks.mjs +67 -0
- package/dist/esm/translation-alignment/alignBlocks.mjs.map +1 -0
- package/dist/esm/translation-alignment/computeSimilarity.mjs +23 -0
- package/dist/esm/translation-alignment/computeSimilarity.mjs.map +1 -0
- package/dist/esm/translation-alignment/fingerprintBlock.mjs +21 -0
- package/dist/esm/translation-alignment/fingerprintBlock.mjs.map +1 -0
- package/dist/esm/translation-alignment/index.mjs +11 -0
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs +17 -0
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs.map +1 -0
- package/dist/esm/translation-alignment/normalizeBlock.mjs +21 -0
- package/dist/esm/translation-alignment/normalizeBlock.mjs.map +1 -0
- package/dist/esm/translation-alignment/pipeline.mjs +36 -0
- package/dist/esm/translation-alignment/pipeline.mjs.map +1 -0
- package/dist/esm/translation-alignment/planActions.mjs +47 -0
- package/dist/esm/translation-alignment/planActions.mjs.map +1 -0
- package/dist/esm/translation-alignment/rebuildDocument.mjs +47 -0
- package/dist/esm/translation-alignment/rebuildDocument.mjs.map +1 -0
- package/dist/esm/translation-alignment/segmentDocument.mjs +131 -0
- package/dist/esm/translation-alignment/segmentDocument.mjs.map +1 -0
- package/dist/esm/translation-alignment/types.mjs +0 -0
- package/dist/types/cli.d.ts.map +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/types/reviewDoc.d.ts +3 -6
- package/dist/types/reviewDoc.d.ts.map +1 -1
- package/dist/types/reviewDocBlockAware.d.ts +19 -0
- package/dist/types/reviewDocBlockAware.d.ts.map +1 -0
- package/dist/types/translateDoc.d.ts +2 -0
- package/dist/types/translateDoc.d.ts.map +1 -1
- package/dist/types/translation-alignment/alignBlocks.d.ts +7 -0
- package/dist/types/translation-alignment/alignBlocks.d.ts.map +1 -0
- package/dist/types/translation-alignment/computeSimilarity.d.ts +6 -0
- package/dist/types/translation-alignment/computeSimilarity.d.ts.map +1 -0
- package/dist/types/translation-alignment/fingerprintBlock.d.ts +7 -0
- package/dist/types/translation-alignment/fingerprintBlock.d.ts.map +1 -0
- package/dist/types/translation-alignment/index.d.ts +11 -0
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts +7 -0
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts.map +1 -0
- package/dist/types/translation-alignment/normalizeBlock.d.ts +7 -0
- package/dist/types/translation-alignment/normalizeBlock.d.ts.map +1 -0
- package/dist/types/translation-alignment/pipeline.d.ts +25 -0
- package/dist/types/translation-alignment/pipeline.d.ts.map +1 -0
- package/dist/types/translation-alignment/planActions.d.ts +7 -0
- package/dist/types/translation-alignment/planActions.d.ts.map +1 -0
- package/dist/types/translation-alignment/rebuildDocument.d.ts +32 -0
- package/dist/types/translation-alignment/rebuildDocument.d.ts.map +1 -0
- package/dist/types/translation-alignment/segmentDocument.d.ts +7 -0
- package/dist/types/translation-alignment/segmentDocument.d.ts.map +1 -0
- package/dist/types/translation-alignment/types.d.ts +49 -0
- package/dist/types/translation-alignment/types.d.ts.map +1 -0
- package/package.json +23 -23
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
# Translation Alignment System
|
|
2
|
+
|
|
3
|
+
A sophisticated block-aware alignment system for maintaining translations of markdown documents. This system intelligently detects changes, handles reordering, and minimizes unnecessary AI translation calls.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
When you have an English document and its French translation, and you make changes to the English version, this system:
|
|
8
|
+
|
|
9
|
+
1. **Segments** both documents into semantic blocks (headings, paragraphs, lists, code blocks, etc.)
|
|
10
|
+
2. **Fingerprints** each block using structural anchors (special characters, numbers, punctuation)
|
|
11
|
+
3. **Aligns** blocks between English and French using the Needleman-Wunsch algorithm
|
|
12
|
+
4. **Detects** which blocks changed, were added, deleted, or reordered
|
|
13
|
+
5. **Optimizes** by only sending changed/new blocks to AI for translation
|
|
14
|
+
6. **Reconstructs** the French document with proper ordering and spacing
|
|
15
|
+
|
|
16
|
+
## Key Features
|
|
17
|
+
|
|
18
|
+
### Language-Agnostic Alignment
|
|
19
|
+
|
|
20
|
+
Uses **anchor text** (special characters like `[]`124567890-=!@#$%^&*()><`) rather than language-specific content. This makes alignment robust across different languages.
|
|
21
|
+
|
|
22
|
+
```typescript
|
|
23
|
+
// Example anchor text extraction:
|
|
24
|
+
"Hello [World](https://example.com) - 2024"
|
|
25
|
+
// Becomes: "[ ](://.)-2024"
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Automatic Reordering Detection
|
|
29
|
+
|
|
30
|
+
If you swap two paragraphs in the English document and both remain unchanged in content, the system automatically reorders them in the French translation without re-translating.
|
|
31
|
+
|
|
32
|
+
### Smart Change Detection
|
|
33
|
+
|
|
34
|
+
Uses Git changed lines (if available) to precisely identify which blocks need review. Falls back to content comparison if Git info is unavailable.
|
|
35
|
+
|
|
36
|
+
### Block Types Supported
|
|
37
|
+
|
|
38
|
+
- `heading` - Markdown headings (# ## ###)
|
|
39
|
+
- `paragraph` - Regular text paragraphs
|
|
40
|
+
- `list_item` - Bullet and numbered lists
|
|
41
|
+
- `code_block` - Fenced code blocks
|
|
42
|
+
- `blockquote` - Quote blocks
|
|
43
|
+
- `table` - Markdown tables
|
|
44
|
+
- `horizontal_rule` - Horizontal rules (---)
|
|
45
|
+
- `html` - Embedded HTML
|
|
46
|
+
- `unknown` - Fallback for blank lines
|
|
47
|
+
|
|
48
|
+
## Architecture
|
|
49
|
+
|
|
50
|
+
### Core Components
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
translation-alignment/
|
|
54
|
+
├── types.ts # Type definitions
|
|
55
|
+
├── segmentDocument.ts # Document → Blocks
|
|
56
|
+
├── normalizeBlock.ts # Block → Normalized (semantic + anchor)
|
|
57
|
+
├── fingerprintBlock.ts # Normalized → Fingerprinted (with digests)
|
|
58
|
+
├── computeSimilarity.ts # Jaccard similarity calculation
|
|
59
|
+
├── alignBlocks.ts # Needleman-Wunsch alignment
|
|
60
|
+
├── mapChangedLinesToBlocks.ts # Git lines → Block indexes
|
|
61
|
+
├── planActions.ts # Alignment → Action plan
|
|
62
|
+
├── rebuildDocument.ts # Action plan → Segments + Merge
|
|
63
|
+
└── pipeline.ts # High-level orchestration
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Data Flow
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
English Text ─────────┐
|
|
70
|
+
│
|
|
71
|
+
├─→ Segment → Normalize → Fingerprint
|
|
72
|
+
│
|
|
73
|
+
French Text ──────────┘
|
|
74
|
+
│
|
|
75
|
+
↓
|
|
76
|
+
Align (Needleman-Wunsch)
|
|
77
|
+
│
|
|
78
|
+
↓
|
|
79
|
+
Map Changed Lines
|
|
80
|
+
│
|
|
81
|
+
↓
|
|
82
|
+
Plan Actions
|
|
83
|
+
│
|
|
84
|
+
↓
|
|
85
|
+
┌─────────────────┴─────────────────┐
|
|
86
|
+
│ │
|
|
87
|
+
↓ ↓
|
|
88
|
+
Segments to Review Reusable Segments
|
|
89
|
+
│ │
|
|
90
|
+
↓ │
|
|
91
|
+
AI Translation │
|
|
92
|
+
│ │
|
|
93
|
+
└─────────────────┬─────────────────┘
|
|
94
|
+
│
|
|
95
|
+
↓
|
|
96
|
+
Merge & Output
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## How It Works
|
|
100
|
+
|
|
101
|
+
### 1. Segmentation
|
|
102
|
+
|
|
103
|
+
```typescript
|
|
104
|
+
const blocks = segmentDocument(text);
|
|
105
|
+
// Input: "# Title\n\nParagraph 1\n\nParagraph 2"
|
|
106
|
+
// Output: [
|
|
107
|
+
// { type: "heading", content: "# Title\n", lineStart: 1, lineEnd: 1 },
|
|
108
|
+
// { type: "paragraph", content: "Paragraph 1\n", lineStart: 3, lineEnd: 3 },
|
|
109
|
+
// { type: "paragraph", content: "Paragraph 2\n", lineStart: 5, lineEnd: 5 }
|
|
110
|
+
// ]
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### 2. Normalization
|
|
114
|
+
|
|
115
|
+
```typescript
|
|
116
|
+
const normalized = normalizeBlock(block);
|
|
117
|
+
// Creates two representations:
|
|
118
|
+
// - semanticText: lowercase, no markdown formatting
|
|
119
|
+
// - anchorText: only special chars, numbers, symbols
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### 3. Fingerprinting
|
|
123
|
+
|
|
124
|
+
```typescript
|
|
125
|
+
const fingerprinted = fingerprintBlock(normalized, prev, next);
|
|
126
|
+
// Adds:
|
|
127
|
+
// - semanticDigest: hash of semantic content
|
|
128
|
+
// - anchorDigest: hash of structural anchors
|
|
129
|
+
// - compositeKey: combined unique identifier
|
|
130
|
+
// - contextKey: hash of surrounding blocks
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### 4. Alignment
|
|
134
|
+
|
|
135
|
+
Uses Needleman-Wunsch algorithm with custom scoring:
|
|
136
|
+
|
|
137
|
+
- **Match score**: Based on type similarity, anchor similarity, and length ratio
|
|
138
|
+
- **Gap penalty**: Penalizes insertions/deletions
|
|
139
|
+
- **Type bonus**: +2 if block types match
|
|
140
|
+
- **Length bonus**: +1 if lengths within 75%
|
|
141
|
+
- **Anchor similarity**: 0-1 Jaccard score × 8
|
|
142
|
+
|
|
143
|
+
### 5. Action Planning
|
|
144
|
+
|
|
145
|
+
Produces one of four actions for each block:
|
|
146
|
+
|
|
147
|
+
```typescript
|
|
148
|
+
type PlannedAction =
|
|
149
|
+
| { kind: "reuse" } // Unchanged, use existing translation
|
|
150
|
+
| { kind: "review" } // Changed, needs AI review
|
|
151
|
+
| { kind: "insert_new" } // New block, needs translation
|
|
152
|
+
| { kind: "delete" }; // Removed from source, skip
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### 6. Reconstruction
|
|
156
|
+
|
|
157
|
+
```typescript
|
|
158
|
+
// Only segments needing review are sent to AI
|
|
159
|
+
const reviewedMap = new Map<actionIndex, translatedText>();
|
|
160
|
+
|
|
161
|
+
// Final document merges reused + reviewed segments in correct order
|
|
162
|
+
const output = mergeReviewedSegments(plan, frenchBlocks, reviewedMap);
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Integration
|
|
166
|
+
|
|
167
|
+
### Basic Usage
|
|
168
|
+
|
|
169
|
+
```typescript
|
|
170
|
+
import { reviewFileBlockAware } from "./reviewDocBlockAware";
|
|
171
|
+
|
|
172
|
+
await reviewFileBlockAware(
|
|
173
|
+
"docs/en/guide.md", // English source
|
|
174
|
+
"docs/fr/guide.md", // French translation
|
|
175
|
+
Locales.FRENCH, // Target locale
|
|
176
|
+
Locales.ENGLISH, // Base locale
|
|
177
|
+
aiOptions, // AI configuration
|
|
178
|
+
configOptions, // App configuration
|
|
179
|
+
customInstructions, // Optional AI instructions
|
|
180
|
+
[10, 15, 20] // Git changed lines (optional)
|
|
181
|
+
);
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Replace Existing Implementation
|
|
185
|
+
|
|
186
|
+
In your main review script, replace:
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
// Old:
|
|
190
|
+
await reviewFile(
|
|
191
|
+
baseFilePath,
|
|
192
|
+
outputFilePath,
|
|
193
|
+
locale,
|
|
194
|
+
baseLocale,
|
|
195
|
+
aiOptions,
|
|
196
|
+
configOptions,
|
|
197
|
+
customInstructions,
|
|
198
|
+
changedLines
|
|
199
|
+
);
|
|
200
|
+
|
|
201
|
+
// New:
|
|
202
|
+
await reviewFileBlockAware(
|
|
203
|
+
baseFilePath,
|
|
204
|
+
outputFilePath,
|
|
205
|
+
locale,
|
|
206
|
+
baseLocale,
|
|
207
|
+
aiOptions,
|
|
208
|
+
configOptions,
|
|
209
|
+
customInstructions,
|
|
210
|
+
changedLines
|
|
211
|
+
);
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Configuration Options
|
|
215
|
+
|
|
216
|
+
```typescript
|
|
217
|
+
const { plan, segmentsToReview } = buildAlignmentPlan({
|
|
218
|
+
englishText,
|
|
219
|
+
frenchText,
|
|
220
|
+
changedLines: [10, 15, 20], // Optional Git changed lines
|
|
221
|
+
similarityOptions: {
|
|
222
|
+
minimumMatchForReuse: 0.9, // Default: 0.9 (90% similar)
|
|
223
|
+
minimumMatchForNearDuplicate: 0.8 // Default: 0.8 (80% similar)
|
|
224
|
+
}
|
|
225
|
+
});
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## Examples
|
|
229
|
+
|
|
230
|
+
### Example 1: Simple Change
|
|
231
|
+
|
|
232
|
+
**English v1:**
|
|
233
|
+
```markdown
|
|
234
|
+
# Introduction
|
|
235
|
+
Hello world
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
**French v1:**
|
|
239
|
+
```markdown
|
|
240
|
+
# Introduction
|
|
241
|
+
Bonjour le monde
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
**English v2 (changed second line):**
|
|
245
|
+
```markdown
|
|
246
|
+
# Introduction
|
|
247
|
+
Hello universe
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
**Result:**
|
|
251
|
+
- Block 1 (heading): **reuse** existing translation
|
|
252
|
+
- Block 2 (paragraph): **review** with AI
|
|
253
|
+
- Final output preserves structure, only paragraph updated
|
|
254
|
+
|
|
255
|
+
### Example 2: Reordering
|
|
256
|
+
|
|
257
|
+
**English v1:**
|
|
258
|
+
```markdown
|
|
259
|
+
## Section A
|
|
260
|
+
Content A
|
|
261
|
+
|
|
262
|
+
## Section B
|
|
263
|
+
Content B
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
**English v2 (swapped order):**
|
|
267
|
+
```markdown
|
|
268
|
+
## Section B
|
|
269
|
+
Content B
|
|
270
|
+
|
|
271
|
+
## Section A
|
|
272
|
+
Content A
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
**Result:**
|
|
276
|
+
- All blocks identified as unchanged
|
|
277
|
+
- Automatically reordered in French
|
|
278
|
+
- No AI calls needed
|
|
279
|
+
|
|
280
|
+
### Example 3: Insertion
|
|
281
|
+
|
|
282
|
+
**English v1:**
|
|
283
|
+
```markdown
|
|
284
|
+
Para 1
|
|
285
|
+
|
|
286
|
+
Para 3
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
**English v2:**
|
|
290
|
+
```markdown
|
|
291
|
+
Para 1
|
|
292
|
+
|
|
293
|
+
Para 2
|
|
294
|
+
|
|
295
|
+
Para 3
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
**Result:**
|
|
299
|
+
- Para 1: **reuse**
|
|
300
|
+
- Para 2: **insert_new** (translate with AI)
|
|
301
|
+
- Para 3: **reuse**
|
|
302
|
+
|
|
303
|
+
### Example 4: Deletion
|
|
304
|
+
|
|
305
|
+
**English v1:**
|
|
306
|
+
```markdown
|
|
307
|
+
Para 1
|
|
308
|
+
|
|
309
|
+
Para 2
|
|
310
|
+
|
|
311
|
+
Para 3
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
**English v2:**
|
|
315
|
+
```markdown
|
|
316
|
+
Para 1
|
|
317
|
+
|
|
318
|
+
Para 3
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
**Result:**
|
|
322
|
+
- Para 1: **reuse**
|
|
323
|
+
- Para 2: **delete** (skip in output)
|
|
324
|
+
- Para 3: **reuse**
|
|
325
|
+
|
|
326
|
+
## Performance Optimization
|
|
327
|
+
|
|
328
|
+
### With Git Integration
|
|
329
|
+
|
|
330
|
+
When Git changed lines are available, the system:
|
|
331
|
+
1. Maps changed lines to block indexes
|
|
332
|
+
2. Only marks those blocks as needing review
|
|
333
|
+
3. Dramatically reduces AI calls for large documents
|
|
334
|
+
|
|
335
|
+
```typescript
|
|
336
|
+
// With git info: only reviews blocks 5, 6, 7 (3 AI calls)
|
|
337
|
+
changedLines: [45, 46, 47, 48, 49, 50]
|
|
338
|
+
|
|
339
|
+
// Without git info: reviews all changed blocks by content comparison
|
|
340
|
+
changedLines: undefined
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
### Reuse Rate
|
|
344
|
+
|
|
345
|
+
Typical reuse rates:
|
|
346
|
+
- **Minor edits**: 90-95% blocks reused
|
|
347
|
+
- **Moderate changes**: 70-80% blocks reused
|
|
348
|
+
- **Major refactor**: 40-60% blocks reused
|
|
349
|
+
- **New document**: 0% blocks reused (but alignment helps with structure)
|
|
350
|
+
|
|
351
|
+
## Best Practices
|
|
352
|
+
|
|
353
|
+
### 1. Preserve Structure
|
|
354
|
+
|
|
355
|
+
Keep consistent markdown structure between versions. The alignment algorithm works best when block types remain stable.
|
|
356
|
+
|
|
357
|
+
✅ Good:
|
|
358
|
+
```markdown
|
|
359
|
+
# Heading
|
|
360
|
+
Paragraph
|
|
361
|
+
|
|
362
|
+
# Heading
|
|
363
|
+
Paragraph (updated content)
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
❌ Avoid:
|
|
367
|
+
```markdown
|
|
368
|
+
# Heading
|
|
369
|
+
Paragraph
|
|
370
|
+
|
|
371
|
+
## Different heading level
|
|
372
|
+
List instead of paragraph
|
|
373
|
+
- Item 1
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
### 2. Use Git Integration
|
|
377
|
+
|
|
378
|
+
Always pass `changedLines` when available for maximum efficiency:
|
|
379
|
+
|
|
380
|
+
```typescript
|
|
381
|
+
const changedLines = await listGitLines(filePath, gitOptions);
|
|
382
|
+
await reviewFileBlockAware(..., changedLines);
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
### 3. Adjust Similarity Thresholds
|
|
386
|
+
|
|
387
|
+
For different document types:
|
|
388
|
+
|
|
389
|
+
- **Technical docs** (lots of code/numbers): Lower threshold (0.85)
|
|
390
|
+
- **Marketing content** (more text): Higher threshold (0.95)
|
|
391
|
+
- **Mixed content**: Default (0.90)
|
|
392
|
+
|
|
393
|
+
### 4. Monitor Action Distribution
|
|
394
|
+
|
|
395
|
+
Log action distribution to understand system behavior:
|
|
396
|
+
|
|
397
|
+
```typescript
|
|
398
|
+
const stats = {
|
|
399
|
+
reuse: plan.actions.filter(a => a.kind === "reuse").length,
|
|
400
|
+
review: plan.actions.filter(a => a.kind === "review").length,
|
|
401
|
+
insertNew: plan.actions.filter(a => a.kind === "insert_new").length,
|
|
402
|
+
delete: plan.actions.filter(a => a.kind === "delete").length,
|
|
403
|
+
};
|
|
404
|
+
console.log(`Efficiency: ${(stats.reuse / plan.actions.length * 100).toFixed(1)}% reused`);
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
## Troubleshooting
|
|
408
|
+
|
|
409
|
+
### Issue: Too Many Blocks Marked for Review
|
|
410
|
+
|
|
411
|
+
**Cause**: Similarity threshold too high
|
|
412
|
+
**Solution**: Lower `minimumMatchForReuse` to 0.85 or 0.80
|
|
413
|
+
|
|
414
|
+
### Issue: Blocks Not Aligning Correctly
|
|
415
|
+
|
|
416
|
+
**Cause**: Document structure changed significantly
|
|
417
|
+
**Solution**:
|
|
418
|
+
- Ensure block types are consistent
|
|
419
|
+
- Check that special characters/numbers are preserved
|
|
420
|
+
- Verify the anchor text is being extracted correctly
|
|
421
|
+
|
|
422
|
+
### Issue: Translations Not Merging
|
|
423
|
+
|
|
424
|
+
**Cause**: Action indexes not matching
|
|
425
|
+
**Solution**: Ensure `reviewedSegmentsMap` uses `segment.actionIndex` as key
|
|
426
|
+
|
|
427
|
+
### Issue: Extra Blank Lines in Output
|
|
428
|
+
|
|
429
|
+
**Cause**: Block content doesn't preserve trailing newlines consistently
|
|
430
|
+
**Solution**: Review `trimTrailingNewlines` in segmentation logic
|
|
431
|
+
|
|
432
|
+
## Testing
|
|
433
|
+
|
|
434
|
+
### Unit Tests
|
|
435
|
+
|
|
436
|
+
```typescript
|
|
437
|
+
import { segmentDocument } from "./segmentDocument";
|
|
438
|
+
import { alignEnglishAndFrenchBlocks } from "./alignBlocks";
|
|
439
|
+
|
|
440
|
+
describe("Block Alignment", () => {
|
|
441
|
+
it("should detect reordered paragraphs", () => {
|
|
442
|
+
const english = "A\n\nB\n\nC";
|
|
443
|
+
const french = "B\n\nA\n\nC";
|
|
444
|
+
// ... test alignment
|
|
445
|
+
});
|
|
446
|
+
});
|
|
447
|
+
```
|
|
448
|
+
|
|
449
|
+
### Integration Tests
|
|
450
|
+
|
|
451
|
+
```typescript
|
|
452
|
+
it("should handle complete document translation workflow", async () => {
|
|
453
|
+
const result = await reviewFileBlockAware(
|
|
454
|
+
testEnglishPath,
|
|
455
|
+
testFrenchPath,
|
|
456
|
+
Locales.FRENCH,
|
|
457
|
+
Locales.ENGLISH
|
|
458
|
+
);
|
|
459
|
+
// Verify output structure, reuse rate, etc.
|
|
460
|
+
});
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
## Future Enhancements
|
|
464
|
+
|
|
465
|
+
- [ ] Support for more block types (footnotes, definition lists)
|
|
466
|
+
- [ ] Fuzzy matching for minor structural variations
|
|
467
|
+
- [ ] Parallel AI translation for multiple segments
|
|
468
|
+
- [ ] Caching of fingerprints for repeated processing
|
|
469
|
+
- [ ] Visual diff tool for reviewing alignment decisions
|
|
470
|
+
- [ ] Machine learning for optimal threshold selection
|
|
471
|
+
|
|
472
|
+
## Contributing
|
|
473
|
+
|
|
474
|
+
When contributing to this system:
|
|
475
|
+
|
|
476
|
+
1. **Follow naming conventions**: No abbreviations, descriptive names
|
|
477
|
+
2. **Use arrow functions**: Consistent function syntax
|
|
478
|
+
3. **Add JSDoc comments**: Document complex logic
|
|
479
|
+
4. **Write tests**: Cover edge cases
|
|
480
|
+
5. **Maintain backwards compatibility**: Ensure existing integrations work
|
|
481
|
+
|
|
482
|
+
## License
|
|
483
|
+
|
|
484
|
+
See main project LICENSE file.
|
|
485
|
+
|