@intlayer/cli 7.0.7 → 7.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/translation-alignment/ARCHITECTURE.md +518 -0
- package/dist/assets/translation-alignment/IMPROVEMENTS.md +550 -0
- package/dist/assets/translation-alignment/INTEGRATION_EXAMPLE.md +682 -0
- package/dist/assets/translation-alignment/QUICK_START.md +494 -0
- package/dist/assets/translation-alignment/README.md +485 -0
- package/dist/assets/translation-alignment/SUMMARY.md +440 -0
- package/dist/cjs/IntlayerEventListener.cjs +0 -3
- package/dist/cjs/IntlayerEventListener.cjs.map +1 -1
- package/dist/cjs/_virtual/_utils_asset.cjs +0 -3
- package/dist/cjs/build.cjs +0 -2
- package/dist/cjs/build.cjs.map +1 -1
- package/dist/cjs/cli.cjs +6 -7
- package/dist/cjs/cli.cjs.map +1 -1
- package/dist/cjs/config.cjs +0 -1
- package/dist/cjs/config.cjs.map +1 -1
- package/dist/cjs/editor.cjs +0 -4
- package/dist/cjs/editor.cjs.map +1 -1
- package/dist/cjs/fill/fill.cjs +0 -3
- package/dist/cjs/fill/fill.cjs.map +1 -1
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs +0 -1
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs.map +1 -1
- package/dist/cjs/fill/listTranslationsTasks.cjs +0 -6
- package/dist/cjs/fill/listTranslationsTasks.cjs.map +1 -1
- package/dist/cjs/fill/translateDictionary.cjs +0 -6
- package/dist/cjs/fill/translateDictionary.cjs.map +1 -1
- package/dist/cjs/fill/writeFill.cjs +0 -4
- package/dist/cjs/fill/writeFill.cjs.map +1 -1
- package/dist/cjs/getTargetDictionary.cjs +0 -4
- package/dist/cjs/getTargetDictionary.cjs.map +1 -1
- package/dist/cjs/index.cjs +0 -1
- package/dist/cjs/listContentDeclaration.cjs +0 -4
- package/dist/cjs/listContentDeclaration.cjs.map +1 -1
- package/dist/cjs/liveSync.cjs +0 -6
- package/dist/cjs/liveSync.cjs.map +1 -1
- package/dist/cjs/pull.cjs +0 -5
- package/dist/cjs/pull.cjs.map +1 -1
- package/dist/cjs/push/pullLog.cjs +0 -1
- package/dist/cjs/push/pullLog.cjs.map +1 -1
- package/dist/cjs/push/push.cjs +0 -5
- package/dist/cjs/push/push.cjs.map +1 -1
- package/dist/cjs/pushConfig.cjs +0 -2
- package/dist/cjs/pushConfig.cjs.map +1 -1
- package/dist/cjs/pushLog.cjs +0 -1
- package/dist/cjs/pushLog.cjs.map +1 -1
- package/dist/cjs/reviewDoc.cjs +8 -131
- package/dist/cjs/reviewDoc.cjs.map +1 -1
- package/dist/cjs/reviewDocBlockAware.cjs +90 -0
- package/dist/cjs/reviewDocBlockAware.cjs.map +1 -0
- package/dist/cjs/test/index.cjs +0 -2
- package/dist/cjs/test/index.cjs.map +1 -1
- package/dist/cjs/test/listMissingTranslations.cjs +0 -4
- package/dist/cjs/test/listMissingTranslations.cjs.map +1 -1
- package/dist/cjs/translateDoc.cjs +8 -8
- package/dist/cjs/translateDoc.cjs.map +1 -1
- package/dist/cjs/translation-alignment/alignBlocks.cjs +67 -0
- package/dist/cjs/translation-alignment/alignBlocks.cjs.map +1 -0
- package/dist/cjs/translation-alignment/computeSimilarity.cjs +25 -0
- package/dist/cjs/translation-alignment/computeSimilarity.cjs.map +1 -0
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs +23 -0
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs.map +1 -0
- package/dist/cjs/translation-alignment/index.cjs +21 -0
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs +18 -0
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs.map +1 -0
- package/dist/cjs/translation-alignment/normalizeBlock.cjs +22 -0
- package/dist/cjs/translation-alignment/normalizeBlock.cjs.map +1 -0
- package/dist/cjs/translation-alignment/pipeline.cjs +37 -0
- package/dist/cjs/translation-alignment/pipeline.cjs.map +1 -0
- package/dist/cjs/translation-alignment/planActions.cjs +48 -0
- package/dist/cjs/translation-alignment/planActions.cjs.map +1 -0
- package/dist/cjs/translation-alignment/rebuildDocument.cjs +49 -0
- package/dist/cjs/translation-alignment/rebuildDocument.cjs.map +1 -0
- package/dist/cjs/translation-alignment/segmentDocument.cjs +132 -0
- package/dist/cjs/translation-alignment/segmentDocument.cjs.map +1 -0
- package/dist/cjs/translation-alignment/types.cjs +0 -0
- package/dist/cjs/utils/calculateChunks.cjs +0 -1
- package/dist/cjs/utils/calculateChunks.cjs.map +1 -1
- package/dist/cjs/utils/checkAccess.cjs +0 -2
- package/dist/cjs/utils/checkAccess.cjs.map +1 -1
- package/dist/cjs/utils/checkLastUpdateTime.cjs +0 -1
- package/dist/cjs/utils/checkLastUpdateTime.cjs.map +1 -1
- package/dist/cjs/utils/chunkInference.cjs +0 -2
- package/dist/cjs/utils/chunkInference.cjs.map +1 -1
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs +0 -1
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs.map +1 -1
- package/dist/cjs/utils/getParentPackageJSON.cjs +0 -2
- package/dist/cjs/utils/getParentPackageJSON.cjs.map +1 -1
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs +0 -1
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs.map +1 -1
- package/dist/cjs/watch.cjs +0 -2
- package/dist/cjs/watch.cjs.map +1 -1
- package/dist/esm/cli.mjs +6 -3
- package/dist/esm/cli.mjs.map +1 -1
- package/dist/esm/index.mjs +2 -2
- package/dist/esm/reviewDoc.mjs +13 -128
- package/dist/esm/reviewDoc.mjs.map +1 -1
- package/dist/esm/reviewDocBlockAware.mjs +89 -0
- package/dist/esm/reviewDocBlockAware.mjs.map +1 -0
- package/dist/esm/translateDoc.mjs +8 -3
- package/dist/esm/translateDoc.mjs.map +1 -1
- package/dist/esm/translation-alignment/alignBlocks.mjs +67 -0
- package/dist/esm/translation-alignment/alignBlocks.mjs.map +1 -0
- package/dist/esm/translation-alignment/computeSimilarity.mjs +23 -0
- package/dist/esm/translation-alignment/computeSimilarity.mjs.map +1 -0
- package/dist/esm/translation-alignment/fingerprintBlock.mjs +21 -0
- package/dist/esm/translation-alignment/fingerprintBlock.mjs.map +1 -0
- package/dist/esm/translation-alignment/index.mjs +11 -0
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs +17 -0
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs.map +1 -0
- package/dist/esm/translation-alignment/normalizeBlock.mjs +21 -0
- package/dist/esm/translation-alignment/normalizeBlock.mjs.map +1 -0
- package/dist/esm/translation-alignment/pipeline.mjs +36 -0
- package/dist/esm/translation-alignment/pipeline.mjs.map +1 -0
- package/dist/esm/translation-alignment/planActions.mjs +47 -0
- package/dist/esm/translation-alignment/planActions.mjs.map +1 -0
- package/dist/esm/translation-alignment/rebuildDocument.mjs +47 -0
- package/dist/esm/translation-alignment/rebuildDocument.mjs.map +1 -0
- package/dist/esm/translation-alignment/segmentDocument.mjs +131 -0
- package/dist/esm/translation-alignment/segmentDocument.mjs.map +1 -0
- package/dist/esm/translation-alignment/types.mjs +0 -0
- package/dist/types/cli.d.ts.map +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/types/pull.d.ts.map +1 -1
- package/dist/types/reviewDoc.d.ts +3 -6
- package/dist/types/reviewDoc.d.ts.map +1 -1
- package/dist/types/reviewDocBlockAware.d.ts +19 -0
- package/dist/types/reviewDocBlockAware.d.ts.map +1 -0
- package/dist/types/translateDoc.d.ts +2 -0
- package/dist/types/translateDoc.d.ts.map +1 -1
- package/dist/types/translation-alignment/alignBlocks.d.ts +7 -0
- package/dist/types/translation-alignment/alignBlocks.d.ts.map +1 -0
- package/dist/types/translation-alignment/computeSimilarity.d.ts +6 -0
- package/dist/types/translation-alignment/computeSimilarity.d.ts.map +1 -0
- package/dist/types/translation-alignment/fingerprintBlock.d.ts +7 -0
- package/dist/types/translation-alignment/fingerprintBlock.d.ts.map +1 -0
- package/dist/types/translation-alignment/index.d.ts +11 -0
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts +7 -0
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts.map +1 -0
- package/dist/types/translation-alignment/normalizeBlock.d.ts +7 -0
- package/dist/types/translation-alignment/normalizeBlock.d.ts.map +1 -0
- package/dist/types/translation-alignment/pipeline.d.ts +25 -0
- package/dist/types/translation-alignment/pipeline.d.ts.map +1 -0
- package/dist/types/translation-alignment/planActions.d.ts +7 -0
- package/dist/types/translation-alignment/planActions.d.ts.map +1 -0
- package/dist/types/translation-alignment/rebuildDocument.d.ts +32 -0
- package/dist/types/translation-alignment/rebuildDocument.d.ts.map +1 -0
- package/dist/types/translation-alignment/segmentDocument.d.ts +7 -0
- package/dist/types/translation-alignment/segmentDocument.d.ts.map +1 -0
- package/dist/types/translation-alignment/types.d.ts +49 -0
- package/dist/types/translation-alignment/types.d.ts.map +1 -0
- package/package.json +23 -23
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
# Architecture Diagram
|
|
2
|
+
|
|
3
|
+
This document provides visual representations of the block-aware translation alignment system.
|
|
4
|
+
|
|
5
|
+
## System Overview
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
┌─────────────────────────────────────────────────────────────────────┐
|
|
9
|
+
│ Block-Aware Translation System │
|
|
10
|
+
└─────────────────────────────────────────────────────────────────────┘
|
|
11
|
+
│
|
|
12
|
+
┌───────────────────┴───────────────────┐
|
|
13
|
+
│ │
|
|
14
|
+
┌──────▼──────┐ ┌──────▼──────┐
|
|
15
|
+
│ English │ │ French │
|
|
16
|
+
│ Document │ │ Document │
|
|
17
|
+
│ (Base) │ │ (Translation)│
|
|
18
|
+
└──────┬──────┘ └──────┬──────┘
|
|
19
|
+
│ │
|
|
20
|
+
│ ┌─────────────────┐ │
|
|
21
|
+
└────────►│ Segmentation │◄─────────┘
|
|
22
|
+
└────────┬────────┘
|
|
23
|
+
│
|
|
24
|
+
┌────────▼────────┐
|
|
25
|
+
│ Normalization │
|
|
26
|
+
│ - Semantic │
|
|
27
|
+
│ - Anchor Text │
|
|
28
|
+
└────────┬────────┘
|
|
29
|
+
│
|
|
30
|
+
┌────────▼────────┐
|
|
31
|
+
│ Fingerprinting │
|
|
32
|
+
│ - SHA-256 │
|
|
33
|
+
│ - Context │
|
|
34
|
+
└────────┬────────┘
|
|
35
|
+
│
|
|
36
|
+
┌──────────────────┴──────────────────┐
|
|
37
|
+
│ │
|
|
38
|
+
┌──────▼──────┐ ┌──────▼──────┐
|
|
39
|
+
│ English │ │ French │
|
|
40
|
+
│ Blocks │ │ Blocks │
|
|
41
|
+
│ (Fingerpr.) │ │ (Fingerpr.) │
|
|
42
|
+
└──────┬──────┘ └──────┬──────┘
|
|
43
|
+
│ │
|
|
44
|
+
└──────────────┬──────────────────────┘
|
|
45
|
+
│
|
|
46
|
+
┌────────▼────────┐
|
|
47
|
+
│ Alignment │
|
|
48
|
+
│ (Needleman- │
|
|
49
|
+
│ Wunsch) │
|
|
50
|
+
└────────┬────────┘
|
|
51
|
+
│
|
|
52
|
+
┌────────▼────────┐
|
|
53
|
+
│ Alignment │
|
|
54
|
+
│ Pairs │
|
|
55
|
+
└────────┬────────┘
|
|
56
|
+
│
|
|
57
|
+
┌──────────────┴──────────────┐
|
|
58
|
+
│ │
|
|
59
|
+
┌──────▼──────┐ ┌─────────▼────────┐
|
|
60
|
+
│ Git │ │ Similarity │
|
|
61
|
+
│ Changed │ │ Options │
|
|
62
|
+
│ Lines │ │ │
|
|
63
|
+
└──────┬──────┘ └─────────┬────────┘
|
|
64
|
+
│ │
|
|
65
|
+
└──────────┬──────────────────┘
|
|
66
|
+
│
|
|
67
|
+
┌────────▼────────┐
|
|
68
|
+
│ Action │
|
|
69
|
+
│ Planning │
|
|
70
|
+
└────────┬────────┘
|
|
71
|
+
│
|
|
72
|
+
┌────────▼────────┐
|
|
73
|
+
│ Action Plan │
|
|
74
|
+
│ - reuse │
|
|
75
|
+
│ - review │
|
|
76
|
+
│ - insert_new │
|
|
77
|
+
│ - delete │
|
|
78
|
+
└────────┬────────┘
|
|
79
|
+
│
|
|
80
|
+
┌─────────────────┴─────────────────┐
|
|
81
|
+
│ │
|
|
82
|
+
│ ┌──────────────────┐ │
|
|
83
|
+
└───────►│ Identify │ │
|
|
84
|
+
│ Segments to │ │
|
|
85
|
+
│ Review │ │
|
|
86
|
+
└────────┬─────────┘ │
|
|
87
|
+
│ │
|
|
88
|
+
┌────────▼─────────┐ │
|
|
89
|
+
│ AI Translation │ │
|
|
90
|
+
│ (Your Logic) │ │
|
|
91
|
+
└────────┬─────────┘ │
|
|
92
|
+
│ │
|
|
93
|
+
┌────────▼─────────┐ │
|
|
94
|
+
│ Reviewed │ │
|
|
95
|
+
│ Segments │ │
|
|
96
|
+
│ (Map) │ │
|
|
97
|
+
└────────┬─────────┘ │
|
|
98
|
+
│ │
|
|
99
|
+
└────────┬────────┘
|
|
100
|
+
│
|
|
101
|
+
┌────────▼────────┐
|
|
102
|
+
│ Merge │
|
|
103
|
+
│ Reviewed │
|
|
104
|
+
│ Segments │
|
|
105
|
+
└────────┬────────┘
|
|
106
|
+
│
|
|
107
|
+
┌────────▼────────┐
|
|
108
|
+
│ Final French │
|
|
109
|
+
│ Document │
|
|
110
|
+
└─────────────────┘
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Data Flow
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
Input:
|
|
117
|
+
englishText: string
|
|
118
|
+
frenchText: string (existing translation or "")
|
|
119
|
+
changedLines: number[] (optional, from Git)
|
|
120
|
+
similarityOptions: { minimumMatchForReuse: 0.9 }
|
|
121
|
+
|
|
122
|
+
Output:
|
|
123
|
+
finalFrenchDocument: string (updated translation)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Module Dependencies
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
types.ts
|
|
130
|
+
↓
|
|
131
|
+
segmentDocument.ts ────────┐
|
|
132
|
+
↓ │
|
|
133
|
+
normalizeBlock.ts ─────────┤
|
|
134
|
+
↓ │
|
|
135
|
+
fingerprintBlock.ts ───────┤
|
|
136
|
+
├──→ pipeline.ts ──→ reviewDocBlockAware.ts
|
|
137
|
+
computeSimilarity.ts ──────┤ ↑
|
|
138
|
+
↓ │ │
|
|
139
|
+
alignBlocks.ts ────────────┤ │
|
|
140
|
+
│ │
|
|
141
|
+
mapChangedLinesToBlocks.ts ┤ │
|
|
142
|
+
↓ │ │
|
|
143
|
+
planActions.ts ────────────┤ │
|
|
144
|
+
↓ │ │
|
|
145
|
+
rebuildDocument.ts ────────┘ │
|
|
146
|
+
│
|
|
147
|
+
index.ts (exports)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Alignment Algorithm (Needleman-Wunsch)
|
|
151
|
+
|
|
152
|
+
```
|
|
153
|
+
English Blocks: [E1] [E2] [E3] [E4] [E5]
|
|
154
|
+
French Blocks: [F1] [F2] [F3] [F4]
|
|
155
|
+
|
|
156
|
+
Score Matrix (5x4):
|
|
157
|
+
F1 F2 F3 F4
|
|
158
|
+
E1 [ 8 ][ 2 ][ 1 ][ 0 ]
|
|
159
|
+
E2 [ 2 ][ 9 ][ 3 ][ 1 ]
|
|
160
|
+
E3 [ 1 ][ 3 ][ 8 ][ 2 ]
|
|
161
|
+
E4 [ 0 ][ 1 ][ 2 ][ 7 ]
|
|
162
|
+
E5 [ 0 ][ 0 ][ 1 ][ 2 ] ← E5 is new
|
|
163
|
+
|
|
164
|
+
Traceback (diagonal = match, up = delete, left = insert):
|
|
165
|
+
E1 ──→ F1 (match, score 8)
|
|
166
|
+
E2 ──→ F2 (match, score 9)
|
|
167
|
+
E3 ──→ F3 (match, score 8)
|
|
168
|
+
E4 ──→ F4 (match, score 7)
|
|
169
|
+
E5 ──→ NULL (new block, no match)
|
|
170
|
+
|
|
171
|
+
Alignment Pairs:
|
|
172
|
+
[ (E1→F1, sim=0.95), (E2→F2, sim=0.98), (E3→F3, sim=0.93), (E4→F4, sim=0.91), (E5→NULL, sim=0) ]
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
## Action Planning Logic
|
|
176
|
+
|
|
177
|
+
```
|
|
178
|
+
For each alignment pair:
|
|
179
|
+
|
|
180
|
+
IF englishIndex = -1 AND frenchIndex != null:
|
|
181
|
+
→ Action: DELETE (French block with no English match)
|
|
182
|
+
|
|
183
|
+
ELSE IF englishIndex >= 0 AND frenchIndex = null:
|
|
184
|
+
→ Action: INSERT_NEW (New English block)
|
|
185
|
+
|
|
186
|
+
ELSE IF englishIndex >= 0 AND frenchIndex != null:
|
|
187
|
+
IF block changed (from Git) OR similarity < threshold:
|
|
188
|
+
→ Action: REVIEW (Changed block, needs AI)
|
|
189
|
+
ELSE:
|
|
190
|
+
→ Action: REUSE (Unchanged block, copy existing)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Similarity Calculation (Jaccard)
|
|
194
|
+
|
|
195
|
+
```
|
|
196
|
+
Block 1: "[Click here](https://example.com) - see section 2.1"
|
|
197
|
+
Block 2: "[Cliquez ici](https://example.com) - voir section 2.1"
|
|
198
|
+
|
|
199
|
+
Anchor Text Extraction:
|
|
200
|
+
Block 1 anchor: "[](://.)-2.1"
|
|
201
|
+
Block 2 anchor: "[](://.)-2.1"
|
|
202
|
+
|
|
203
|
+
Character Shingles (length=3):
|
|
204
|
+
Set A: {"[](" "](:" "]:/" "://", "//." "//.)", ".)-", ")-2", "-2.", "2.1"}
|
|
205
|
+
Set B: {"[](" "](:" "]:/" "://", "//." "//.)", ".)-", ")-2", "-2.", "2.1"}
|
|
206
|
+
|
|
207
|
+
Jaccard Similarity:
|
|
208
|
+
Intersection: 10 shingles
|
|
209
|
+
Union: 10 shingles
|
|
210
|
+
Similarity = 10/10 = 1.0 (100% match!)
|
|
211
|
+
|
|
212
|
+
Result: REUSE (despite different languages!)
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## State Machine
|
|
216
|
+
|
|
217
|
+
```
|
|
218
|
+
┌─────────────┐
|
|
219
|
+
│ Start │
|
|
220
|
+
└──────┬──────┘
|
|
221
|
+
│
|
|
222
|
+
▼
|
|
223
|
+
┌─────────────────┐
|
|
224
|
+
│ Segment │
|
|
225
|
+
│ Documents │
|
|
226
|
+
└──────┬──────────┘
|
|
227
|
+
│
|
|
228
|
+
▼
|
|
229
|
+
┌─────────────────┐
|
|
230
|
+
│ Normalize │
|
|
231
|
+
│ Blocks │
|
|
232
|
+
└──────┬──────────┘
|
|
233
|
+
│
|
|
234
|
+
▼
|
|
235
|
+
┌─────────────────┐
|
|
236
|
+
│ Fingerprint │
|
|
237
|
+
│ Blocks │
|
|
238
|
+
└──────┬──────────┘
|
|
239
|
+
│
|
|
240
|
+
▼
|
|
241
|
+
┌─────────────────┐
|
|
242
|
+
│ Align │
|
|
243
|
+
│ Blocks │
|
|
244
|
+
└──────┬──────────┘
|
|
245
|
+
│
|
|
246
|
+
▼
|
|
247
|
+
┌─────────────────┐ YES ┌─────────────┐
|
|
248
|
+
│ Changed Lines? ├────────────►│ Map Lines │
|
|
249
|
+
└──────┬──────────┘ │ to Blocks │
|
|
250
|
+
│ NO └──────┬──────┘
|
|
251
|
+
│ │
|
|
252
|
+
└───────────────┬───────────────┘
|
|
253
|
+
│
|
|
254
|
+
▼
|
|
255
|
+
┌─────────────────┐
|
|
256
|
+
│ Plan Actions │
|
|
257
|
+
└────────┬─────────┘
|
|
258
|
+
│
|
|
259
|
+
▼
|
|
260
|
+
┌─────────────────┐
|
|
261
|
+
│ For Each Action │
|
|
262
|
+
└────────┬─────────┘
|
|
263
|
+
│
|
|
264
|
+
┌───────────────┼───────────────┐
|
|
265
|
+
│ │ │
|
|
266
|
+
▼ ▼ ▼
|
|
267
|
+
┌─────────┐ ┌─────────────┐ ┌─────────┐
|
|
268
|
+
│ REUSE │ │ REVIEW │ │ DELETE │
|
|
269
|
+
│ Copy │ │ Translate │ │ Skip │
|
|
270
|
+
│ Existing│ │ with AI │ │ │
|
|
271
|
+
└────┬────┘ └──────┬──────┘ └────┬────┘
|
|
272
|
+
│ │ │
|
|
273
|
+
└───────────────┼───────────────┘
|
|
274
|
+
│
|
|
275
|
+
▼
|
|
276
|
+
┌─────────────────┐
|
|
277
|
+
│ Merge Segments │
|
|
278
|
+
└────────┬─────────┘
|
|
279
|
+
│
|
|
280
|
+
▼
|
|
281
|
+
┌─────────────────┐
|
|
282
|
+
│ Write Output │
|
|
283
|
+
└────────┬─────────┘
|
|
284
|
+
│
|
|
285
|
+
▼
|
|
286
|
+
┌─────────────┐
|
|
287
|
+
│ Done │
|
|
288
|
+
└─────────────┘
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
## Block Types Hierarchy
|
|
292
|
+
|
|
293
|
+
```
|
|
294
|
+
Block (abstract)
|
|
295
|
+
│
|
|
296
|
+
├── StructuralBlock
|
|
297
|
+
│ ├── heading (# ## ###)
|
|
298
|
+
│ ├── horizontal_rule (---)
|
|
299
|
+
│ └── html (<div>...</div>)
|
|
300
|
+
│
|
|
301
|
+
├── ContentBlock
|
|
302
|
+
│ ├── paragraph
|
|
303
|
+
│ ├── blockquote (> text)
|
|
304
|
+
│ └── list_item (- item)
|
|
305
|
+
│
|
|
306
|
+
├── CodeBlock
|
|
307
|
+
│ └── code_block (```...```)
|
|
308
|
+
│
|
|
309
|
+
├── TableBlock
|
|
310
|
+
│ └── table (| col |)
|
|
311
|
+
│
|
|
312
|
+
└── UnknownBlock
|
|
313
|
+
└── unknown (blank lines, etc.)
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
## Fingerprinting Strategy
|
|
317
|
+
|
|
318
|
+
```
|
|
319
|
+
Block Content:
|
|
320
|
+
"# Introduction\n\nWelcome to [our guide](https://example.com)!"
|
|
321
|
+
|
|
322
|
+
Step 1: Normalize
|
|
323
|
+
semanticText: "introduction welcome to our guide"
|
|
324
|
+
anchorText: "#[](://.)!"
|
|
325
|
+
|
|
326
|
+
Step 2: Hash
|
|
327
|
+
semanticDigest: sha256(semanticText) = "a1b2c3..."
|
|
328
|
+
anchorDigest: sha256(anchorText) = "d4e5f6..."
|
|
329
|
+
|
|
330
|
+
Step 3: Composite Key
|
|
331
|
+
compositeKey: "a1b2c3...:d4e5f6..."
|
|
332
|
+
|
|
333
|
+
Step 4: Context (surrounding blocks)
|
|
334
|
+
previousDigest: sha256(previousBlock.semanticText)
|
|
335
|
+
nextDigest: sha256(nextBlock.semanticText)
|
|
336
|
+
contextKey: sha256(previousDigest + ":" + nextDigest)
|
|
337
|
+
|
|
338
|
+
Result: Unique fingerprint that:
|
|
339
|
+
- Identifies content semantically
|
|
340
|
+
- Preserves structure (anchors)
|
|
341
|
+
- Considers context (neighbors)
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
## Performance Characteristics
|
|
345
|
+
|
|
346
|
+
```
|
|
347
|
+
Time Complexity:
|
|
348
|
+
Segmentation: O(n) n = lines in document
|
|
349
|
+
Normalization: O(b*m) b = blocks, m = avg block length
|
|
350
|
+
Fingerprinting: O(b) b = blocks
|
|
351
|
+
Alignment: O(b_en * b_fr) Needleman-Wunsch
|
|
352
|
+
Planning: O(p) p = alignment pairs
|
|
353
|
+
Reconstruction: O(a) a = actions
|
|
354
|
+
|
|
355
|
+
Total: O(b_en * b_fr) dominant for large documents
|
|
356
|
+
|
|
357
|
+
Space Complexity:
|
|
358
|
+
Blocks storage: O(b*m)
|
|
359
|
+
Score matrix: O(b_en * b_fr)
|
|
360
|
+
Action plan: O(max(b_en, b_fr))
|
|
361
|
+
|
|
362
|
+
Total: O(b*m + b_en*b_fr)
|
|
363
|
+
|
|
364
|
+
Optimization:
|
|
365
|
+
Blocks << Lines (typically 10x fewer)
|
|
366
|
+
So O(blocks^2) << O(lines^2)
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
## Example Trace
|
|
370
|
+
|
|
371
|
+
```
|
|
372
|
+
Input:
|
|
373
|
+
English: "# Title\n\nPara 1\n\nPara 2\n"
|
|
374
|
+
French: "# Titre\n\nPara 1 (fr)\n\nPara 2 (fr)\n"
|
|
375
|
+
Changed: [3] (line 3 changed)
|
|
376
|
+
|
|
377
|
+
Segmentation:
|
|
378
|
+
English Blocks: [E1: heading, E2: paragraph, E3: paragraph]
|
|
379
|
+
French Blocks: [F1: heading, F2: paragraph, F3: paragraph]
|
|
380
|
+
|
|
381
|
+
Normalization:
|
|
382
|
+
E1: semantic="title", anchor="#"
|
|
383
|
+
E2: semantic="para 1", anchor=""
|
|
384
|
+
E3: semantic="para 2", anchor=""
|
|
385
|
+
F1: semantic="titre", anchor="#"
|
|
386
|
+
F2: semantic="para 1 (fr)", anchor="()"
|
|
387
|
+
F3: semantic="para 2 (fr)", anchor="()"
|
|
388
|
+
|
|
389
|
+
Fingerprinting:
|
|
390
|
+
E1: compositeKey="hash1:hash2"
|
|
391
|
+
E2: compositeKey="hash3:hash4"
|
|
392
|
+
E3: compositeKey="hash5:hash6"
|
|
393
|
+
|
|
394
|
+
Alignment:
|
|
395
|
+
Pairs: [(E1→F1, 0.82), (E2→F2, 0.95), (E3→F3, 0.94)]
|
|
396
|
+
|
|
397
|
+
Change Detection:
|
|
398
|
+
Line 3 is in block E2 (lines 3-3)
|
|
399
|
+
Changed blocks: {E2}
|
|
400
|
+
|
|
401
|
+
Action Planning:
|
|
402
|
+
E1→F1: similarity=0.82 < 0.90, but not changed → REUSE
|
|
403
|
+
E2→F2: similarity=0.95, but CHANGED → REVIEW
|
|
404
|
+
E3→F3: similarity=0.94, not changed → REUSE
|
|
405
|
+
|
|
406
|
+
Reconstruction:
|
|
407
|
+
Output:
|
|
408
|
+
F1 content (reused)
|
|
409
|
+
F2 content (from AI)
|
|
410
|
+
F3 content (reused)
|
|
411
|
+
|
|
412
|
+
Final:
|
|
413
|
+
"# Titre\n\nPara 1 (translated by AI)\n\nPara 2 (fr)\n"
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
## Integration Points
|
|
417
|
+
|
|
418
|
+
```
|
|
419
|
+
┌───────────────────────────────────────────────────────┐
|
|
420
|
+
│ Your Application │
|
|
421
|
+
└───────────────────────┬───────────────────────────────┘
|
|
422
|
+
│
|
|
423
|
+
┌────────────┴────────────┐
|
|
424
|
+
│ │
|
|
425
|
+
┌──────▼──────┐ ┌──────▼──────┐
|
|
426
|
+
│ reviewDoc │ │ Direct │
|
|
427
|
+
│ (main) │ │ Pipeline │
|
|
428
|
+
└──────┬──────┘ │ API │
|
|
429
|
+
│ └──────┬──────┘
|
|
430
|
+
│ │
|
|
431
|
+
┌──────▼──────────────────────┬─┘
|
|
432
|
+
│ │
|
|
433
|
+
│ reviewFileBlockAware │
|
|
434
|
+
│ (integration layer) │
|
|
435
|
+
└──────┬──────────────────────┘
|
|
436
|
+
│
|
|
437
|
+
┌──────▼──────────────────────┐
|
|
438
|
+
│ buildAlignmentPlan │
|
|
439
|
+
│ (pipeline) │
|
|
440
|
+
└──────┬──────────────────────┘
|
|
441
|
+
│
|
|
442
|
+
┌──────▼──────────────────────┐
|
|
443
|
+
│ Core Translation │
|
|
444
|
+
│ Alignment Modules │
|
|
445
|
+
└─────────────────────────────┘
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
## Testing Strategy
|
|
449
|
+
|
|
450
|
+
```
|
|
451
|
+
Unit Tests:
|
|
452
|
+
├── segmentDocument.test.ts
|
|
453
|
+
│ ├── Should segment headings
|
|
454
|
+
│ ├── Should segment paragraphs
|
|
455
|
+
│ ├── Should segment code blocks
|
|
456
|
+
│ └── Should handle edge cases
|
|
457
|
+
│
|
|
458
|
+
├── normalizeBlock.test.ts
|
|
459
|
+
│ ├── Should extract semantic text
|
|
460
|
+
│ ├── Should extract anchor text
|
|
461
|
+
│ └── Should handle special chars
|
|
462
|
+
│
|
|
463
|
+
├── computeSimilarity.test.ts
|
|
464
|
+
│ ├── Should compute Jaccard similarity
|
|
465
|
+
│ └── Should handle edge cases
|
|
466
|
+
│
|
|
467
|
+
└── alignBlocks.test.ts
|
|
468
|
+
├── Should align matching blocks
|
|
469
|
+
├── Should detect insertions
|
|
470
|
+
├── Should detect deletions
|
|
471
|
+
└── Should detect reordering
|
|
472
|
+
|
|
473
|
+
Integration Tests:
|
|
474
|
+
├── Full pipeline test
|
|
475
|
+
├── Reordering scenario
|
|
476
|
+
├── Insertion scenario
|
|
477
|
+
├── Deletion scenario
|
|
478
|
+
└── Mixed changes scenario
|
|
479
|
+
|
|
480
|
+
E2E Tests:
|
|
481
|
+
├── Real markdown documents
|
|
482
|
+
├── Multiple locales
|
|
483
|
+
└── Git integration
|
|
484
|
+
```
|
|
485
|
+
|
|
486
|
+
## Configuration Flow
|
|
487
|
+
|
|
488
|
+
```
|
|
489
|
+
User Config
|
|
490
|
+
│
|
|
491
|
+
├── minimumMatchForReuse (default: 0.90)
|
|
492
|
+
├── minimumMatchForNearDuplicate (default: 0.80)
|
|
493
|
+
└── AI options (provider, model, etc.)
|
|
494
|
+
│
|
|
495
|
+
└──→ pipeline.ts
|
|
496
|
+
│
|
|
497
|
+
├──→ buildAlignmentPlan()
|
|
498
|
+
│ │
|
|
499
|
+
│ ├──→ planActions(similarityOptions)
|
|
500
|
+
│ │
|
|
501
|
+
│ └──→ Result: Plan + Segments
|
|
502
|
+
│
|
|
503
|
+
└──→ reviewFileBlockAware()
|
|
504
|
+
│
|
|
505
|
+
├──→ chunkInference(AI options)
|
|
506
|
+
│
|
|
507
|
+
└──→ Final output
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
---
|
|
511
|
+
|
|
512
|
+
This architecture ensures:
|
|
513
|
+
- ✅ Separation of concerns
|
|
514
|
+
- ✅ Testability
|
|
515
|
+
- ✅ Extensibility
|
|
516
|
+
- ✅ Performance
|
|
517
|
+
- ✅ Maintainability
|
|
518
|
+
|