@intlayer/chokidar 9.0.0-canary.0 → 9.0.0-canary.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/docReview/alignBlocks.cjs +79 -0
- package/dist/cjs/docReview/alignBlocks.cjs.map +1 -0
- package/dist/cjs/docReview/computeSimilarity.cjs +42 -0
- package/dist/cjs/docReview/computeSimilarity.cjs.map +1 -0
- package/dist/cjs/docReview/fingerprintBlock.cjs +35 -0
- package/dist/cjs/docReview/fingerprintBlock.cjs.map +1 -0
- package/dist/cjs/docReview/index.cjs +25 -0
- package/dist/cjs/docReview/mapChangedLinesToBlocks.cjs +27 -0
- package/dist/cjs/docReview/mapChangedLinesToBlocks.cjs.map +1 -0
- package/dist/cjs/docReview/normalizeBlock.cjs +34 -0
- package/dist/cjs/docReview/normalizeBlock.cjs.map +1 -0
- package/dist/cjs/docReview/pipeline.cjs +144 -0
- package/dist/cjs/docReview/pipeline.cjs.map +1 -0
- package/dist/cjs/docReview/planActions.cjs +58 -0
- package/dist/cjs/docReview/planActions.cjs.map +1 -0
- package/dist/cjs/docReview/rebuildDocument.cjs +65 -0
- package/dist/cjs/docReview/rebuildDocument.cjs.map +1 -0
- package/dist/cjs/docReview/reviewReport.cjs +200 -0
- package/dist/cjs/docReview/reviewReport.cjs.map +1 -0
- package/dist/cjs/docReview/segmentDocument.cjs +134 -0
- package/dist/cjs/docReview/segmentDocument.cjs.map +1 -0
- package/dist/cjs/docReview/types.cjs +0 -0
- package/dist/cjs/init/index.cjs +17 -0
- package/dist/cjs/init/index.cjs.map +1 -1
- package/dist/cjs/init/utils/githubActions.cjs +159 -0
- package/dist/cjs/init/utils/githubActions.cjs.map +1 -0
- package/dist/cjs/init/utils/index.cjs +4 -0
- package/dist/esm/docReview/alignBlocks.mjs +78 -0
- package/dist/esm/docReview/alignBlocks.mjs.map +1 -0
- package/dist/esm/docReview/computeSimilarity.mjs +39 -0
- package/dist/esm/docReview/computeSimilarity.mjs.map +1 -0
- package/dist/esm/docReview/fingerprintBlock.mjs +32 -0
- package/dist/esm/docReview/fingerprintBlock.mjs.map +1 -0
- package/dist/esm/docReview/index.mjs +12 -0
- package/dist/esm/docReview/mapChangedLinesToBlocks.mjs +25 -0
- package/dist/esm/docReview/mapChangedLinesToBlocks.mjs.map +1 -0
- package/dist/esm/docReview/normalizeBlock.mjs +32 -0
- package/dist/esm/docReview/normalizeBlock.mjs.map +1 -0
- package/dist/esm/docReview/pipeline.mjs +142 -0
- package/dist/esm/docReview/pipeline.mjs.map +1 -0
- package/dist/esm/docReview/planActions.mjs +56 -0
- package/dist/esm/docReview/planActions.mjs.map +1 -0
- package/dist/esm/docReview/rebuildDocument.mjs +62 -0
- package/dist/esm/docReview/rebuildDocument.mjs.map +1 -0
- package/dist/esm/docReview/reviewReport.mjs +196 -0
- package/dist/esm/docReview/reviewReport.mjs.map +1 -0
- package/dist/esm/docReview/segmentDocument.mjs +131 -0
- package/dist/esm/docReview/segmentDocument.mjs.map +1 -0
- package/dist/esm/docReview/types.mjs +0 -0
- package/dist/esm/init/index.mjs +17 -0
- package/dist/esm/init/index.mjs.map +1 -1
- package/dist/esm/init/utils/githubActions.mjs +155 -0
- package/dist/esm/init/utils/githubActions.mjs.map +1 -0
- package/dist/esm/init/utils/index.mjs +2 -1
- package/dist/types/docReview/alignBlocks.d.ts +18 -0
- package/dist/types/docReview/alignBlocks.d.ts.map +1 -0
- package/dist/types/docReview/computeSimilarity.d.ts +22 -0
- package/dist/types/docReview/computeSimilarity.d.ts.map +1 -0
- package/dist/types/docReview/fingerprintBlock.d.ts +18 -0
- package/dist/types/docReview/fingerprintBlock.d.ts.map +1 -0
- package/dist/types/docReview/index.d.ts +12 -0
- package/dist/types/docReview/mapChangedLinesToBlocks.d.ts +15 -0
- package/dist/types/docReview/mapChangedLinesToBlocks.d.ts.map +1 -0
- package/dist/types/docReview/normalizeBlock.d.ts +18 -0
- package/dist/types/docReview/normalizeBlock.d.ts.map +1 -0
- package/dist/types/docReview/pipeline.d.ts +46 -0
- package/dist/types/docReview/pipeline.d.ts.map +1 -0
- package/dist/types/docReview/planActions.d.ts +18 -0
- package/dist/types/docReview/planActions.d.ts.map +1 -0
- package/dist/types/docReview/rebuildDocument.d.ts +46 -0
- package/dist/types/docReview/rebuildDocument.d.ts.map +1 -0
- package/dist/types/docReview/reviewReport.d.ts +82 -0
- package/dist/types/docReview/reviewReport.d.ts.map +1 -0
- package/dist/types/docReview/segmentDocument.d.ts +40 -0
- package/dist/types/docReview/segmentDocument.d.ts.map +1 -0
- package/dist/types/docReview/types.d.ts +73 -0
- package/dist/types/docReview/types.d.ts.map +1 -0
- package/dist/types/formatDictionary.d.ts +2 -3
- package/dist/types/formatDictionary.d.ts.map +1 -1
- package/dist/types/init/index.d.ts +2 -1
- package/dist/types/init/index.d.ts.map +1 -1
- package/dist/types/init/utils/githubActions.d.ts +19 -0
- package/dist/types/init/utils/githubActions.d.ts.map +1 -0
- package/dist/types/init/utils/index.d.ts +2 -1
- package/package.json +17 -9
- package/dist/types/intlayer/dist/types/index.d.ts +0 -4
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
const require_docReview_computeSimilarity = require('./computeSimilarity.cjs');
|
|
3
|
+
|
|
4
|
+
//#region src/docReview/alignBlocks.ts
|
|
5
|
+
/**
|
|
6
|
+
* Align the blocks of a base document with the blocks of its translation using a
|
|
7
|
+
* Needleman–Wunsch global alignment over anchor similarity and block type.
|
|
8
|
+
*
|
|
9
|
+
* Because prose differs across languages, the score is weighted toward the
|
|
10
|
+
* structural anchor (digits and symbols) rather than the words themselves.
|
|
11
|
+
*
|
|
12
|
+
* @param baseBlocks - Blocks of the base (source) document.
|
|
13
|
+
* @param targetBlocks - Blocks of the target (translated) document.
|
|
14
|
+
* @returns The ordered list of alignment pairs, including insertions and deletions.
|
|
15
|
+
*/
|
|
16
|
+
const alignBaseAndTargetBlocks = (baseBlocks, targetBlocks) => {
|
|
17
|
+
const baseLength = baseBlocks.length;
|
|
18
|
+
const targetLength = targetBlocks.length;
|
|
19
|
+
const scoreMatrix = Array.from({ length: baseLength + 1 }, () => Array.from({ length: targetLength + 1 }, () => 0));
|
|
20
|
+
const traceMatrix = Array.from({ length: baseLength + 1 }, () => Array.from({ length: targetLength + 1 }, () => "diagonal"));
|
|
21
|
+
const gapPenalty = -2;
|
|
22
|
+
const computeMatchScore = (baseIndex, targetIndex) => {
|
|
23
|
+
const baseBlock = baseBlocks[baseIndex];
|
|
24
|
+
const targetBlock = targetBlocks[targetIndex];
|
|
25
|
+
const typeBonus = baseBlock.type === targetBlock.type ? 2 : 0;
|
|
26
|
+
const anchorSimilarity = require_docReview_computeSimilarity.computeJaccardSimilarity(baseBlock.anchorText, targetBlock.anchorText, 3);
|
|
27
|
+
return typeBonus + (Math.min(baseBlock.content.length, targetBlock.content.length) / Math.max(baseBlock.content.length, targetBlock.content.length) > .75 ? 1 : 0) + anchorSimilarity * 8;
|
|
28
|
+
};
|
|
29
|
+
for (let i = 1; i <= baseLength; i += 1) {
|
|
30
|
+
scoreMatrix[i][0] = scoreMatrix[i - 1][0] + gapPenalty;
|
|
31
|
+
traceMatrix[i][0] = "up";
|
|
32
|
+
}
|
|
33
|
+
for (let j = 1; j <= targetLength; j += 1) {
|
|
34
|
+
scoreMatrix[0][j] = scoreMatrix[0][j - 1] + gapPenalty;
|
|
35
|
+
traceMatrix[0][j] = "left";
|
|
36
|
+
}
|
|
37
|
+
for (let i = 1; i <= baseLength; i += 1) for (let j = 1; j <= targetLength; j += 1) {
|
|
38
|
+
const match = scoreMatrix[i - 1][j - 1] + computeMatchScore(i - 1, j - 1);
|
|
39
|
+
const deleteGap = scoreMatrix[i - 1][j] + gapPenalty;
|
|
40
|
+
const insertGap = scoreMatrix[i][j - 1] + gapPenalty;
|
|
41
|
+
const best = Math.max(match, deleteGap, insertGap);
|
|
42
|
+
scoreMatrix[i][j] = best;
|
|
43
|
+
traceMatrix[i][j] = best === match ? "diagonal" : best === deleteGap ? "up" : "left";
|
|
44
|
+
}
|
|
45
|
+
const result = [];
|
|
46
|
+
let i = baseLength;
|
|
47
|
+
let j = targetLength;
|
|
48
|
+
while (i > 0 || j > 0) if (i > 0 && j > 0 && traceMatrix[i][j] === "diagonal") {
|
|
49
|
+
const baseIndex = i - 1;
|
|
50
|
+
const targetIndex = j - 1;
|
|
51
|
+
const similarityScore = require_docReview_computeSimilarity.computeJaccardSimilarity(baseBlocks[baseIndex].anchorText, targetBlocks[targetIndex].anchorText, 3);
|
|
52
|
+
result.unshift({
|
|
53
|
+
baseIndex,
|
|
54
|
+
targetIndex,
|
|
55
|
+
similarityScore
|
|
56
|
+
});
|
|
57
|
+
i -= 1;
|
|
58
|
+
j -= 1;
|
|
59
|
+
} else if (i > 0 && (j === 0 || traceMatrix[i][j] === "up")) {
|
|
60
|
+
result.unshift({
|
|
61
|
+
baseIndex: i - 1,
|
|
62
|
+
targetIndex: null,
|
|
63
|
+
similarityScore: 0
|
|
64
|
+
});
|
|
65
|
+
i -= 1;
|
|
66
|
+
} else if (j > 0 && (i === 0 || traceMatrix[i][j] === "left")) {
|
|
67
|
+
result.unshift({
|
|
68
|
+
baseIndex: -1,
|
|
69
|
+
targetIndex: j - 1,
|
|
70
|
+
similarityScore: 0
|
|
71
|
+
});
|
|
72
|
+
j -= 1;
|
|
73
|
+
}
|
|
74
|
+
return result;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
//#endregion
|
|
78
|
+
exports.alignBaseAndTargetBlocks = alignBaseAndTargetBlocks;
|
|
79
|
+
//# sourceMappingURL=alignBlocks.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignBlocks.cjs","names":["computeJaccardSimilarity"],"sources":["../../../src/docReview/alignBlocks.ts"],"sourcesContent":["import { computeJaccardSimilarity } from './computeSimilarity';\nimport type { AlignmentPair, FingerprintedBlock } from './types';\n\n/**\n * Align the blocks of a base document with the blocks of its translation using a\n * Needleman–Wunsch global alignment over anchor similarity and block type.\n *\n * Because prose differs across languages, the score is weighted toward the\n * structural anchor (digits and symbols) rather than the words themselves.\n *\n * @param baseBlocks - Blocks of the base (source) document.\n * @param targetBlocks - Blocks of the target (translated) document.\n * @returns The ordered list of alignment pairs, including insertions and deletions.\n */\nexport const alignBaseAndTargetBlocks = (\n baseBlocks: FingerprintedBlock[],\n targetBlocks: FingerprintedBlock[]\n): AlignmentPair[] => {\n const baseLength = baseBlocks.length;\n const targetLength = targetBlocks.length;\n\n const scoreMatrix: number[][] = Array.from({ length: baseLength + 1 }, () =>\n Array.from({ length: targetLength + 1 }, () => 0)\n );\n const traceMatrix: ('diagonal' | 'up' | 'left')[][] = Array.from(\n { length: baseLength + 1 },\n () => Array.from({ length: targetLength + 1 }, () => 'diagonal')\n );\n\n const gapPenalty = -2;\n\n const computeMatchScore = (\n baseIndex: number,\n targetIndex: number\n ): number => {\n const baseBlock = baseBlocks[baseIndex];\n const targetBlock = targetBlocks[targetIndex];\n const typeBonus = baseBlock.type === targetBlock.type ? 2 : 0;\n const anchorSimilarity = computeJaccardSimilarity(\n baseBlock.anchorText,\n targetBlock.anchorText,\n 3\n );\n const lengthRatio =\n Math.min(baseBlock.content.length, targetBlock.content.length) /\n Math.max(baseBlock.content.length, targetBlock.content.length);\n const lengthBonus = lengthRatio > 0.75 ? 1 : 0;\n return typeBonus + lengthBonus + anchorSimilarity * 8; // weighted toward anchor similarity\n };\n\n // initialize first row and column\n for (let i = 1; i <= baseLength; i += 1) {\n scoreMatrix[i][0] = scoreMatrix[i - 1][0] + gapPenalty;\n traceMatrix[i][0] = 'up';\n }\n for (let j = 1; j <= targetLength; j += 1) {\n scoreMatrix[0][j] = scoreMatrix[0][j - 1] + gapPenalty;\n traceMatrix[0][j] = 'left';\n }\n\n // fill\n for (let i = 1; i <= baseLength; i += 1) {\n for (let j = 1; j <= targetLength; j += 1) {\n const match = scoreMatrix[i - 1][j - 1] + computeMatchScore(i - 1, j - 1);\n const deleteGap = scoreMatrix[i - 1][j] + gapPenalty;\n const insertGap = scoreMatrix[i][j - 1] + gapPenalty;\n\n const best = Math.max(match, deleteGap, insertGap);\n scoreMatrix[i][j] = best;\n traceMatrix[i][j] =\n best === match ? 'diagonal' : best === deleteGap ? 'up' : 'left';\n }\n }\n\n // traceback\n const result: AlignmentPair[] = [];\n let i = baseLength;\n let j = targetLength;\n while (i > 0 || j > 0) {\n if (i > 0 && j > 0 && traceMatrix[i][j] === 'diagonal') {\n const baseIndex = i - 1;\n const targetIndex = j - 1;\n const similarityScore = computeJaccardSimilarity(\n baseBlocks[baseIndex].anchorText,\n targetBlocks[targetIndex].anchorText,\n 3\n );\n result.unshift({ baseIndex, targetIndex, similarityScore });\n i -= 1;\n j -= 1;\n } else if (i > 0 && (j === 0 || traceMatrix[i][j] === 'up')) {\n result.unshift({\n baseIndex: i - 1,\n targetIndex: null,\n similarityScore: 0,\n });\n i -= 1;\n } else if (j > 0 && (i === 0 || traceMatrix[i][j] === 'left')) {\n // target block has no corresponding base block (deleted)\n result.unshift({\n baseIndex: -1,\n targetIndex: j - 1,\n similarityScore: 0,\n });\n j -= 1;\n }\n }\n return result;\n};\n"],"mappings":";;;;;;;;;;;;;;;AAcA,MAAa,4BACX,YACA,iBACoB;CACpB,MAAM,aAAa,WAAW;CAC9B,MAAM,eAAe,aAAa;CAElC,MAAM,cAA0B,MAAM,KAAK,EAAE,QAAQ,aAAa,GAAG,QACnE,MAAM,KAAK,EAAE,QAAQ,eAAe,GAAG,QAAQ,EAAE,CAClD;CACD,MAAM,cAAgD,MAAM,KAC1D,EAAE,QAAQ,aAAa,GAAG,QACpB,MAAM,KAAK,EAAE,QAAQ,eAAe,GAAG,QAAQ,WAAW,CACjE;CAED,MAAM,aAAa;CAEnB,MAAM,qBACJ,WACA,gBACW;EACX,MAAM,YAAY,WAAW;EAC7B,MAAM,cAAc,aAAa;EACjC,MAAM,YAAY,UAAU,SAAS,YAAY,OAAO,IAAI;EAC5D,MAAM,mBAAmBA,6DACvB,UAAU,YACV,YAAY,YACZ,EACD;AAKD,SAAO,aAHL,KAAK,IAAI,UAAU,QAAQ,QAAQ,YAAY,QAAQ,OAAO,GAC9D,KAAK,IAAI,UAAU,QAAQ,QAAQ,YAAY,QAAQ,OAAO,GAC9B,MAAO,IAAI,KACZ,mBAAmB;;AAItD,MAAK,IAAI,IAAI,GAAG,KAAK,YAAY,KAAK,GAAG;AACvC,cAAY,GAAG,KAAK,YAAY,IAAI,GAAG,KAAK;AAC5C,cAAY,GAAG,KAAK;;AAEtB,MAAK,IAAI,IAAI,GAAG,KAAK,cAAc,KAAK,GAAG;AACzC,cAAY,GAAG,KAAK,YAAY,GAAG,IAAI,KAAK;AAC5C,cAAY,GAAG,KAAK;;AAItB,MAAK,IAAI,IAAI,GAAG,KAAK,YAAY,KAAK,EACpC,MAAK,IAAI,IAAI,GAAG,KAAK,cAAc,KAAK,GAAG;EACzC,MAAM,QAAQ,YAAY,IAAI,GAAG,IAAI,KAAK,kBAAkB,IAAI,GAAG,IAAI,EAAE;EACzE,MAAM,YAAY,YAAY,IAAI,GAAG,KAAK;EAC1C,MAAM,YAAY,YAAY,GAAG,IAAI,KAAK;EAE1C,MAAM,OAAO,KAAK,IAAI,OAAO,WAAW,UAAU;AAClD,cAAY,GAAG,KAAK;AACpB,cAAY,GAAG,KACb,SAAS,QAAQ,aAAa,SAAS,YAAY,OAAO;;CAKhE,MAAM,SAA0B,EAAE;CAClC,IAAI,IAAI;CACR,IAAI,IAAI;AACR,QAAO,IAAI,KAAK,IAAI,EAClB,KAAI,IAAI,KAAK,IAAI,KAAK,YAAY,GAAG,OAAO,YAAY;EACtD,MAAM,YAAY,IAAI;EACtB,MAAM,cAAc,IAAI;EACxB,MAAM,kBAAkBA,6DACtB,WAAW,WAAW,YACtB,aAAa,aAAa,YAC1B,EACD;AACD,SAAO,QAAQ;GAAE;GAAW;GAAa;GAAiB,CAAC;AAC3D,OAAK;AACL,OAAK;YACI,IAAI,MAAM,MAAM,KAAK,YAAY,GAAG,OAAO,OAAO;AAC3D,SAAO,QAAQ;GACb,WAAW,IAAI;GACf,aAAa;GACb,iBAAiB;GAClB,CAAC;AACF,OAAK;YACI,IAAI,MAAM,MAAM,KAAK,YAAY,GAAG,OAAO,SAAS;AAE7D,SAAO,QAAQ;GACb,WAAW;GACX,aAAa,IAAI;GACjB,iBAAiB;GAClB,CAAC;AACF,OAAK;;AAGT,QAAO"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
|
|
3
|
+
//#region src/docReview/computeSimilarity.ts
|
|
4
|
+
/**
|
|
5
|
+
* Generate the set of character shingles (substrings of a fixed length) for a
|
|
6
|
+
* piece of text. Whitespace is collapsed so the result is language agnostic.
|
|
7
|
+
*
|
|
8
|
+
* @param text - The text to shingle.
|
|
9
|
+
* @param shingleLength - The length of each shingle.
|
|
10
|
+
* @returns The set of unique shingles found in the text.
|
|
11
|
+
*/
|
|
12
|
+
const generateCharacterShingles = (text, shingleLength) => {
|
|
13
|
+
const normalized = text.replace(/\s+/g, " ").trim();
|
|
14
|
+
const set = /* @__PURE__ */ new Set();
|
|
15
|
+
if (normalized.length < shingleLength) {
|
|
16
|
+
if (normalized.length > 0) set.add(normalized);
|
|
17
|
+
return set;
|
|
18
|
+
}
|
|
19
|
+
for (let index = 0; index <= normalized.length - shingleLength; index += 1) set.add(normalized.slice(index, index + shingleLength));
|
|
20
|
+
return set;
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Compute the Jaccard similarity between two strings using character shingles.
|
|
24
|
+
*
|
|
25
|
+
* @param a - First string.
|
|
26
|
+
* @param b - Second string.
|
|
27
|
+
* @param shingleLength - The shingle length (defaults to 3).
|
|
28
|
+
* @returns A score between 0 (disjoint) and 1 (identical).
|
|
29
|
+
*/
|
|
30
|
+
const computeJaccardSimilarity = (a, b, shingleLength = 3) => {
|
|
31
|
+
const setA = generateCharacterShingles(a, shingleLength);
|
|
32
|
+
const setB = generateCharacterShingles(b, shingleLength);
|
|
33
|
+
if (setA.size === 0 && setB.size === 0) return 1;
|
|
34
|
+
const intersectionSize = Array.from(setA).filter((token) => setB.has(token)).length;
|
|
35
|
+
const unionSize = new Set([...Array.from(setA), ...Array.from(setB)]).size;
|
|
36
|
+
return unionSize === 0 ? 0 : intersectionSize / unionSize;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
//#endregion
|
|
40
|
+
exports.computeJaccardSimilarity = computeJaccardSimilarity;
|
|
41
|
+
exports.generateCharacterShingles = generateCharacterShingles;
|
|
42
|
+
//# sourceMappingURL=computeSimilarity.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"computeSimilarity.cjs","names":[],"sources":["../../../src/docReview/computeSimilarity.ts"],"sourcesContent":["/**\n * Generate the set of character shingles (substrings of a fixed length) for a\n * piece of text. Whitespace is collapsed so the result is language agnostic.\n *\n * @param text - The text to shingle.\n * @param shingleLength - The length of each shingle.\n * @returns The set of unique shingles found in the text.\n */\nexport const generateCharacterShingles = (\n text: string,\n shingleLength: number\n): Set<string> => {\n const normalized = text.replace(/\\s+/g, ' ').trim();\n const set = new Set<string>();\n if (normalized.length < shingleLength) {\n if (normalized.length > 0) {\n set.add(normalized);\n }\n return set;\n }\n for (let index = 0; index <= normalized.length - shingleLength; index += 1) {\n set.add(normalized.slice(index, index + shingleLength));\n }\n return set;\n};\n\n/**\n * Compute the Jaccard similarity between two strings using character shingles.\n *\n * @param a - First string.\n * @param b - Second string.\n * @param shingleLength - The shingle length (defaults to 3).\n * @returns A score between 0 (disjoint) and 1 (identical).\n */\nexport const computeJaccardSimilarity = (\n a: string,\n b: string,\n shingleLength: number = 3\n): number => {\n const setA = generateCharacterShingles(a, shingleLength);\n const setB = generateCharacterShingles(b, shingleLength);\n if (setA.size === 0 && setB.size === 0) return 1;\n const intersectionSize = Array.from(setA).filter((token) =>\n setB.has(token)\n ).length;\n const unionSize = new Set([...Array.from(setA), ...Array.from(setB)]).size;\n return unionSize === 0 ? 0 : intersectionSize / unionSize;\n};\n"],"mappings":";;;;;;;;;;;AAQA,MAAa,6BACX,MACA,kBACgB;CAChB,MAAM,aAAa,KAAK,QAAQ,QAAQ,IAAI,CAAC,MAAM;CACnD,MAAM,sBAAM,IAAI,KAAa;AAC7B,KAAI,WAAW,SAAS,eAAe;AACrC,MAAI,WAAW,SAAS,EACtB,KAAI,IAAI,WAAW;AAErB,SAAO;;AAET,MAAK,IAAI,QAAQ,GAAG,SAAS,WAAW,SAAS,eAAe,SAAS,EACvE,KAAI,IAAI,WAAW,MAAM,OAAO,QAAQ,cAAc,CAAC;AAEzD,QAAO;;;;;;;;;;AAWT,MAAa,4BACX,GACA,GACA,gBAAwB,MACb;CACX,MAAM,OAAO,0BAA0B,GAAG,cAAc;CACxD,MAAM,OAAO,0BAA0B,GAAG,cAAc;AACxD,KAAI,KAAK,SAAS,KAAK,KAAK,SAAS,EAAG,QAAO;CAC/C,MAAM,mBAAmB,MAAM,KAAK,KAAK,CAAC,QAAQ,UAChD,KAAK,IAAI,MAAM,CAChB,CAAC;CACF,MAAM,YAAY,IAAI,IAAI,CAAC,GAAG,MAAM,KAAK,KAAK,EAAE,GAAG,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC;AACtE,QAAO,cAAc,IAAI,IAAI,mBAAmB"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
const require_runtime = require('../_virtual/_rolldown/runtime.cjs');
|
|
3
|
+
let node_crypto = require("node:crypto");
|
|
4
|
+
node_crypto = require_runtime.__toESM(node_crypto);
|
|
5
|
+
|
|
6
|
+
//#region src/docReview/fingerprintBlock.ts
|
|
7
|
+
const computeStringDigest = (text) => node_crypto.default.createHash("sha256").update(text).digest("hex");
|
|
8
|
+
/**
|
|
9
|
+
* Compute content and context digests for a normalized block.
|
|
10
|
+
*
|
|
11
|
+
* The context key (digest of the surrounding blocks) lets the planner tell apart
|
|
12
|
+
* two blocks that share the same content but live in different sections.
|
|
13
|
+
*
|
|
14
|
+
* @param block - The block to fingerprint.
|
|
15
|
+
* @param previousBlock - The block immediately before, or `null` at the start.
|
|
16
|
+
* @param nextBlock - The block immediately after, or `null` at the end.
|
|
17
|
+
* @returns The block enriched with its digests.
|
|
18
|
+
*/
|
|
19
|
+
const fingerprintBlock = (block, previousBlock, nextBlock) => {
|
|
20
|
+
const semanticDigest = computeStringDigest(block.semanticText);
|
|
21
|
+
const anchorDigest = computeStringDigest(block.anchorText);
|
|
22
|
+
const compositeKey = `${semanticDigest}:${anchorDigest}`;
|
|
23
|
+
const contextKey = computeStringDigest(`${computeStringDigest(previousBlock?.semanticText ?? "")}:${computeStringDigest(nextBlock?.semanticText ?? "")}`);
|
|
24
|
+
return {
|
|
25
|
+
...block,
|
|
26
|
+
semanticDigest,
|
|
27
|
+
anchorDigest,
|
|
28
|
+
compositeKey,
|
|
29
|
+
contextKey
|
|
30
|
+
};
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
//#endregion
|
|
34
|
+
exports.fingerprintBlock = fingerprintBlock;
|
|
35
|
+
//# sourceMappingURL=fingerprintBlock.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fingerprintBlock.cjs","names":["crypto"],"sources":["../../../src/docReview/fingerprintBlock.ts"],"sourcesContent":["import crypto from 'node:crypto';\nimport type { FingerprintedBlock, NormalizedBlock } from './types';\n\nconst computeStringDigest = (text: string): string =>\n crypto.createHash('sha256').update(text).digest('hex');\n\n/**\n * Compute content and context digests for a normalized block.\n *\n * The context key (digest of the surrounding blocks) lets the planner tell apart\n * two blocks that share the same content but live in different sections.\n *\n * @param block - The block to fingerprint.\n * @param previousBlock - The block immediately before, or `null` at the start.\n * @param nextBlock - The block immediately after, or `null` at the end.\n * @returns The block enriched with its digests.\n */\nexport const fingerprintBlock = (\n block: NormalizedBlock,\n previousBlock: NormalizedBlock | null,\n nextBlock: NormalizedBlock | null\n): FingerprintedBlock => {\n const semanticDigest = computeStringDigest(block.semanticText);\n const anchorDigest = computeStringDigest(block.anchorText);\n const compositeKey = `${semanticDigest}:${anchorDigest}`;\n\n const previousDigest = computeStringDigest(previousBlock?.semanticText ?? '');\n const nextDigest = computeStringDigest(nextBlock?.semanticText ?? '');\n const contextKey = computeStringDigest(`${previousDigest}:${nextDigest}`);\n\n return {\n ...block,\n semanticDigest,\n anchorDigest,\n compositeKey,\n contextKey,\n };\n};\n"],"mappings":";;;;;;AAGA,MAAM,uBAAuB,SAC3BA,oBAAO,WAAW,SAAS,CAAC,OAAO,KAAK,CAAC,OAAO,MAAM;;;;;;;;;;;;AAaxD,MAAa,oBACX,OACA,eACA,cACuB;CACvB,MAAM,iBAAiB,oBAAoB,MAAM,aAAa;CAC9D,MAAM,eAAe,oBAAoB,MAAM,WAAW;CAC1D,MAAM,eAAe,GAAG,eAAe,GAAG;CAI1C,MAAM,aAAa,oBAAoB,GAFhB,oBAAoB,eAAe,gBAAgB,GAElB,CAAC,GADtC,oBAAoB,WAAW,gBAAgB,GACI,GAAG;AAEzE,QAAO;EACL,GAAG;EACH;EACA;EACA;EACA;EACD"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
const require_docReview_planActions = require('./planActions.cjs');
|
|
3
|
+
const require_docReview_normalizeBlock = require('./normalizeBlock.cjs');
|
|
4
|
+
const require_docReview_fingerprintBlock = require('./fingerprintBlock.cjs');
|
|
5
|
+
const require_docReview_computeSimilarity = require('./computeSimilarity.cjs');
|
|
6
|
+
const require_docReview_mapChangedLinesToBlocks = require('./mapChangedLinesToBlocks.cjs');
|
|
7
|
+
const require_docReview_alignBlocks = require('./alignBlocks.cjs');
|
|
8
|
+
const require_docReview_rebuildDocument = require('./rebuildDocument.cjs');
|
|
9
|
+
const require_docReview_segmentDocument = require('./segmentDocument.cjs');
|
|
10
|
+
const require_docReview_pipeline = require('./pipeline.cjs');
|
|
11
|
+
const require_docReview_reviewReport = require('./reviewReport.cjs');
|
|
12
|
+
|
|
13
|
+
exports.alignBaseAndTargetBlocks = require_docReview_alignBlocks.alignBaseAndTargetBlocks;
|
|
14
|
+
exports.buildAlignmentPlan = require_docReview_pipeline.buildAlignmentPlan;
|
|
15
|
+
exports.buildReviewReport = require_docReview_reviewReport.buildReviewReport;
|
|
16
|
+
exports.computeJaccardSimilarity = require_docReview_computeSimilarity.computeJaccardSimilarity;
|
|
17
|
+
exports.fingerprintBlock = require_docReview_fingerprintBlock.fingerprintBlock;
|
|
18
|
+
exports.formatReviewReport = require_docReview_reviewReport.formatReviewReport;
|
|
19
|
+
exports.generateCharacterShingles = require_docReview_computeSimilarity.generateCharacterShingles;
|
|
20
|
+
exports.identifySegmentsToReview = require_docReview_rebuildDocument.identifySegmentsToReview;
|
|
21
|
+
exports.mapChangedLinesToBlocks = require_docReview_mapChangedLinesToBlocks.mapChangedLinesToBlocks;
|
|
22
|
+
exports.mergeReviewedSegments = require_docReview_rebuildDocument.mergeReviewedSegments;
|
|
23
|
+
exports.normalizeBlock = require_docReview_normalizeBlock.normalizeBlock;
|
|
24
|
+
exports.planAlignmentActions = require_docReview_planActions.planAlignmentActions;
|
|
25
|
+
exports.segmentDocument = require_docReview_segmentDocument.segmentDocument;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
|
|
3
|
+
//#region src/docReview/mapChangedLinesToBlocks.ts
|
|
4
|
+
/**
|
|
5
|
+
* Map a set of changed line numbers onto the indexes of the blocks that contain
|
|
6
|
+
* them.
|
|
7
|
+
*
|
|
8
|
+
* @param blocks - The ordered blocks of the base document.
|
|
9
|
+
* @param changedLines - 1-based line numbers that changed in the base document.
|
|
10
|
+
* @returns The set of block indexes touched by at least one changed line.
|
|
11
|
+
*/
|
|
12
|
+
const mapChangedLinesToBlocks = (blocks, changedLines) => {
|
|
13
|
+
const changedSet = /* @__PURE__ */ new Set();
|
|
14
|
+
if (!changedLines || changedLines.length === 0) return changedSet;
|
|
15
|
+
const changedLookup = new Set(changedLines);
|
|
16
|
+
blocks.forEach((block, index) => {
|
|
17
|
+
for (let line = block.lineStart; line <= block.lineEnd; line += 1) if (changedLookup.has(line)) {
|
|
18
|
+
changedSet.add(index);
|
|
19
|
+
break;
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
|
+
return changedSet;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
//#endregion
|
|
26
|
+
exports.mapChangedLinesToBlocks = mapChangedLinesToBlocks;
|
|
27
|
+
//# sourceMappingURL=mapChangedLinesToBlocks.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mapChangedLinesToBlocks.cjs","names":[],"sources":["../../../src/docReview/mapChangedLinesToBlocks.ts"],"sourcesContent":["import type { Block, LineChange } from './types';\n\n/**\n * Map a set of changed line numbers onto the indexes of the blocks that contain\n * them.\n *\n * @param blocks - The ordered blocks of the base document.\n * @param changedLines - 1-based line numbers that changed in the base document.\n * @returns The set of block indexes touched by at least one changed line.\n */\nexport const mapChangedLinesToBlocks = (\n blocks: Block[],\n changedLines: LineChange[]\n): Set<number> => {\n const changedSet = new Set<number>();\n if (!changedLines || changedLines.length === 0) return changedSet;\n\n const changedLookup = new Set<number>(changedLines);\n\n blocks.forEach((block, index) => {\n for (let line = block.lineStart; line <= block.lineEnd; line += 1) {\n if (changedLookup.has(line)) {\n changedSet.add(index);\n break;\n }\n }\n });\n\n return changedSet;\n};\n"],"mappings":";;;;;;;;;;;AAUA,MAAa,2BACX,QACA,iBACgB;CAChB,MAAM,6BAAa,IAAI,KAAa;AACpC,KAAI,CAAC,gBAAgB,aAAa,WAAW,EAAG,QAAO;CAEvD,MAAM,gBAAgB,IAAI,IAAY,aAAa;AAEnD,QAAO,SAAS,OAAO,UAAU;AAC/B,OAAK,IAAI,OAAO,MAAM,WAAW,QAAQ,MAAM,SAAS,QAAQ,EAC9D,KAAI,cAAc,IAAI,KAAK,EAAE;AAC3B,cAAW,IAAI,MAAM;AACrB;;GAGJ;AAEF,QAAO"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
|
|
3
|
+
//#region src/docReview/normalizeBlock.ts
|
|
4
|
+
const removeMarkdownFormatting = (text) => {
|
|
5
|
+
return text.replace(/`{1,3}[^`]*`{1,3}/g, " ").replace(/\*\*([^*]+)\*\*/g, "$1").replace(/\*([^*]+)\*/g, "$1").replace(/_([^_]+)_/g, "$1").replace(/~~([^~]+)~~/g, "$1").replace(/!?\[[^\]]*\]\([^)]*\)/g, " ").replace(/^\s*#{1,6}\s+/gm, "").replace(/^\s*>\s?/gm, "").replace(/^\s*[-*+]\s+/gm, "").replace(/^\s*\d+\.\s+/gm, "");
|
|
6
|
+
};
|
|
7
|
+
const collapseWhitespace = (text) => text.replace(/\s+/g, " ").trim();
|
|
8
|
+
const stripLettersKeepDigitsAndSymbols = (text) => {
|
|
9
|
+
return text.replace(/\p{L}+/gu, "");
|
|
10
|
+
};
|
|
11
|
+
/**
|
|
12
|
+
* Derive the normalized representations of a block used for matching.
|
|
13
|
+
*
|
|
14
|
+
* - `semanticText`: markdown-stripped, lower-cased text (used to detect identical
|
|
15
|
+
* content across versions of the same language).
|
|
16
|
+
* - `anchorText`: only digits/symbols (used to align blocks across languages, as
|
|
17
|
+
* prose differs but structure such as numbers and punctuation is preserved).
|
|
18
|
+
*
|
|
19
|
+
* @param block - The block to normalize.
|
|
20
|
+
* @returns The block enriched with its normalized texts.
|
|
21
|
+
*/
|
|
22
|
+
const normalizeBlock = (block) => {
|
|
23
|
+
const semanticCollapsed = collapseWhitespace(removeMarkdownFormatting(block.content).toLowerCase());
|
|
24
|
+
const anchorCollapsed = collapseWhitespace(stripLettersKeepDigitsAndSymbols(block.content));
|
|
25
|
+
return {
|
|
26
|
+
...block,
|
|
27
|
+
semanticText: semanticCollapsed,
|
|
28
|
+
anchorText: anchorCollapsed
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
//#endregion
|
|
33
|
+
exports.normalizeBlock = normalizeBlock;
|
|
34
|
+
//# sourceMappingURL=normalizeBlock.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalizeBlock.cjs","names":[],"sources":["../../../src/docReview/normalizeBlock.ts"],"sourcesContent":["import type { Block, NormalizedBlock } from './types';\n\nconst removeMarkdownFormatting = (text: string): string => {\n return text\n .replace(/`{1,3}[^`]*`{1,3}/g, ' ')\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')\n .replace(/\\*([^*]+)\\*/g, '$1')\n .replace(/_([^_]+)_/g, '$1')\n .replace(/~~([^~]+)~~/g, '$1')\n .replace(/!?\\[[^\\]]*\\]\\([^)]*\\)/g, ' ')\n .replace(/^\\s*#{1,6}\\s+/gm, '')\n .replace(/^\\s*>\\s?/gm, '')\n .replace(/^\\s*[-*+]\\s+/gm, '')\n .replace(/^\\s*\\d+\\.\\s+/gm, '');\n};\n\nconst collapseWhitespace = (text: string): string =>\n text.replace(/\\s+/g, ' ').trim();\n\nconst stripLettersKeepDigitsAndSymbols = (text: string): string => {\n // Keep digits and non-letter characters, remove all letters (including accents)\n return text.replace(/\\p{L}+/gu, '');\n};\n\n/**\n * Derive the normalized representations of a block used for matching.\n *\n * - `semanticText`: markdown-stripped, lower-cased text (used to detect identical\n * content across versions of the same language).\n * - `anchorText`: only digits/symbols (used to align blocks across languages, as\n * prose differs but structure such as numbers and punctuation is preserved).\n *\n * @param block - The block to normalize.\n * @returns The block enriched with its normalized texts.\n */\nexport const normalizeBlock = (block: Block): NormalizedBlock => {\n const contentWithoutMarkdown = removeMarkdownFormatting(block.content);\n const semanticLowercased = contentWithoutMarkdown.toLowerCase();\n const semanticCollapsed = collapseWhitespace(semanticLowercased);\n\n const anchorOnlySymbols = stripLettersKeepDigitsAndSymbols(block.content);\n const anchorCollapsed = collapseWhitespace(anchorOnlySymbols);\n\n return {\n ...block,\n semanticText: semanticCollapsed,\n anchorText: anchorCollapsed,\n };\n};\n"],"mappings":";;;AAEA,MAAM,4BAA4B,SAAyB;AACzD,QAAO,KACJ,QAAQ,sBAAsB,IAAI,CAClC,QAAQ,oBAAoB,KAAK,CACjC,QAAQ,gBAAgB,KAAK,CAC7B,QAAQ,cAAc,KAAK,CAC3B,QAAQ,gBAAgB,KAAK,CAC7B,QAAQ,0BAA0B,IAAI,CACtC,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,cAAc,GAAG,CACzB,QAAQ,kBAAkB,GAAG,CAC7B,QAAQ,kBAAkB,GAAG;;AAGlC,MAAM,sBAAsB,SAC1B,KAAK,QAAQ,QAAQ,IAAI,CAAC,MAAM;AAElC,MAAM,oCAAoC,SAAyB;AAEjE,QAAO,KAAK,QAAQ,YAAY,GAAG;;;;;;;;;;;;;AAcrC,MAAa,kBAAkB,UAAkC;CAG/D,MAAM,oBAAoB,mBAFK,yBAAyB,MAAM,QACb,CAAC,aACa,CAAC;CAGhE,MAAM,kBAAkB,mBADE,iCAAiC,MAAM,QACL,CAAC;AAE7D,QAAO;EACL,GAAG;EACH,cAAc;EACd,YAAY;EACb"}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
const require_docReview_normalizeBlock = require('./normalizeBlock.cjs');
|
|
3
|
+
const require_docReview_fingerprintBlock = require('./fingerprintBlock.cjs');
|
|
4
|
+
const require_docReview_mapChangedLinesToBlocks = require('./mapChangedLinesToBlocks.cjs');
|
|
5
|
+
const require_docReview_alignBlocks = require('./alignBlocks.cjs');
|
|
6
|
+
const require_docReview_rebuildDocument = require('./rebuildDocument.cjs');
|
|
7
|
+
const require_docReview_segmentDocument = require('./segmentDocument.cjs');
|
|
8
|
+
|
|
9
|
+
//#region src/docReview/pipeline.ts
|
|
10
|
+
/** Fingerprint a list of already-segmented blocks (context = neighbours). */
|
|
11
|
+
const fingerprintBlockList = (blocks) => {
|
|
12
|
+
return blocks.map(require_docReview_normalizeBlock.normalizeBlock).map((block, index, array) => require_docReview_fingerprintBlock.fingerprintBlock(block, array[index - 1] ?? null, array[index + 1] ?? null));
|
|
13
|
+
};
|
|
14
|
+
/**
|
|
15
|
+
* Re-segment a section's own text into fine blocks, shifting their relative line
|
|
16
|
+
* numbers back to absolute document positions so changed-line mapping and the
|
|
17
|
+
* review report keep reporting real file coordinates.
|
|
18
|
+
*/
|
|
19
|
+
const fingerprintSectionFineBlocks = (section) => {
|
|
20
|
+
const lineOffset = section.lineStart - 1;
|
|
21
|
+
return fingerprintBlockList(require_docReview_segmentDocument.segmentDocument(section.content).map((block) => ({
|
|
22
|
+
...block,
|
|
23
|
+
lineStart: block.lineStart + lineOffset,
|
|
24
|
+
lineEnd: block.lineEnd + lineOffset
|
|
25
|
+
})));
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Build the block-aware alignment plan between a base document and its
|
|
29
|
+
* translation, in two levels.
|
|
30
|
+
*
|
|
31
|
+
* 1. **Sections** (heading-anchored) are aligned first. Because a document and
|
|
32
|
+
* its translation share the same heading structure, this alignment is robust
|
|
33
|
+
* and never drops a section just because the prose was split into a different
|
|
34
|
+
* number of paragraphs.
|
|
35
|
+
* 2. Only the sections **touched by a changed line** are then re-segmented into
|
|
36
|
+
* fine blocks (paragraphs, code fences) and aligned within the section, so a
|
|
37
|
+
* small edit re-translates only the affected paragraph(s) instead of the
|
|
38
|
+
* whole section. Within a changed section a target block with no base
|
|
39
|
+
* counterpart is **kept as-is** (reused) rather than deleted, so a translation
|
|
40
|
+
* that has extra paragraphs never loses content.
|
|
41
|
+
*
|
|
42
|
+
* Section-level insertions and deletions stay whole: a brand-new section is
|
|
43
|
+
* translated as one unit, and a target section with no base counterpart is
|
|
44
|
+
* reported as `delete` for visibility but never dropped by the merge (see
|
|
45
|
+
* {@link mergeReviewedSegments}), so a review can never lose translated content.
|
|
46
|
+
*
|
|
47
|
+
* @param input - The base/target texts and optional changed lines.
|
|
48
|
+
* @returns The (flattened) blocks, the plan, and the segments that need translation.
|
|
49
|
+
*/
|
|
50
|
+
const buildAlignmentPlan = ({ baseText, targetText, changedLines }) => {
|
|
51
|
+
const changedLineNumbers = Array.isArray(changedLines) ? changedLines : [];
|
|
52
|
+
const baseSections = fingerprintBlockList(require_docReview_segmentDocument.segmentSections(baseText));
|
|
53
|
+
const targetSections = fingerprintBlockList(require_docReview_segmentDocument.segmentSections(targetText));
|
|
54
|
+
const sectionAlignment = require_docReview_alignBlocks.alignBaseAndTargetBlocks(baseSections, targetSections);
|
|
55
|
+
const changedSectionIndexes = require_docReview_mapChangedLinesToBlocks.mapChangedLinesToBlocks(baseSections, changedLineNumbers);
|
|
56
|
+
const baseBlocks = [];
|
|
57
|
+
const targetBlocks = [];
|
|
58
|
+
const actions = [];
|
|
59
|
+
const pushBaseBlock = (block) => baseBlocks.push(block) - 1;
|
|
60
|
+
const pushTargetBlock = (block) => targetBlocks.push(block) - 1;
|
|
61
|
+
for (const pair of sectionAlignment) {
|
|
62
|
+
if (pair.baseIndex === -1 && pair.targetIndex !== null) {
|
|
63
|
+
const targetIndex = pushTargetBlock(targetSections[pair.targetIndex]);
|
|
64
|
+
actions.push({
|
|
65
|
+
kind: "delete",
|
|
66
|
+
targetIndex
|
|
67
|
+
});
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
if (pair.baseIndex >= 0 && pair.targetIndex === null) {
|
|
71
|
+
const baseIndex = pushBaseBlock(baseSections[pair.baseIndex]);
|
|
72
|
+
actions.push({
|
|
73
|
+
kind: "insert_new",
|
|
74
|
+
baseIndex
|
|
75
|
+
});
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
if (pair.baseIndex < 0 || pair.targetIndex === null) continue;
|
|
79
|
+
const baseSection = baseSections[pair.baseIndex];
|
|
80
|
+
const targetSection = targetSections[pair.targetIndex];
|
|
81
|
+
if (!changedSectionIndexes.has(pair.baseIndex)) {
|
|
82
|
+
const baseIndex = pushBaseBlock(baseSection);
|
|
83
|
+
const targetIndex = pushTargetBlock(targetSection);
|
|
84
|
+
actions.push({
|
|
85
|
+
kind: "reuse",
|
|
86
|
+
baseIndex,
|
|
87
|
+
targetIndex
|
|
88
|
+
});
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
const baseFineBlocks = fingerprintSectionFineBlocks(baseSection);
|
|
92
|
+
const targetFineBlocks = fingerprintSectionFineBlocks(targetSection);
|
|
93
|
+
const fineAlignment = require_docReview_alignBlocks.alignBaseAndTargetBlocks(baseFineBlocks, targetFineBlocks);
|
|
94
|
+
const changedFineIndexes = require_docReview_mapChangedLinesToBlocks.mapChangedLinesToBlocks(baseFineBlocks, changedLineNumbers);
|
|
95
|
+
for (const finePair of fineAlignment) {
|
|
96
|
+
if (finePair.baseIndex === -1 && finePair.targetIndex !== null) {
|
|
97
|
+
const targetIndex = pushTargetBlock(targetFineBlocks[finePair.targetIndex]);
|
|
98
|
+
actions.push({
|
|
99
|
+
kind: "reuse",
|
|
100
|
+
baseIndex: -1,
|
|
101
|
+
targetIndex
|
|
102
|
+
});
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
if (finePair.baseIndex >= 0 && finePair.targetIndex === null) {
|
|
106
|
+
const baseIndex = pushBaseBlock(baseFineBlocks[finePair.baseIndex]);
|
|
107
|
+
actions.push({
|
|
108
|
+
kind: "insert_new",
|
|
109
|
+
baseIndex
|
|
110
|
+
});
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
if (finePair.baseIndex < 0 || finePair.targetIndex === null) continue;
|
|
114
|
+
const baseIndex = pushBaseBlock(baseFineBlocks[finePair.baseIndex]);
|
|
115
|
+
const targetIndex = pushTargetBlock(targetFineBlocks[finePair.targetIndex]);
|
|
116
|
+
actions.push(changedFineIndexes.has(finePair.baseIndex) ? {
|
|
117
|
+
kind: "review",
|
|
118
|
+
baseIndex,
|
|
119
|
+
targetIndex
|
|
120
|
+
} : {
|
|
121
|
+
kind: "reuse",
|
|
122
|
+
baseIndex,
|
|
123
|
+
targetIndex
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
const plan = { actions };
|
|
128
|
+
const { segmentsToReview } = require_docReview_rebuildDocument.identifySegmentsToReview({
|
|
129
|
+
baseBlocks,
|
|
130
|
+
targetBlocks,
|
|
131
|
+
plan
|
|
132
|
+
});
|
|
133
|
+
return {
|
|
134
|
+
baseBlocks,
|
|
135
|
+
targetBlocks,
|
|
136
|
+
plan,
|
|
137
|
+
segmentsToReview
|
|
138
|
+
};
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
//#endregion
|
|
142
|
+
exports.buildAlignmentPlan = buildAlignmentPlan;
|
|
143
|
+
exports.mergeReviewedSegments = require_docReview_rebuildDocument.mergeReviewedSegments;
|
|
144
|
+
//# sourceMappingURL=pipeline.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.cjs","names":["normalizeBlock","fingerprintBlock","segmentDocument","segmentSections","alignBaseAndTargetBlocks","mapChangedLinesToBlocks","identifySegmentsToReview"],"sources":["../../../src/docReview/pipeline.ts"],"sourcesContent":["import { alignBaseAndTargetBlocks } from './alignBlocks';\nimport { fingerprintBlock } from './fingerprintBlock';\nimport { mapChangedLinesToBlocks } from './mapChangedLinesToBlocks';\nimport { normalizeBlock } from './normalizeBlock';\nimport {\n identifySegmentsToReview,\n mergeReviewedSegments,\n type SegmentToReview,\n} from './rebuildDocument';\nimport { segmentDocument, segmentSections } from './segmentDocument';\nimport type { Block, FingerprintedBlock, PlannedAction } from './types';\n\nexport type BuildAlignmentPlanInput = {\n /** The base (source) document, used as the translation reference. */\n baseText: string;\n /** The existing target (translated) document, possibly empty. */\n targetText: string;\n /** 1-based line numbers that changed in the base document, when known. */\n changedLines: number[] | undefined;\n};\n\nexport type BuildAlignmentPlanOutput = {\n baseBlocks: FingerprintedBlock[];\n targetBlocks: FingerprintedBlock[];\n plan: AlignmentPlan;\n segmentsToReview: SegmentToReview[];\n};\n\n/** Fingerprint a list of already-segmented blocks (context = neighbours). */\nconst fingerprintBlockList = (blocks: Block[]): FingerprintedBlock[] => {\n const normalized = blocks.map(normalizeBlock);\n\n return normalized.map((block, index, array) =>\n fingerprintBlock(block, array[index - 1] ?? null, array[index + 1] ?? null)\n );\n};\n\n/**\n * Re-segment a section's own text into fine blocks, shifting their relative line\n * numbers back to absolute document positions so changed-line mapping and the\n * review report keep reporting real file coordinates.\n */\nconst fingerprintSectionFineBlocks = (section: Block): FingerprintedBlock[] => {\n const lineOffset = section.lineStart - 1;\n\n const fineBlocks = segmentDocument(section.content).map(\n (block): Block => ({\n ...block,\n lineStart: block.lineStart + lineOffset,\n lineEnd: block.lineEnd + lineOffset,\n })\n );\n\n return fingerprintBlockList(fineBlocks);\n};\n\n/**\n * Build the block-aware alignment plan between a base document and its\n * translation, in two levels.\n *\n * 1. **Sections** (heading-anchored) are aligned first. Because a document and\n * its translation share the same heading structure, this alignment is robust\n * and never drops a section just because the prose was split into a different\n * number of paragraphs.\n * 2. Only the sections **touched by a changed line** are then re-segmented into\n * fine blocks (paragraphs, code fences) and aligned within the section, so a\n * small edit re-translates only the affected paragraph(s) instead of the\n * whole section. Within a changed section a target block with no base\n * counterpart is **kept as-is** (reused) rather than deleted, so a translation\n * that has extra paragraphs never loses content.\n *\n * Section-level insertions and deletions stay whole: a brand-new section is\n * translated as one unit, and a target section with no base counterpart is\n * reported as `delete` for visibility but never dropped by the merge (see\n * {@link mergeReviewedSegments}), so a review can never lose translated content.\n *\n * @param input - The base/target texts and optional changed lines.\n * @returns The (flattened) blocks, the plan, and the segments that need translation.\n */\nexport const buildAlignmentPlan = ({\n baseText,\n targetText,\n changedLines,\n}: BuildAlignmentPlanInput): BuildAlignmentPlanOutput => {\n const changedLineNumbers = Array.isArray(changedLines) ? changedLines : [];\n\n const baseSections = fingerprintBlockList(segmentSections(baseText));\n const targetSections = fingerprintBlockList(segmentSections(targetText));\n\n const sectionAlignment = alignBaseAndTargetBlocks(\n baseSections,\n targetSections\n );\n const changedSectionIndexes = mapChangedLinesToBlocks(\n baseSections,\n changedLineNumbers\n );\n\n // Flattened blocks the plan refers to by index. Reused/deleted/inserted\n // sections contribute their whole-section block; changed sections contribute\n // their fine sub-blocks.\n const baseBlocks: FingerprintedBlock[] = [];\n const targetBlocks: FingerprintedBlock[] = [];\n const actions: PlannedAction[] = [];\n\n const pushBaseBlock = (block: FingerprintedBlock): number =>\n baseBlocks.push(block) - 1;\n const pushTargetBlock = (block: FingerprintedBlock): number =>\n targetBlocks.push(block) - 1;\n\n for (const pair of sectionAlignment) {\n // Section present only in the target → reported as `delete` for visibility,\n // but kept verbatim by the merge (the aligner may simply have failed to\n // follow a reordered section), never silently dropped.\n if (pair.baseIndex === -1 && pair.targetIndex !== null) {\n const targetIndex = pushTargetBlock(targetSections[pair.targetIndex]!);\n actions.push({ kind: 'delete', targetIndex });\n continue;\n }\n\n // Section present only in the base → brand new, translate as one unit.\n if (pair.baseIndex >= 0 && pair.targetIndex === null) {\n const baseIndex = pushBaseBlock(baseSections[pair.baseIndex]!);\n actions.push({ kind: 'insert_new', baseIndex });\n continue;\n }\n\n if (pair.baseIndex < 0 || pair.targetIndex === null) continue;\n\n const baseSection = baseSections[pair.baseIndex]!;\n const targetSection = targetSections[pair.targetIndex]!;\n\n // Unchanged section → reuse the existing translation verbatim.\n if (!changedSectionIndexes.has(pair.baseIndex)) {\n const baseIndex = pushBaseBlock(baseSection);\n const targetIndex = pushTargetBlock(targetSection);\n actions.push({ kind: 'reuse', baseIndex, targetIndex });\n continue;\n }\n\n // Changed section → align its fine blocks and review only what changed.\n const baseFineBlocks = fingerprintSectionFineBlocks(baseSection);\n const targetFineBlocks = fingerprintSectionFineBlocks(targetSection);\n const fineAlignment = alignBaseAndTargetBlocks(\n baseFineBlocks,\n targetFineBlocks\n );\n const changedFineIndexes = mapChangedLinesToBlocks(\n baseFineBlocks,\n changedLineNumbers\n );\n\n for (const finePair of fineAlignment) {\n // Target-only fine block: keep it (no data loss), do not delete.\n if (finePair.baseIndex === -1 && finePair.targetIndex !== null) {\n const targetIndex = pushTargetBlock(\n targetFineBlocks[finePair.targetIndex]!\n );\n actions.push({ kind: 'reuse', baseIndex: -1, targetIndex });\n continue;\n }\n\n // Base-only fine block: a new paragraph inside the section, translate it.\n if (finePair.baseIndex >= 0 && finePair.targetIndex === null) {\n const baseIndex = pushBaseBlock(baseFineBlocks[finePair.baseIndex]!);\n actions.push({ kind: 'insert_new', baseIndex });\n continue;\n }\n\n if (finePair.baseIndex < 0 || finePair.targetIndex === null) continue;\n\n const baseIndex = pushBaseBlock(baseFineBlocks[finePair.baseIndex]!);\n const targetIndex = pushTargetBlock(\n targetFineBlocks[finePair.targetIndex]!\n );\n\n actions.push(\n changedFineIndexes.has(finePair.baseIndex)\n ? { kind: 'review', baseIndex, targetIndex }\n : { kind: 'reuse', baseIndex, targetIndex }\n );\n }\n }\n\n const plan = { actions };\n\n const { segmentsToReview } = identifySegmentsToReview({\n baseBlocks,\n targetBlocks,\n plan,\n });\n\n return { baseBlocks, targetBlocks, plan, segmentsToReview };\n};\n\nexport type { SegmentToReview };\nexport { mergeReviewedSegments };\n"],"mappings":";;;;;;;;;;AA6BA,MAAM,wBAAwB,WAA0C;AAGtE,QAFmB,OAAO,IAAIA,gDAEb,CAAC,KAAK,OAAO,OAAO,UACnCC,oDAAiB,OAAO,MAAM,QAAQ,MAAM,MAAM,MAAM,QAAQ,MAAM,KAAK,CAC5E;;;;;;;AAQH,MAAM,gCAAgC,YAAyC;CAC7E,MAAM,aAAa,QAAQ,YAAY;AAUvC,QAAO,qBARYC,kDAAgB,QAAQ,QAAQ,CAAC,KACjD,WAAkB;EACjB,GAAG;EACH,WAAW,MAAM,YAAY;EAC7B,SAAS,MAAM,UAAU;EAC1B,EAGmC,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;AA0BzC,MAAa,sBAAsB,EACjC,UACA,YACA,mBACuD;CACvD,MAAM,qBAAqB,MAAM,QAAQ,aAAa,GAAG,eAAe,EAAE;CAE1E,MAAM,eAAe,qBAAqBC,kDAAgB,SAAS,CAAC;CACpE,MAAM,iBAAiB,qBAAqBA,kDAAgB,WAAW,CAAC;CAExE,MAAM,mBAAmBC,uDACvB,cACA,eACD;CACD,MAAM,wBAAwBC,kEAC5B,cACA,mBACD;CAKD,MAAM,aAAmC,EAAE;CAC3C,MAAM,eAAqC,EAAE;CAC7C,MAAM,UAA2B,EAAE;CAEnC,MAAM,iBAAiB,UACrB,WAAW,KAAK,MAAM,GAAG;CAC3B,MAAM,mBAAmB,UACvB,aAAa,KAAK,MAAM,GAAG;AAE7B,MAAK,MAAM,QAAQ,kBAAkB;AAInC,MAAI,KAAK,cAAc,MAAM,KAAK,gBAAgB,MAAM;GACtD,MAAM,cAAc,gBAAgB,eAAe,KAAK,aAAc;AACtE,WAAQ,KAAK;IAAE,MAAM;IAAU;IAAa,CAAC;AAC7C;;AAIF,MAAI,KAAK,aAAa,KAAK,KAAK,gBAAgB,MAAM;GACpD,MAAM,YAAY,cAAc,aAAa,KAAK,WAAY;AAC9D,WAAQ,KAAK;IAAE,MAAM;IAAc;IAAW,CAAC;AAC/C;;AAGF,MAAI,KAAK,YAAY,KAAK,KAAK,gBAAgB,KAAM;EAErD,MAAM,cAAc,aAAa,KAAK;EACtC,MAAM,gBAAgB,eAAe,KAAK;AAG1C,MAAI,CAAC,sBAAsB,IAAI,KAAK,UAAU,EAAE;GAC9C,MAAM,YAAY,cAAc,YAAY;GAC5C,MAAM,cAAc,gBAAgB,cAAc;AAClD,WAAQ,KAAK;IAAE,MAAM;IAAS;IAAW;IAAa,CAAC;AACvD;;EAIF,MAAM,iBAAiB,6BAA6B,YAAY;EAChE,MAAM,mBAAmB,6BAA6B,cAAc;EACpE,MAAM,gBAAgBD,uDACpB,gBACA,iBACD;EACD,MAAM,qBAAqBC,kEACzB,gBACA,mBACD;AAED,OAAK,MAAM,YAAY,eAAe;AAEpC,OAAI,SAAS,cAAc,MAAM,SAAS,gBAAgB,MAAM;IAC9D,MAAM,cAAc,gBAClB,iBAAiB,SAAS,aAC3B;AACD,YAAQ,KAAK;KAAE,MAAM;KAAS,WAAW;KAAI;KAAa,CAAC;AAC3D;;AAIF,OAAI,SAAS,aAAa,KAAK,SAAS,gBAAgB,MAAM;IAC5D,MAAM,YAAY,cAAc,eAAe,SAAS,WAAY;AACpE,YAAQ,KAAK;KAAE,MAAM;KAAc;KAAW,CAAC;AAC/C;;AAGF,OAAI,SAAS,YAAY,KAAK,SAAS,gBAAgB,KAAM;GAE7D,MAAM,YAAY,cAAc,eAAe,SAAS,WAAY;GACpE,MAAM,cAAc,gBAClB,iBAAiB,SAAS,aAC3B;AAED,WAAQ,KACN,mBAAmB,IAAI,SAAS,UAAU,GACtC;IAAE,MAAM;IAAU;IAAW;IAAa,GAC1C;IAAE,MAAM;IAAS;IAAW;IAAa,CAC9C;;;CAIL,MAAM,OAAO,EAAE,SAAS;CAExB,MAAM,EAAE,qBAAqBC,2DAAyB;EACpD;EACA;EACA;EACD,CAAC;AAEF,QAAO;EAAE;EAAY;EAAc;EAAM;EAAkB"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
|
|
3
|
+
//#region src/docReview/planActions.ts
|
|
4
|
+
/**
|
|
5
|
+
* Turn raw alignment pairs into an ordered list of merge actions.
|
|
6
|
+
*
|
|
7
|
+
* A block aligned in both documents is reused when its base counterpart is not
|
|
8
|
+
* marked as changed, and reviewed otherwise. Blocks present only in the base are
|
|
9
|
+
* inserted, blocks present only in the target are deleted.
|
|
10
|
+
*
|
|
11
|
+
* @param alignment - The alignment pairs produced by the aligner.
|
|
12
|
+
* @param changedBaseBlockIndexes - Indexes of base blocks reported as changed.
|
|
13
|
+
* @returns The plan of actions to merge the documents.
|
|
14
|
+
*/
|
|
15
|
+
const planAlignmentActions = (alignment, changedBaseBlockIndexes) => {
|
|
16
|
+
const actions = [];
|
|
17
|
+
const seenTarget = /* @__PURE__ */ new Set();
|
|
18
|
+
alignment.forEach((pair) => {
|
|
19
|
+
const baseIndex = pair.baseIndex;
|
|
20
|
+
const targetIndex = pair.targetIndex;
|
|
21
|
+
if (baseIndex === -1 && targetIndex !== null) {
|
|
22
|
+
if (!seenTarget.has(targetIndex)) {
|
|
23
|
+
actions.push({
|
|
24
|
+
kind: "delete",
|
|
25
|
+
targetIndex
|
|
26
|
+
});
|
|
27
|
+
seenTarget.add(targetIndex);
|
|
28
|
+
}
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
if (baseIndex >= 0 && targetIndex === null) {
|
|
32
|
+
actions.push({
|
|
33
|
+
kind: "insert_new",
|
|
34
|
+
baseIndex
|
|
35
|
+
});
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
if (baseIndex >= 0 && targetIndex !== null) {
|
|
39
|
+
if (!changedBaseBlockIndexes.has(baseIndex)) actions.push({
|
|
40
|
+
kind: "reuse",
|
|
41
|
+
baseIndex,
|
|
42
|
+
targetIndex
|
|
43
|
+
});
|
|
44
|
+
else actions.push({
|
|
45
|
+
kind: "review",
|
|
46
|
+
baseIndex,
|
|
47
|
+
targetIndex
|
|
48
|
+
});
|
|
49
|
+
seenTarget.add(targetIndex);
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
return { actions };
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
//#endregion
|
|
57
|
+
exports.planAlignmentActions = planAlignmentActions;
|
|
58
|
+
//# sourceMappingURL=planActions.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"planActions.cjs","names":[],"sources":["../../../src/docReview/planActions.ts"],"sourcesContent":["import type { AlignmentPair, AlignmentPlan, PlannedAction } from './types';\n\n/**\n * Turn raw alignment pairs into an ordered list of merge actions.\n *\n * A block aligned in both documents is reused when its base counterpart is not\n * marked as changed, and reviewed otherwise. Blocks present only in the base are\n * inserted, blocks present only in the target are deleted.\n *\n * @param alignment - The alignment pairs produced by the aligner.\n * @param changedBaseBlockIndexes - Indexes of base blocks reported as changed.\n * @returns The plan of actions to merge the documents.\n */\nexport const planAlignmentActions = (\n alignment: AlignmentPair[],\n changedBaseBlockIndexes: Set<number>\n): AlignmentPlan => {\n const actions: PlannedAction[] = [];\n const seenTarget = new Set<number>();\n\n alignment.forEach((pair) => {\n const baseIndex = pair.baseIndex;\n const targetIndex = pair.targetIndex;\n\n // Case 1: Deletion (Exists in target, not in base)\n if (baseIndex === -1 && targetIndex !== null) {\n if (!seenTarget.has(targetIndex)) {\n actions.push({ kind: 'delete', targetIndex });\n seenTarget.add(targetIndex);\n }\n return;\n }\n\n // Case 2: New Insertion (Exists in base, not in target)\n if (baseIndex >= 0 && targetIndex === null) {\n actions.push({ kind: 'insert_new', baseIndex });\n return;\n }\n\n // Case 3: Alignment (Exists in both)\n if (baseIndex >= 0 && targetIndex !== null) {\n const isChanged = changedBaseBlockIndexes.has(baseIndex);\n\n // If the block is NOT marked as changed, we REUSE it. We assume the existing\n // translation is correct because the source hasn't been touched. We ignore\n // 'similarityScore' here because base vs target text will always have low\n // similarity.\n if (!isChanged) {\n actions.push({ kind: 'reuse', baseIndex, targetIndex });\n } else {\n // If the block IS changed, we Review it.\n actions.push({ kind: 'review', baseIndex, targetIndex });\n }\n\n seenTarget.add(targetIndex);\n return;\n }\n });\n\n return { actions };\n};\n"],"mappings":";;;;;;;;;;;;;;AAaA,MAAa,wBACX,WACA,4BACkB;CAClB,MAAM,UAA2B,EAAE;CACnC,MAAM,6BAAa,IAAI,KAAa;AAEpC,WAAU,SAAS,SAAS;EAC1B,MAAM,YAAY,KAAK;EACvB,MAAM,cAAc,KAAK;AAGzB,MAAI,cAAc,MAAM,gBAAgB,MAAM;AAC5C,OAAI,CAAC,WAAW,IAAI,YAAY,EAAE;AAChC,YAAQ,KAAK;KAAE,MAAM;KAAU;KAAa,CAAC;AAC7C,eAAW,IAAI,YAAY;;AAE7B;;AAIF,MAAI,aAAa,KAAK,gBAAgB,MAAM;AAC1C,WAAQ,KAAK;IAAE,MAAM;IAAc;IAAW,CAAC;AAC/C;;AAIF,MAAI,aAAa,KAAK,gBAAgB,MAAM;AAO1C,OAAI,CANc,wBAAwB,IAAI,UAMhC,CACZ,SAAQ,KAAK;IAAE,MAAM;IAAS;IAAW;IAAa,CAAC;OAGvD,SAAQ,KAAK;IAAE,MAAM;IAAU;IAAW;IAAa,CAAC;AAG1D,cAAW,IAAI,YAAY;AAC3B;;GAEF;AAEF,QAAO,EAAE,SAAS"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
+
|
|
3
|
+
//#region src/docReview/rebuildDocument.ts
|
|
4
|
+
/**
|
|
5
|
+
* Analyze the alignment plan and return only the segments that need
|
|
6
|
+
* review/translation. Does not generate output text - that is done by
|
|
7
|
+
* {@link mergeReviewedSegments} once the translations are available.
|
|
8
|
+
*
|
|
9
|
+
* @param input - The base/target blocks and the alignment plan.
|
|
10
|
+
* @returns The list of segments that require translation.
|
|
11
|
+
*/
|
|
12
|
+
const identifySegmentsToReview = ({ baseBlocks, targetBlocks, plan }) => {
|
|
13
|
+
const segmentsToReview = [];
|
|
14
|
+
plan.actions.forEach((action, actionIndex) => {
|
|
15
|
+
if (action.kind === "review") {
|
|
16
|
+
const baseBlock = baseBlocks[action.baseIndex];
|
|
17
|
+
const targetBlockText = action.targetIndex !== null ? targetBlocks[action.targetIndex].content : null;
|
|
18
|
+
segmentsToReview.push({
|
|
19
|
+
baseBlock,
|
|
20
|
+
targetBlockText,
|
|
21
|
+
actionIndex
|
|
22
|
+
});
|
|
23
|
+
} else if (action.kind === "insert_new") {
|
|
24
|
+
const baseBlock = baseBlocks[action.baseIndex];
|
|
25
|
+
segmentsToReview.push({
|
|
26
|
+
baseBlock,
|
|
27
|
+
targetBlockText: null,
|
|
28
|
+
actionIndex
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
return { segmentsToReview };
|
|
33
|
+
};
|
|
34
|
+
/**
|
|
35
|
+
* Merge reviewed translations back into the final document following the
|
|
36
|
+
* alignment plan, reusing untouched target blocks as-is.
|
|
37
|
+
*
|
|
38
|
+
* @param plan - The alignment plan.
|
|
39
|
+
* @param targetBlocks - Blocks of the existing target document.
|
|
40
|
+
* @param reviewedSegments - Map of action index to its reviewed translation.
|
|
41
|
+
* @returns The rebuilt target document.
|
|
42
|
+
*/
|
|
43
|
+
const mergeReviewedSegments = (plan, targetBlocks, reviewedSegments) => {
|
|
44
|
+
const outputParts = [];
|
|
45
|
+
plan.actions.forEach((action, actionIndex) => {
|
|
46
|
+
if (action.kind === "reuse") outputParts.push(targetBlocks[action.targetIndex].content);
|
|
47
|
+
else if (action.kind === "review" || action.kind === "insert_new") {
|
|
48
|
+
const reviewedContent = reviewedSegments.get(actionIndex);
|
|
49
|
+
if (reviewedContent !== void 0) outputParts.push(reviewedContent);
|
|
50
|
+
else if (action.kind === "review" && action.targetIndex !== null) outputParts.push(targetBlocks[action.targetIndex].content);
|
|
51
|
+
else outputParts.push("\n");
|
|
52
|
+
} else if (action.kind === "delete") {
|
|
53
|
+
const reviewedContent = reviewedSegments.get(actionIndex);
|
|
54
|
+
if (reviewedContent !== void 0) {
|
|
55
|
+
if (reviewedContent) outputParts.push(reviewedContent);
|
|
56
|
+
} else outputParts.push(targetBlocks[action.targetIndex].content);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
return outputParts.join("");
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
//#endregion
|
|
63
|
+
exports.identifySegmentsToReview = identifySegmentsToReview;
|
|
64
|
+
exports.mergeReviewedSegments = mergeReviewedSegments;
|
|
65
|
+
//# sourceMappingURL=rebuildDocument.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rebuildDocument.cjs","names":[],"sources":["../../../src/docReview/rebuildDocument.ts"],"sourcesContent":["import type { AlignmentPlan, FingerprintedBlock } from './types';\n\n/**\n * A block that needs to be translated or re-translated by an external consumer\n * (an AI client, a human, or an agent).\n */\nexport type SegmentToReview = {\n /** The base block to translate. */\n baseBlock: FingerprintedBlock;\n /** Existing target translation, or `null` when the block is new. */\n targetBlockText: string | null;\n /** Index of the originating action within {@link AlignmentPlan.actions}. */\n actionIndex: number;\n};\n\nexport type RebuildInput = {\n baseBlocks: FingerprintedBlock[];\n targetBlocks: FingerprintedBlock[];\n plan: AlignmentPlan;\n};\n\nexport type RebuildResult = {\n segmentsToReview: SegmentToReview[];\n};\n\n/**\n * Analyze the alignment plan and return only the segments that need\n * review/translation. Does not generate output text - that is done by\n * {@link mergeReviewedSegments} once the translations are available.\n *\n * @param input - The base/target blocks and the alignment plan.\n * @returns The list of segments that require translation.\n */\nexport const identifySegmentsToReview = ({\n baseBlocks,\n targetBlocks,\n plan,\n}: RebuildInput): RebuildResult => {\n const segmentsToReview: SegmentToReview[] = [];\n\n plan.actions.forEach((action, actionIndex) => {\n if (action.kind === 'review') {\n const baseBlock = baseBlocks[action.baseIndex];\n const targetBlockText =\n action.targetIndex !== null\n ? targetBlocks[action.targetIndex].content\n : null;\n\n segmentsToReview.push({ baseBlock, targetBlockText, actionIndex });\n } else if (action.kind === 'insert_new') {\n const baseBlock = baseBlocks[action.baseIndex];\n\n segmentsToReview.push({\n baseBlock,\n targetBlockText: null,\n actionIndex,\n });\n }\n });\n\n return { segmentsToReview };\n};\n\n/**\n * Merge reviewed translations back into the final document following the\n * alignment plan, reusing untouched target blocks as-is.\n *\n * @param plan - The alignment plan.\n * @param targetBlocks - Blocks of the existing target document.\n * @param reviewedSegments - Map of action index to its reviewed translation.\n * @returns The rebuilt target document.\n */\nexport const mergeReviewedSegments = (\n plan: AlignmentPlan,\n targetBlocks: FingerprintedBlock[],\n reviewedSegments: Map<number, string>\n): string => {\n const outputParts: string[] = [];\n\n plan.actions.forEach((action, actionIndex) => {\n if (action.kind === 'reuse') {\n outputParts.push(targetBlocks[action.targetIndex].content);\n } else if (action.kind === 'review' || action.kind === 'insert_new') {\n const reviewedContent = reviewedSegments.get(actionIndex);\n\n if (reviewedContent !== undefined) {\n outputParts.push(reviewedContent);\n } else {\n // Fallback: if review failed, use existing or blank\n if (action.kind === 'review' && action.targetIndex !== null) {\n outputParts.push(targetBlocks[action.targetIndex].content);\n } else {\n outputParts.push('\\n');\n }\n }\n } else if (action.kind === 'delete') {\n const reviewedContent = reviewedSegments.get(actionIndex);\n if (reviewedContent !== undefined) {\n // Caller explicitly resolved this block: empty string = actually delete,\n // non-empty string = replacement content.\n if (reviewedContent) outputParts.push(reviewedContent);\n } else {\n // Default: keep verbatim. A target block with no base counterpart may\n // just be a section the aligner could not follow (reordering, split\n // prose) — keeping it prevents accidental data loss in log/read-only mode.\n outputParts.push(targetBlocks[action.targetIndex].content);\n }\n }\n });\n\n return outputParts.join('');\n};\n"],"mappings":";;;;;;;;;;;AAiCA,MAAa,4BAA4B,EACvC,YACA,cACA,WACiC;CACjC,MAAM,mBAAsC,EAAE;AAE9C,MAAK,QAAQ,SAAS,QAAQ,gBAAgB;AAC5C,MAAI,OAAO,SAAS,UAAU;GAC5B,MAAM,YAAY,WAAW,OAAO;GACpC,MAAM,kBACJ,OAAO,gBAAgB,OACnB,aAAa,OAAO,aAAa,UACjC;AAEN,oBAAiB,KAAK;IAAE;IAAW;IAAiB;IAAa,CAAC;aACzD,OAAO,SAAS,cAAc;GACvC,MAAM,YAAY,WAAW,OAAO;AAEpC,oBAAiB,KAAK;IACpB;IACA,iBAAiB;IACjB;IACD,CAAC;;GAEJ;AAEF,QAAO,EAAE,kBAAkB;;;;;;;;;;;AAY7B,MAAa,yBACX,MACA,cACA,qBACW;CACX,MAAM,cAAwB,EAAE;AAEhC,MAAK,QAAQ,SAAS,QAAQ,gBAAgB;AAC5C,MAAI,OAAO,SAAS,QAClB,aAAY,KAAK,aAAa,OAAO,aAAa,QAAQ;WACjD,OAAO,SAAS,YAAY,OAAO,SAAS,cAAc;GACnE,MAAM,kBAAkB,iBAAiB,IAAI,YAAY;AAEzD,OAAI,oBAAoB,OACtB,aAAY,KAAK,gBAAgB;YAG7B,OAAO,SAAS,YAAY,OAAO,gBAAgB,KACrD,aAAY,KAAK,aAAa,OAAO,aAAa,QAAQ;OAE1D,aAAY,KAAK,KAAK;aAGjB,OAAO,SAAS,UAAU;GACnC,MAAM,kBAAkB,iBAAiB,IAAI,YAAY;AACzD,OAAI,oBAAoB,QAGtB;QAAI,gBAAiB,aAAY,KAAK,gBAAgB;SAKtD,aAAY,KAAK,aAAa,OAAO,aAAa,QAAQ;;GAG9D;AAEF,QAAO,YAAY,KAAK,GAAG"}
|