@intlayer/cli 9.0.0-canary.0 → 9.0.0-canary.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/cli.cjs +7 -3
- package/dist/cjs/cli.cjs.map +1 -1
- package/dist/cjs/reviewDoc/reviewDoc.cjs +38 -11
- package/dist/cjs/reviewDoc/reviewDoc.cjs.map +1 -1
- package/dist/cjs/reviewDoc/reviewDocBlockAware.cjs +54 -37
- package/dist/cjs/reviewDoc/reviewDocBlockAware.cjs.map +1 -1
- package/dist/cjs/reviewDoc/reviewDocLog.cjs +48 -0
- package/dist/cjs/reviewDoc/reviewDocLog.cjs.map +1 -0
- package/dist/cjs/utils/formatLineRanges.cjs +44 -0
- package/dist/cjs/utils/formatLineRanges.cjs.map +1 -0
- package/dist/esm/cli.mjs +7 -3
- package/dist/esm/cli.mjs.map +1 -1
- package/dist/esm/reviewDoc/reviewDoc.mjs +38 -11
- package/dist/esm/reviewDoc/reviewDoc.mjs.map +1 -1
- package/dist/esm/reviewDoc/reviewDocBlockAware.mjs +54 -37
- package/dist/esm/reviewDoc/reviewDocBlockAware.mjs.map +1 -1
- package/dist/esm/reviewDoc/reviewDocLog.mjs +46 -0
- package/dist/esm/reviewDoc/reviewDocLog.mjs.map +1 -0
- package/dist/esm/utils/formatLineRanges.mjs +42 -0
- package/dist/esm/utils/formatLineRanges.mjs.map +1 -0
- package/dist/types/reviewDoc/reviewDoc.d.ts +8 -1
- package/dist/types/reviewDoc/reviewDoc.d.ts.map +1 -1
- package/dist/types/reviewDoc/reviewDocBlockAware.d.ts +8 -6
- package/dist/types/reviewDoc/reviewDocBlockAware.d.ts.map +1 -1
- package/dist/types/reviewDoc/reviewDocLog.d.ts +25 -0
- package/dist/types/reviewDoc/reviewDocLog.d.ts.map +1 -0
- package/dist/types/utils/formatLineRanges.d.ts +21 -0
- package/dist/types/utils/formatLineRanges.d.ts.map +1 -0
- package/package.json +13 -13
- package/dist/cjs/translation-alignment/alignBlocks.cjs +0 -68
- package/dist/cjs/translation-alignment/alignBlocks.cjs.map +0 -1
- package/dist/cjs/translation-alignment/computeSimilarity.cjs +0 -26
- package/dist/cjs/translation-alignment/computeSimilarity.cjs.map +0 -1
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs +0 -24
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs.map +0 -1
- package/dist/cjs/translation-alignment/index.cjs +0 -22
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs +0 -19
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs.map +0 -1
- package/dist/cjs/translation-alignment/normalizeBlock.cjs +0 -23
- package/dist/cjs/translation-alignment/normalizeBlock.cjs.map +0 -1
- package/dist/cjs/translation-alignment/pipeline.cjs +0 -38
- package/dist/cjs/translation-alignment/pipeline.cjs.map +0 -1
- package/dist/cjs/translation-alignment/planActions.cjs +0 -47
- package/dist/cjs/translation-alignment/planActions.cjs.map +0 -1
- package/dist/cjs/translation-alignment/rebuildDocument.cjs +0 -50
- package/dist/cjs/translation-alignment/rebuildDocument.cjs.map +0 -1
- package/dist/cjs/translation-alignment/segmentDocument.cjs +0 -67
- package/dist/cjs/translation-alignment/segmentDocument.cjs.map +0 -1
- package/dist/cjs/translation-alignment/types.cjs +0 -0
- package/dist/esm/translation-alignment/alignBlocks.mjs +0 -67
- package/dist/esm/translation-alignment/alignBlocks.mjs.map +0 -1
- package/dist/esm/translation-alignment/computeSimilarity.mjs +0 -23
- package/dist/esm/translation-alignment/computeSimilarity.mjs.map +0 -1
- package/dist/esm/translation-alignment/fingerprintBlock.mjs +0 -21
- package/dist/esm/translation-alignment/fingerprintBlock.mjs.map +0 -1
- package/dist/esm/translation-alignment/index.mjs +0 -11
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs +0 -17
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs.map +0 -1
- package/dist/esm/translation-alignment/normalizeBlock.mjs +0 -21
- package/dist/esm/translation-alignment/normalizeBlock.mjs.map +0 -1
- package/dist/esm/translation-alignment/pipeline.mjs +0 -36
- package/dist/esm/translation-alignment/pipeline.mjs.map +0 -1
- package/dist/esm/translation-alignment/planActions.mjs +0 -45
- package/dist/esm/translation-alignment/planActions.mjs.map +0 -1
- package/dist/esm/translation-alignment/rebuildDocument.mjs +0 -47
- package/dist/esm/translation-alignment/rebuildDocument.mjs.map +0 -1
- package/dist/esm/translation-alignment/segmentDocument.mjs +0 -65
- package/dist/esm/translation-alignment/segmentDocument.mjs.map +0 -1
- package/dist/esm/translation-alignment/types.mjs +0 -0
- package/dist/types/translation-alignment/alignBlocks.d.ts +0 -7
- package/dist/types/translation-alignment/alignBlocks.d.ts.map +0 -1
- package/dist/types/translation-alignment/computeSimilarity.d.ts +0 -6
- package/dist/types/translation-alignment/computeSimilarity.d.ts.map +0 -1
- package/dist/types/translation-alignment/fingerprintBlock.d.ts +0 -7
- package/dist/types/translation-alignment/fingerprintBlock.d.ts.map +0 -1
- package/dist/types/translation-alignment/index.d.ts +0 -11
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts +0 -7
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts.map +0 -1
- package/dist/types/translation-alignment/normalizeBlock.d.ts +0 -7
- package/dist/types/translation-alignment/normalizeBlock.d.ts.map +0 -1
- package/dist/types/translation-alignment/pipeline.d.ts +0 -25
- package/dist/types/translation-alignment/pipeline.d.ts.map +0 -1
- package/dist/types/translation-alignment/planActions.d.ts +0 -7
- package/dist/types/translation-alignment/planActions.d.ts.map +0 -1
- package/dist/types/translation-alignment/rebuildDocument.d.ts +0 -32
- package/dist/types/translation-alignment/rebuildDocument.d.ts.map +0 -1
- package/dist/types/translation-alignment/segmentDocument.d.ts +0 -7
- package/dist/types/translation-alignment/segmentDocument.d.ts.map +0 -1
- package/dist/types/translation-alignment/types.d.ts +0 -49
- package/dist/types/translation-alignment/types.d.ts.map +0 -1
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import { readAsset } from "../_virtual/_utils_asset.mjs";
|
|
2
2
|
import { sanitizeChunk, validateTranslation } from "../translateDoc/validation.mjs";
|
|
3
|
-
import { mergeReviewedSegments } from "../translation-alignment/rebuildDocument.mjs";
|
|
4
|
-
import { buildAlignmentPlan } from "../translation-alignment/pipeline.mjs";
|
|
5
3
|
import { chunkInference } from "../utils/chunkInference.mjs";
|
|
6
4
|
import { fixChunkStartEndChars } from "../utils/fixChunkStartEndChars.mjs";
|
|
7
5
|
import { mkdirSync, writeFileSync } from "node:fs";
|
|
@@ -12,48 +10,69 @@ import { colon, colorize, colorizeNumber, getAppLogger } from "@intlayer/config/
|
|
|
12
10
|
import { getConfiguration } from "@intlayer/config/node";
|
|
13
11
|
import { retryManager } from "@intlayer/config/utils";
|
|
14
12
|
import { readFile } from "node:fs/promises";
|
|
13
|
+
import { buildAlignmentPlan, mergeReviewedSegments } from "@intlayer/chokidar/docReview";
|
|
15
14
|
import { getLocaleName } from "@intlayer/core/localization";
|
|
16
15
|
import { ENGLISH } from "@intlayer/types/locales";
|
|
17
16
|
|
|
18
17
|
//#region src/reviewDoc/reviewDocBlockAware.ts
|
|
19
18
|
/**
|
|
20
19
|
* Review a file using block-aware alignment.
|
|
21
|
-
*
|
|
22
|
-
* 1. Segments both
|
|
23
|
-
* 2. Aligns blocks using structure (special chars, numbers) and context
|
|
24
|
-
* 3. Detects which blocks changed, were added, or deleted
|
|
25
|
-
* 4.
|
|
26
|
-
* 5.
|
|
20
|
+
*
|
|
21
|
+
* 1. Segments both base and target documents into semantic blocks.
|
|
22
|
+
* 2. Aligns blocks using structure (special chars, numbers) and context.
|
|
23
|
+
* 3. Detects which blocks changed, were added, or deleted.
|
|
24
|
+
* 4. Applies deletions immediately without AI.
|
|
25
|
+
* 5. Sends changed/new blocks to AI in bottom-up order (last block first), so
|
|
26
|
+
* line numbers of earlier blocks are not shifted by edits below them.
|
|
27
|
+
* 6. Rewrites the file after each block so progress is persisted incrementally.
|
|
27
28
|
*/
|
|
28
29
|
const reviewFileBlockAware = async (baseFilePath, outputFilePath, locale, baseLocale, aiOptions, configOptions, customInstructions, changedLines, aiClient, aiConfig) => {
|
|
29
30
|
const configuration = getConfiguration(configOptions);
|
|
30
|
-
const applicationLogger = getAppLogger(
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
const applicationLogger = getAppLogger({ log: {
|
|
32
|
+
...configuration.log,
|
|
33
|
+
prefix: ""
|
|
34
|
+
} });
|
|
35
|
+
const baseText = await readFile(baseFilePath, "utf-8");
|
|
36
|
+
const targetText = await readFile(outputFilePath, "utf-8").catch(() => "");
|
|
33
37
|
const basePrompt = readAsset("./prompts/REVIEW_PROMPT.md", "utf-8").replaceAll("{{localeName}}", `${formatLocale(locale, false)}`).replaceAll("{{baseLocaleName}}", `${formatLocale(baseLocale, false)}`).replace("{{applicationContext}}", aiOptions?.applicationContext ?? "-").replace("{{customInstructions}}", customInstructions ?? "-");
|
|
34
38
|
const filePrefix = [colon(`${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}] `, { colSize: 40 }), `→ ${ANSIColors.RESET}`].join("");
|
|
35
39
|
const prefix = [colon(`${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}][${formatLocale(locale)}${ANSIColors.GREY_DARK}] `, { colSize: 40 }), `→ ${ANSIColors.RESET}`].join("");
|
|
36
|
-
const {
|
|
37
|
-
|
|
38
|
-
|
|
40
|
+
const { baseBlocks, targetBlocks, plan, segmentsToReview } = buildAlignmentPlan({
|
|
41
|
+
baseText,
|
|
42
|
+
targetText,
|
|
39
43
|
changedLines
|
|
40
44
|
});
|
|
41
|
-
|
|
42
|
-
applicationLogger(`${filePrefix}
|
|
43
|
-
|
|
44
|
-
|
|
45
|
+
const deleteCount = plan.actions.filter((a) => a.kind === "delete").length;
|
|
46
|
+
applicationLogger(`${filePrefix}Block-aware alignment complete. Total blocks: base=${colorizeNumber(baseBlocks.length)}, target=${colorizeNumber(targetBlocks.length)}`);
|
|
47
|
+
applicationLogger(`${filePrefix}Actions: reuse=${colorizeNumber(plan.actions.filter((a) => a.kind === "reuse").length)}, review=${colorizeNumber(plan.actions.filter((a) => a.kind === "review").length)}, new=${colorizeNumber(plan.actions.filter((a) => a.kind === "insert_new").length)}, delete=${colorizeNumber(deleteCount)}`);
|
|
48
|
+
const reviewedSegmentsMap = /* @__PURE__ */ new Map();
|
|
49
|
+
for (const [actionIndex, action] of plan.actions.entries()) if (action.kind === "delete") reviewedSegmentsMap.set(actionIndex, "");
|
|
50
|
+
const writeCurrentState = () => {
|
|
51
|
+
const output = mergeReviewedSegments(plan, targetBlocks, reviewedSegmentsMap);
|
|
45
52
|
mkdirSync(dirname(outputFilePath), { recursive: true });
|
|
46
|
-
writeFileSync(outputFilePath,
|
|
47
|
-
|
|
53
|
+
writeFileSync(outputFilePath, output);
|
|
54
|
+
};
|
|
55
|
+
if (deleteCount > 0) {
|
|
56
|
+
writeCurrentState();
|
|
57
|
+
applicationLogger(`${filePrefix}${colorizeNumber(deleteCount)} block(s) deleted without AI.`);
|
|
58
|
+
}
|
|
59
|
+
if (segmentsToReview.length === 0) {
|
|
60
|
+
if (deleteCount === 0) {
|
|
61
|
+
applicationLogger(`${filePrefix}No segments need review, reusing existing translation`);
|
|
62
|
+
writeCurrentState();
|
|
63
|
+
}
|
|
64
|
+
applicationLogger(`${colorize("✔", ANSIColors.GREEN)} File ${formatPath(outputFilePath)} updated successfully (no AI changes needed).`);
|
|
48
65
|
return;
|
|
49
66
|
}
|
|
50
|
-
applicationLogger(`${filePrefix}Segments to review: ${colorizeNumber(segmentsToReview.length)}`);
|
|
51
|
-
const
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
const
|
|
67
|
+
applicationLogger(`${filePrefix}Segments to review: ${colorizeNumber(segmentsToReview.length)} (processing bottom-up)`);
|
|
68
|
+
const segmentsBottomUp = segmentsToReview.map((segment, originalIndex) => ({
|
|
69
|
+
segment,
|
|
70
|
+
displayNumber: originalIndex + 1
|
|
71
|
+
})).reverse();
|
|
72
|
+
for (const { segment, displayNumber } of segmentsBottomUp) {
|
|
73
|
+
const baseBlock = segment.baseBlock;
|
|
74
|
+
const getBaseChunkContextPrompt = () => `**BLOCK ${displayNumber} of ${segmentsToReview.length}** is the base block in ${formatLocale(baseLocale, false)} as reference.\n///chunksStart///\n` + baseBlock.content + `///chunksEnd///`;
|
|
75
|
+
const getTargetChunkPrompt = () => `**BLOCK ${displayNumber} of ${segmentsToReview.length}** is the current block to review in ${formatLocale(locale, false)}.\n///chunksStart///\n` + (segment.targetBlockText ?? "") + `///chunksEnd///`;
|
|
57
76
|
const reviewedChunkResult = await retryManager(async () => {
|
|
58
77
|
const result = await chunkInference([
|
|
59
78
|
{
|
|
@@ -66,27 +85,25 @@ const reviewFileBlockAware = async (baseFilePath, outputFilePath, locale, baseLo
|
|
|
66
85
|
},
|
|
67
86
|
{
|
|
68
87
|
role: "system",
|
|
69
|
-
content:
|
|
88
|
+
content: getTargetChunkPrompt()
|
|
70
89
|
},
|
|
71
90
|
{
|
|
72
91
|
role: "system",
|
|
73
|
-
content: `The next user message will be the **BLOCK ${colorizeNumber(
|
|
92
|
+
content: `The next user message will be the **BLOCK ${colorizeNumber(displayNumber)} of ${colorizeNumber(segmentsToReview.length)}** that should be translated in ${getLocaleName(locale, ENGLISH)} (${locale}).`
|
|
74
93
|
}
|
|
75
94
|
], [{
|
|
76
95
|
role: "user",
|
|
77
|
-
content:
|
|
96
|
+
content: baseBlock.content
|
|
78
97
|
}], aiOptions, configuration, aiClient, aiConfig);
|
|
79
|
-
applicationLogger(`${prefix}${colorizeNumber(result.tokenUsed)} tokens used - Block ${colorizeNumber(
|
|
80
|
-
let processedChunk = sanitizeChunk(result?.fileContent,
|
|
81
|
-
processedChunk = fixChunkStartEndChars(processedChunk,
|
|
82
|
-
if (!validateTranslation(
|
|
98
|
+
applicationLogger(`${prefix}${colorizeNumber(result.tokenUsed)} tokens used - Block ${colorizeNumber(displayNumber)} of ${colorizeNumber(segmentsToReview.length)}`);
|
|
99
|
+
let processedChunk = sanitizeChunk(result?.fileContent, baseBlock.content);
|
|
100
|
+
processedChunk = fixChunkStartEndChars(processedChunk, baseBlock.content);
|
|
101
|
+
if (!validateTranslation(baseBlock.content, processedChunk, applicationLogger)) throw new Error("Validation failed for chunk (structure or length mismatch). Retrying...");
|
|
83
102
|
return processedChunk;
|
|
84
103
|
})();
|
|
85
104
|
reviewedSegmentsMap.set(segment.actionIndex, reviewedChunkResult);
|
|
105
|
+
writeCurrentState();
|
|
86
106
|
}
|
|
87
|
-
const finalFrenchOutput = mergeReviewedSegments(plan, frenchBlocks, reviewedSegmentsMap);
|
|
88
|
-
mkdirSync(dirname(outputFilePath), { recursive: true });
|
|
89
|
-
writeFileSync(outputFilePath, finalFrenchOutput);
|
|
90
107
|
applicationLogger(`${colorize("✔", ANSIColors.GREEN)} File ${formatPath(outputFilePath)} created/updated successfully.`);
|
|
91
108
|
};
|
|
92
109
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reviewDocBlockAware.mjs","names":[],"sources":["../../../src/reviewDoc/reviewDocBlockAware.ts"],"sourcesContent":["import { mkdirSync, writeFileSync } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { dirname } from 'node:path';\nimport { readAsset } from 'utils:asset';\nimport type { AIConfig } from '@intlayer/ai';\nimport type { AIOptions } from '@intlayer/api';\nimport { formatLocale, formatPath } from '@intlayer/chokidar/utils';\nimport * as ANSIColors from '@intlayer/config/colors';\nimport {\n colon,\n colorize,\n colorizeNumber,\n getAppLogger,\n} from '@intlayer/config/logger';\nimport {\n type GetConfigurationOptions,\n getConfiguration,\n} from '@intlayer/config/node';\nimport { retryManager } from '@intlayer/config/utils';\nimport { getLocaleName } from '@intlayer/core/localization';\nimport type { Locale } from '@intlayer/types/allLocales';\nimport { ENGLISH } from '@intlayer/types/locales';\nimport { sanitizeChunk, validateTranslation } from '../translateDoc/validation';\nimport {\n buildAlignmentPlan,\n mergeReviewedSegments,\n} from '../translation-alignment/pipeline';\nimport { chunkInference } from '../utils/chunkInference';\nimport { fixChunkStartEndChars } from '../utils/fixChunkStartEndChars';\nimport type { AIClient } from '../utils/setupAI';\n\n/**\n * Review a file using block-aware alignment.\n * This approach:\n * 1. Segments both English and French documents into semantic blocks\n * 2. Aligns blocks using structure (special chars, numbers) and context\n * 3. Detects which blocks changed, were added, or deleted\n * 4. Only sends changed/new blocks to AI for translation\n * 5. Handles reordering automatically\n */\nexport const reviewFileBlockAware = async (\n baseFilePath: string,\n outputFilePath: string,\n locale: Locale,\n baseLocale: Locale,\n aiOptions?: AIOptions,\n configOptions?: GetConfigurationOptions,\n customInstructions?: string,\n changedLines?: number[],\n aiClient?: AIClient,\n aiConfig?: AIConfig\n) => {\n const configuration = getConfiguration(configOptions);\n const applicationLogger = getAppLogger(configuration);\n\n const englishText = await readFile(baseFilePath, 'utf-8');\n const frenchText = await readFile(outputFilePath, 'utf-8').catch(() => '');\n\n const basePrompt = readAsset('./prompts/REVIEW_PROMPT.md', 'utf-8')\n .replaceAll('{{localeName}}', `${formatLocale(locale, false)}`)\n .replaceAll('{{baseLocaleName}}', `${formatLocale(baseLocale, false)}`)\n .replace('{{applicationContext}}', aiOptions?.applicationContext ?? '-')\n .replace('{{customInstructions}}', customInstructions ?? '-');\n\n const filePrefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}] `;\n const filePrefix = [\n colon(filePrefixText, { colSize: 40 }),\n `→ ${ANSIColors.RESET}`,\n ].join('');\n const prefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}][${formatLocale(locale)}${ANSIColors.GREY_DARK}] `;\n const prefix = [\n colon(prefixText, { colSize: 40 }),\n `→ ${ANSIColors.RESET}`,\n ].join('');\n\n // Build block-aware alignment and plan\n const { englishBlocks, frenchBlocks, plan, segmentsToReview } =\n buildAlignmentPlan({\n englishText,\n frenchText,\n changedLines,\n });\n\n applicationLogger(\n `${filePrefix}Block-aware alignment complete. Total blocks: EN=${colorizeNumber(englishBlocks.length)}, FR=${colorizeNumber(frenchBlocks.length)}`\n );\n applicationLogger(\n `${filePrefix}Actions: reuse=${colorizeNumber(plan.actions.filter((a) => a.kind === 'reuse').length)}, review=${colorizeNumber(plan.actions.filter((a) => a.kind === 'review').length)}, new=${colorizeNumber(plan.actions.filter((a) => a.kind === 'insert_new').length)}, delete=${colorizeNumber(plan.actions.filter((a) => a.kind === 'delete').length)}`\n );\n\n if (segmentsToReview.length === 0) {\n applicationLogger(\n `${filePrefix}No segments need review, reusing existing translation`\n );\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(\n outputFilePath,\n mergeReviewedSegments(plan, frenchBlocks, new Map())\n );\n applicationLogger(\n `${colorize('✔', ANSIColors.GREEN)} File ${formatPath(outputFilePath)} updated successfully (no changes needed).`\n );\n return;\n }\n\n applicationLogger(\n `${filePrefix}Segments to review: ${colorizeNumber(segmentsToReview.length)}`\n );\n\n // Review segments that need AI translation\n const reviewedSegmentsMap = new Map<number, string>();\n\n for (const segment of segmentsToReview) {\n const segmentNumber = segmentsToReview.indexOf(segment) + 1;\n const englishBlock = segment.englishBlock;\n\n const getBaseChunkContextPrompt = () =>\n `**BLOCK ${segmentNumber} of ${segmentsToReview.length}** is the base block in ${formatLocale(baseLocale, false)} as reference.\\n` +\n `///chunksStart///\\n` +\n englishBlock.content +\n `///chunksEnd///`;\n\n const getFrenchChunkPrompt = () =>\n `**BLOCK ${segmentNumber} of ${segmentsToReview.length}** is the current block to review in ${formatLocale(locale, false)}.\\n` +\n `///chunksStart///\\n` +\n (segment.frenchBlockText ?? '') +\n `///chunksEnd///`;\n\n const reviewedChunkResult = await retryManager(async () => {\n const result = await chunkInference(\n [\n { role: 'system', content: basePrompt },\n { role: 'system', content: getBaseChunkContextPrompt() },\n { role: 'system', content: getFrenchChunkPrompt() },\n {\n role: 'system',\n content: `The next user message will be the **BLOCK ${colorizeNumber(segmentNumber)} of ${colorizeNumber(segmentsToReview.length)}** that should be translated in ${getLocaleName(locale, ENGLISH)} (${locale}).`,\n },\n ],\n [{ role: 'user', content: englishBlock.content }],\n aiOptions,\n configuration,\n aiClient,\n aiConfig\n );\n\n applicationLogger(\n `${prefix}${colorizeNumber(result.tokenUsed)} tokens used - Block ${colorizeNumber(segmentNumber)} of ${colorizeNumber(segmentsToReview.length)}`\n );\n\n // Sanitize artifacts (e.g. Markdown code block wrappers)\n let processedChunk = sanitizeChunk(\n result?.fileContent,\n englishBlock.content\n );\n\n // Fix start/end characters\n processedChunk = fixChunkStartEndChars(\n processedChunk,\n englishBlock.content\n );\n\n // Validate Translation (YAML, Code fences, Length ratio)\n const isValid = validateTranslation(\n englishBlock.content,\n processedChunk,\n applicationLogger\n );\n\n if (!isValid) {\n throw new Error(\n 'Validation failed for chunk (structure or length mismatch). Retrying...'\n );\n }\n\n return processedChunk;\n })();\n\n reviewedSegmentsMap.set(segment.actionIndex, reviewedChunkResult);\n }\n\n // Merge reviewed segments back into final document\n const finalFrenchOutput = mergeReviewedSegments(\n plan,\n frenchBlocks,\n reviewedSegmentsMap\n );\n\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, finalFrenchOutput);\n\n applicationLogger(\n `${colorize('✔', ANSIColors.GREEN)} File ${formatPath(outputFilePath)} created/updated successfully.`\n );\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;AAwCA,MAAa,uBAAuB,OAClC,cACA,gBACA,QACA,YACA,WACA,eACA,oBACA,cACA,UACA,aACG;CACH,MAAM,gBAAgB,iBAAiB,cAAc;CACrD,MAAM,oBAAoB,aAAa,cAAc;CAErD,MAAM,cAAc,MAAM,SAAS,cAAc,QAAQ;CACzD,MAAM,aAAa,MAAM,SAAS,gBAAgB,QAAQ,CAAC,YAAY,GAAG;CAE1E,MAAM,aAAa,UAAU,8BAA8B,QAAQ,CAChE,WAAW,kBAAkB,GAAG,aAAa,QAAQ,MAAM,GAAG,CAC9D,WAAW,sBAAsB,GAAG,aAAa,YAAY,MAAM,GAAG,CACtE,QAAQ,0BAA0B,WAAW,sBAAsB,IAAI,CACvE,QAAQ,0BAA0B,sBAAsB,IAAI;CAG/D,MAAM,aAAa,CACjB,MAAM,GAFkB,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,KAE1E,EAAE,SAAS,IAAI,CAAC,EACtC,KAAK,WAAW,QACjB,CAAC,KAAK,GAAG;CAEV,MAAM,SAAS,CACb,MAAM,GAFc,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,IAAI,aAAa,OAAO,GAAG,WAAW,UAAU,KAE1H,EAAE,SAAS,IAAI,CAAC,EAClC,KAAK,WAAW,QACjB,CAAC,KAAK,GAAG;CAGV,MAAM,EAAE,eAAe,cAAc,MAAM,qBACzC,mBAAmB;EACjB;EACA;EACA;EACD,CAAC;AAEJ,mBACE,GAAG,WAAW,mDAAmD,eAAe,cAAc,OAAO,CAAC,OAAO,eAAe,aAAa,OAAO,GACjJ;AACD,mBACE,GAAG,WAAW,iBAAiB,eAAe,KAAK,QAAQ,QAAQ,MAAM,EAAE,SAAS,QAAQ,CAAC,OAAO,CAAC,WAAW,eAAe,KAAK,QAAQ,QAAQ,MAAM,EAAE,SAAS,SAAS,CAAC,OAAO,CAAC,QAAQ,eAAe,KAAK,QAAQ,QAAQ,MAAM,EAAE,SAAS,aAAa,CAAC,OAAO,CAAC,WAAW,eAAe,KAAK,QAAQ,QAAQ,MAAM,EAAE,SAAS,SAAS,CAAC,OAAO,GAC5V;AAED,KAAI,iBAAiB,WAAW,GAAG;AACjC,oBACE,GAAG,WAAW,uDACf;AACD,YAAU,QAAQ,eAAe,EAAE,EAAE,WAAW,MAAM,CAAC;AACvD,gBACE,gBACA,sBAAsB,MAAM,8BAAc,IAAI,KAAK,CAAC,CACrD;AACD,oBACE,GAAG,SAAS,KAAK,WAAW,MAAM,CAAC,QAAQ,WAAW,eAAe,CAAC,4CACvE;AACD;;AAGF,mBACE,GAAG,WAAW,sBAAsB,eAAe,iBAAiB,OAAO,GAC5E;CAGD,MAAM,sCAAsB,IAAI,KAAqB;AAErD,MAAK,MAAM,WAAW,kBAAkB;EACtC,MAAM,gBAAgB,iBAAiB,QAAQ,QAAQ,GAAG;EAC1D,MAAM,eAAe,QAAQ;EAE7B,MAAM,kCACJ,WAAW,cAAc,MAAM,iBAAiB,OAAO,0BAA0B,aAAa,YAAY,MAAM,CAAC,uCAEjH,aAAa,UACb;EAEF,MAAM,6BACJ,WAAW,cAAc,MAAM,iBAAiB,OAAO,uCAAuC,aAAa,QAAQ,MAAM,CAAC,2BAEzH,QAAQ,mBAAmB,MAC5B;EAEF,MAAM,sBAAsB,MAAM,aAAa,YAAY;GACzD,MAAM,SAAS,MAAM,eACnB;IACE;KAAE,MAAM;KAAU,SAAS;KAAY;IACvC;KAAE,MAAM;KAAU,SAAS,2BAA2B;KAAE;IACxD;KAAE,MAAM;KAAU,SAAS,sBAAsB;KAAE;IACnD;KACE,MAAM;KACN,SAAS,6CAA6C,eAAe,cAAc,CAAC,MAAM,eAAe,iBAAiB,OAAO,CAAC,kCAAkC,cAAc,QAAQ,QAAQ,CAAC,IAAI,OAAO;KAC/M;IACF,EACD,CAAC;IAAE,MAAM;IAAQ,SAAS,aAAa;IAAS,CAAC,EACjD,WACA,eACA,UACA,SACD;AAED,qBACE,GAAG,SAAS,eAAe,OAAO,UAAU,CAAC,uBAAuB,eAAe,cAAc,CAAC,MAAM,eAAe,iBAAiB,OAAO,GAChJ;GAGD,IAAI,iBAAiB,cACnB,QAAQ,aACR,aAAa,QACd;AAGD,oBAAiB,sBACf,gBACA,aAAa,QACd;AASD,OAAI,CANY,oBACd,aAAa,SACb,gBACA,kBAGU,CACV,OAAM,IAAI,MACR,0EACD;AAGH,UAAO;IACP,EAAE;AAEJ,sBAAoB,IAAI,QAAQ,aAAa,oBAAoB;;CAInE,MAAM,oBAAoB,sBACxB,MACA,cACA,oBACD;AAED,WAAU,QAAQ,eAAe,EAAE,EAAE,WAAW,MAAM,CAAC;AACvD,eAAc,gBAAgB,kBAAkB;AAEhD,mBACE,GAAG,SAAS,KAAK,WAAW,MAAM,CAAC,QAAQ,WAAW,eAAe,CAAC,gCACvE"}
|
|
1
|
+
{"version":3,"file":"reviewDocBlockAware.mjs","names":[],"sources":["../../../src/reviewDoc/reviewDocBlockAware.ts"],"sourcesContent":["import { mkdirSync, writeFileSync } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { dirname } from 'node:path';\nimport { readAsset } from 'utils:asset';\nimport type { AIConfig } from '@intlayer/ai';\nimport type { AIOptions } from '@intlayer/api';\nimport {\n buildAlignmentPlan,\n mergeReviewedSegments,\n} from '@intlayer/chokidar/docReview';\nimport { formatLocale, formatPath } from '@intlayer/chokidar/utils';\nimport * as ANSIColors from '@intlayer/config/colors';\nimport {\n colon,\n colorize,\n colorizeNumber,\n getAppLogger,\n} from '@intlayer/config/logger';\nimport {\n type GetConfigurationOptions,\n getConfiguration,\n} from '@intlayer/config/node';\nimport { retryManager } from '@intlayer/config/utils';\nimport { getLocaleName } from '@intlayer/core/localization';\nimport type { Locale } from '@intlayer/types/allLocales';\nimport { ENGLISH } from '@intlayer/types/locales';\nimport { sanitizeChunk, validateTranslation } from '../translateDoc/validation';\nimport { chunkInference } from '../utils/chunkInference';\nimport { fixChunkStartEndChars } from '../utils/fixChunkStartEndChars';\nimport type { AIClient } from '../utils/setupAI';\n\n/**\n * Review a file using block-aware alignment.\n *\n * 1. Segments both base and target documents into semantic blocks.\n * 2. Aligns blocks using structure (special chars, numbers) and context.\n * 3. Detects which blocks changed, were added, or deleted.\n * 4. Applies deletions immediately without AI.\n * 5. Sends changed/new blocks to AI in bottom-up order (last block first), so\n * line numbers of earlier blocks are not shifted by edits below them.\n * 6. Rewrites the file after each block so progress is persisted incrementally.\n */\nexport const reviewFileBlockAware = async (\n baseFilePath: string,\n outputFilePath: string,\n locale: Locale,\n baseLocale: Locale,\n aiOptions?: AIOptions,\n configOptions?: GetConfigurationOptions,\n customInstructions?: string,\n changedLines?: number[],\n aiClient?: AIClient,\n aiConfig?: AIConfig\n) => {\n const configuration = getConfiguration(configOptions);\n const applicationLogger = getAppLogger({\n log: { ...configuration.log, prefix: '' },\n });\n\n const baseText = await readFile(baseFilePath, 'utf-8');\n const targetText = await readFile(outputFilePath, 'utf-8').catch(() => '');\n\n const basePrompt = readAsset('./prompts/REVIEW_PROMPT.md', 'utf-8')\n .replaceAll('{{localeName}}', `${formatLocale(locale, false)}`)\n .replaceAll('{{baseLocaleName}}', `${formatLocale(baseLocale, false)}`)\n .replace('{{applicationContext}}', aiOptions?.applicationContext ?? '-')\n .replace('{{customInstructions}}', customInstructions ?? '-');\n\n const filePrefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}] `;\n const filePrefix = [\n colon(filePrefixText, { colSize: 40 }),\n `→ ${ANSIColors.RESET}`,\n ].join('');\n const prefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}][${formatLocale(locale)}${ANSIColors.GREY_DARK}] `;\n const prefix = [\n colon(prefixText, { colSize: 40 }),\n `→ ${ANSIColors.RESET}`,\n ].join('');\n\n // Build block-aware alignment and plan\n const { baseBlocks, targetBlocks, plan, segmentsToReview } =\n buildAlignmentPlan({\n baseText,\n targetText,\n changedLines,\n });\n\n const deleteCount = plan.actions.filter((a) => a.kind === 'delete').length;\n\n applicationLogger(\n `${filePrefix}Block-aware alignment complete. Total blocks: base=${colorizeNumber(baseBlocks.length)}, target=${colorizeNumber(targetBlocks.length)}`\n );\n applicationLogger(\n `${filePrefix}Actions: reuse=${colorizeNumber(plan.actions.filter((a) => a.kind === 'reuse').length)}, review=${colorizeNumber(plan.actions.filter((a) => a.kind === 'review').length)}, new=${colorizeNumber(plan.actions.filter((a) => a.kind === 'insert_new').length)}, delete=${colorizeNumber(deleteCount)}`\n );\n\n // Map shared across the entire run: each entry overrides the default behavior\n // of mergeReviewedSegments for that action index.\n const reviewedSegmentsMap = new Map<number, string>();\n\n // --- Step 1: apply deletions immediately (no AI needed) ---\n for (const [actionIndex, action] of plan.actions.entries()) {\n if (action.kind === 'delete') {\n reviewedSegmentsMap.set(actionIndex, '');\n }\n }\n\n const writeCurrentState = (): void => {\n const output = mergeReviewedSegments(\n plan,\n targetBlocks,\n reviewedSegmentsMap\n );\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, output);\n };\n\n if (deleteCount > 0) {\n writeCurrentState();\n applicationLogger(\n `${filePrefix}${colorizeNumber(deleteCount)} block(s) deleted without AI.`\n );\n }\n\n if (segmentsToReview.length === 0) {\n if (deleteCount === 0) {\n applicationLogger(\n `${filePrefix}No segments need review, reusing existing translation`\n );\n writeCurrentState();\n }\n applicationLogger(\n `${colorize('✔', ANSIColors.GREEN)} File ${formatPath(outputFilePath)} updated successfully (no AI changes needed).`\n );\n return;\n }\n\n applicationLogger(\n `${filePrefix}Segments to review: ${colorizeNumber(segmentsToReview.length)} (processing bottom-up)`\n );\n\n // --- Step 2: process AI segments in bottom-up order ---\n // Reversing ensures edits near the end of the file don't shift line numbers\n // that matter for blocks higher up, and each intermediate file write is valid.\n const segmentsBottomUp = segmentsToReview\n .map((segment, originalIndex) => ({\n segment,\n displayNumber: originalIndex + 1,\n }))\n .reverse();\n\n for (const { segment, displayNumber } of segmentsBottomUp) {\n const baseBlock = segment.baseBlock;\n\n const getBaseChunkContextPrompt = () =>\n `**BLOCK ${displayNumber} of ${segmentsToReview.length}** is the base block in ${formatLocale(baseLocale, false)} as reference.\\n` +\n `///chunksStart///\\n` +\n baseBlock.content +\n `///chunksEnd///`;\n\n const getTargetChunkPrompt = () =>\n `**BLOCK ${displayNumber} of ${segmentsToReview.length}** is the current block to review in ${formatLocale(locale, false)}.\\n` +\n `///chunksStart///\\n` +\n (segment.targetBlockText ?? '') +\n `///chunksEnd///`;\n\n const reviewedChunkResult = await retryManager(async () => {\n const result = await chunkInference(\n [\n { role: 'system', content: basePrompt },\n { role: 'system', content: getBaseChunkContextPrompt() },\n { role: 'system', content: getTargetChunkPrompt() },\n {\n role: 'system',\n content: `The next user message will be the **BLOCK ${colorizeNumber(displayNumber)} of ${colorizeNumber(segmentsToReview.length)}** that should be translated in ${getLocaleName(locale, ENGLISH)} (${locale}).`,\n },\n ],\n [{ role: 'user', content: baseBlock.content }],\n aiOptions,\n configuration,\n aiClient,\n aiConfig\n );\n\n applicationLogger(\n `${prefix}${colorizeNumber(result.tokenUsed)} tokens used - Block ${colorizeNumber(displayNumber)} of ${colorizeNumber(segmentsToReview.length)}`\n );\n\n let processedChunk = sanitizeChunk(\n result?.fileContent,\n baseBlock.content\n );\n processedChunk = fixChunkStartEndChars(processedChunk, baseBlock.content);\n\n const isValid = validateTranslation(\n baseBlock.content,\n processedChunk,\n applicationLogger\n );\n\n if (!isValid) {\n throw new Error(\n 'Validation failed for chunk (structure or length mismatch). Retrying...'\n );\n }\n\n return processedChunk;\n })();\n\n reviewedSegmentsMap.set(segment.actionIndex, reviewedChunkResult);\n\n // Rewrite the file after every block so progress is never lost.\n writeCurrentState();\n }\n\n applicationLogger(\n `${colorize('✔', ANSIColors.GREEN)} File ${formatPath(outputFilePath)} created/updated successfully.`\n );\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AA0CA,MAAa,uBAAuB,OAClC,cACA,gBACA,QACA,YACA,WACA,eACA,oBACA,cACA,UACA,aACG;CACH,MAAM,gBAAgB,iBAAiB,cAAc;CACrD,MAAM,oBAAoB,aAAa,EACrC,KAAK;EAAE,GAAG,cAAc;EAAK,QAAQ;EAAI,EAC1C,CAAC;CAEF,MAAM,WAAW,MAAM,SAAS,cAAc,QAAQ;CACtD,MAAM,aAAa,MAAM,SAAS,gBAAgB,QAAQ,CAAC,YAAY,GAAG;CAE1E,MAAM,aAAa,UAAU,8BAA8B,QAAQ,CAChE,WAAW,kBAAkB,GAAG,aAAa,QAAQ,MAAM,GAAG,CAC9D,WAAW,sBAAsB,GAAG,aAAa,YAAY,MAAM,GAAG,CACtE,QAAQ,0BAA0B,WAAW,sBAAsB,IAAI,CACvE,QAAQ,0BAA0B,sBAAsB,IAAI;CAG/D,MAAM,aAAa,CACjB,MAAM,GAFkB,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,KAE1E,EAAE,SAAS,IAAI,CAAC,EACtC,KAAK,WAAW,QACjB,CAAC,KAAK,GAAG;CAEV,MAAM,SAAS,CACb,MAAM,GAFc,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,IAAI,aAAa,OAAO,GAAG,WAAW,UAAU,KAE1H,EAAE,SAAS,IAAI,CAAC,EAClC,KAAK,WAAW,QACjB,CAAC,KAAK,GAAG;CAGV,MAAM,EAAE,YAAY,cAAc,MAAM,qBACtC,mBAAmB;EACjB;EACA;EACA;EACD,CAAC;CAEJ,MAAM,cAAc,KAAK,QAAQ,QAAQ,MAAM,EAAE,SAAS,SAAS,CAAC;AAEpE,mBACE,GAAG,WAAW,qDAAqD,eAAe,WAAW,OAAO,CAAC,WAAW,eAAe,aAAa,OAAO,GACpJ;AACD,mBACE,GAAG,WAAW,iBAAiB,eAAe,KAAK,QAAQ,QAAQ,MAAM,EAAE,SAAS,QAAQ,CAAC,OAAO,CAAC,WAAW,eAAe,KAAK,QAAQ,QAAQ,MAAM,EAAE,SAAS,SAAS,CAAC,OAAO,CAAC,QAAQ,eAAe,KAAK,QAAQ,QAAQ,MAAM,EAAE,SAAS,aAAa,CAAC,OAAO,CAAC,WAAW,eAAe,YAAY,GACjT;CAID,MAAM,sCAAsB,IAAI,KAAqB;AAGrD,MAAK,MAAM,CAAC,aAAa,WAAW,KAAK,QAAQ,SAAS,CACxD,KAAI,OAAO,SAAS,SAClB,qBAAoB,IAAI,aAAa,GAAG;CAI5C,MAAM,0BAAgC;EACpC,MAAM,SAAS,sBACb,MACA,cACA,oBACD;AACD,YAAU,QAAQ,eAAe,EAAE,EAAE,WAAW,MAAM,CAAC;AACvD,gBAAc,gBAAgB,OAAO;;AAGvC,KAAI,cAAc,GAAG;AACnB,qBAAmB;AACnB,oBACE,GAAG,aAAa,eAAe,YAAY,CAAC,+BAC7C;;AAGH,KAAI,iBAAiB,WAAW,GAAG;AACjC,MAAI,gBAAgB,GAAG;AACrB,qBACE,GAAG,WAAW,uDACf;AACD,sBAAmB;;AAErB,oBACE,GAAG,SAAS,KAAK,WAAW,MAAM,CAAC,QAAQ,WAAW,eAAe,CAAC,+CACvE;AACD;;AAGF,mBACE,GAAG,WAAW,sBAAsB,eAAe,iBAAiB,OAAO,CAAC,yBAC7E;CAKD,MAAM,mBAAmB,iBACtB,KAAK,SAAS,mBAAmB;EAChC;EACA,eAAe,gBAAgB;EAChC,EAAE,CACF,SAAS;AAEZ,MAAK,MAAM,EAAE,SAAS,mBAAmB,kBAAkB;EACzD,MAAM,YAAY,QAAQ;EAE1B,MAAM,kCACJ,WAAW,cAAc,MAAM,iBAAiB,OAAO,0BAA0B,aAAa,YAAY,MAAM,CAAC,uCAEjH,UAAU,UACV;EAEF,MAAM,6BACJ,WAAW,cAAc,MAAM,iBAAiB,OAAO,uCAAuC,aAAa,QAAQ,MAAM,CAAC,2BAEzH,QAAQ,mBAAmB,MAC5B;EAEF,MAAM,sBAAsB,MAAM,aAAa,YAAY;GACzD,MAAM,SAAS,MAAM,eACnB;IACE;KAAE,MAAM;KAAU,SAAS;KAAY;IACvC;KAAE,MAAM;KAAU,SAAS,2BAA2B;KAAE;IACxD;KAAE,MAAM;KAAU,SAAS,sBAAsB;KAAE;IACnD;KACE,MAAM;KACN,SAAS,6CAA6C,eAAe,cAAc,CAAC,MAAM,eAAe,iBAAiB,OAAO,CAAC,kCAAkC,cAAc,QAAQ,QAAQ,CAAC,IAAI,OAAO;KAC/M;IACF,EACD,CAAC;IAAE,MAAM;IAAQ,SAAS,UAAU;IAAS,CAAC,EAC9C,WACA,eACA,UACA,SACD;AAED,qBACE,GAAG,SAAS,eAAe,OAAO,UAAU,CAAC,uBAAuB,eAAe,cAAc,CAAC,MAAM,eAAe,iBAAiB,OAAO,GAChJ;GAED,IAAI,iBAAiB,cACnB,QAAQ,aACR,UAAU,QACX;AACD,oBAAiB,sBAAsB,gBAAgB,UAAU,QAAQ;AAQzE,OAAI,CANY,oBACd,UAAU,SACV,gBACA,kBAGU,CACV,OAAM,IAAI,MACR,0EACD;AAGH,UAAO;IACP,EAAE;AAEJ,sBAAoB,IAAI,QAAQ,aAAa,oBAAoB;AAGjE,qBAAmB;;AAGrB,mBACE,GAAG,SAAS,KAAK,WAAW,MAAM,CAAC,QAAQ,WAAW,eAAe,CAAC,gCACvE"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { formatLocale, formatPath } from "@intlayer/chokidar/utils";
|
|
3
|
+
import { getAppLogger } from "@intlayer/config/logger";
|
|
4
|
+
import { getConfiguration } from "@intlayer/config/node";
|
|
5
|
+
import { readFile } from "node:fs/promises";
|
|
6
|
+
import { buildReviewReport, formatReviewReport } from "@intlayer/chokidar/docReview";
|
|
7
|
+
|
|
8
|
+
//#region src/reviewDoc/reviewDocLog.ts
|
|
9
|
+
/**
|
|
10
|
+
* Log-only review of a single file/locale pair.
|
|
11
|
+
*
|
|
12
|
+
* Instead of calling an AI to translate the changed blocks, this compares the
|
|
13
|
+
* base document with its translation and logs the blocks that need attention
|
|
14
|
+
* (with their line ranges and content) so another agent or a human can generate
|
|
15
|
+
* the missing translations.
|
|
16
|
+
*
|
|
17
|
+
* @param baseFilePath - Absolute path of the base (source) document.
|
|
18
|
+
* @param outputFilePath - Absolute path of the target (translated) document.
|
|
19
|
+
* @param locale - The target locale being reviewed.
|
|
20
|
+
* @param baseLocale - The base locale used as reference.
|
|
21
|
+
* @param configOptions - Optional Intlayer configuration overrides.
|
|
22
|
+
* @param changedLines - 1-based base line numbers that changed (from git), if any.
|
|
23
|
+
* @returns The structured review report.
|
|
24
|
+
*/
|
|
25
|
+
const logReviewFileBlocks = async (baseFilePath, outputFilePath, locale, baseLocale, configOptions, changedLines) => {
|
|
26
|
+
const appLogger = getAppLogger({ log: {
|
|
27
|
+
...getConfiguration(configOptions).log,
|
|
28
|
+
prefix: ""
|
|
29
|
+
} });
|
|
30
|
+
const report = buildReviewReport({
|
|
31
|
+
baseText: await readFile(baseFilePath, "utf-8"),
|
|
32
|
+
targetText: existsSync(outputFilePath) ? await readFile(outputFilePath, "utf-8").catch(() => "") : "",
|
|
33
|
+
changedLines
|
|
34
|
+
});
|
|
35
|
+
const formatted = formatReviewReport(report, {
|
|
36
|
+
baseLabel: formatLocale(baseLocale),
|
|
37
|
+
targetLabel: formatLocale(locale)
|
|
38
|
+
});
|
|
39
|
+
appLogger(`${formatPath(baseFilePath)} → ${formatLocale(locale)}`);
|
|
40
|
+
for (const line of formatted.split("\n")) appLogger(line);
|
|
41
|
+
return report;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
//#endregion
|
|
45
|
+
export { logReviewFileBlocks };
|
|
46
|
+
//# sourceMappingURL=reviewDocLog.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reviewDocLog.mjs","names":[],"sources":["../../../src/reviewDoc/reviewDocLog.ts"],"sourcesContent":["import { existsSync } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport {\n buildReviewReport,\n formatReviewReport,\n type ReviewReport,\n} from '@intlayer/chokidar/docReview';\nimport { formatLocale, formatPath } from '@intlayer/chokidar/utils';\nimport { getAppLogger } from '@intlayer/config/logger';\nimport {\n type GetConfigurationOptions,\n getConfiguration,\n} from '@intlayer/config/node';\nimport type { Locale } from '@intlayer/types/allLocales';\n\n/**\n * Log-only review of a single file/locale pair.\n *\n * Instead of calling an AI to translate the changed blocks, this compares the\n * base document with its translation and logs the blocks that need attention\n * (with their line ranges and content) so another agent or a human can generate\n * the missing translations.\n *\n * @param baseFilePath - Absolute path of the base (source) document.\n * @param outputFilePath - Absolute path of the target (translated) document.\n * @param locale - The target locale being reviewed.\n * @param baseLocale - The base locale used as reference.\n * @param configOptions - Optional Intlayer configuration overrides.\n * @param changedLines - 1-based base line numbers that changed (from git), if any.\n * @returns The structured review report.\n */\nexport const logReviewFileBlocks = async (\n baseFilePath: string,\n outputFilePath: string,\n locale: Locale,\n baseLocale: Locale,\n configOptions?: GetConfigurationOptions,\n changedLines?: number[]\n): Promise<ReviewReport> => {\n const configuration = getConfiguration(configOptions);\n const appLogger = getAppLogger({ log: { ...configuration.log, prefix: '' } });\n\n const baseText = await readFile(baseFilePath, 'utf-8');\n const targetText = existsSync(outputFilePath)\n ? await readFile(outputFilePath, 'utf-8').catch(() => '')\n : '';\n\n const report = buildReviewReport({ baseText, targetText, changedLines });\n\n const formatted = formatReviewReport(report, {\n baseLabel: formatLocale(baseLocale),\n targetLabel: formatLocale(locale),\n });\n\n appLogger(`${formatPath(baseFilePath)} → ${formatLocale(locale)}`);\n for (const line of formatted.split('\\n')) {\n appLogger(line);\n }\n\n return report;\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;AA+BA,MAAa,sBAAsB,OACjC,cACA,gBACA,QACA,YACA,eACA,iBAC0B;CAE1B,MAAM,YAAY,aAAa,EAAE,KAAK;EAAE,GADlB,iBAAiB,cACiB,CAAC;EAAK,QAAQ;EAAI,EAAE,CAAC;CAO7E,MAAM,SAAS,kBAAkB;EAAE,gBALZ,SAAS,cAAc,QAAQ;EAKT,YAJ1B,WAAW,eAAe,GACzC,MAAM,SAAS,gBAAgB,QAAQ,CAAC,YAAY,GAAG,GACvD;EAEqD;EAAc,CAAC;CAExE,MAAM,YAAY,mBAAmB,QAAQ;EAC3C,WAAW,aAAa,WAAW;EACnC,aAAa,aAAa,OAAO;EAClC,CAAC;AAEF,WAAU,GAAG,WAAW,aAAa,CAAC,KAAK,aAAa,OAAO,GAAG;AAClE,MAAK,MAAM,QAAQ,UAAU,MAAM,KAAK,CACtC,WAAU,KAAK;AAGjB,QAAO"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
//#region src/utils/formatLineRanges.ts
|
|
2
|
+
/**
|
|
3
|
+
* Formats a list of line numbers into a compact, human-readable string where
|
|
4
|
+
* runs of consecutive lines are collapsed into ranges.
|
|
5
|
+
*
|
|
6
|
+
* The input is sorted and de-duplicated first, so callers don't need to
|
|
7
|
+
* pre-process it. A run of a single line is printed as the bare number; a run
|
|
8
|
+
* of two or more consecutive lines is printed as `start-end`.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* formatLineRanges([2, 3, 4, 5, 333, 412, 413, 414]);
|
|
12
|
+
* // → '2-5, 333, 412-414'
|
|
13
|
+
*
|
|
14
|
+
* @param lineNumbers - The (possibly unsorted, possibly duplicated) line numbers.
|
|
15
|
+
* @param separator - String inserted between groups. Defaults to `', '`.
|
|
16
|
+
* @returns The grouped string, or an empty string when no lines are provided.
|
|
17
|
+
*/
|
|
18
|
+
const formatLineRanges = (lineNumbers, separator = ", ") => {
|
|
19
|
+
const sortedUniqueLines = [...new Set(lineNumbers)].sort((a, b) => a - b);
|
|
20
|
+
if (sortedUniqueLines.length === 0) return "";
|
|
21
|
+
const groups = [];
|
|
22
|
+
let rangeStart = sortedUniqueLines[0];
|
|
23
|
+
let rangeEnd = rangeStart;
|
|
24
|
+
const pushGroup = () => {
|
|
25
|
+
groups.push(rangeStart === rangeEnd ? `${rangeStart}` : `${rangeStart}-${rangeEnd}`);
|
|
26
|
+
};
|
|
27
|
+
for (const lineNumber of sortedUniqueLines.slice(1)) {
|
|
28
|
+
if (lineNumber === rangeEnd + 1) {
|
|
29
|
+
rangeEnd = lineNumber;
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
pushGroup();
|
|
33
|
+
rangeStart = lineNumber;
|
|
34
|
+
rangeEnd = lineNumber;
|
|
35
|
+
}
|
|
36
|
+
pushGroup();
|
|
37
|
+
return groups.join(separator);
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
//#endregion
|
|
41
|
+
export { formatLineRanges };
|
|
42
|
+
//# sourceMappingURL=formatLineRanges.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"formatLineRanges.mjs","names":[],"sources":["../../../src/utils/formatLineRanges.ts"],"sourcesContent":["/**\n * Formats a list of line numbers into a compact, human-readable string where\n * runs of consecutive lines are collapsed into ranges.\n *\n * The input is sorted and de-duplicated first, so callers don't need to\n * pre-process it. A run of a single line is printed as the bare number; a run\n * of two or more consecutive lines is printed as `start-end`.\n *\n * @example\n * formatLineRanges([2, 3, 4, 5, 333, 412, 413, 414]);\n * // → '2-5, 333, 412-414'\n *\n * @param lineNumbers - The (possibly unsorted, possibly duplicated) line numbers.\n * @param separator - String inserted between groups. Defaults to `', '`.\n * @returns The grouped string, or an empty string when no lines are provided.\n */\nexport const formatLineRanges = (\n lineNumbers: number[],\n separator = ', '\n): string => {\n const sortedUniqueLines = [...new Set(lineNumbers)].sort((a, b) => a - b);\n\n if (sortedUniqueLines.length === 0) return '';\n\n const groups: string[] = [];\n let rangeStart = sortedUniqueLines[0]!;\n let rangeEnd = rangeStart;\n\n const pushGroup = (): void => {\n groups.push(\n rangeStart === rangeEnd ? `${rangeStart}` : `${rangeStart}-${rangeEnd}`\n );\n };\n\n for (const lineNumber of sortedUniqueLines.slice(1)) {\n if (lineNumber === rangeEnd + 1) {\n // Still inside the current consecutive run.\n rangeEnd = lineNumber;\n continue;\n }\n\n // Gap detected: close the current run and start a new one.\n pushGroup();\n rangeStart = lineNumber;\n rangeEnd = lineNumber;\n }\n\n pushGroup();\n\n return groups.join(separator);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;AAgBA,MAAa,oBACX,aACA,YAAY,SACD;CACX,MAAM,oBAAoB,CAAC,GAAG,IAAI,IAAI,YAAY,CAAC,CAAC,MAAM,GAAG,MAAM,IAAI,EAAE;AAEzE,KAAI,kBAAkB,WAAW,EAAG,QAAO;CAE3C,MAAM,SAAmB,EAAE;CAC3B,IAAI,aAAa,kBAAkB;CACnC,IAAI,WAAW;CAEf,MAAM,kBAAwB;AAC5B,SAAO,KACL,eAAe,WAAW,GAAG,eAAe,GAAG,WAAW,GAAG,WAC9D;;AAGH,MAAK,MAAM,cAAc,kBAAkB,MAAM,EAAE,EAAE;AACnD,MAAI,eAAe,WAAW,GAAG;AAE/B,cAAW;AACX;;AAIF,aAAW;AACX,eAAa;AACb,aAAW;;AAGb,YAAW;AAEX,QAAO,OAAO,KAAK,UAAU"}
|
|
@@ -17,6 +17,12 @@ type ReviewDocOptions = {
|
|
|
17
17
|
skipIfModifiedAfter?: number | string | Date;
|
|
18
18
|
skipIfExists?: boolean;
|
|
19
19
|
gitOptions?: ListGitFilesOptions;
|
|
20
|
+
/**
|
|
21
|
+
* Log-only mode. Instead of translating the changed blocks with AI, log the
|
|
22
|
+
* blocks that need attention (with line numbers and content) for the base and
|
|
23
|
+
* target locales, so another agent can generate the translations.
|
|
24
|
+
*/
|
|
25
|
+
log?: boolean;
|
|
20
26
|
};
|
|
21
27
|
/**
|
|
22
28
|
* Main audit function: scans all .md files in "en/" (unless you specified DOC_LIST),
|
|
@@ -34,7 +40,8 @@ declare const reviewDoc: ({
|
|
|
34
40
|
skipIfModifiedBefore,
|
|
35
41
|
skipIfModifiedAfter,
|
|
36
42
|
skipIfExists,
|
|
37
|
-
gitOptions
|
|
43
|
+
gitOptions,
|
|
44
|
+
log
|
|
38
45
|
}: ReviewDocOptions) => Promise<void>;
|
|
39
46
|
//#endregion
|
|
40
47
|
export { reviewDoc };
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reviewDoc.d.ts","names":[],"sources":["../../../src/reviewDoc/reviewDoc.ts"],"mappings":";;;;;;
|
|
1
|
+
{"version":3,"file":"reviewDoc.d.ts","names":[],"sources":["../../../src/reviewDoc/reviewDoc.ts"],"mappings":";;;;;;KAqCK,gBAAA;EACH,UAAA;EACA,OAAA,EAAS,MAAA;EACT,mBAAA;EACA,UAAA,EAAY,MAAA;EACZ,SAAA,GAAY,SAAA;EACZ,2BAAA;EACA,aAAA,GAAgB,uBAAA;EAChB,kBAAA;EACA,oBAAA,qBAAyC,IAAA;EACzC,mBAAA,qBAAwC,IAAA;EACxC,YAAA;EACA,UAAA,GAAa,mBAAA;EAAmB;;;;;EAMhC,GAAA;AAAA;;;;;cAOW,SAAA;EAAmB,UAAA;EAAA,OAAA;EAAA,mBAAA;EAAA,UAAA;EAAA,SAAA;EAAA,2BAAA;EAAA,aAAA;EAAA,kBAAA;EAAA,oBAAA;EAAA,mBAAA;EAAA,YAAA;EAAA,UAAA;EAAA;AAAA,GAc7B,gBAAA,KAAgB,OAAA"}
|
|
@@ -7,12 +7,14 @@ import { AIConfig } from "@intlayer/ai";
|
|
|
7
7
|
//#region src/reviewDoc/reviewDocBlockAware.d.ts
|
|
8
8
|
/**
|
|
9
9
|
* Review a file using block-aware alignment.
|
|
10
|
-
*
|
|
11
|
-
* 1. Segments both
|
|
12
|
-
* 2. Aligns blocks using structure (special chars, numbers) and context
|
|
13
|
-
* 3. Detects which blocks changed, were added, or deleted
|
|
14
|
-
* 4.
|
|
15
|
-
* 5.
|
|
10
|
+
*
|
|
11
|
+
* 1. Segments both base and target documents into semantic blocks.
|
|
12
|
+
* 2. Aligns blocks using structure (special chars, numbers) and context.
|
|
13
|
+
* 3. Detects which blocks changed, were added, or deleted.
|
|
14
|
+
* 4. Applies deletions immediately without AI.
|
|
15
|
+
* 5. Sends changed/new blocks to AI in bottom-up order (last block first), so
|
|
16
|
+
* line numbers of earlier blocks are not shifted by edits below them.
|
|
17
|
+
* 6. Rewrites the file after each block so progress is persisted incrementally.
|
|
16
18
|
*/
|
|
17
19
|
declare const reviewFileBlockAware: (baseFilePath: string, outputFilePath: string, locale: Locale, baseLocale: Locale, aiOptions?: AIOptions, configOptions?: GetConfigurationOptions, customInstructions?: string, changedLines?: number[], aiClient?: AIClient, aiConfig?: AIConfig) => Promise<void>;
|
|
18
20
|
//#endregion
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"reviewDocBlockAware.d.ts","names":[],"sources":["../../../src/reviewDoc/reviewDocBlockAware.ts"],"mappings":";;;;;;;;;
|
|
1
|
+
{"version":3,"file":"reviewDocBlockAware.d.ts","names":[],"sources":["../../../src/reviewDoc/reviewDocBlockAware.ts"],"mappings":";;;;;;;;;AA0CA;;;;;;;;;cAAa,oBAAA,GACX,YAAA,UACA,cAAA,UACA,MAAA,EAAQ,MAAA,EACR,UAAA,EAAY,MAAA,EACZ,SAAA,GAAY,SAAA,EACZ,aAAA,GAAgB,uBAAA,EAChB,kBAAA,WACA,YAAA,aACA,QAAA,GAAW,QAAA,EACX,QAAA,GAAW,QAAA,KAAQ,OAAA"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { GetConfigurationOptions } from "@intlayer/config/node";
|
|
2
|
+
import { Locale } from "@intlayer/types/allLocales";
|
|
3
|
+
import { ReviewReport } from "@intlayer/chokidar/docReview";
|
|
4
|
+
|
|
5
|
+
//#region src/reviewDoc/reviewDocLog.d.ts
|
|
6
|
+
/**
|
|
7
|
+
* Log-only review of a single file/locale pair.
|
|
8
|
+
*
|
|
9
|
+
* Instead of calling an AI to translate the changed blocks, this compares the
|
|
10
|
+
* base document with its translation and logs the blocks that need attention
|
|
11
|
+
* (with their line ranges and content) so another agent or a human can generate
|
|
12
|
+
* the missing translations.
|
|
13
|
+
*
|
|
14
|
+
* @param baseFilePath - Absolute path of the base (source) document.
|
|
15
|
+
* @param outputFilePath - Absolute path of the target (translated) document.
|
|
16
|
+
* @param locale - The target locale being reviewed.
|
|
17
|
+
* @param baseLocale - The base locale used as reference.
|
|
18
|
+
* @param configOptions - Optional Intlayer configuration overrides.
|
|
19
|
+
* @param changedLines - 1-based base line numbers that changed (from git), if any.
|
|
20
|
+
* @returns The structured review report.
|
|
21
|
+
*/
|
|
22
|
+
declare const logReviewFileBlocks: (baseFilePath: string, outputFilePath: string, locale: Locale, baseLocale: Locale, configOptions?: GetConfigurationOptions, changedLines?: number[]) => Promise<ReviewReport>;
|
|
23
|
+
//#endregion
|
|
24
|
+
export { logReviewFileBlocks };
|
|
25
|
+
//# sourceMappingURL=reviewDocLog.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"reviewDocLog.d.ts","names":[],"sources":["../../../src/reviewDoc/reviewDocLog.ts"],"mappings":";;;;;;;AA+BA;;;;;;;;;;;;;;cAAa,mBAAA,GACX,YAAA,UACA,cAAA,UACA,MAAA,EAAQ,MAAA,EACR,UAAA,EAAY,MAAA,EACZ,aAAA,GAAgB,uBAAA,EAChB,YAAA,gBACC,OAAA,CAAQ,YAAA"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
//#region src/utils/formatLineRanges.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* Formats a list of line numbers into a compact, human-readable string where
|
|
4
|
+
* runs of consecutive lines are collapsed into ranges.
|
|
5
|
+
*
|
|
6
|
+
* The input is sorted and de-duplicated first, so callers don't need to
|
|
7
|
+
* pre-process it. A run of a single line is printed as the bare number; a run
|
|
8
|
+
* of two or more consecutive lines is printed as `start-end`.
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* formatLineRanges([2, 3, 4, 5, 333, 412, 413, 414]);
|
|
12
|
+
* // → '2-5, 333, 412-414'
|
|
13
|
+
*
|
|
14
|
+
* @param lineNumbers - The (possibly unsorted, possibly duplicated) line numbers.
|
|
15
|
+
* @param separator - String inserted between groups. Defaults to `', '`.
|
|
16
|
+
* @returns The grouped string, or an empty string when no lines are provided.
|
|
17
|
+
*/
|
|
18
|
+
declare const formatLineRanges: (lineNumbers: number[], separator?: string) => string;
|
|
19
|
+
//#endregion
|
|
20
|
+
export { formatLineRanges };
|
|
21
|
+
//# sourceMappingURL=formatLineRanges.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"formatLineRanges.d.ts","names":[],"sources":["../../../src/utils/formatLineRanges.ts"],"mappings":";;AAgBA;;;;;;;;;;;;;;;cAAa,gBAAA,GACX,WAAA,YACA,SAAA"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@intlayer/cli",
|
|
3
|
-
"version": "9.0.0-canary.
|
|
3
|
+
"version": "9.0.0-canary.1",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Provides uniform command-line interface scripts for Intlayer, used in packages like intlayer-cli and intlayer.",
|
|
6
6
|
"keywords": [
|
|
@@ -67,22 +67,22 @@
|
|
|
67
67
|
},
|
|
68
68
|
"dependencies": {
|
|
69
69
|
"@clack/prompts": "0.11.0",
|
|
70
|
-
"@intlayer/api": "9.0.0-canary.
|
|
71
|
-
"@intlayer/babel": "9.0.0-canary.
|
|
72
|
-
"@intlayer/chokidar": "9.0.0-canary.
|
|
73
|
-
"@intlayer/config": "9.0.0-canary.
|
|
74
|
-
"@intlayer/core": "9.0.0-canary.
|
|
75
|
-
"@intlayer/dictionaries-entry": "9.0.0-canary.
|
|
76
|
-
"@intlayer/remote-dictionaries-entry": "9.0.0-canary.
|
|
77
|
-
"@intlayer/types": "9.0.0-canary.
|
|
78
|
-
"@intlayer/unmerged-dictionaries-entry": "9.0.0-canary.
|
|
70
|
+
"@intlayer/api": "9.0.0-canary.1",
|
|
71
|
+
"@intlayer/babel": "9.0.0-canary.1",
|
|
72
|
+
"@intlayer/chokidar": "9.0.0-canary.1",
|
|
73
|
+
"@intlayer/config": "9.0.0-canary.1",
|
|
74
|
+
"@intlayer/core": "9.0.0-canary.1",
|
|
75
|
+
"@intlayer/dictionaries-entry": "9.0.0-canary.1",
|
|
76
|
+
"@intlayer/remote-dictionaries-entry": "9.0.0-canary.1",
|
|
77
|
+
"@intlayer/types": "9.0.0-canary.1",
|
|
78
|
+
"@intlayer/unmerged-dictionaries-entry": "9.0.0-canary.1",
|
|
79
79
|
"commander": "14.0.3",
|
|
80
80
|
"enquirer": "2.4.1",
|
|
81
81
|
"eventsource": "4.1.0",
|
|
82
82
|
"fast-glob": "3.3.3"
|
|
83
83
|
},
|
|
84
84
|
"devDependencies": {
|
|
85
|
-
"@intlayer/ai": "9.0.0-canary.
|
|
85
|
+
"@intlayer/ai": "9.0.0-canary.1",
|
|
86
86
|
"@types/node": "25.9.3",
|
|
87
87
|
"@utils/ts-config": "1.0.4",
|
|
88
88
|
"@utils/ts-config-types": "1.0.4",
|
|
@@ -90,10 +90,10 @@
|
|
|
90
90
|
"rimraf": "6.1.3",
|
|
91
91
|
"tsdown": "0.21.10",
|
|
92
92
|
"typescript": "6.0.3",
|
|
93
|
-
"vitest": "4.1.
|
|
93
|
+
"vitest": "4.1.9"
|
|
94
94
|
},
|
|
95
95
|
"peerDependencies": {
|
|
96
|
-
"@intlayer/ai": "9.0.0-canary.
|
|
96
|
+
"@intlayer/ai": "9.0.0-canary.1"
|
|
97
97
|
},
|
|
98
98
|
"peerDependenciesMeta": {
|
|
99
99
|
"@intlayer/ai": {
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
-
const require_translation_alignment_computeSimilarity = require('./computeSimilarity.cjs');
|
|
3
|
-
|
|
4
|
-
//#region src/translation-alignment/alignBlocks.ts
|
|
5
|
-
const alignEnglishAndFrenchBlocks = (defaultBlocks, secondaryBlocks) => {
|
|
6
|
-
const defaultLength = defaultBlocks.length;
|
|
7
|
-
const secondaryLength = secondaryBlocks.length;
|
|
8
|
-
const scoreMatrix = Array.from({ length: defaultLength + 1 }, () => Array.from({ length: secondaryLength + 1 }, () => 0));
|
|
9
|
-
const traceMatrix = Array.from({ length: defaultLength + 1 }, () => Array.from({ length: secondaryLength + 1 }, () => "diagonal"));
|
|
10
|
-
const gapPenalty = -2;
|
|
11
|
-
const computeMatchScore = (defaultIndex, secondaryIndex) => {
|
|
12
|
-
const defaultBlock = defaultBlocks[defaultIndex];
|
|
13
|
-
const secondaryBlock = secondaryBlocks[secondaryIndex];
|
|
14
|
-
const typeBonus = defaultBlock.type === secondaryBlock.type ? 2 : 0;
|
|
15
|
-
const anchorSimilarity = require_translation_alignment_computeSimilarity.computeJaccardSimilarity(defaultBlock.anchorText, secondaryBlock.anchorText, 3);
|
|
16
|
-
return typeBonus + (Math.min(defaultBlock.content.length, secondaryBlock.content.length) / Math.max(defaultBlock.content.length, secondaryBlock.content.length) > .75 ? 1 : 0) + anchorSimilarity * 8;
|
|
17
|
-
};
|
|
18
|
-
for (let i = 1; i <= defaultLength; i += 1) {
|
|
19
|
-
scoreMatrix[i][0] = scoreMatrix[i - 1][0] + gapPenalty;
|
|
20
|
-
traceMatrix[i][0] = "up";
|
|
21
|
-
}
|
|
22
|
-
for (let j = 1; j <= secondaryLength; j += 1) {
|
|
23
|
-
scoreMatrix[0][j] = scoreMatrix[0][j - 1] + gapPenalty;
|
|
24
|
-
traceMatrix[0][j] = "left";
|
|
25
|
-
}
|
|
26
|
-
for (let i = 1; i <= defaultLength; i += 1) for (let j = 1; j <= secondaryLength; j += 1) {
|
|
27
|
-
const match = scoreMatrix[i - 1][j - 1] + computeMatchScore(i - 1, j - 1);
|
|
28
|
-
const deleteGap = scoreMatrix[i - 1][j] + gapPenalty;
|
|
29
|
-
const insertGap = scoreMatrix[i][j - 1] + gapPenalty;
|
|
30
|
-
const best = Math.max(match, deleteGap, insertGap);
|
|
31
|
-
scoreMatrix[i][j] = best;
|
|
32
|
-
traceMatrix[i][j] = best === match ? "diagonal" : best === deleteGap ? "up" : "left";
|
|
33
|
-
}
|
|
34
|
-
const result = [];
|
|
35
|
-
let i = defaultLength;
|
|
36
|
-
let j = secondaryLength;
|
|
37
|
-
while (i > 0 || j > 0) if (i > 0 && j > 0 && traceMatrix[i][j] === "diagonal") {
|
|
38
|
-
const englishIndex = i - 1;
|
|
39
|
-
const frenchIndex = j - 1;
|
|
40
|
-
const similarityScore = require_translation_alignment_computeSimilarity.computeJaccardSimilarity(defaultBlocks[englishIndex].anchorText, secondaryBlocks[frenchIndex].anchorText, 3);
|
|
41
|
-
result.unshift({
|
|
42
|
-
englishIndex,
|
|
43
|
-
frenchIndex,
|
|
44
|
-
similarityScore
|
|
45
|
-
});
|
|
46
|
-
i -= 1;
|
|
47
|
-
j -= 1;
|
|
48
|
-
} else if (i > 0 && (j === 0 || traceMatrix[i][j] === "up")) {
|
|
49
|
-
result.unshift({
|
|
50
|
-
englishIndex: i - 1,
|
|
51
|
-
frenchIndex: null,
|
|
52
|
-
similarityScore: 0
|
|
53
|
-
});
|
|
54
|
-
i -= 1;
|
|
55
|
-
} else if (j > 0 && (i === 0 || traceMatrix[i][j] === "left")) {
|
|
56
|
-
result.unshift({
|
|
57
|
-
englishIndex: -1,
|
|
58
|
-
frenchIndex: j - 1,
|
|
59
|
-
similarityScore: 0
|
|
60
|
-
});
|
|
61
|
-
j -= 1;
|
|
62
|
-
}
|
|
63
|
-
return result;
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
//#endregion
|
|
67
|
-
exports.alignEnglishAndFrenchBlocks = alignEnglishAndFrenchBlocks;
|
|
68
|
-
//# sourceMappingURL=alignBlocks.cjs.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"alignBlocks.cjs","names":["computeJaccardSimilarity"],"sources":["../../../src/translation-alignment/alignBlocks.ts"],"sourcesContent":["import { computeJaccardSimilarity } from './computeSimilarity';\nimport type { AlignmentPair, FingerprintedBlock } from './types';\n\nexport const alignEnglishAndFrenchBlocks = (\n defaultBlocks: FingerprintedBlock[],\n secondaryBlocks: FingerprintedBlock[]\n): AlignmentPair[] => {\n // Needleman–Wunsch style global alignment using anchor similarity and type equality\n const defaultLength = defaultBlocks.length;\n const secondaryLength = secondaryBlocks.length;\n\n const scoreMatrix: number[][] = Array.from(\n { length: defaultLength + 1 },\n () => Array.from({ length: secondaryLength + 1 }, () => 0)\n );\n const traceMatrix: ('diagonal' | 'up' | 'left')[][] = Array.from(\n { length: defaultLength + 1 },\n () => Array.from({ length: secondaryLength + 1 }, () => 'diagonal')\n );\n\n const gapPenalty = -2;\n\n const computeMatchScore = (\n defaultIndex: number,\n secondaryIndex: number\n ): number => {\n const defaultBlock = defaultBlocks[defaultIndex];\n const secondaryBlock = secondaryBlocks[secondaryIndex];\n const typeBonus = defaultBlock.type === secondaryBlock.type ? 2 : 0;\n const anchorSimilarity = computeJaccardSimilarity(\n defaultBlock.anchorText,\n secondaryBlock.anchorText,\n 3\n );\n const lengthRatio =\n Math.min(defaultBlock.content.length, secondaryBlock.content.length) /\n Math.max(defaultBlock.content.length, secondaryBlock.content.length);\n const lengthBonus = lengthRatio > 0.75 ? 1 : 0;\n return typeBonus + lengthBonus + anchorSimilarity * 8; // weighted toward anchor similarity\n };\n\n // initialize first row and column\n for (let i = 1; i <= defaultLength; i += 1) {\n scoreMatrix[i][0] = scoreMatrix[i - 1][0] + gapPenalty;\n traceMatrix[i][0] = 'up';\n }\n for (let j = 1; j <= secondaryLength; j += 1) {\n scoreMatrix[0][j] = scoreMatrix[0][j - 1] + gapPenalty;\n traceMatrix[0][j] = 'left';\n }\n\n // fill\n for (let i = 1; i <= defaultLength; i += 1) {\n for (let j = 1; j <= secondaryLength; j += 1) {\n const match = scoreMatrix[i - 1][j - 1] + computeMatchScore(i - 1, j - 1);\n const deleteGap = scoreMatrix[i - 1][j] + gapPenalty;\n const insertGap = scoreMatrix[i][j - 1] + gapPenalty;\n\n const best = Math.max(match, deleteGap, insertGap);\n scoreMatrix[i][j] = best;\n traceMatrix[i][j] =\n best === match ? 'diagonal' : best === deleteGap ? 'up' : 'left';\n }\n }\n\n // traceback\n const result: AlignmentPair[] = [];\n let i = defaultLength;\n let j = secondaryLength;\n while (i > 0 || j > 0) {\n if (i > 0 && j > 0 && traceMatrix[i][j] === 'diagonal') {\n const englishIndex = i - 1;\n const frenchIndex = j - 1;\n const similarityScore = computeJaccardSimilarity(\n defaultBlocks[englishIndex].anchorText,\n secondaryBlocks[frenchIndex].anchorText,\n 3\n );\n result.unshift({ englishIndex, frenchIndex, similarityScore });\n i -= 1;\n j -= 1;\n } else if (i > 0 && (j === 0 || traceMatrix[i][j] === 'up')) {\n result.unshift({\n englishIndex: i - 1,\n frenchIndex: null,\n similarityScore: 0,\n });\n i -= 1;\n } else if (j > 0 && (i === 0 || traceMatrix[i][j] === 'left')) {\n // french block has no corresponding english block (deleted)\n result.unshift({\n englishIndex: -1,\n frenchIndex: j - 1,\n similarityScore: 0,\n });\n j -= 1;\n }\n }\n return result;\n};\n"],"mappings":";;;;AAGA,MAAa,+BACX,eACA,oBACoB;CAEpB,MAAM,gBAAgB,cAAc;CACpC,MAAM,kBAAkB,gBAAgB;CAExC,MAAM,cAA0B,MAAM,KACpC,EAAE,QAAQ,gBAAgB,GAAG,QACvB,MAAM,KAAK,EAAE,QAAQ,kBAAkB,GAAG,QAAQ,EAAE,CAC3D;CACD,MAAM,cAAgD,MAAM,KAC1D,EAAE,QAAQ,gBAAgB,GAAG,QACvB,MAAM,KAAK,EAAE,QAAQ,kBAAkB,GAAG,QAAQ,WAAW,CACpE;CAED,MAAM,aAAa;CAEnB,MAAM,qBACJ,cACA,mBACW;EACX,MAAM,eAAe,cAAc;EACnC,MAAM,iBAAiB,gBAAgB;EACvC,MAAM,YAAY,aAAa,SAAS,eAAe,OAAO,IAAI;EAClE,MAAM,mBAAmBA,yEACvB,aAAa,YACb,eAAe,YACf,EACD;AAKD,SAAO,aAHL,KAAK,IAAI,aAAa,QAAQ,QAAQ,eAAe,QAAQ,OAAO,GACpE,KAAK,IAAI,aAAa,QAAQ,QAAQ,eAAe,QAAQ,OAAO,GACpC,MAAO,IAAI,KACZ,mBAAmB;;AAItD,MAAK,IAAI,IAAI,GAAG,KAAK,eAAe,KAAK,GAAG;AAC1C,cAAY,GAAG,KAAK,YAAY,IAAI,GAAG,KAAK;AAC5C,cAAY,GAAG,KAAK;;AAEtB,MAAK,IAAI,IAAI,GAAG,KAAK,iBAAiB,KAAK,GAAG;AAC5C,cAAY,GAAG,KAAK,YAAY,GAAG,IAAI,KAAK;AAC5C,cAAY,GAAG,KAAK;;AAItB,MAAK,IAAI,IAAI,GAAG,KAAK,eAAe,KAAK,EACvC,MAAK,IAAI,IAAI,GAAG,KAAK,iBAAiB,KAAK,GAAG;EAC5C,MAAM,QAAQ,YAAY,IAAI,GAAG,IAAI,KAAK,kBAAkB,IAAI,GAAG,IAAI,EAAE;EACzE,MAAM,YAAY,YAAY,IAAI,GAAG,KAAK;EAC1C,MAAM,YAAY,YAAY,GAAG,IAAI,KAAK;EAE1C,MAAM,OAAO,KAAK,IAAI,OAAO,WAAW,UAAU;AAClD,cAAY,GAAG,KAAK;AACpB,cAAY,GAAG,KACb,SAAS,QAAQ,aAAa,SAAS,YAAY,OAAO;;CAKhE,MAAM,SAA0B,EAAE;CAClC,IAAI,IAAI;CACR,IAAI,IAAI;AACR,QAAO,IAAI,KAAK,IAAI,EAClB,KAAI,IAAI,KAAK,IAAI,KAAK,YAAY,GAAG,OAAO,YAAY;EACtD,MAAM,eAAe,IAAI;EACzB,MAAM,cAAc,IAAI;EACxB,MAAM,kBAAkBA,yEACtB,cAAc,cAAc,YAC5B,gBAAgB,aAAa,YAC7B,EACD;AACD,SAAO,QAAQ;GAAE;GAAc;GAAa;GAAiB,CAAC;AAC9D,OAAK;AACL,OAAK;YACI,IAAI,MAAM,MAAM,KAAK,YAAY,GAAG,OAAO,OAAO;AAC3D,SAAO,QAAQ;GACb,cAAc,IAAI;GAClB,aAAa;GACb,iBAAiB;GAClB,CAAC;AACF,OAAK;YACI,IAAI,MAAM,MAAM,KAAK,YAAY,GAAG,OAAO,SAAS;AAE7D,SAAO,QAAQ;GACb,cAAc;GACd,aAAa,IAAI;GACjB,iBAAiB;GAClB,CAAC;AACF,OAAK;;AAGT,QAAO"}
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
-
|
|
3
|
-
//#region src/translation-alignment/computeSimilarity.ts
|
|
4
|
-
const generateCharacterShingles = (text, shingleLength) => {
|
|
5
|
-
const normalized = text.replace(/\s+/g, " ").trim();
|
|
6
|
-
const set = /* @__PURE__ */ new Set();
|
|
7
|
-
if (normalized.length < shingleLength) {
|
|
8
|
-
if (normalized.length > 0) set.add(normalized);
|
|
9
|
-
return set;
|
|
10
|
-
}
|
|
11
|
-
for (let index = 0; index <= normalized.length - shingleLength; index += 1) set.add(normalized.slice(index, index + shingleLength));
|
|
12
|
-
return set;
|
|
13
|
-
};
|
|
14
|
-
const computeJaccardSimilarity = (a, b, shingleLength = 3) => {
|
|
15
|
-
const setA = generateCharacterShingles(a, shingleLength);
|
|
16
|
-
const setB = generateCharacterShingles(b, shingleLength);
|
|
17
|
-
if (setA.size === 0 && setB.size === 0) return 1;
|
|
18
|
-
const intersectionSize = Array.from(setA).filter((token) => setB.has(token)).length;
|
|
19
|
-
const unionSize = new Set([...Array.from(setA), ...Array.from(setB)]).size;
|
|
20
|
-
return unionSize === 0 ? 0 : intersectionSize / unionSize;
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
//#endregion
|
|
24
|
-
exports.computeJaccardSimilarity = computeJaccardSimilarity;
|
|
25
|
-
exports.generateCharacterShingles = generateCharacterShingles;
|
|
26
|
-
//# sourceMappingURL=computeSimilarity.cjs.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"computeSimilarity.cjs","names":[],"sources":["../../../src/translation-alignment/computeSimilarity.ts"],"sourcesContent":["// Character shingle Jaccard similarity (language agnostic)\nexport const generateCharacterShingles = (\n text: string,\n shingleLength: number\n): Set<string> => {\n const normalized = text.replace(/\\s+/g, ' ').trim();\n const set = new Set<string>();\n if (normalized.length < shingleLength) {\n if (normalized.length > 0) {\n set.add(normalized);\n }\n return set;\n }\n for (let index = 0; index <= normalized.length - shingleLength; index += 1) {\n set.add(normalized.slice(index, index + shingleLength));\n }\n return set;\n};\n\nexport const computeJaccardSimilarity = (\n a: string,\n b: string,\n shingleLength: number = 3\n): number => {\n const setA = generateCharacterShingles(a, shingleLength);\n const setB = generateCharacterShingles(b, shingleLength);\n if (setA.size === 0 && setB.size === 0) return 1;\n const intersectionSize = Array.from(setA).filter((token) =>\n setB.has(token)\n ).length;\n const unionSize = new Set([...Array.from(setA), ...Array.from(setB)]).size;\n return unionSize === 0 ? 0 : intersectionSize / unionSize;\n};\n"],"mappings":";;;AACA,MAAa,6BACX,MACA,kBACgB;CAChB,MAAM,aAAa,KAAK,QAAQ,QAAQ,IAAI,CAAC,MAAM;CACnD,MAAM,sBAAM,IAAI,KAAa;AAC7B,KAAI,WAAW,SAAS,eAAe;AACrC,MAAI,WAAW,SAAS,EACtB,KAAI,IAAI,WAAW;AAErB,SAAO;;AAET,MAAK,IAAI,QAAQ,GAAG,SAAS,WAAW,SAAS,eAAe,SAAS,EACvE,KAAI,IAAI,WAAW,MAAM,OAAO,QAAQ,cAAc,CAAC;AAEzD,QAAO;;AAGT,MAAa,4BACX,GACA,GACA,gBAAwB,MACb;CACX,MAAM,OAAO,0BAA0B,GAAG,cAAc;CACxD,MAAM,OAAO,0BAA0B,GAAG,cAAc;AACxD,KAAI,KAAK,SAAS,KAAK,KAAK,SAAS,EAAG,QAAO;CAC/C,MAAM,mBAAmB,MAAM,KAAK,KAAK,CAAC,QAAQ,UAChD,KAAK,IAAI,MAAM,CAChB,CAAC;CACF,MAAM,YAAY,IAAI,IAAI,CAAC,GAAG,MAAM,KAAK,KAAK,EAAE,GAAG,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC;AACtE,QAAO,cAAc,IAAI,IAAI,mBAAmB"}
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
|
2
|
-
const require_runtime = require('../_virtual/_rolldown/runtime.cjs');
|
|
3
|
-
let node_crypto = require("node:crypto");
|
|
4
|
-
node_crypto = require_runtime.__toESM(node_crypto);
|
|
5
|
-
|
|
6
|
-
//#region src/translation-alignment/fingerprintBlock.ts
|
|
7
|
-
const computeStringDigest = (text) => node_crypto.default.createHash("sha256").update(text).digest("hex");
|
|
8
|
-
const fingerprintBlock = (block, previousBlock, nextBlock) => {
|
|
9
|
-
const semanticDigest = computeStringDigest(block.semanticText);
|
|
10
|
-
const anchorDigest = computeStringDigest(block.anchorText);
|
|
11
|
-
const compositeKey = `${semanticDigest}:${anchorDigest}`;
|
|
12
|
-
const contextKey = computeStringDigest(`${computeStringDigest(previousBlock?.semanticText ?? "")}:${computeStringDigest(nextBlock?.semanticText ?? "")}`);
|
|
13
|
-
return {
|
|
14
|
-
...block,
|
|
15
|
-
semanticDigest,
|
|
16
|
-
anchorDigest,
|
|
17
|
-
compositeKey,
|
|
18
|
-
contextKey
|
|
19
|
-
};
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
//#endregion
|
|
23
|
-
exports.fingerprintBlock = fingerprintBlock;
|
|
24
|
-
//# sourceMappingURL=fingerprintBlock.cjs.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"fingerprintBlock.cjs","names":["crypto"],"sources":["../../../src/translation-alignment/fingerprintBlock.ts"],"sourcesContent":["import crypto from 'node:crypto';\nimport type { FingerprintedBlock, NormalizedBlock } from './types';\n\nconst computeStringDigest = (text: string): string =>\n crypto.createHash('sha256').update(text).digest('hex');\n\nexport const fingerprintBlock = (\n block: NormalizedBlock,\n previousBlock: NormalizedBlock | null,\n nextBlock: NormalizedBlock | null\n): FingerprintedBlock => {\n const semanticDigest = computeStringDigest(block.semanticText);\n const anchorDigest = computeStringDigest(block.anchorText);\n const compositeKey = `${semanticDigest}:${anchorDigest}`;\n\n const previousDigest = computeStringDigest(previousBlock?.semanticText ?? '');\n const nextDigest = computeStringDigest(nextBlock?.semanticText ?? '');\n const contextKey = computeStringDigest(`${previousDigest}:${nextDigest}`);\n\n return {\n ...block,\n semanticDigest,\n anchorDigest,\n compositeKey,\n contextKey,\n };\n};\n"],"mappings":";;;;;;AAGA,MAAM,uBAAuB,SAC3BA,oBAAO,WAAW,SAAS,CAAC,OAAO,KAAK,CAAC,OAAO,MAAM;AAExD,MAAa,oBACX,OACA,eACA,cACuB;CACvB,MAAM,iBAAiB,oBAAoB,MAAM,aAAa;CAC9D,MAAM,eAAe,oBAAoB,MAAM,WAAW;CAC1D,MAAM,eAAe,GAAG,eAAe,GAAG;CAI1C,MAAM,aAAa,oBAAoB,GAFhB,oBAAoB,eAAe,gBAAgB,GAElB,CAAC,GADtC,oBAAoB,WAAW,gBAAgB,GACI,GAAG;AAEzE,QAAO;EACL,GAAG;EACH;EACA;EACA;EACA;EACD"}
|