@intlayer/cli 8.1.1 → 8.1.3-canary.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/IntlayerEventListener.cjs +1 -186
- package/dist/cjs/IntlayerEventListener.cjs.map +1 -1
- package/dist/cjs/_virtual/_rolldown/runtime.cjs +1 -29
- package/dist/cjs/_virtual/_utils_asset.cjs +2 -98
- package/dist/cjs/auth/login.cjs +2 -85
- package/dist/cjs/auth/login.cjs.map +1 -1
- package/dist/cjs/build.cjs +1 -27
- package/dist/cjs/build.cjs.map +1 -1
- package/dist/cjs/ci.cjs +1 -73
- package/dist/cjs/ci.cjs.map +1 -1
- package/dist/cjs/cli.cjs +1 -476
- package/dist/cjs/cli.cjs.map +1 -1
- package/dist/cjs/config.cjs +1 -12
- package/dist/cjs/config.cjs.map +1 -1
- package/dist/cjs/editor.cjs +1 -50
- package/dist/cjs/editor.cjs.map +1 -1
- package/dist/cjs/extract.cjs +1 -96
- package/dist/cjs/extract.cjs.map +1 -1
- package/dist/cjs/fill/deepMergeContent.cjs +1 -27
- package/dist/cjs/fill/deepMergeContent.cjs.map +1 -1
- package/dist/cjs/fill/fill.cjs +1 -78
- package/dist/cjs/fill/fill.cjs.map +1 -1
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs +1 -29
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs.map +1 -1
- package/dist/cjs/fill/formatFillData.cjs +1 -50
- package/dist/cjs/fill/formatFillData.cjs.map +1 -1
- package/dist/cjs/fill/getAvailableLocalesInDictionary.cjs +1 -26
- package/dist/cjs/fill/getAvailableLocalesInDictionary.cjs.map +1 -1
- package/dist/cjs/fill/getFilterMissingContentPerLocale.cjs +1 -50
- package/dist/cjs/fill/getFilterMissingContentPerLocale.cjs.map +1 -1
- package/dist/cjs/fill/index.cjs +1 -6
- package/dist/cjs/fill/listTranslationsTasks.cjs +1 -70
- package/dist/cjs/fill/listTranslationsTasks.cjs.map +1 -1
- package/dist/cjs/fill/mergeChunks.cjs +1 -28
- package/dist/cjs/fill/mergeChunks.cjs.map +1 -1
- package/dist/cjs/fill/translateDictionary.cjs +1 -205
- package/dist/cjs/fill/translateDictionary.cjs.map +1 -1
- package/dist/cjs/fill/writeFill.cjs +1 -54
- package/dist/cjs/fill/writeFill.cjs.map +1 -1
- package/dist/cjs/getTargetDictionary.cjs +1 -36
- package/dist/cjs/getTargetDictionary.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -39
- package/dist/cjs/init.cjs +1 -322
- package/dist/cjs/init.cjs.map +1 -1
- package/dist/cjs/listContentDeclaration.cjs +1 -41
- package/dist/cjs/listContentDeclaration.cjs.map +1 -1
- package/dist/cjs/listProjects.cjs +1 -28
- package/dist/cjs/listProjects.cjs.map +1 -1
- package/dist/cjs/liveSync.cjs +8 -151
- package/dist/cjs/liveSync.cjs.map +1 -1
- package/dist/cjs/pull.cjs +1 -146
- package/dist/cjs/pull.cjs.map +1 -1
- package/dist/cjs/push/pullLog.cjs +3 -102
- package/dist/cjs/push/pullLog.cjs.map +1 -1
- package/dist/cjs/push/push.cjs +1 -206
- package/dist/cjs/push/push.cjs.map +1 -1
- package/dist/cjs/pushConfig.cjs +1 -19
- package/dist/cjs/pushConfig.cjs.map +1 -1
- package/dist/cjs/pushLog.cjs +3 -84
- package/dist/cjs/pushLog.cjs.map +1 -1
- package/dist/cjs/reviewDoc/reviewDoc.cjs +1 -68
- package/dist/cjs/reviewDoc/reviewDoc.cjs.map +1 -1
- package/dist/cjs/reviewDoc/reviewDocBlockAware.cjs +1 -94
- package/dist/cjs/reviewDoc/reviewDocBlockAware.cjs.map +1 -1
- package/dist/cjs/searchDoc.cjs +1 -38
- package/dist/cjs/searchDoc.cjs.map +1 -1
- package/dist/cjs/test/index.cjs +1 -7
- package/dist/cjs/test/listMissingTranslations.cjs +1 -49
- package/dist/cjs/test/listMissingTranslations.cjs.map +1 -1
- package/dist/cjs/test/test.cjs +1 -51
- package/dist/cjs/test/test.cjs.map +1 -1
- package/dist/cjs/translateDoc/index.cjs +1 -9
- package/dist/cjs/translateDoc/translateDoc.cjs +1 -74
- package/dist/cjs/translateDoc/translateDoc.cjs.map +1 -1
- package/dist/cjs/translateDoc/translateFile.cjs +2 -103
- package/dist/cjs/translateDoc/translateFile.cjs.map +1 -1
- package/dist/cjs/translateDoc/validation.cjs +5 -49
- package/dist/cjs/translateDoc/validation.cjs.map +1 -1
- package/dist/cjs/translation-alignment/alignBlocks.cjs +1 -67
- package/dist/cjs/translation-alignment/alignBlocks.cjs.map +1 -1
- package/dist/cjs/translation-alignment/computeSimilarity.cjs +1 -25
- package/dist/cjs/translation-alignment/computeSimilarity.cjs.map +1 -1
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs +1 -23
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs.map +1 -1
- package/dist/cjs/translation-alignment/index.cjs +1 -22
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs +1 -18
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs.map +1 -1
- package/dist/cjs/translation-alignment/normalizeBlock.cjs +1 -22
- package/dist/cjs/translation-alignment/normalizeBlock.cjs.map +1 -1
- package/dist/cjs/translation-alignment/pipeline.cjs +1 -37
- package/dist/cjs/translation-alignment/pipeline.cjs.map +1 -1
- package/dist/cjs/translation-alignment/planActions.cjs +1 -46
- package/dist/cjs/translation-alignment/planActions.cjs.map +1 -1
- package/dist/cjs/translation-alignment/rebuildDocument.cjs +2 -49
- package/dist/cjs/translation-alignment/rebuildDocument.cjs.map +1 -1
- package/dist/cjs/translation-alignment/segmentDocument.cjs +5 -66
- package/dist/cjs/translation-alignment/segmentDocument.cjs.map +1 -1
- package/dist/cjs/utils/calculateChunks.cjs +2 -89
- package/dist/cjs/utils/calculateChunks.cjs.map +1 -1
- package/dist/cjs/utils/checkAccess.cjs +1 -81
- package/dist/cjs/utils/checkAccess.cjs.map +1 -1
- package/dist/cjs/utils/checkConfigConsistency.cjs +1 -16
- package/dist/cjs/utils/checkConfigConsistency.cjs.map +1 -1
- package/dist/cjs/utils/checkFileModifiedRange.cjs +1 -81
- package/dist/cjs/utils/checkFileModifiedRange.cjs.map +1 -1
- package/dist/cjs/utils/checkLastUpdateTime.cjs +1 -19
- package/dist/cjs/utils/checkLastUpdateTime.cjs.map +1 -1
- package/dist/cjs/utils/chunkInference.cjs +1 -45
- package/dist/cjs/utils/chunkInference.cjs.map +1 -1
- package/dist/cjs/utils/fixChunkStartEndChars.cjs +3 -27
- package/dist/cjs/utils/fixChunkStartEndChars.cjs.map +1 -1
- package/dist/cjs/utils/formatTimeDiff.cjs +1 -20
- package/dist/cjs/utils/formatTimeDiff.cjs.map +1 -1
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs +1 -16
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs.map +1 -1
- package/dist/cjs/utils/getOutputFilePath.cjs +1 -74
- package/dist/cjs/utils/getOutputFilePath.cjs.map +1 -1
- package/dist/cjs/utils/getParentPackageJSON.cjs +1 -20
- package/dist/cjs/utils/getParentPackageJSON.cjs.map +1 -1
- package/dist/cjs/utils/listSpecialChars.cjs +2 -54
- package/dist/cjs/utils/listSpecialChars.cjs.map +1 -1
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs +1 -102
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs.map +1 -1
- package/dist/cjs/utils/openBrowser.cjs +1 -19
- package/dist/cjs/utils/openBrowser.cjs.map +1 -1
- package/dist/cjs/utils/reorderParagraphs.cjs +3 -91
- package/dist/cjs/utils/reorderParagraphs.cjs.map +1 -1
- package/dist/cjs/utils/setupAI.cjs +1 -64
- package/dist/cjs/utils/setupAI.cjs.map +1 -1
- package/dist/cjs/watch.cjs +1 -43
- package/dist/cjs/watch.cjs.map +1 -1
- package/dist/esm/IntlayerEventListener.mjs +1 -183
- package/dist/esm/IntlayerEventListener.mjs.map +1 -1
- package/dist/esm/_virtual/_rolldown/runtime.mjs +1 -8
- package/dist/esm/_virtual/_utils_asset.mjs +2 -97
- package/dist/esm/auth/login.mjs +2 -82
- package/dist/esm/auth/login.mjs.map +1 -1
- package/dist/esm/build.mjs +1 -25
- package/dist/esm/build.mjs.map +1 -1
- package/dist/esm/ci.mjs +1 -71
- package/dist/esm/ci.mjs.map +1 -1
- package/dist/esm/cli.mjs +1 -473
- package/dist/esm/cli.mjs.map +1 -1
- package/dist/esm/config.mjs +1 -10
- package/dist/esm/config.mjs.map +1 -1
- package/dist/esm/editor.mjs +1 -49
- package/dist/esm/editor.mjs.map +1 -1
- package/dist/esm/extract.mjs +1 -93
- package/dist/esm/extract.mjs.map +1 -1
- package/dist/esm/fill/deepMergeContent.mjs +1 -25
- package/dist/esm/fill/deepMergeContent.mjs.map +1 -1
- package/dist/esm/fill/fill.mjs +1 -76
- package/dist/esm/fill/fill.mjs.map +1 -1
- package/dist/esm/fill/formatAutoFilledFilePath.mjs +1 -27
- package/dist/esm/fill/formatAutoFilledFilePath.mjs.map +1 -1
- package/dist/esm/fill/formatFillData.mjs +1 -49
- package/dist/esm/fill/formatFillData.mjs.map +1 -1
- package/dist/esm/fill/getAvailableLocalesInDictionary.mjs +1 -24
- package/dist/esm/fill/getAvailableLocalesInDictionary.mjs.map +1 -1
- package/dist/esm/fill/getFilterMissingContentPerLocale.mjs +1 -48
- package/dist/esm/fill/getFilterMissingContentPerLocale.mjs.map +1 -1
- package/dist/esm/fill/index.mjs +1 -4
- package/dist/esm/fill/listTranslationsTasks.mjs +1 -68
- package/dist/esm/fill/listTranslationsTasks.mjs.map +1 -1
- package/dist/esm/fill/mergeChunks.mjs +1 -26
- package/dist/esm/fill/mergeChunks.mjs.map +1 -1
- package/dist/esm/fill/translateDictionary.mjs +1 -203
- package/dist/esm/fill/translateDictionary.mjs.map +1 -1
- package/dist/esm/fill/writeFill.mjs +1 -52
- package/dist/esm/fill/writeFill.mjs.map +1 -1
- package/dist/esm/getTargetDictionary.mjs +1 -33
- package/dist/esm/getTargetDictionary.mjs.map +1 -1
- package/dist/esm/index.mjs +1 -18
- package/dist/esm/init.mjs +1 -317
- package/dist/esm/init.mjs.map +1 -1
- package/dist/esm/listContentDeclaration.mjs +1 -38
- package/dist/esm/listContentDeclaration.mjs.map +1 -1
- package/dist/esm/listProjects.mjs +1 -26
- package/dist/esm/listProjects.mjs.map +1 -1
- package/dist/esm/liveSync.mjs +8 -148
- package/dist/esm/liveSync.mjs.map +1 -1
- package/dist/esm/pull.mjs +1 -144
- package/dist/esm/pull.mjs.map +1 -1
- package/dist/esm/push/pullLog.mjs +3 -100
- package/dist/esm/push/pullLog.mjs.map +1 -1
- package/dist/esm/push/push.mjs +1 -203
- package/dist/esm/push/push.mjs.map +1 -1
- package/dist/esm/pushConfig.mjs +1 -17
- package/dist/esm/pushConfig.mjs.map +1 -1
- package/dist/esm/pushLog.mjs +3 -82
- package/dist/esm/pushLog.mjs.map +1 -1
- package/dist/esm/reviewDoc/reviewDoc.mjs +1 -65
- package/dist/esm/reviewDoc/reviewDoc.mjs.map +1 -1
- package/dist/esm/reviewDoc/reviewDocBlockAware.mjs +1 -92
- package/dist/esm/reviewDoc/reviewDocBlockAware.mjs.map +1 -1
- package/dist/esm/searchDoc.mjs +1 -36
- package/dist/esm/searchDoc.mjs.map +1 -1
- package/dist/esm/test/index.mjs +1 -4
- package/dist/esm/test/listMissingTranslations.mjs +1 -46
- package/dist/esm/test/listMissingTranslations.mjs.map +1 -1
- package/dist/esm/test/test.mjs +1 -49
- package/dist/esm/test/test.mjs.map +1 -1
- package/dist/esm/translateDoc/index.mjs +1 -5
- package/dist/esm/translateDoc/translateDoc.mjs +1 -71
- package/dist/esm/translateDoc/translateDoc.mjs.map +1 -1
- package/dist/esm/translateDoc/translateFile.mjs +2 -101
- package/dist/esm/translateDoc/translateFile.mjs.map +1 -1
- package/dist/esm/translateDoc/validation.mjs +5 -46
- package/dist/esm/translateDoc/validation.mjs.map +1 -1
- package/dist/esm/translation-alignment/alignBlocks.mjs +1 -66
- package/dist/esm/translation-alignment/alignBlocks.mjs.map +1 -1
- package/dist/esm/translation-alignment/computeSimilarity.mjs +1 -22
- package/dist/esm/translation-alignment/computeSimilarity.mjs.map +1 -1
- package/dist/esm/translation-alignment/fingerprintBlock.mjs +1 -20
- package/dist/esm/translation-alignment/fingerprintBlock.mjs.map +1 -1
- package/dist/esm/translation-alignment/index.mjs +1 -11
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs +1 -16
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs.map +1 -1
- package/dist/esm/translation-alignment/normalizeBlock.mjs +1 -20
- package/dist/esm/translation-alignment/normalizeBlock.mjs.map +1 -1
- package/dist/esm/translation-alignment/pipeline.mjs +1 -35
- package/dist/esm/translation-alignment/pipeline.mjs.map +1 -1
- package/dist/esm/translation-alignment/planActions.mjs +1 -44
- package/dist/esm/translation-alignment/planActions.mjs.map +1 -1
- package/dist/esm/translation-alignment/rebuildDocument.mjs +2 -46
- package/dist/esm/translation-alignment/rebuildDocument.mjs.map +1 -1
- package/dist/esm/translation-alignment/segmentDocument.mjs +5 -64
- package/dist/esm/translation-alignment/segmentDocument.mjs.map +1 -1
- package/dist/esm/utils/calculateChunks.mjs +2 -87
- package/dist/esm/utils/calculateChunks.mjs.map +1 -1
- package/dist/esm/utils/checkAccess.mjs +1 -78
- package/dist/esm/utils/checkAccess.mjs.map +1 -1
- package/dist/esm/utils/checkConfigConsistency.mjs +1 -14
- package/dist/esm/utils/checkConfigConsistency.mjs.map +1 -1
- package/dist/esm/utils/checkFileModifiedRange.mjs +1 -80
- package/dist/esm/utils/checkFileModifiedRange.mjs.map +1 -1
- package/dist/esm/utils/checkLastUpdateTime.mjs +1 -17
- package/dist/esm/utils/checkLastUpdateTime.mjs.map +1 -1
- package/dist/esm/utils/chunkInference.mjs +1 -43
- package/dist/esm/utils/chunkInference.mjs.map +1 -1
- package/dist/esm/utils/fixChunkStartEndChars.mjs +3 -25
- package/dist/esm/utils/fixChunkStartEndChars.mjs.map +1 -1
- package/dist/esm/utils/formatTimeDiff.mjs +1 -18
- package/dist/esm/utils/formatTimeDiff.mjs.map +1 -1
- package/dist/esm/utils/getIsFileUpdatedRecently.mjs +1 -14
- package/dist/esm/utils/getIsFileUpdatedRecently.mjs.map +1 -1
- package/dist/esm/utils/getOutputFilePath.mjs +1 -72
- package/dist/esm/utils/getOutputFilePath.mjs.map +1 -1
- package/dist/esm/utils/getParentPackageJSON.mjs +1 -18
- package/dist/esm/utils/getParentPackageJSON.mjs.map +1 -1
- package/dist/esm/utils/listSpecialChars.mjs +2 -52
- package/dist/esm/utils/listSpecialChars.mjs.map +1 -1
- package/dist/esm/utils/mapChunksBetweenFiles.mjs +1 -100
- package/dist/esm/utils/mapChunksBetweenFiles.mjs.map +1 -1
- package/dist/esm/utils/openBrowser.mjs +1 -17
- package/dist/esm/utils/openBrowser.mjs.map +1 -1
- package/dist/esm/utils/reorderParagraphs.mjs +3 -90
- package/dist/esm/utils/reorderParagraphs.mjs.map +1 -1
- package/dist/esm/utils/setupAI.mjs +1 -62
- package/dist/esm/utils/setupAI.mjs.map +1 -1
- package/dist/esm/watch.mjs +1 -41
- package/dist/esm/watch.mjs.map +1 -1
- package/dist/types/auth/login.d.ts +1 -1
- package/dist/types/auth/login.d.ts.map +1 -1
- package/dist/types/build.d.ts +1 -1
- package/dist/types/build.d.ts.map +1 -1
- package/dist/types/config.d.ts +1 -1
- package/dist/types/extract.d.ts +1 -1
- package/dist/types/extract.d.ts.map +1 -1
- package/dist/types/fill/fill.d.ts +1 -1
- package/dist/types/fill/fill.d.ts.map +1 -1
- package/dist/types/fill/translateDictionary.d.ts +2 -2
- package/dist/types/fill/translateDictionary.d.ts.map +1 -1
- package/dist/types/fill/writeFill.d.ts.map +1 -1
- package/dist/types/getTargetDictionary.d.ts +2 -2
- package/dist/types/index.d.ts +4 -1
- package/dist/types/init.d.ts +1 -1
- package/dist/types/listContentDeclaration.d.ts +1 -1
- package/dist/types/listContentDeclaration.d.ts.map +1 -1
- package/dist/types/listProjects.d.ts +1 -1
- package/dist/types/listProjects.d.ts.map +1 -1
- package/dist/types/liveSync.d.ts +1 -1
- package/dist/types/pull.d.ts +1 -1
- package/dist/types/pull.d.ts.map +1 -1
- package/dist/types/push/pullLog.d.ts +1 -1
- package/dist/types/push/pullLog.d.ts.map +1 -1
- package/dist/types/push/push.d.ts +2 -2
- package/dist/types/pushConfig.d.ts +1 -1
- package/dist/types/reviewDoc/reviewDoc.d.ts +2 -2
- package/dist/types/reviewDoc/reviewDoc.d.ts.map +1 -1
- package/dist/types/reviewDoc/reviewDocBlockAware.d.ts +1 -1
- package/dist/types/reviewDoc/reviewDocBlockAware.d.ts.map +1 -1
- package/dist/types/searchDoc.d.ts +1 -1
- package/dist/types/searchDoc.d.ts.map +1 -1
- package/dist/types/test/listMissingTranslations.d.ts +1 -1
- package/dist/types/test/test.d.ts +1 -1
- package/dist/types/test/test.d.ts.map +1 -1
- package/dist/types/translateDoc/translateDoc.d.ts.map +1 -1
- package/dist/types/translateDoc/types.d.ts +2 -2
- package/dist/types/translateDoc/validation.d.ts +1 -1
- package/dist/types/utils/checkAccess.d.ts.map +1 -1
- package/dist/types/watch.d.ts +1 -1
- package/dist/types/watch.d.ts.map +1 -1
- package/package.json +13 -13
|
@@ -1,102 +1,3 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
import { chunkInference } from "../utils/chunkInference.mjs";
|
|
4
|
-
import { fixChunkStartEndChars } from "../utils/fixChunkStartEndChars.mjs";
|
|
5
|
-
import { chunkText } from "../utils/calculateChunks.mjs";
|
|
6
|
-
import { formatLocale, formatPath } from "@intlayer/chokidar";
|
|
7
|
-
import { ANSIColors, colon, colorize, colorizeNumber, getAppLogger, retryManager } from "@intlayer/config";
|
|
8
|
-
import { dirname, relative } from "node:path";
|
|
9
|
-
import { mkdirSync, writeFileSync } from "node:fs";
|
|
10
|
-
import { readFile } from "node:fs/promises";
|
|
11
|
-
import { performance } from "node:perf_hooks";
|
|
12
|
-
|
|
13
|
-
//#region src/translateDoc/translateFile.ts
|
|
14
|
-
const translateFile = async ({ baseFilePath, outputFilePath, locale, baseLocale, configuration, errorState, aiOptions, customInstructions, aiClient, aiConfig, flushStrategy = "incremental", onChunkReceive, limit }) => {
|
|
15
|
-
if (errorState.shouldStop) return null;
|
|
16
|
-
const appLogger = getAppLogger(configuration, { config: { prefix: "" } });
|
|
17
|
-
const fileStartTime = performance.now();
|
|
18
|
-
try {
|
|
19
|
-
const chunks = chunkText(await readFile(baseFilePath, "utf-8"));
|
|
20
|
-
const totalChunks = chunks.length;
|
|
21
|
-
const filePrefix = `${colon(`${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}] `, { colSize: 40 })}${ANSIColors.RESET}`;
|
|
22
|
-
const prefix = `${colon(`${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}][${formatLocale(locale)}${ANSIColors.GREY_DARK}] `, { colSize: 40 })}${ANSIColors.RESET}`;
|
|
23
|
-
appLogger(`${filePrefix}Split into ${colorizeNumber(totalChunks)} chunks. Queuing...`);
|
|
24
|
-
const basePrompt = readAsset("./prompts/TRANSLATE_PROMPT.md", "utf-8").replaceAll("{{localeName}}", `${formatLocale(locale, false)}`).replaceAll("{{baseLocaleName}}", `${formatLocale(baseLocale, false)}`).replace("{{applicationContext}}", aiOptions?.applicationContext ?? "-").replace("{{customInstructions}}", customInstructions ?? "-");
|
|
25
|
-
const translatedParts = new Array(totalChunks).fill("");
|
|
26
|
-
const runTask = limit ?? ((fn) => fn());
|
|
27
|
-
const tasks = chunks.map((chunk, i) => runTask(async () => {
|
|
28
|
-
if (errorState.shouldStop) return null;
|
|
29
|
-
const chunkLogger = getAppLogger(configuration, { config: { prefix: `${prefix} ${ANSIColors.GREY_DARK}[${i + 1}/${totalChunks}] ${ANSIColors.RESET}` } });
|
|
30
|
-
const chunkStartTime = performance.now();
|
|
31
|
-
const isFirstChunk = i === 0;
|
|
32
|
-
const fileToTranslateCurrentChunk = chunk.content;
|
|
33
|
-
const getPrevChunkPrompt = () => `>>> CONTEXT: PREVIOUS SOURCE CONTENT <<<\n\`\`\`\n` + (chunks[i - 1]?.content ?? "") + `\n\`\`\`\n>>> END PREVIOUS CONTEXT <<<`;
|
|
34
|
-
const getBaseChunkContextPrompt = () => `>>> CONTEXT: NEXT CONTENT <<<\n\`\`\`\n` + (chunks[i + 1]?.content ?? "") + `\n\`\`\`\n>>> END NEXT CONTEXT <<<`;
|
|
35
|
-
chunkLogger("Process started");
|
|
36
|
-
const { content: translatedChunk, tokens } = await retryManager(async () => {
|
|
37
|
-
const result = await chunkInference([
|
|
38
|
-
{
|
|
39
|
-
role: "system",
|
|
40
|
-
content: basePrompt
|
|
41
|
-
},
|
|
42
|
-
...chunks[i + 1] ? [{
|
|
43
|
-
role: "system",
|
|
44
|
-
content: getBaseChunkContextPrompt()
|
|
45
|
-
}] : [],
|
|
46
|
-
...isFirstChunk ? [] : [{
|
|
47
|
-
role: "system",
|
|
48
|
-
content: getPrevChunkPrompt()
|
|
49
|
-
}],
|
|
50
|
-
{
|
|
51
|
-
role: "system",
|
|
52
|
-
content: [`You are translating TARGET CHUNK (${i + 1}/${totalChunks}).`, `Translate ONLY the target chunk. Preserve frontmatter/code exactly.`].join("\n")
|
|
53
|
-
},
|
|
54
|
-
{
|
|
55
|
-
role: "user",
|
|
56
|
-
content: `>>> TARGET CHUNK START <<<\n${fileToTranslateCurrentChunk}\n>>> TARGET CHUNK END <<<`
|
|
57
|
-
}
|
|
58
|
-
], aiOptions, configuration, aiClient, aiConfig);
|
|
59
|
-
let processedChunk = sanitizeChunk(result?.fileContent, fileToTranslateCurrentChunk);
|
|
60
|
-
processedChunk = fixChunkStartEndChars(processedChunk, fileToTranslateCurrentChunk);
|
|
61
|
-
if (!validateTranslation(fileToTranslateCurrentChunk, processedChunk, chunkLogger)) throw new Error(`Validation failed for chunk ${i + 1}/${totalChunks}`);
|
|
62
|
-
return {
|
|
63
|
-
content: processedChunk,
|
|
64
|
-
tokens: result.tokenUsed
|
|
65
|
-
};
|
|
66
|
-
})();
|
|
67
|
-
const chunkDuration = (performance.now() - chunkStartTime).toFixed(0);
|
|
68
|
-
translatedParts[i] = translatedChunk;
|
|
69
|
-
if (onChunkReceive) onChunkReceive(translatedChunk, i, totalChunks);
|
|
70
|
-
if (flushStrategy === "incremental") {
|
|
71
|
-
if (translatedParts.slice(0, i + 1).every((p) => p && p !== "")) {
|
|
72
|
-
let endIdx = 0;
|
|
73
|
-
while (endIdx < totalChunks && translatedParts[endIdx] && translatedParts[endIdx] !== "") endIdx++;
|
|
74
|
-
const currentContent = translatedParts.slice(0, endIdx).join("");
|
|
75
|
-
mkdirSync(dirname(outputFilePath), { recursive: true });
|
|
76
|
-
writeFileSync(outputFilePath, currentContent);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
chunkLogger([`${colorizeNumber(tokens)} tokens used `, `${ANSIColors.GREY_DARK}in ${colorizeNumber(chunkDuration)}ms${ANSIColors.RESET}`].join(""));
|
|
80
|
-
}));
|
|
81
|
-
await Promise.all(tasks);
|
|
82
|
-
const fullContent = translatedParts.join("");
|
|
83
|
-
if (flushStrategy === "end" || flushStrategy === "incremental") {
|
|
84
|
-
mkdirSync(dirname(outputFilePath), { recursive: true });
|
|
85
|
-
writeFileSync(outputFilePath, fullContent);
|
|
86
|
-
}
|
|
87
|
-
const totalDuration = ((performance.now() - fileStartTime) / 1e3).toFixed(2);
|
|
88
|
-
const relativePath = relative(configuration.content.baseDir, outputFilePath);
|
|
89
|
-
appLogger(`${colorize("✔", ANSIColors.GREEN)} File ${formatPath(relativePath)} completed in ${colorizeNumber(totalDuration)}s.`);
|
|
90
|
-
return fullContent;
|
|
91
|
-
} catch (error) {
|
|
92
|
-
errorState.count++;
|
|
93
|
-
const errorMessage = error?.message ?? JSON.stringify(error);
|
|
94
|
-
appLogger(`${colorize("✖", ANSIColors.RED)} Error: ${errorMessage}`);
|
|
95
|
-
if (errorState.count >= errorState.maxErrors) errorState.shouldStop = true;
|
|
96
|
-
return null;
|
|
97
|
-
}
|
|
98
|
-
};
|
|
99
|
-
|
|
100
|
-
//#endregion
|
|
101
|
-
export { translateFile };
|
|
1
|
+
import{readAsset as e}from"../_virtual/_utils_asset.mjs";import{sanitizeChunk as t,validateTranslation as n}from"./validation.mjs";import{chunkInference as r}from"../utils/chunkInference.mjs";import{fixChunkStartEndChars as i}from"../utils/fixChunkStartEndChars.mjs";import{chunkText as a}from"../utils/calculateChunks.mjs";import{formatLocale as o,formatPath as s}from"@intlayer/chokidar/utils";import{dirname as c,relative as l}from"node:path";import{ANSIColors as u,colon as d,colorize as f,colorizeNumber as p,getAppLogger as m}from"@intlayer/config/logger";import{retryManager as h}from"@intlayer/config/utils";import{mkdirSync as g,writeFileSync as _}from"node:fs";import{readFile as v}from"node:fs/promises";import{performance as y}from"node:perf_hooks";const b=async({baseFilePath:b,outputFilePath:x,locale:S,baseLocale:C,configuration:w,errorState:T,aiOptions:E,customInstructions:D,aiClient:O,aiConfig:k,flushStrategy:A=`incremental`,onChunkReceive:j,limit:M})=>{if(T.shouldStop)return null;let N=m(w,{config:{prefix:``}}),P=y.now();try{let F=a(await v(b,`utf-8`)),I=F.length,L=`${d(`${u.GREY_DARK}[${s(b)}${u.GREY_DARK}] `,{colSize:40})}${u.RESET}`,R=`${d(`${u.GREY_DARK}[${s(b)}${u.GREY_DARK}][${o(S)}${u.GREY_DARK}] `,{colSize:40})}${u.RESET}`;N(`${L}Split into ${p(I)} chunks. Queuing...`);let z=e(`./prompts/TRANSLATE_PROMPT.md`,`utf-8`).replaceAll(`{{localeName}}`,`${o(S,!1)}`).replaceAll(`{{baseLocaleName}}`,`${o(C,!1)}`).replace(`{{applicationContext}}`,E?.applicationContext??`-`).replace(`{{customInstructions}}`,D??`-`),B=Array(I).fill(``),V=M??(e=>e()),H=F.map((e,a)=>V(async()=>{if(T.shouldStop)return null;let o=m(w,{config:{prefix:`${R} ${u.GREY_DARK}[${a+1}/${I}] ${u.RESET}`}}),s=y.now(),l=a===0,d=e.content,f=()=>">>> CONTEXT: PREVIOUS SOURCE CONTENT <<<\n```\n"+(F[a-1]?.content??``)+"\n```\n>>> END PREVIOUS CONTEXT <<<",v=()=>">>> CONTEXT: NEXT CONTENT <<<\n```\n"+(F[a+1]?.content??``)+"\n```\n>>> END NEXT CONTEXT <<<";o(`Process started`);let{content:b,tokens:S}=await h(async()=>{let e=await r([{role:`system`,content:z},...F[a+1]?[{role:`system`,content:v()}]:[],...l?[]:[{role:`system`,content:f()}],{role:`system`,content:[`You are translating TARGET CHUNK (${a+1}/${I}).`,`Translate ONLY the target chunk. Preserve frontmatter/code exactly.`].join(`
|
|
2
|
+
`)},{role:`user`,content:`>>> TARGET CHUNK START <<<\n${d}\n>>> TARGET CHUNK END <<<`}],E,w,O,k),s=t(e?.fileContent,d);if(s=i(s,d),!n(d,s,o))throw Error(`Validation failed for chunk ${a+1}/${I}`);return{content:s,tokens:e.tokenUsed}})(),C=(y.now()-s).toFixed(0);if(B[a]=b,j&&j(b,a,I),A===`incremental`&&B.slice(0,a+1).every(e=>e&&e!==``)){let e=0;for(;e<I&&B[e]&&B[e]!==``;)e++;let t=B.slice(0,e).join(``);g(c(x),{recursive:!0}),_(x,t)}o([`${p(S)} tokens used `,`${u.GREY_DARK}in ${p(C)}ms${u.RESET}`].join(``))}));await Promise.all(H);let U=B.join(``);(A===`end`||A===`incremental`)&&(g(c(x),{recursive:!0}),_(x,U));let W=((y.now()-P)/1e3).toFixed(2),G=l(w.content.baseDir,x);return N(`${f(`✔`,u.GREEN)} File ${s(G)} completed in ${p(W)}s.`),U}catch(e){T.count++;let t=e?.message??JSON.stringify(e);return N(`${f(`✖`,u.RED)} Error: ${t}`),T.count>=T.maxErrors&&(T.shouldStop=!0),null}};export{b as translateFile};
|
|
102
3
|
//# sourceMappingURL=translateFile.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"translateFile.mjs","names":[],"sources":["../../../src/translateDoc/translateFile.ts"],"sourcesContent":["import { mkdirSync, writeFileSync } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { dirname, relative } from 'node:path';\nimport { performance } from 'node:perf_hooks';\nimport { readAsset } from 'utils:asset';\nimport { formatLocale, formatPath } from '@intlayer/chokidar';\nimport {\n ANSIColors,\n colon,\n colorize,\n colorizeNumber,\n getAppLogger,\n retryManager,\n} from '@intlayer/config';\nimport { chunkText } from '../utils/calculateChunks';\nimport { chunkInference } from '../utils/chunkInference';\nimport { fixChunkStartEndChars } from '../utils/fixChunkStartEndChars';\nimport type { TranslateFileOptions } from './types';\nimport { sanitizeChunk, validateTranslation } from './validation';\n\nexport const translateFile = async ({\n baseFilePath,\n outputFilePath,\n locale,\n baseLocale,\n configuration,\n errorState,\n aiOptions,\n customInstructions,\n aiClient,\n aiConfig,\n flushStrategy = 'incremental',\n onChunkReceive,\n limit, // The Global Limiter\n}: TranslateFileOptions): Promise<string | null> => {\n if (errorState.shouldStop) return null;\n\n const appLogger = getAppLogger(configuration, { config: { prefix: '' } });\n const fileStartTime = performance.now();\n\n try {\n const fileContent = await readFile(baseFilePath, 'utf-8');\n const chunks = chunkText(fileContent);\n const totalChunks = chunks.length;\n\n const filePrefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}] `;\n const filePrefix = `${colon(filePrefixText, { colSize: 40 })}${ANSIColors.RESET}`;\n const prefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}][${formatLocale(locale)}${ANSIColors.GREY_DARK}] `;\n const prefix = `${colon(prefixText, { colSize: 40 })}${ANSIColors.RESET}`;\n\n appLogger(\n `${filePrefix}Split into ${colorizeNumber(totalChunks)} chunks. Queuing...`\n );\n\n const basePrompt = readAsset('./prompts/TRANSLATE_PROMPT.md', 'utf-8')\n .replaceAll('{{localeName}}', `${formatLocale(locale, false)}`)\n .replaceAll('{{baseLocaleName}}', `${formatLocale(baseLocale, false)}`)\n .replace('{{applicationContext}}', aiOptions?.applicationContext ?? '-')\n .replace('{{customInstructions}}', customInstructions ?? '-');\n\n const translatedParts: string[] = new Array(totalChunks).fill('');\n\n // Fallback if no limiter is provided (runs immediately)\n const runTask = limit ?? ((fn) => fn());\n\n // MAP CHUNKS TO GLOBAL TASKS\n // This pushes ALL chunks for this file into the Global Queue immediately.\n // They will execute whenever the global concurrency slots open up.\n const tasks = chunks.map((chunk, i) =>\n runTask(async () => {\n if (errorState.shouldStop) return null;\n\n const chunkLogger = getAppLogger(configuration, {\n config: {\n prefix: `${prefix} ${ANSIColors.GREY_DARK}[${i + 1}/${totalChunks}] ${ANSIColors.RESET}`,\n },\n });\n\n const chunkStartTime = performance.now();\n const isFirstChunk = i === 0;\n const fileToTranslateCurrentChunk = chunk.content;\n\n // Context Preparation\n const getPrevChunkPrompt = () =>\n `>>> CONTEXT: PREVIOUS SOURCE CONTENT <<<\\n\\`\\`\\`\\n` +\n (chunks[i - 1]?.content ?? '') +\n `\\n\\`\\`\\`\\n>>> END PREVIOUS CONTEXT <<<`;\n\n const getBaseChunkContextPrompt = () =>\n `>>> CONTEXT: NEXT CONTENT <<<\\n\\`\\`\\`\\n` +\n (chunks[i + 1]?.content ?? '') +\n `\\n\\`\\`\\`\\n>>> END NEXT CONTEXT <<<`;\n\n chunkLogger('Process started');\n\n const chunkTranslation = retryManager(async () => {\n const result = await chunkInference(\n [\n { role: 'system', content: basePrompt },\n ...(chunks[i + 1]\n ? [\n {\n role: 'system',\n content: getBaseChunkContextPrompt(),\n } as const,\n ]\n : []),\n ...(isFirstChunk\n ? []\n : [{ role: 'system', content: getPrevChunkPrompt() } as const]),\n {\n role: 'system',\n content: [\n `You are translating TARGET CHUNK (${i + 1}/${totalChunks}).`,\n `Translate ONLY the target chunk. Preserve frontmatter/code exactly.`,\n ].join('\\n'),\n },\n {\n role: 'user',\n content: `>>> TARGET CHUNK START <<<\\n${fileToTranslateCurrentChunk}\\n>>> TARGET CHUNK END <<<`,\n },\n ],\n aiOptions,\n configuration,\n aiClient,\n aiConfig\n );\n\n let processedChunk = sanitizeChunk(\n result?.fileContent,\n fileToTranslateCurrentChunk\n );\n processedChunk = fixChunkStartEndChars(\n processedChunk,\n fileToTranslateCurrentChunk\n );\n\n const isValid = validateTranslation(\n fileToTranslateCurrentChunk,\n processedChunk,\n chunkLogger\n );\n\n if (!isValid) {\n // Throwing an error here signals retryManager to try again\n throw new Error(\n `Validation failed for chunk ${i + 1}/${totalChunks}`\n );\n }\n\n return { content: processedChunk, tokens: result.tokenUsed };\n });\n\n const { content: translatedChunk, tokens } = await chunkTranslation();\n const chunkEndTime = performance.now();\n const chunkDuration = (chunkEndTime - chunkStartTime).toFixed(0);\n\n // Store Result\n translatedParts[i] = translatedChunk;\n\n if (onChunkReceive) {\n onChunkReceive(translatedChunk, i, totalChunks);\n }\n\n // Incremental Flush Strategy\n if (flushStrategy === 'incremental') {\n const isContiguous = translatedParts\n .slice(0, i + 1)\n .every((p) => p && p !== '');\n\n if (isContiguous) {\n let endIdx = 0;\n while (\n endIdx < totalChunks &&\n translatedParts[endIdx] &&\n translatedParts[endIdx] !== ''\n ) {\n endIdx++;\n }\n const currentContent = translatedParts.slice(0, endIdx).join('');\n // Write asynchronously/sync is fine here as node handles file locks reasonably well for single process\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, currentContent);\n }\n }\n\n chunkLogger(\n [\n `${colorizeNumber(tokens)} tokens used `,\n `${ANSIColors.GREY_DARK}in ${colorizeNumber(chunkDuration)}ms${ANSIColors.RESET}`,\n ].join('')\n );\n })\n );\n\n // Wait for all chunks for this specific file/locale to finish\n await Promise.all(tasks);\n\n // Final Flush\n const fullContent = translatedParts.join('');\n if (flushStrategy === 'end' || flushStrategy === 'incremental') {\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, fullContent);\n }\n\n const fileEndTime = performance.now();\n const totalDuration = ((fileEndTime - fileStartTime) / 1000).toFixed(2);\n const relativePath = relative(\n configuration.content.baseDir,\n outputFilePath\n );\n\n appLogger(\n `${colorize('✔', ANSIColors.GREEN)} File ${formatPath(relativePath)} completed in ${colorizeNumber(totalDuration)}s.`\n );\n\n return fullContent;\n } catch (error: any) {\n errorState.count++;\n const errorMessage = error?.message ?? JSON.stringify(error);\n appLogger(`${colorize('✖', ANSIColors.RED)} Error: ${errorMessage}`);\n if (errorState.count >= errorState.maxErrors) errorState.shouldStop = true;\n return null;\n }\n};\n"],"mappings":";;;;;;;;;;;;;AAoBA,MAAa,gBAAgB,OAAO,EAClC,cACA,gBACA,QACA,YACA,eACA,YACA,WACA,oBACA,UACA,UACA,gBAAgB,eAChB,gBACA,YACkD;AAClD,KAAI,WAAW,WAAY,QAAO;CAElC,MAAM,YAAY,aAAa,eAAe,EAAE,QAAQ,EAAE,QAAQ,IAAI,EAAE,CAAC;CACzE,MAAM,gBAAgB,YAAY,KAAK;AAEvC,KAAI;EAEF,MAAM,SAAS,UADK,MAAM,SAAS,cAAc,QAAQ,CACpB;EACrC,MAAM,cAAc,OAAO;EAG3B,MAAM,aAAa,GAAG,MADC,GAAG,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,KACtD,EAAE,SAAS,IAAI,CAAC,GAAG,WAAW;EAE1E,MAAM,SAAS,GAAG,MADC,GAAG,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,IAAI,aAAa,OAAO,GAAG,WAAW,UAAU,KAC1G,EAAE,SAAS,IAAI,CAAC,GAAG,WAAW;AAElE,YACE,GAAG,WAAW,aAAa,eAAe,YAAY,CAAC,qBACxD;EAED,MAAM,aAAa,UAAU,iCAAiC,QAAQ,CACnE,WAAW,kBAAkB,GAAG,aAAa,QAAQ,MAAM,GAAG,CAC9D,WAAW,sBAAsB,GAAG,aAAa,YAAY,MAAM,GAAG,CACtE,QAAQ,0BAA0B,WAAW,sBAAsB,IAAI,CACvE,QAAQ,0BAA0B,sBAAsB,IAAI;EAE/D,MAAM,kBAA4B,IAAI,MAAM,YAAY,CAAC,KAAK,GAAG;EAGjE,MAAM,UAAU,WAAW,OAAO,IAAI;EAKtC,MAAM,QAAQ,OAAO,KAAK,OAAO,MAC/B,QAAQ,YAAY;AAClB,OAAI,WAAW,WAAY,QAAO;GAElC,MAAM,cAAc,aAAa,eAAe,EAC9C,QAAQ,EACN,QAAQ,GAAG,OAAO,IAAI,WAAW,UAAU,GAAG,IAAI,EAAE,GAAG,YAAY,IAAI,WAAW,SACnF,EACF,CAAC;GAEF,MAAM,iBAAiB,YAAY,KAAK;GACxC,MAAM,eAAe,MAAM;GAC3B,MAAM,8BAA8B,MAAM;GAG1C,MAAM,2BACJ,wDACC,OAAO,IAAI,IAAI,WAAW,MAC3B;GAEF,MAAM,kCACJ,6CACC,OAAO,IAAI,IAAI,WAAW,MAC3B;AAEF,eAAY,kBAAkB;GA4D9B,MAAM,EAAE,SAAS,iBAAiB,WAAW,MA1DpB,aAAa,YAAY;IAChD,MAAM,SAAS,MAAM,eACnB;KACE;MAAE,MAAM;MAAU,SAAS;MAAY;KACvC,GAAI,OAAO,IAAI,KACX,CACE;MACE,MAAM;MACN,SAAS,2BAA2B;MACrC,CACF,GACD,EAAE;KACN,GAAI,eACA,EAAE,GACF,CAAC;MAAE,MAAM;MAAU,SAAS,oBAAoB;MAAE,CAAU;KAChE;MACE,MAAM;MACN,SAAS,CACP,qCAAqC,IAAI,EAAE,GAAG,YAAY,KAC1D,sEACD,CAAC,KAAK,KAAK;MACb;KACD;MACE,MAAM;MACN,SAAS,+BAA+B,4BAA4B;MACrE;KACF,EACD,WACA,eACA,UACA,SACD;IAED,IAAI,iBAAiB,cACnB,QAAQ,aACR,4BACD;AACD,qBAAiB,sBACf,gBACA,4BACD;AAQD,QAAI,CANY,oBACd,6BACA,gBACA,YACD,CAIC,OAAM,IAAI,MACR,+BAA+B,IAAI,EAAE,GAAG,cACzC;AAGH,WAAO;KAAE,SAAS;KAAgB,QAAQ,OAAO;KAAW;KAC5D,EAEmE;GAErE,MAAM,iBADe,YAAY,KAAK,GACA,gBAAgB,QAAQ,EAAE;AAGhE,mBAAgB,KAAK;AAErB,OAAI,eACF,gBAAe,iBAAiB,GAAG,YAAY;AAIjD,OAAI,kBAAkB,eAKpB;QAJqB,gBAClB,MAAM,GAAG,IAAI,EAAE,CACf,OAAO,MAAM,KAAK,MAAM,GAAG,EAEZ;KAChB,IAAI,SAAS;AACb,YACE,SAAS,eACT,gBAAgB,WAChB,gBAAgB,YAAY,GAE5B;KAEF,MAAM,iBAAiB,gBAAgB,MAAM,GAAG,OAAO,CAAC,KAAK,GAAG;AAEhE,eAAU,QAAQ,eAAe,EAAE,EAAE,WAAW,MAAM,CAAC;AACvD,mBAAc,gBAAgB,eAAe;;;AAIjD,eACE,CACE,GAAG,eAAe,OAAO,CAAC,gBAC1B,GAAG,WAAW,UAAU,KAAK,eAAe,cAAc,CAAC,IAAI,WAAW,QAC3E,CAAC,KAAK,GAAG,CACX;IACD,CACH;AAGD,QAAM,QAAQ,IAAI,MAAM;EAGxB,MAAM,cAAc,gBAAgB,KAAK,GAAG;AAC5C,MAAI,kBAAkB,SAAS,kBAAkB,eAAe;AAC9D,aAAU,QAAQ,eAAe,EAAE,EAAE,WAAW,MAAM,CAAC;AACvD,iBAAc,gBAAgB,YAAY;;EAI5C,MAAM,kBADc,YAAY,KAAK,GACC,iBAAiB,KAAM,QAAQ,EAAE;EACvE,MAAM,eAAe,SACnB,cAAc,QAAQ,SACtB,eACD;AAED,YACE,GAAG,SAAS,KAAK,WAAW,MAAM,CAAC,QAAQ,WAAW,aAAa,CAAC,gBAAgB,eAAe,cAAc,CAAC,IACnH;AAED,SAAO;UACA,OAAY;AACnB,aAAW;EACX,MAAM,eAAe,OAAO,WAAW,KAAK,UAAU,MAAM;AAC5D,YAAU,GAAG,SAAS,KAAK,WAAW,IAAI,CAAC,UAAU,eAAe;AACpE,MAAI,WAAW,SAAS,WAAW,UAAW,YAAW,aAAa;AACtE,SAAO"}
|
|
1
|
+
{"version":3,"file":"translateFile.mjs","names":[],"sources":["../../../src/translateDoc/translateFile.ts"],"sourcesContent":["import { mkdirSync, writeFileSync } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { dirname, relative } from 'node:path';\nimport { performance } from 'node:perf_hooks';\nimport { readAsset } from 'utils:asset';\nimport { formatLocale, formatPath } from '@intlayer/chokidar/utils';\nimport {\n ANSIColors,\n colon,\n colorize,\n colorizeNumber,\n getAppLogger,\n} from '@intlayer/config/logger';\nimport { retryManager } from '@intlayer/config/utils';\nimport { chunkText } from '../utils/calculateChunks';\nimport { chunkInference } from '../utils/chunkInference';\nimport { fixChunkStartEndChars } from '../utils/fixChunkStartEndChars';\nimport type { TranslateFileOptions } from './types';\nimport { sanitizeChunk, validateTranslation } from './validation';\n\nexport const translateFile = async ({\n baseFilePath,\n outputFilePath,\n locale,\n baseLocale,\n configuration,\n errorState,\n aiOptions,\n customInstructions,\n aiClient,\n aiConfig,\n flushStrategy = 'incremental',\n onChunkReceive,\n limit, // The Global Limiter\n}: TranslateFileOptions): Promise<string | null> => {\n if (errorState.shouldStop) return null;\n\n const appLogger = getAppLogger(configuration, { config: { prefix: '' } });\n const fileStartTime = performance.now();\n\n try {\n const fileContent = await readFile(baseFilePath, 'utf-8');\n const chunks = chunkText(fileContent);\n const totalChunks = chunks.length;\n\n const filePrefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}] `;\n const filePrefix = `${colon(filePrefixText, { colSize: 40 })}${ANSIColors.RESET}`;\n const prefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}][${formatLocale(locale)}${ANSIColors.GREY_DARK}] `;\n const prefix = `${colon(prefixText, { colSize: 40 })}${ANSIColors.RESET}`;\n\n appLogger(\n `${filePrefix}Split into ${colorizeNumber(totalChunks)} chunks. Queuing...`\n );\n\n const basePrompt = readAsset('./prompts/TRANSLATE_PROMPT.md', 'utf-8')\n .replaceAll('{{localeName}}', `${formatLocale(locale, false)}`)\n .replaceAll('{{baseLocaleName}}', `${formatLocale(baseLocale, false)}`)\n .replace('{{applicationContext}}', aiOptions?.applicationContext ?? '-')\n .replace('{{customInstructions}}', customInstructions ?? '-');\n\n const translatedParts: string[] = new Array(totalChunks).fill('');\n\n // Fallback if no limiter is provided (runs immediately)\n const runTask = limit ?? ((fn) => fn());\n\n // MAP CHUNKS TO GLOBAL TASKS\n // This pushes ALL chunks for this file into the Global Queue immediately.\n // They will execute whenever the global concurrency slots open up.\n const tasks = chunks.map((chunk, i) =>\n runTask(async () => {\n if (errorState.shouldStop) return null;\n\n const chunkLogger = getAppLogger(configuration, {\n config: {\n prefix: `${prefix} ${ANSIColors.GREY_DARK}[${i + 1}/${totalChunks}] ${ANSIColors.RESET}`,\n },\n });\n\n const chunkStartTime = performance.now();\n const isFirstChunk = i === 0;\n const fileToTranslateCurrentChunk = chunk.content;\n\n // Context Preparation\n const getPrevChunkPrompt = () =>\n `>>> CONTEXT: PREVIOUS SOURCE CONTENT <<<\\n\\`\\`\\`\\n` +\n (chunks[i - 1]?.content ?? '') +\n `\\n\\`\\`\\`\\n>>> END PREVIOUS CONTEXT <<<`;\n\n const getBaseChunkContextPrompt = () =>\n `>>> CONTEXT: NEXT CONTENT <<<\\n\\`\\`\\`\\n` +\n (chunks[i + 1]?.content ?? '') +\n `\\n\\`\\`\\`\\n>>> END NEXT CONTEXT <<<`;\n\n chunkLogger('Process started');\n\n const chunkTranslation = retryManager(async () => {\n const result = await chunkInference(\n [\n { role: 'system', content: basePrompt },\n ...(chunks[i + 1]\n ? [\n {\n role: 'system',\n content: getBaseChunkContextPrompt(),\n } as const,\n ]\n : []),\n ...(isFirstChunk\n ? []\n : [{ role: 'system', content: getPrevChunkPrompt() } as const]),\n {\n role: 'system',\n content: [\n `You are translating TARGET CHUNK (${i + 1}/${totalChunks}).`,\n `Translate ONLY the target chunk. Preserve frontmatter/code exactly.`,\n ].join('\\n'),\n },\n {\n role: 'user',\n content: `>>> TARGET CHUNK START <<<\\n${fileToTranslateCurrentChunk}\\n>>> TARGET CHUNK END <<<`,\n },\n ],\n aiOptions,\n configuration,\n aiClient,\n aiConfig\n );\n\n let processedChunk = sanitizeChunk(\n result?.fileContent,\n fileToTranslateCurrentChunk\n );\n processedChunk = fixChunkStartEndChars(\n processedChunk,\n fileToTranslateCurrentChunk\n );\n\n const isValid = validateTranslation(\n fileToTranslateCurrentChunk,\n processedChunk,\n chunkLogger\n );\n\n if (!isValid) {\n // Throwing an error here signals retryManager to try again\n throw new Error(\n `Validation failed for chunk ${i + 1}/${totalChunks}`\n );\n }\n\n return { content: processedChunk, tokens: result.tokenUsed };\n });\n\n const { content: translatedChunk, tokens } = await chunkTranslation();\n const chunkEndTime = performance.now();\n const chunkDuration = (chunkEndTime - chunkStartTime).toFixed(0);\n\n // Store Result\n translatedParts[i] = translatedChunk;\n\n if (onChunkReceive) {\n onChunkReceive(translatedChunk, i, totalChunks);\n }\n\n // Incremental Flush Strategy\n if (flushStrategy === 'incremental') {\n const isContiguous = translatedParts\n .slice(0, i + 1)\n .every((p) => p && p !== '');\n\n if (isContiguous) {\n let endIdx = 0;\n while (\n endIdx < totalChunks &&\n translatedParts[endIdx] &&\n translatedParts[endIdx] !== ''\n ) {\n endIdx++;\n }\n const currentContent = translatedParts.slice(0, endIdx).join('');\n // Write asynchronously/sync is fine here as node handles file locks reasonably well for single process\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, currentContent);\n }\n }\n\n chunkLogger(\n [\n `${colorizeNumber(tokens)} tokens used `,\n `${ANSIColors.GREY_DARK}in ${colorizeNumber(chunkDuration)}ms${ANSIColors.RESET}`,\n ].join('')\n );\n })\n );\n\n // Wait for all chunks for this specific file/locale to finish\n await Promise.all(tasks);\n\n // Final Flush\n const fullContent = translatedParts.join('');\n if (flushStrategy === 'end' || flushStrategy === 'incremental') {\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, fullContent);\n }\n\n const fileEndTime = performance.now();\n const totalDuration = ((fileEndTime - fileStartTime) / 1000).toFixed(2);\n const relativePath = relative(\n configuration.content.baseDir,\n outputFilePath\n );\n\n appLogger(\n `${colorize('✔', ANSIColors.GREEN)} File ${formatPath(relativePath)} completed in ${colorizeNumber(totalDuration)}s.`\n );\n\n return fullContent;\n } catch (error: any) {\n errorState.count++;\n const errorMessage = error?.message ?? JSON.stringify(error);\n appLogger(`${colorize('✖', ANSIColors.RED)} Error: ${errorMessage}`);\n if (errorState.count >= errorState.maxErrors) errorState.shouldStop = true;\n return null;\n }\n};\n"],"mappings":"yvBAoBA,MAAa,EAAgB,MAAO,CAClC,eACA,iBACA,SACA,aACA,gBACA,aACA,YACA,qBACA,WACA,WACA,gBAAgB,cAChB,iBACA,WACkD,CAClD,GAAI,EAAW,WAAY,OAAO,KAElC,IAAM,EAAY,EAAa,EAAe,CAAE,OAAQ,CAAE,OAAQ,GAAI,CAAE,CAAC,CACnE,EAAgB,EAAY,KAAK,CAEvC,GAAI,CAEF,IAAM,EAAS,EADK,MAAM,EAAS,EAAc,QAAQ,CACpB,CAC/B,EAAc,EAAO,OAGrB,EAAa,GAAG,EADC,GAAG,EAAW,UAAU,GAAG,EAAW,EAAa,GAAG,EAAW,UAAU,IACtD,CAAE,QAAS,GAAI,CAAC,GAAG,EAAW,QAEpE,EAAS,GAAG,EADC,GAAG,EAAW,UAAU,GAAG,EAAW,EAAa,GAAG,EAAW,UAAU,IAAI,EAAa,EAAO,GAAG,EAAW,UAAU,IAC1G,CAAE,QAAS,GAAI,CAAC,GAAG,EAAW,QAElE,EACE,GAAG,EAAW,aAAa,EAAe,EAAY,CAAC,qBACxD,CAED,IAAM,EAAa,EAAU,gCAAiC,QAAQ,CACnE,WAAW,iBAAkB,GAAG,EAAa,EAAQ,GAAM,GAAG,CAC9D,WAAW,qBAAsB,GAAG,EAAa,EAAY,GAAM,GAAG,CACtE,QAAQ,yBAA0B,GAAW,oBAAsB,IAAI,CACvE,QAAQ,yBAA0B,GAAsB,IAAI,CAEzD,EAAgC,MAAM,EAAY,CAAC,KAAK,GAAG,CAG3D,EAAU,IAAW,GAAO,GAAI,EAKhC,EAAQ,EAAO,KAAK,EAAO,IAC/B,EAAQ,SAAY,CAClB,GAAI,EAAW,WAAY,OAAO,KAElC,IAAM,EAAc,EAAa,EAAe,CAC9C,OAAQ,CACN,OAAQ,GAAG,EAAO,IAAI,EAAW,UAAU,GAAG,EAAI,EAAE,GAAG,EAAY,IAAI,EAAW,QACnF,CACF,CAAC,CAEI,EAAiB,EAAY,KAAK,CAClC,EAAe,IAAM,EACrB,EAA8B,EAAM,QAGpC,MACJ,mDACC,EAAO,EAAI,IAAI,SAAW,IAC3B,sCAEI,MACJ,wCACC,EAAO,EAAI,IAAI,SAAW,IAC3B,kCAEF,EAAY,kBAAkB,CA4D9B,GAAM,CAAE,QAAS,EAAiB,UAAW,MA1DpB,EAAa,SAAY,CAChD,IAAM,EAAS,MAAM,EACnB,CACE,CAAE,KAAM,SAAU,QAAS,EAAY,CACvC,GAAI,EAAO,EAAI,GACX,CACE,CACE,KAAM,SACN,QAAS,GAA2B,CACrC,CACF,CACD,EAAE,CACN,GAAI,EACA,EAAE,CACF,CAAC,CAAE,KAAM,SAAU,QAAS,GAAoB,CAAE,CAAU,CAChE,CACE,KAAM,SACN,QAAS,CACP,qCAAqC,EAAI,EAAE,GAAG,EAAY,IAC1D,sEACD,CAAC,KAAK;EAAK,CACb,CACD,CACE,KAAM,OACN,QAAS,+BAA+B,EAA4B,4BACrE,CACF,CACD,EACA,EACA,EACA,EACD,CAEG,EAAiB,EACnB,GAAQ,YACR,EACD,CAYD,GAXA,EAAiB,EACf,EACA,EACD,CAQG,CANY,EACd,EACA,EACA,EACD,CAIC,MAAU,MACR,+BAA+B,EAAI,EAAE,GAAG,IACzC,CAGH,MAAO,CAAE,QAAS,EAAgB,OAAQ,EAAO,UAAW,EAC5D,EAEmE,CAE/D,GADe,EAAY,KAAK,CACA,GAAgB,QAAQ,EAAE,CAUhE,GAPA,EAAgB,GAAK,EAEjB,GACF,EAAe,EAAiB,EAAG,EAAY,CAI7C,IAAkB,eACC,EAClB,MAAM,EAAG,EAAI,EAAE,CACf,MAAO,GAAM,GAAK,IAAM,GAAG,CAEZ,CAChB,IAAI,EAAS,EACb,KACE,EAAS,GACT,EAAgB,IAChB,EAAgB,KAAY,IAE5B,IAEF,IAAM,EAAiB,EAAgB,MAAM,EAAG,EAAO,CAAC,KAAK,GAAG,CAEhE,EAAU,EAAQ,EAAe,CAAE,CAAE,UAAW,GAAM,CAAC,CACvD,EAAc,EAAgB,EAAe,CAIjD,EACE,CACE,GAAG,EAAe,EAAO,CAAC,eAC1B,GAAG,EAAW,UAAU,KAAK,EAAe,EAAc,CAAC,IAAI,EAAW,QAC3E,CAAC,KAAK,GAAG,CACX,EACD,CACH,CAGD,MAAM,QAAQ,IAAI,EAAM,CAGxB,IAAM,EAAc,EAAgB,KAAK,GAAG,EACxC,IAAkB,OAAS,IAAkB,iBAC/C,EAAU,EAAQ,EAAe,CAAE,CAAE,UAAW,GAAM,CAAC,CACvD,EAAc,EAAgB,EAAY,EAI5C,IAAM,IADc,EAAY,KAAK,CACC,GAAiB,KAAM,QAAQ,EAAE,CACjE,EAAe,EACnB,EAAc,QAAQ,QACtB,EACD,CAMD,OAJA,EACE,GAAG,EAAS,IAAK,EAAW,MAAM,CAAC,QAAQ,EAAW,EAAa,CAAC,gBAAgB,EAAe,EAAc,CAAC,IACnH,CAEM,QACA,EAAY,CACnB,EAAW,QACX,IAAM,EAAe,GAAO,SAAW,KAAK,UAAU,EAAM,CAG5D,OAFA,EAAU,GAAG,EAAS,IAAK,EAAW,IAAI,CAAC,UAAU,IAAe,CAChE,EAAW,OAAS,EAAW,YAAW,EAAW,WAAa,IAC/D"}
|
|
@@ -1,47 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
const validateTranslation = (original, translated, logger) => {
|
|
7
|
-
const errors = [];
|
|
8
|
-
if (original.trimStart().startsWith("---")) {
|
|
9
|
-
if (!translated.trimStart().startsWith("---")) errors.push("YAML Frontmatter missing: Input starts with \"---\", output does not.");
|
|
10
|
-
const originalDashes = (original.match(/^---$/gm) || []).length;
|
|
11
|
-
const translatedDashes = (translated.match(/^---$/gm) || []).length;
|
|
12
|
-
if (originalDashes >= 2 && translatedDashes < 2) errors.push("YAML Frontmatter unclosed: Input has closing \"---\", output is missing it.");
|
|
13
|
-
}
|
|
14
|
-
const fenceRegex = /^\s*```/gm;
|
|
15
|
-
const originalFences = (original.match(fenceRegex) || []).length;
|
|
16
|
-
const translatedFences = (translated.match(fenceRegex) || []).length;
|
|
17
|
-
if (originalFences !== translatedFences) errors.push(`Code fence mismatch: Input has ${originalFences}, output has ${translatedFences}`);
|
|
18
|
-
const ratio = translated.length / (original.length || 1);
|
|
19
|
-
const isTooLong = ratio > 2.5;
|
|
20
|
-
const isSignificantLength = original.length > 50;
|
|
21
|
-
if (isTooLong && isSignificantLength) errors.push(`Length deviation: Output is ${translated.length} chars vs Input ${original.length} (${ratio.toFixed(1)}x). Likely included context.`);
|
|
22
|
-
const originalLines = original.split("\n").length;
|
|
23
|
-
const translatedLines = translated.split("\n").length;
|
|
24
|
-
if (originalLines > 5) {
|
|
25
|
-
if (translatedLines < originalLines * .4) errors.push(`Line count deviation: Output has ${translatedLines} lines, Input has ${originalLines}. Likely content deletion.`);
|
|
26
|
-
}
|
|
27
|
-
if (errors.length > 0) {
|
|
28
|
-
logger(`Validation Failed: ${errors.join(", ")}`);
|
|
29
|
-
return false;
|
|
30
|
-
}
|
|
31
|
-
return true;
|
|
32
|
-
};
|
|
33
|
-
/**
|
|
34
|
-
* Clean common AI artifacts
|
|
35
|
-
*/
|
|
36
|
-
const sanitizeChunk = (translated, original) => {
|
|
37
|
-
let cleaned = translated;
|
|
38
|
-
const match = cleaned.match(/^```(?:markdown|md|txt)?\n([\s\S]*?)\n```$/i);
|
|
39
|
-
if (match) cleaned = match[1];
|
|
40
|
-
if (!original.startsWith("\n") && cleaned.startsWith("\n")) cleaned = cleaned.replace(/^\n+/, "");
|
|
41
|
-
if (!original.startsWith(" ") && cleaned.startsWith(" ")) cleaned = cleaned.trimStart();
|
|
42
|
-
return cleaned;
|
|
43
|
-
};
|
|
44
|
-
|
|
45
|
-
//#endregion
|
|
46
|
-
export { sanitizeChunk, validateTranslation };
|
|
1
|
+
const e=(e,t,n)=>{let r=[];if(e.trimStart().startsWith(`---`)){t.trimStart().startsWith(`---`)||r.push(`YAML Frontmatter missing: Input starts with "---", output does not.`);let n=(e.match(/^---$/gm)||[]).length,i=(t.match(/^---$/gm)||[]).length;n>=2&&i<2&&r.push(`YAML Frontmatter unclosed: Input has closing "---", output is missing it.`)}let i=/^\s*```/gm,a=(e.match(i)||[]).length,o=(t.match(i)||[]).length;a!==o&&r.push(`Code fence mismatch: Input has ${a}, output has ${o}`);let s=t.length/(e.length||1),c=s>2.5,l=e.length>50;c&&l&&r.push(`Length deviation: Output is ${t.length} chars vs Input ${e.length} (${s.toFixed(1)}x). Likely included context.`);let u=e.split(`
|
|
2
|
+
`).length,d=t.split(`
|
|
3
|
+
`).length;return u>5&&d<u*.4&&r.push(`Line count deviation: Output has ${d} lines, Input has ${u}. Likely content deletion.`),r.length>0?(n(`Validation Failed: ${r.join(`, `)}`),!1):!0},t=(e,t)=>{let n=e,r=n.match(/^```(?:markdown|md|txt)?\n([\s\S]*?)\n```$/i);return r&&(n=r[1]),!t.startsWith(`
|
|
4
|
+
`)&&n.startsWith(`
|
|
5
|
+
`)&&(n=n.replace(/^\n+/,``)),!t.startsWith(` `)&&n.startsWith(` `)&&(n=n.trimStart()),n};export{t as sanitizeChunk,e as validateTranslation};
|
|
47
6
|
//# sourceMappingURL=validation.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validation.mjs","names":[],"sources":["../../../src/translateDoc/validation.ts"],"sourcesContent":["import type { Logger } from '@intlayer/config';\n\n/**\n * Validates that the translated content matches the structure of the original.\n * Throws an error if a mismatch is found, triggering a retry.\n */\nexport const validateTranslation = (\n original: string,\n translated: string,\n logger: Logger\n): boolean => {\n const errors: string[] = [];\n\n // YAML Frontmatter Integrity (CRITICAL)\n if (original.trimStart().startsWith('---')) {\n if (!translated.trimStart().startsWith('---')) {\n errors.push(\n 'YAML Frontmatter missing: Input starts with \"---\", output does not.'\n );\n }\n const originalDashes = (original.match(/^---$/gm) || []).length;\n const translatedDashes = (translated.match(/^---$/gm) || []).length;\n if (originalDashes >= 2 && translatedDashes < 2) {\n errors.push(\n 'YAML Frontmatter unclosed: Input has closing \"---\", output is missing it.'\n );\n }\n }\n\n // Code Fence Check\n const fenceRegex = /^\\s*```/gm;\n const originalFences = (original.match(fenceRegex) || []).length;\n const translatedFences = (translated.match(fenceRegex) || []).length;\n\n if (originalFences !== translatedFences) {\n errors.push(\n `Code fence mismatch: Input has ${originalFences}, output has ${translatedFences}`\n );\n }\n\n // Length/Duplication Check\n const ratio = translated.length / (original.length || 1);\n const isTooLong = ratio > 2.5;\n const isSignificantLength = original.length > 50;\n\n if (isTooLong && isSignificantLength) {\n errors.push(\n `Length deviation: Output is ${translated.length} chars vs Input ${original.length} (${ratio.toFixed(1)}x). Likely included context.`\n );\n }\n\n // Line Count Heuristic\n const originalLines = original.split('\\n').length;\n const translatedLines = translated.split('\\n').length;\n\n if (originalLines > 5) {\n if (translatedLines < originalLines * 0.4) {\n errors.push(\n `Line count deviation: Output has ${translatedLines} lines, Input has ${originalLines}. Likely content deletion.`\n );\n }\n }\n\n if (errors.length > 0) {\n logger(`Validation Failed: ${errors.join(', ')}`);\n return false;\n }\n\n return true;\n};\n\n/**\n * Clean common AI artifacts\n */\nexport const sanitizeChunk = (translated: string, original: string): string => {\n let cleaned = translated;\n const wrapRegex = /^```(?:markdown|md|txt)?\\n([\\s\\S]*?)\\n```$/i;\n const match = cleaned.match(wrapRegex);\n if (match) cleaned = match[1];\n\n if (!original.startsWith('\\n') && cleaned.startsWith('\\n')) {\n cleaned = cleaned.replace(/^\\n+/, '');\n }\n if (!original.startsWith(' ') && cleaned.startsWith(' ')) {\n cleaned = cleaned.trimStart();\n }\n return cleaned;\n};\n"],"mappings":"
|
|
1
|
+
{"version":3,"file":"validation.mjs","names":[],"sources":["../../../src/translateDoc/validation.ts"],"sourcesContent":["import type { Logger } from '@intlayer/config/logger';\n\n/**\n * Validates that the translated content matches the structure of the original.\n * Throws an error if a mismatch is found, triggering a retry.\n */\nexport const validateTranslation = (\n original: string,\n translated: string,\n logger: Logger\n): boolean => {\n const errors: string[] = [];\n\n // YAML Frontmatter Integrity (CRITICAL)\n if (original.trimStart().startsWith('---')) {\n if (!translated.trimStart().startsWith('---')) {\n errors.push(\n 'YAML Frontmatter missing: Input starts with \"---\", output does not.'\n );\n }\n const originalDashes = (original.match(/^---$/gm) || []).length;\n const translatedDashes = (translated.match(/^---$/gm) || []).length;\n if (originalDashes >= 2 && translatedDashes < 2) {\n errors.push(\n 'YAML Frontmatter unclosed: Input has closing \"---\", output is missing it.'\n );\n }\n }\n\n // Code Fence Check\n const fenceRegex = /^\\s*```/gm;\n const originalFences = (original.match(fenceRegex) || []).length;\n const translatedFences = (translated.match(fenceRegex) || []).length;\n\n if (originalFences !== translatedFences) {\n errors.push(\n `Code fence mismatch: Input has ${originalFences}, output has ${translatedFences}`\n );\n }\n\n // Length/Duplication Check\n const ratio = translated.length / (original.length || 1);\n const isTooLong = ratio > 2.5;\n const isSignificantLength = original.length > 50;\n\n if (isTooLong && isSignificantLength) {\n errors.push(\n `Length deviation: Output is ${translated.length} chars vs Input ${original.length} (${ratio.toFixed(1)}x). Likely included context.`\n );\n }\n\n // Line Count Heuristic\n const originalLines = original.split('\\n').length;\n const translatedLines = translated.split('\\n').length;\n\n if (originalLines > 5) {\n if (translatedLines < originalLines * 0.4) {\n errors.push(\n `Line count deviation: Output has ${translatedLines} lines, Input has ${originalLines}. Likely content deletion.`\n );\n }\n }\n\n if (errors.length > 0) {\n logger(`Validation Failed: ${errors.join(', ')}`);\n return false;\n }\n\n return true;\n};\n\n/**\n * Clean common AI artifacts\n */\nexport const sanitizeChunk = (translated: string, original: string): string => {\n let cleaned = translated;\n const wrapRegex = /^```(?:markdown|md|txt)?\\n([\\s\\S]*?)\\n```$/i;\n const match = cleaned.match(wrapRegex);\n if (match) cleaned = match[1];\n\n if (!original.startsWith('\\n') && cleaned.startsWith('\\n')) {\n cleaned = cleaned.replace(/^\\n+/, '');\n }\n if (!original.startsWith(' ') && cleaned.startsWith(' ')) {\n cleaned = cleaned.trimStart();\n }\n return cleaned;\n};\n"],"mappings":"AAMA,MAAa,GACX,EACA,EACA,IACY,CACZ,IAAM,EAAmB,EAAE,CAG3B,GAAI,EAAS,WAAW,CAAC,WAAW,MAAM,CAAE,CACrC,EAAW,WAAW,CAAC,WAAW,MAAM,EAC3C,EAAO,KACL,sEACD,CAEH,IAAM,GAAkB,EAAS,MAAM,UAAU,EAAI,EAAE,EAAE,OACnD,GAAoB,EAAW,MAAM,UAAU,EAAI,EAAE,EAAE,OACzD,GAAkB,GAAK,EAAmB,GAC5C,EAAO,KACL,4EACD,CAKL,IAAM,EAAa,YACb,GAAkB,EAAS,MAAM,EAAW,EAAI,EAAE,EAAE,OACpD,GAAoB,EAAW,MAAM,EAAW,EAAI,EAAE,EAAE,OAE1D,IAAmB,GACrB,EAAO,KACL,kCAAkC,EAAe,eAAe,IACjE,CAIH,IAAM,EAAQ,EAAW,QAAU,EAAS,QAAU,GAChD,EAAY,EAAQ,IACpB,EAAsB,EAAS,OAAS,GAE1C,GAAa,GACf,EAAO,KACL,+BAA+B,EAAW,OAAO,kBAAkB,EAAS,OAAO,IAAI,EAAM,QAAQ,EAAE,CAAC,8BACzG,CAIH,IAAM,EAAgB,EAAS,MAAM;EAAK,CAAC,OACrC,EAAkB,EAAW,MAAM;EAAK,CAAC,OAe/C,OAbI,EAAgB,GACd,EAAkB,EAAgB,IACpC,EAAO,KACL,oCAAoC,EAAgB,oBAAoB,EAAc,4BACvF,CAID,EAAO,OAAS,GAClB,EAAO,sBAAsB,EAAO,KAAK,KAAK,GAAG,CAC1C,IAGF,IAMI,GAAiB,EAAoB,IAA6B,CAC7E,IAAI,EAAU,EAER,EAAQ,EAAQ,MADJ,8CACoB,CAStC,OARI,IAAO,EAAU,EAAM,IAEvB,CAAC,EAAS,WAAW;EAAK,EAAI,EAAQ,WAAW;EAAK,GACxD,EAAU,EAAQ,QAAQ,OAAQ,GAAG,EAEnC,CAAC,EAAS,WAAW,IAAI,EAAI,EAAQ,WAAW,IAAI,GACtD,EAAU,EAAQ,WAAW,EAExB"}
|
|
@@ -1,67 +1,2 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
//#region src/translation-alignment/alignBlocks.ts
|
|
4
|
-
const alignEnglishAndFrenchBlocks = (defaultBlocks, secondaryBlocks) => {
|
|
5
|
-
const defaultLength = defaultBlocks.length;
|
|
6
|
-
const secondaryLength = secondaryBlocks.length;
|
|
7
|
-
const scoreMatrix = Array.from({ length: defaultLength + 1 }, () => Array.from({ length: secondaryLength + 1 }, () => 0));
|
|
8
|
-
const traceMatrix = Array.from({ length: defaultLength + 1 }, () => Array.from({ length: secondaryLength + 1 }, () => "diagonal"));
|
|
9
|
-
const gapPenalty = -2;
|
|
10
|
-
const computeMatchScore = (defaultIndex, secondaryIndex) => {
|
|
11
|
-
const defaultBlock = defaultBlocks[defaultIndex];
|
|
12
|
-
const secondaryBlock = secondaryBlocks[secondaryIndex];
|
|
13
|
-
const typeBonus = defaultBlock.type === secondaryBlock.type ? 2 : 0;
|
|
14
|
-
const anchorSimilarity = computeJaccardSimilarity(defaultBlock.anchorText, secondaryBlock.anchorText, 3);
|
|
15
|
-
return typeBonus + (Math.min(defaultBlock.content.length, secondaryBlock.content.length) / Math.max(defaultBlock.content.length, secondaryBlock.content.length) > .75 ? 1 : 0) + anchorSimilarity * 8;
|
|
16
|
-
};
|
|
17
|
-
for (let i = 1; i <= defaultLength; i += 1) {
|
|
18
|
-
scoreMatrix[i][0] = scoreMatrix[i - 1][0] + gapPenalty;
|
|
19
|
-
traceMatrix[i][0] = "up";
|
|
20
|
-
}
|
|
21
|
-
for (let j = 1; j <= secondaryLength; j += 1) {
|
|
22
|
-
scoreMatrix[0][j] = scoreMatrix[0][j - 1] + gapPenalty;
|
|
23
|
-
traceMatrix[0][j] = "left";
|
|
24
|
-
}
|
|
25
|
-
for (let i = 1; i <= defaultLength; i += 1) for (let j = 1; j <= secondaryLength; j += 1) {
|
|
26
|
-
const match = scoreMatrix[i - 1][j - 1] + computeMatchScore(i - 1, j - 1);
|
|
27
|
-
const deleteGap = scoreMatrix[i - 1][j] + gapPenalty;
|
|
28
|
-
const insertGap = scoreMatrix[i][j - 1] + gapPenalty;
|
|
29
|
-
const best = Math.max(match, deleteGap, insertGap);
|
|
30
|
-
scoreMatrix[i][j] = best;
|
|
31
|
-
traceMatrix[i][j] = best === match ? "diagonal" : best === deleteGap ? "up" : "left";
|
|
32
|
-
}
|
|
33
|
-
const result = [];
|
|
34
|
-
let i = defaultLength;
|
|
35
|
-
let j = secondaryLength;
|
|
36
|
-
while (i > 0 || j > 0) if (i > 0 && j > 0 && traceMatrix[i][j] === "diagonal") {
|
|
37
|
-
const englishIndex = i - 1;
|
|
38
|
-
const frenchIndex = j - 1;
|
|
39
|
-
const similarityScore = computeJaccardSimilarity(defaultBlocks[englishIndex].anchorText, secondaryBlocks[frenchIndex].anchorText, 3);
|
|
40
|
-
result.unshift({
|
|
41
|
-
englishIndex,
|
|
42
|
-
frenchIndex,
|
|
43
|
-
similarityScore
|
|
44
|
-
});
|
|
45
|
-
i -= 1;
|
|
46
|
-
j -= 1;
|
|
47
|
-
} else if (i > 0 && (j === 0 || traceMatrix[i][j] === "up")) {
|
|
48
|
-
result.unshift({
|
|
49
|
-
englishIndex: i - 1,
|
|
50
|
-
frenchIndex: null,
|
|
51
|
-
similarityScore: 0
|
|
52
|
-
});
|
|
53
|
-
i -= 1;
|
|
54
|
-
} else if (j > 0 && (i === 0 || traceMatrix[i][j] === "left")) {
|
|
55
|
-
result.unshift({
|
|
56
|
-
englishIndex: -1,
|
|
57
|
-
frenchIndex: j - 1,
|
|
58
|
-
similarityScore: 0
|
|
59
|
-
});
|
|
60
|
-
j -= 1;
|
|
61
|
-
}
|
|
62
|
-
return result;
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
//#endregion
|
|
66
|
-
export { alignEnglishAndFrenchBlocks };
|
|
1
|
+
import{computeJaccardSimilarity as e}from"./computeSimilarity.mjs";const t=(t,n)=>{let r=t.length,i=n.length,a=Array.from({length:r+1},()=>Array.from({length:i+1},()=>0)),o=Array.from({length:r+1},()=>Array.from({length:i+1},()=>`diagonal`)),s=(r,i)=>{let a=t[r],o=n[i],s=a.type===o.type?2:0,c=e(a.anchorText,o.anchorText,3);return s+(Math.min(a.content.length,o.content.length)/Math.max(a.content.length,o.content.length)>.75?1:0)+c*8};for(let e=1;e<=r;e+=1)a[e][0]=a[e-1][0]+-2,o[e][0]=`up`;for(let e=1;e<=i;e+=1)a[0][e]=a[0][e-1]+-2,o[0][e]=`left`;for(let e=1;e<=r;e+=1)for(let t=1;t<=i;t+=1){let n=a[e-1][t-1]+s(e-1,t-1),r=a[e-1][t]+-2,i=a[e][t-1]+-2,c=Math.max(n,r,i);a[e][t]=c,o[e][t]=c===n?`diagonal`:c===r?`up`:`left`}let c=[],l=r,u=i;for(;l>0||u>0;)if(l>0&&u>0&&o[l][u]===`diagonal`){let r=l-1,i=u-1,a=e(t[r].anchorText,n[i].anchorText,3);c.unshift({englishIndex:r,frenchIndex:i,similarityScore:a}),--l,--u}else l>0&&(u===0||o[l][u]===`up`)?(c.unshift({englishIndex:l-1,frenchIndex:null,similarityScore:0}),--l):u>0&&(l===0||o[l][u]===`left`)&&(c.unshift({englishIndex:-1,frenchIndex:u-1,similarityScore:0}),--u);return c};export{t as alignEnglishAndFrenchBlocks};
|
|
67
2
|
//# sourceMappingURL=alignBlocks.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"alignBlocks.mjs","names":[],"sources":["../../../src/translation-alignment/alignBlocks.ts"],"sourcesContent":["import { computeJaccardSimilarity } from './computeSimilarity';\nimport type { AlignmentPair, FingerprintedBlock } from './types';\n\nexport const alignEnglishAndFrenchBlocks = (\n defaultBlocks: FingerprintedBlock[],\n secondaryBlocks: FingerprintedBlock[]\n): AlignmentPair[] => {\n // Needleman–Wunsch style global alignment using anchor similarity and type equality\n const defaultLength = defaultBlocks.length;\n const secondaryLength = secondaryBlocks.length;\n\n const scoreMatrix: number[][] = Array.from(\n { length: defaultLength + 1 },\n () => Array.from({ length: secondaryLength + 1 }, () => 0)\n );\n const traceMatrix: ('diagonal' | 'up' | 'left')[][] = Array.from(\n { length: defaultLength + 1 },\n () => Array.from({ length: secondaryLength + 1 }, () => 'diagonal')\n );\n\n const gapPenalty = -2;\n\n const computeMatchScore = (\n defaultIndex: number,\n secondaryIndex: number\n ): number => {\n const defaultBlock = defaultBlocks[defaultIndex];\n const secondaryBlock = secondaryBlocks[secondaryIndex];\n const typeBonus = defaultBlock.type === secondaryBlock.type ? 2 : 0;\n const anchorSimilarity = computeJaccardSimilarity(\n defaultBlock.anchorText,\n secondaryBlock.anchorText,\n 3\n );\n const lengthRatio =\n Math.min(defaultBlock.content.length, secondaryBlock.content.length) /\n Math.max(defaultBlock.content.length, secondaryBlock.content.length);\n const lengthBonus = lengthRatio > 0.75 ? 1 : 0;\n return typeBonus + lengthBonus + anchorSimilarity * 8; // weighted toward anchor similarity\n };\n\n // initialize first row and column\n for (let i = 1; i <= defaultLength; i += 1) {\n scoreMatrix[i][0] = scoreMatrix[i - 1][0] + gapPenalty;\n traceMatrix[i][0] = 'up';\n }\n for (let j = 1; j <= secondaryLength; j += 1) {\n scoreMatrix[0][j] = scoreMatrix[0][j - 1] + gapPenalty;\n traceMatrix[0][j] = 'left';\n }\n\n // fill\n for (let i = 1; i <= defaultLength; i += 1) {\n for (let j = 1; j <= secondaryLength; j += 1) {\n const match = scoreMatrix[i - 1][j - 1] + computeMatchScore(i - 1, j - 1);\n const deleteGap = scoreMatrix[i - 1][j] + gapPenalty;\n const insertGap = scoreMatrix[i][j - 1] + gapPenalty;\n\n const best = Math.max(match, deleteGap, insertGap);\n scoreMatrix[i][j] = best;\n traceMatrix[i][j] =\n best === match ? 'diagonal' : best === deleteGap ? 'up' : 'left';\n }\n }\n\n // traceback\n const result: AlignmentPair[] = [];\n let i = defaultLength;\n let j = secondaryLength;\n while (i > 0 || j > 0) {\n if (i > 0 && j > 0 && traceMatrix[i][j] === 'diagonal') {\n const englishIndex = i - 1;\n const frenchIndex = j - 1;\n const similarityScore = computeJaccardSimilarity(\n defaultBlocks[englishIndex].anchorText,\n secondaryBlocks[frenchIndex].anchorText,\n 3\n );\n result.unshift({ englishIndex, frenchIndex, similarityScore });\n i -= 1;\n j -= 1;\n } else if (i > 0 && (j === 0 || traceMatrix[i][j] === 'up')) {\n result.unshift({\n englishIndex: i - 1,\n frenchIndex: null,\n similarityScore: 0,\n });\n i -= 1;\n } else if (j > 0 && (i === 0 || traceMatrix[i][j] === 'left')) {\n // french block has no corresponding english block (deleted)\n result.unshift({\n englishIndex: -1,\n frenchIndex: j - 1,\n similarityScore: 0,\n });\n j -= 1;\n }\n }\n return result;\n};\n"],"mappings":"
|
|
1
|
+
{"version":3,"file":"alignBlocks.mjs","names":[],"sources":["../../../src/translation-alignment/alignBlocks.ts"],"sourcesContent":["import { computeJaccardSimilarity } from './computeSimilarity';\nimport type { AlignmentPair, FingerprintedBlock } from './types';\n\nexport const alignEnglishAndFrenchBlocks = (\n defaultBlocks: FingerprintedBlock[],\n secondaryBlocks: FingerprintedBlock[]\n): AlignmentPair[] => {\n // Needleman–Wunsch style global alignment using anchor similarity and type equality\n const defaultLength = defaultBlocks.length;\n const secondaryLength = secondaryBlocks.length;\n\n const scoreMatrix: number[][] = Array.from(\n { length: defaultLength + 1 },\n () => Array.from({ length: secondaryLength + 1 }, () => 0)\n );\n const traceMatrix: ('diagonal' | 'up' | 'left')[][] = Array.from(\n { length: defaultLength + 1 },\n () => Array.from({ length: secondaryLength + 1 }, () => 'diagonal')\n );\n\n const gapPenalty = -2;\n\n const computeMatchScore = (\n defaultIndex: number,\n secondaryIndex: number\n ): number => {\n const defaultBlock = defaultBlocks[defaultIndex];\n const secondaryBlock = secondaryBlocks[secondaryIndex];\n const typeBonus = defaultBlock.type === secondaryBlock.type ? 2 : 0;\n const anchorSimilarity = computeJaccardSimilarity(\n defaultBlock.anchorText,\n secondaryBlock.anchorText,\n 3\n );\n const lengthRatio =\n Math.min(defaultBlock.content.length, secondaryBlock.content.length) /\n Math.max(defaultBlock.content.length, secondaryBlock.content.length);\n const lengthBonus = lengthRatio > 0.75 ? 1 : 0;\n return typeBonus + lengthBonus + anchorSimilarity * 8; // weighted toward anchor similarity\n };\n\n // initialize first row and column\n for (let i = 1; i <= defaultLength; i += 1) {\n scoreMatrix[i][0] = scoreMatrix[i - 1][0] + gapPenalty;\n traceMatrix[i][0] = 'up';\n }\n for (let j = 1; j <= secondaryLength; j += 1) {\n scoreMatrix[0][j] = scoreMatrix[0][j - 1] + gapPenalty;\n traceMatrix[0][j] = 'left';\n }\n\n // fill\n for (let i = 1; i <= defaultLength; i += 1) {\n for (let j = 1; j <= secondaryLength; j += 1) {\n const match = scoreMatrix[i - 1][j - 1] + computeMatchScore(i - 1, j - 1);\n const deleteGap = scoreMatrix[i - 1][j] + gapPenalty;\n const insertGap = scoreMatrix[i][j - 1] + gapPenalty;\n\n const best = Math.max(match, deleteGap, insertGap);\n scoreMatrix[i][j] = best;\n traceMatrix[i][j] =\n best === match ? 'diagonal' : best === deleteGap ? 'up' : 'left';\n }\n }\n\n // traceback\n const result: AlignmentPair[] = [];\n let i = defaultLength;\n let j = secondaryLength;\n while (i > 0 || j > 0) {\n if (i > 0 && j > 0 && traceMatrix[i][j] === 'diagonal') {\n const englishIndex = i - 1;\n const frenchIndex = j - 1;\n const similarityScore = computeJaccardSimilarity(\n defaultBlocks[englishIndex].anchorText,\n secondaryBlocks[frenchIndex].anchorText,\n 3\n );\n result.unshift({ englishIndex, frenchIndex, similarityScore });\n i -= 1;\n j -= 1;\n } else if (i > 0 && (j === 0 || traceMatrix[i][j] === 'up')) {\n result.unshift({\n englishIndex: i - 1,\n frenchIndex: null,\n similarityScore: 0,\n });\n i -= 1;\n } else if (j > 0 && (i === 0 || traceMatrix[i][j] === 'left')) {\n // french block has no corresponding english block (deleted)\n result.unshift({\n englishIndex: -1,\n frenchIndex: j - 1,\n similarityScore: 0,\n });\n j -= 1;\n }\n }\n return result;\n};\n"],"mappings":"mEAGA,MAAa,GACX,EACA,IACoB,CAEpB,IAAM,EAAgB,EAAc,OAC9B,EAAkB,EAAgB,OAElC,EAA0B,MAAM,KACpC,CAAE,OAAQ,EAAgB,EAAG,KACvB,MAAM,KAAK,CAAE,OAAQ,EAAkB,EAAG,KAAQ,EAAE,CAC3D,CACK,EAAgD,MAAM,KAC1D,CAAE,OAAQ,EAAgB,EAAG,KACvB,MAAM,KAAK,CAAE,OAAQ,EAAkB,EAAG,KAAQ,WAAW,CACpE,CAIK,GACJ,EACA,IACW,CACX,IAAM,EAAe,EAAc,GAC7B,EAAiB,EAAgB,GACjC,EAAY,EAAa,OAAS,EAAe,KAAO,EAAI,EAC5D,EAAmB,EACvB,EAAa,WACb,EAAe,WACf,EACD,CAKD,OAAO,GAHL,KAAK,IAAI,EAAa,QAAQ,OAAQ,EAAe,QAAQ,OAAO,CACpE,KAAK,IAAI,EAAa,QAAQ,OAAQ,EAAe,QAAQ,OAAO,CACpC,IAAO,EAAI,GACZ,EAAmB,GAItD,IAAK,IAAI,EAAI,EAAG,GAAK,EAAe,GAAK,EACvC,EAAY,GAAG,GAAK,EAAY,EAAI,GAAG,GAAK,GAC5C,EAAY,GAAG,GAAK,KAEtB,IAAK,IAAI,EAAI,EAAG,GAAK,EAAiB,GAAK,EACzC,EAAY,GAAG,GAAK,EAAY,GAAG,EAAI,GAAK,GAC5C,EAAY,GAAG,GAAK,OAItB,IAAK,IAAI,EAAI,EAAG,GAAK,EAAe,GAAK,EACvC,IAAK,IAAI,EAAI,EAAG,GAAK,EAAiB,GAAK,EAAG,CAC5C,IAAM,EAAQ,EAAY,EAAI,GAAG,EAAI,GAAK,EAAkB,EAAI,EAAG,EAAI,EAAE,CACnE,EAAY,EAAY,EAAI,GAAG,GAAK,GACpC,EAAY,EAAY,GAAG,EAAI,GAAK,GAEpC,EAAO,KAAK,IAAI,EAAO,EAAW,EAAU,CAClD,EAAY,GAAG,GAAK,EACpB,EAAY,GAAG,GACb,IAAS,EAAQ,WAAa,IAAS,EAAY,KAAO,OAKhE,IAAM,EAA0B,EAAE,CAC9B,EAAI,EACJ,EAAI,EACR,KAAO,EAAI,GAAK,EAAI,GAClB,GAAI,EAAI,GAAK,EAAI,GAAK,EAAY,GAAG,KAAO,WAAY,CACtD,IAAM,EAAe,EAAI,EACnB,EAAc,EAAI,EAClB,EAAkB,EACtB,EAAc,GAAc,WAC5B,EAAgB,GAAa,WAC7B,EACD,CACD,EAAO,QAAQ,CAAE,eAAc,cAAa,kBAAiB,CAAC,CAC9D,IACA,SACS,EAAI,IAAM,IAAM,GAAK,EAAY,GAAG,KAAO,OACpD,EAAO,QAAQ,CACb,aAAc,EAAI,EAClB,YAAa,KACb,gBAAiB,EAClB,CAAC,CACF,KACS,EAAI,IAAM,IAAM,GAAK,EAAY,GAAG,KAAO,UAEpD,EAAO,QAAQ,CACb,aAAc,GACd,YAAa,EAAI,EACjB,gBAAiB,EAClB,CAAC,CACF,KAGJ,OAAO"}
|
|
@@ -1,23 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
const generateCharacterShingles = (text, shingleLength) => {
|
|
3
|
-
const normalized = text.replace(/\s+/g, " ").trim();
|
|
4
|
-
const set = /* @__PURE__ */ new Set();
|
|
5
|
-
if (normalized.length < shingleLength) {
|
|
6
|
-
if (normalized.length > 0) set.add(normalized);
|
|
7
|
-
return set;
|
|
8
|
-
}
|
|
9
|
-
for (let index = 0; index <= normalized.length - shingleLength; index += 1) set.add(normalized.slice(index, index + shingleLength));
|
|
10
|
-
return set;
|
|
11
|
-
};
|
|
12
|
-
const computeJaccardSimilarity = (a, b, shingleLength = 3) => {
|
|
13
|
-
const setA = generateCharacterShingles(a, shingleLength);
|
|
14
|
-
const setB = generateCharacterShingles(b, shingleLength);
|
|
15
|
-
if (setA.size === 0 && setB.size === 0) return 1;
|
|
16
|
-
const intersectionSize = Array.from(setA).filter((token) => setB.has(token)).length;
|
|
17
|
-
const unionSize = new Set([...Array.from(setA), ...Array.from(setB)]).size;
|
|
18
|
-
return unionSize === 0 ? 0 : intersectionSize / unionSize;
|
|
19
|
-
};
|
|
20
|
-
|
|
21
|
-
//#endregion
|
|
22
|
-
export { computeJaccardSimilarity, generateCharacterShingles };
|
|
1
|
+
const e=(e,t)=>{let n=e.replace(/\s+/g,` `).trim(),r=new Set;if(n.length<t)return n.length>0&&r.add(n),r;for(let e=0;e<=n.length-t;e+=1)r.add(n.slice(e,e+t));return r},t=(t,n,r=3)=>{let i=e(t,r),a=e(n,r);if(i.size===0&&a.size===0)return 1;let o=Array.from(i).filter(e=>a.has(e)).length,s=new Set([...Array.from(i),...Array.from(a)]).size;return s===0?0:o/s};export{t as computeJaccardSimilarity,e as generateCharacterShingles};
|
|
23
2
|
//# sourceMappingURL=computeSimilarity.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"computeSimilarity.mjs","names":[],"sources":["../../../src/translation-alignment/computeSimilarity.ts"],"sourcesContent":["// Character shingle Jaccard similarity (language agnostic)\nexport const generateCharacterShingles = (\n text: string,\n shingleLength: number\n): Set<string> => {\n const normalized = text.replace(/\\s+/g, ' ').trim();\n const set = new Set<string>();\n if (normalized.length < shingleLength) {\n if (normalized.length > 0) {\n set.add(normalized);\n }\n return set;\n }\n for (let index = 0; index <= normalized.length - shingleLength; index += 1) {\n set.add(normalized.slice(index, index + shingleLength));\n }\n return set;\n};\n\nexport const computeJaccardSimilarity = (\n a: string,\n b: string,\n shingleLength: number = 3\n): number => {\n const setA = generateCharacterShingles(a, shingleLength);\n const setB = generateCharacterShingles(b, shingleLength);\n if (setA.size === 0 && setB.size === 0) return 1;\n const intersectionSize = Array.from(setA).filter((token) =>\n setB.has(token)\n ).length;\n const unionSize = new Set([...Array.from(setA), ...Array.from(setB)]).size;\n return unionSize === 0 ? 0 : intersectionSize / unionSize;\n};\n"],"mappings":"
|
|
1
|
+
{"version":3,"file":"computeSimilarity.mjs","names":[],"sources":["../../../src/translation-alignment/computeSimilarity.ts"],"sourcesContent":["// Character shingle Jaccard similarity (language agnostic)\nexport const generateCharacterShingles = (\n text: string,\n shingleLength: number\n): Set<string> => {\n const normalized = text.replace(/\\s+/g, ' ').trim();\n const set = new Set<string>();\n if (normalized.length < shingleLength) {\n if (normalized.length > 0) {\n set.add(normalized);\n }\n return set;\n }\n for (let index = 0; index <= normalized.length - shingleLength; index += 1) {\n set.add(normalized.slice(index, index + shingleLength));\n }\n return set;\n};\n\nexport const computeJaccardSimilarity = (\n a: string,\n b: string,\n shingleLength: number = 3\n): number => {\n const setA = generateCharacterShingles(a, shingleLength);\n const setB = generateCharacterShingles(b, shingleLength);\n if (setA.size === 0 && setB.size === 0) return 1;\n const intersectionSize = Array.from(setA).filter((token) =>\n setB.has(token)\n ).length;\n const unionSize = new Set([...Array.from(setA), ...Array.from(setB)]).size;\n return unionSize === 0 ? 0 : intersectionSize / unionSize;\n};\n"],"mappings":"AACA,MAAa,GACX,EACA,IACgB,CAChB,IAAM,EAAa,EAAK,QAAQ,OAAQ,IAAI,CAAC,MAAM,CAC7C,EAAM,IAAI,IAChB,GAAI,EAAW,OAAS,EAItB,OAHI,EAAW,OAAS,GACtB,EAAI,IAAI,EAAW,CAEd,EAET,IAAK,IAAI,EAAQ,EAAG,GAAS,EAAW,OAAS,EAAe,GAAS,EACvE,EAAI,IAAI,EAAW,MAAM,EAAO,EAAQ,EAAc,CAAC,CAEzD,OAAO,GAGI,GACX,EACA,EACA,EAAwB,IACb,CACX,IAAM,EAAO,EAA0B,EAAG,EAAc,CAClD,EAAO,EAA0B,EAAG,EAAc,CACxD,GAAI,EAAK,OAAS,GAAK,EAAK,OAAS,EAAG,MAAO,GAC/C,IAAM,EAAmB,MAAM,KAAK,EAAK,CAAC,OAAQ,GAChD,EAAK,IAAI,EAAM,CAChB,CAAC,OACI,EAAY,IAAI,IAAI,CAAC,GAAG,MAAM,KAAK,EAAK,CAAE,GAAG,MAAM,KAAK,EAAK,CAAC,CAAC,CAAC,KACtE,OAAO,IAAc,EAAI,EAAI,EAAmB"}
|
|
@@ -1,21 +1,2 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
3
|
-
//#region src/translation-alignment/fingerprintBlock.ts
|
|
4
|
-
const computeStringDigest = (text) => crypto.createHash("sha256").update(text).digest("hex");
|
|
5
|
-
const fingerprintBlock = (block, previousBlock, nextBlock) => {
|
|
6
|
-
const semanticDigest = computeStringDigest(block.semanticText);
|
|
7
|
-
const anchorDigest = computeStringDigest(block.anchorText);
|
|
8
|
-
const compositeKey = `${semanticDigest}:${anchorDigest}`;
|
|
9
|
-
const contextKey = computeStringDigest(`${computeStringDigest(previousBlock?.semanticText ?? "")}:${computeStringDigest(nextBlock?.semanticText ?? "")}`);
|
|
10
|
-
return {
|
|
11
|
-
...block,
|
|
12
|
-
semanticDigest,
|
|
13
|
-
anchorDigest,
|
|
14
|
-
compositeKey,
|
|
15
|
-
contextKey
|
|
16
|
-
};
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
//#endregion
|
|
20
|
-
export { fingerprintBlock };
|
|
1
|
+
import e from"node:crypto";const t=t=>e.createHash(`sha256`).update(t).digest(`hex`),n=(e,n,r)=>{let i=t(e.semanticText),a=t(e.anchorText),o=`${i}:${a}`,s=t(`${t(n?.semanticText??``)}:${t(r?.semanticText??``)}`);return{...e,semanticDigest:i,anchorDigest:a,compositeKey:o,contextKey:s}};export{n as fingerprintBlock};
|
|
21
2
|
//# sourceMappingURL=fingerprintBlock.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fingerprintBlock.mjs","names":[],"sources":["../../../src/translation-alignment/fingerprintBlock.ts"],"sourcesContent":["import crypto from 'node:crypto';\nimport type { FingerprintedBlock, NormalizedBlock } from './types';\n\nconst computeStringDigest = (text: string): string =>\n crypto.createHash('sha256').update(text).digest('hex');\n\nexport const fingerprintBlock = (\n block: NormalizedBlock,\n previousBlock: NormalizedBlock | null,\n nextBlock: NormalizedBlock | null\n): FingerprintedBlock => {\n const semanticDigest = computeStringDigest(block.semanticText);\n const anchorDigest = computeStringDigest(block.anchorText);\n const compositeKey = `${semanticDigest}:${anchorDigest}`;\n\n const previousDigest = computeStringDigest(previousBlock?.semanticText ?? '');\n const nextDigest = computeStringDigest(nextBlock?.semanticText ?? '');\n const contextKey = computeStringDigest(`${previousDigest}:${nextDigest}`);\n\n return {\n ...block,\n semanticDigest,\n anchorDigest,\n compositeKey,\n contextKey,\n };\n};\n"],"mappings":"
|
|
1
|
+
{"version":3,"file":"fingerprintBlock.mjs","names":[],"sources":["../../../src/translation-alignment/fingerprintBlock.ts"],"sourcesContent":["import crypto from 'node:crypto';\nimport type { FingerprintedBlock, NormalizedBlock } from './types';\n\nconst computeStringDigest = (text: string): string =>\n crypto.createHash('sha256').update(text).digest('hex');\n\nexport const fingerprintBlock = (\n block: NormalizedBlock,\n previousBlock: NormalizedBlock | null,\n nextBlock: NormalizedBlock | null\n): FingerprintedBlock => {\n const semanticDigest = computeStringDigest(block.semanticText);\n const anchorDigest = computeStringDigest(block.anchorText);\n const compositeKey = `${semanticDigest}:${anchorDigest}`;\n\n const previousDigest = computeStringDigest(previousBlock?.semanticText ?? '');\n const nextDigest = computeStringDigest(nextBlock?.semanticText ?? '');\n const contextKey = computeStringDigest(`${previousDigest}:${nextDigest}`);\n\n return {\n ...block,\n semanticDigest,\n anchorDigest,\n compositeKey,\n contextKey,\n };\n};\n"],"mappings":"2BAGA,MAAM,EAAuB,GAC3B,EAAO,WAAW,SAAS,CAAC,OAAO,EAAK,CAAC,OAAO,MAAM,CAE3C,GACX,EACA,EACA,IACuB,CACvB,IAAM,EAAiB,EAAoB,EAAM,aAAa,CACxD,EAAe,EAAoB,EAAM,WAAW,CACpD,EAAe,GAAG,EAAe,GAAG,IAIpC,EAAa,EAAoB,GAFhB,EAAoB,GAAe,cAAgB,GAAG,CAEpB,GADtC,EAAoB,GAAW,cAAgB,GAAG,GACI,CAEzE,MAAO,CACL,GAAG,EACH,iBACA,eACA,eACA,aACD"}
|
|
@@ -1,11 +1 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { alignEnglishAndFrenchBlocks } from "./alignBlocks.mjs";
|
|
3
|
-
import { fingerprintBlock } from "./fingerprintBlock.mjs";
|
|
4
|
-
import { mapChangedLinesToBlocks } from "./mapChangedLinesToBlocks.mjs";
|
|
5
|
-
import { normalizeBlock } from "./normalizeBlock.mjs";
|
|
6
|
-
import { planAlignmentActions } from "./planActions.mjs";
|
|
7
|
-
import { identifySegmentsToReview, mergeReviewedSegments } from "./rebuildDocument.mjs";
|
|
8
|
-
import { segmentDocument } from "./segmentDocument.mjs";
|
|
9
|
-
import { buildAlignmentPlan } from "./pipeline.mjs";
|
|
10
|
-
|
|
11
|
-
export { alignEnglishAndFrenchBlocks, buildAlignmentPlan, computeJaccardSimilarity, fingerprintBlock, generateCharacterShingles, identifySegmentsToReview, mapChangedLinesToBlocks, mergeReviewedSegments, normalizeBlock, planAlignmentActions, segmentDocument };
|
|
1
|
+
import{computeJaccardSimilarity as e,generateCharacterShingles as t}from"./computeSimilarity.mjs";import{alignEnglishAndFrenchBlocks as n}from"./alignBlocks.mjs";import{fingerprintBlock as r}from"./fingerprintBlock.mjs";import{mapChangedLinesToBlocks as i}from"./mapChangedLinesToBlocks.mjs";import{normalizeBlock as a}from"./normalizeBlock.mjs";import{planAlignmentActions as o}from"./planActions.mjs";import{identifySegmentsToReview as s,mergeReviewedSegments as c}from"./rebuildDocument.mjs";import{segmentDocument as l}from"./segmentDocument.mjs";import{buildAlignmentPlan as u}from"./pipeline.mjs";export{n as alignEnglishAndFrenchBlocks,u as buildAlignmentPlan,e as computeJaccardSimilarity,r as fingerprintBlock,t as generateCharacterShingles,s as identifySegmentsToReview,i as mapChangedLinesToBlocks,c as mergeReviewedSegments,a as normalizeBlock,o as planAlignmentActions,l as segmentDocument};
|
|
@@ -1,17 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
const mapChangedLinesToBlocks = (blocks, changedLines) => {
|
|
3
|
-
const changedSet = /* @__PURE__ */ new Set();
|
|
4
|
-
if (!changedLines || changedLines.length === 0) return changedSet;
|
|
5
|
-
const changedLookup = new Set(changedLines);
|
|
6
|
-
blocks.forEach((block, index) => {
|
|
7
|
-
for (let line = block.lineStart; line <= block.lineEnd; line += 1) if (changedLookup.has(line)) {
|
|
8
|
-
changedSet.add(index);
|
|
9
|
-
break;
|
|
10
|
-
}
|
|
11
|
-
});
|
|
12
|
-
return changedSet;
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
//#endregion
|
|
16
|
-
export { mapChangedLinesToBlocks };
|
|
1
|
+
const e=(e,t)=>{let n=new Set;if(!t||t.length===0)return n;let r=new Set(t);return e.forEach((e,t)=>{for(let i=e.lineStart;i<=e.lineEnd;i+=1)if(r.has(i)){n.add(t);break}}),n};export{e as mapChangedLinesToBlocks};
|
|
17
2
|
//# sourceMappingURL=mapChangedLinesToBlocks.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mapChangedLinesToBlocks.mjs","names":[],"sources":["../../../src/translation-alignment/mapChangedLinesToBlocks.ts"],"sourcesContent":["import type { Block, LineChange } from './types';\n\nexport const mapChangedLinesToBlocks = (\n blocks: Block[],\n changedLines: LineChange[]\n): Set<number> => {\n const changedSet = new Set<number>();\n if (!changedLines || changedLines.length === 0) return changedSet;\n\n const changedLookup = new Set<number>(changedLines);\n\n blocks.forEach((block, index) => {\n for (let line = block.lineStart; line <= block.lineEnd; line += 1) {\n if (changedLookup.has(line)) {\n changedSet.add(index);\n break;\n }\n }\n });\n\n return changedSet;\n};\n"],"mappings":"
|
|
1
|
+
{"version":3,"file":"mapChangedLinesToBlocks.mjs","names":[],"sources":["../../../src/translation-alignment/mapChangedLinesToBlocks.ts"],"sourcesContent":["import type { Block, LineChange } from './types';\n\nexport const mapChangedLinesToBlocks = (\n blocks: Block[],\n changedLines: LineChange[]\n): Set<number> => {\n const changedSet = new Set<number>();\n if (!changedLines || changedLines.length === 0) return changedSet;\n\n const changedLookup = new Set<number>(changedLines);\n\n blocks.forEach((block, index) => {\n for (let line = block.lineStart; line <= block.lineEnd; line += 1) {\n if (changedLookup.has(line)) {\n changedSet.add(index);\n break;\n }\n }\n });\n\n return changedSet;\n};\n"],"mappings":"AAEA,MAAa,GACX,EACA,IACgB,CAChB,IAAM,EAAa,IAAI,IACvB,GAAI,CAAC,GAAgB,EAAa,SAAW,EAAG,OAAO,EAEvD,IAAM,EAAgB,IAAI,IAAY,EAAa,CAWnD,OATA,EAAO,SAAS,EAAO,IAAU,CAC/B,IAAK,IAAI,EAAO,EAAM,UAAW,GAAQ,EAAM,QAAS,GAAQ,EAC9D,GAAI,EAAc,IAAI,EAAK,CAAE,CAC3B,EAAW,IAAI,EAAM,CACrB,QAGJ,CAEK"}
|
|
@@ -1,21 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
const removeMarkdownFormatting = (text) => {
|
|
3
|
-
return text.replace(/`{1,3}[^`]*`{1,3}/g, " ").replace(/\*\*([^*]+)\*\*/g, "$1").replace(/\*([^*]+)\*/g, "$1").replace(/_([^_]+)_/g, "$1").replace(/~~([^~]+)~~/g, "$1").replace(/!?\[[^\]]*\]\([^)]*\)/g, " ").replace(/^\s*#{1,6}\s+/gm, "").replace(/^\s*>\s?/gm, "").replace(/^\s*[-*+]\s+/gm, "").replace(/^\s*\d+\.\s+/gm, "");
|
|
4
|
-
};
|
|
5
|
-
const collapseWhitespace = (text) => text.replace(/\s+/g, " ").trim();
|
|
6
|
-
const stripLettersKeepDigitsAndSymbols = (text) => {
|
|
7
|
-
return text.replace(/\p{L}+/gu, "");
|
|
8
|
-
};
|
|
9
|
-
const normalizeBlock = (block) => {
|
|
10
|
-
const semanticCollapsed = collapseWhitespace(removeMarkdownFormatting(block.content).toLowerCase());
|
|
11
|
-
const anchorCollapsed = collapseWhitespace(stripLettersKeepDigitsAndSymbols(block.content));
|
|
12
|
-
return {
|
|
13
|
-
...block,
|
|
14
|
-
semanticText: semanticCollapsed,
|
|
15
|
-
anchorText: anchorCollapsed
|
|
16
|
-
};
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
//#endregion
|
|
20
|
-
export { normalizeBlock };
|
|
1
|
+
const e=e=>e.replace(/`{1,3}[^`]*`{1,3}/g,` `).replace(/\*\*([^*]+)\*\*/g,`$1`).replace(/\*([^*]+)\*/g,`$1`).replace(/_([^_]+)_/g,`$1`).replace(/~~([^~]+)~~/g,`$1`).replace(/!?\[[^\]]*\]\([^)]*\)/g,` `).replace(/^\s*#{1,6}\s+/gm,``).replace(/^\s*>\s?/gm,``).replace(/^\s*[-*+]\s+/gm,``).replace(/^\s*\d+\.\s+/gm,``),t=e=>e.replace(/\s+/g,` `).trim(),n=e=>e.replace(/\p{L}+/gu,``),r=r=>{let i=t(e(r.content).toLowerCase()),a=t(n(r.content));return{...r,semanticText:i,anchorText:a}};export{r as normalizeBlock};
|
|
21
2
|
//# sourceMappingURL=normalizeBlock.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"normalizeBlock.mjs","names":[],"sources":["../../../src/translation-alignment/normalizeBlock.ts"],"sourcesContent":["import type { Block, NormalizedBlock } from './types';\n\nconst removeMarkdownFormatting = (text: string): string => {\n return text\n .replace(/`{1,3}[^`]*`{1,3}/g, ' ')\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')\n .replace(/\\*([^*]+)\\*/g, '$1')\n .replace(/_([^_]+)_/g, '$1')\n .replace(/~~([^~]+)~~/g, '$1')\n .replace(/!?\\[[^\\]]*\\]\\([^)]*\\)/g, ' ')\n .replace(/^\\s*#{1,6}\\s+/gm, '')\n .replace(/^\\s*>\\s?/gm, '')\n .replace(/^\\s*[-*+]\\s+/gm, '')\n .replace(/^\\s*\\d+\\.\\s+/gm, '');\n};\n\nconst collapseWhitespace = (text: string): string =>\n text.replace(/\\s+/g, ' ').trim();\n\nconst stripLettersKeepDigitsAndSymbols = (text: string): string => {\n // Keep digits and non-letter characters, remove all letters (including accents)\n return text.replace(/\\p{L}+/gu, '');\n};\n\nexport const normalizeBlock = (block: Block): NormalizedBlock => {\n const contentWithoutMarkdown = removeMarkdownFormatting(block.content);\n const semanticLowercased = contentWithoutMarkdown.toLowerCase();\n const semanticCollapsed = collapseWhitespace(semanticLowercased);\n\n const anchorOnlySymbols = stripLettersKeepDigitsAndSymbols(block.content);\n const anchorCollapsed = collapseWhitespace(anchorOnlySymbols);\n\n return {\n ...block,\n semanticText: semanticCollapsed,\n anchorText: anchorCollapsed,\n };\n};\n"],"mappings":"
|
|
1
|
+
{"version":3,"file":"normalizeBlock.mjs","names":[],"sources":["../../../src/translation-alignment/normalizeBlock.ts"],"sourcesContent":["import type { Block, NormalizedBlock } from './types';\n\nconst removeMarkdownFormatting = (text: string): string => {\n return text\n .replace(/`{1,3}[^`]*`{1,3}/g, ' ')\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')\n .replace(/\\*([^*]+)\\*/g, '$1')\n .replace(/_([^_]+)_/g, '$1')\n .replace(/~~([^~]+)~~/g, '$1')\n .replace(/!?\\[[^\\]]*\\]\\([^)]*\\)/g, ' ')\n .replace(/^\\s*#{1,6}\\s+/gm, '')\n .replace(/^\\s*>\\s?/gm, '')\n .replace(/^\\s*[-*+]\\s+/gm, '')\n .replace(/^\\s*\\d+\\.\\s+/gm, '');\n};\n\nconst collapseWhitespace = (text: string): string =>\n text.replace(/\\s+/g, ' ').trim();\n\nconst stripLettersKeepDigitsAndSymbols = (text: string): string => {\n // Keep digits and non-letter characters, remove all letters (including accents)\n return text.replace(/\\p{L}+/gu, '');\n};\n\nexport const normalizeBlock = (block: Block): NormalizedBlock => {\n const contentWithoutMarkdown = removeMarkdownFormatting(block.content);\n const semanticLowercased = contentWithoutMarkdown.toLowerCase();\n const semanticCollapsed = collapseWhitespace(semanticLowercased);\n\n const anchorOnlySymbols = stripLettersKeepDigitsAndSymbols(block.content);\n const anchorCollapsed = collapseWhitespace(anchorOnlySymbols);\n\n return {\n ...block,\n semanticText: semanticCollapsed,\n anchorText: anchorCollapsed,\n };\n};\n"],"mappings":"AAEA,MAAM,EAA4B,GACzB,EACJ,QAAQ,qBAAsB,IAAI,CAClC,QAAQ,mBAAoB,KAAK,CACjC,QAAQ,eAAgB,KAAK,CAC7B,QAAQ,aAAc,KAAK,CAC3B,QAAQ,eAAgB,KAAK,CAC7B,QAAQ,yBAA0B,IAAI,CACtC,QAAQ,kBAAmB,GAAG,CAC9B,QAAQ,aAAc,GAAG,CACzB,QAAQ,iBAAkB,GAAG,CAC7B,QAAQ,iBAAkB,GAAG,CAG5B,EAAsB,GAC1B,EAAK,QAAQ,OAAQ,IAAI,CAAC,MAAM,CAE5B,EAAoC,GAEjC,EAAK,QAAQ,WAAY,GAAG,CAGxB,EAAkB,GAAkC,CAG/D,IAAM,EAAoB,EAFK,EAAyB,EAAM,QAAQ,CACpB,aAAa,CACC,CAG1D,EAAkB,EADE,EAAiC,EAAM,QAAQ,CACZ,CAE7D,MAAO,CACL,GAAG,EACH,aAAc,EACd,WAAY,EACb"}
|
|
@@ -1,36 +1,2 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { fingerprintBlock } from "./fingerprintBlock.mjs";
|
|
3
|
-
import { mapChangedLinesToBlocks } from "./mapChangedLinesToBlocks.mjs";
|
|
4
|
-
import { normalizeBlock } from "./normalizeBlock.mjs";
|
|
5
|
-
import { planAlignmentActions } from "./planActions.mjs";
|
|
6
|
-
import { identifySegmentsToReview, mergeReviewedSegments } from "./rebuildDocument.mjs";
|
|
7
|
-
import { segmentDocument } from "./segmentDocument.mjs";
|
|
8
|
-
|
|
9
|
-
//#region src/translation-alignment/pipeline.ts
|
|
10
|
-
const buildAlignmentPlan = ({ englishText, frenchText, changedLines, similarityOptions }) => {
|
|
11
|
-
const englishBlocksRaw = segmentDocument(englishText);
|
|
12
|
-
const frenchBlocksRaw = segmentDocument(frenchText);
|
|
13
|
-
const englishNormalized = englishBlocksRaw.map(normalizeBlock);
|
|
14
|
-
const frenchNormalized = frenchBlocksRaw.map(normalizeBlock);
|
|
15
|
-
const englishBlocks = englishNormalized.map((block, index, array) => fingerprintBlock(block, array[index - 1] ?? null, array[index + 1] ?? null));
|
|
16
|
-
const frenchBlocks = frenchNormalized.map((block, index, array) => fingerprintBlock(block, array[index - 1] ?? null, array[index + 1] ?? null));
|
|
17
|
-
const plan = planAlignmentActions(alignEnglishAndFrenchBlocks(englishBlocks, frenchBlocks), mapChangedLinesToBlocks(englishBlocks, Array.isArray(changedLines) ? changedLines : []), {
|
|
18
|
-
minimumMatchForReuse: similarityOptions?.minimumMatchForReuse ?? .9,
|
|
19
|
-
minimumMatchForNearDuplicate: similarityOptions?.minimumMatchForNearDuplicate ?? .8
|
|
20
|
-
});
|
|
21
|
-
const { segmentsToReview } = identifySegmentsToReview({
|
|
22
|
-
englishBlocks,
|
|
23
|
-
frenchBlocks,
|
|
24
|
-
plan
|
|
25
|
-
});
|
|
26
|
-
return {
|
|
27
|
-
englishBlocks,
|
|
28
|
-
frenchBlocks,
|
|
29
|
-
plan,
|
|
30
|
-
segmentsToReview
|
|
31
|
-
};
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
//#endregion
|
|
35
|
-
export { buildAlignmentPlan, mergeReviewedSegments };
|
|
1
|
+
import{alignEnglishAndFrenchBlocks as e}from"./alignBlocks.mjs";import{fingerprintBlock as t}from"./fingerprintBlock.mjs";import{mapChangedLinesToBlocks as n}from"./mapChangedLinesToBlocks.mjs";import{normalizeBlock as r}from"./normalizeBlock.mjs";import{planAlignmentActions as i}from"./planActions.mjs";import{identifySegmentsToReview as a,mergeReviewedSegments as o}from"./rebuildDocument.mjs";import{segmentDocument as s}from"./segmentDocument.mjs";const c=({englishText:o,frenchText:c,changedLines:l,similarityOptions:u})=>{let d=s(o),f=s(c),p=d.map(r),m=f.map(r),h=p.map((e,n,r)=>t(e,r[n-1]??null,r[n+1]??null)),g=m.map((e,n,r)=>t(e,r[n-1]??null,r[n+1]??null)),_=i(e(h,g),n(h,Array.isArray(l)?l:[]),{minimumMatchForReuse:u?.minimumMatchForReuse??.9,minimumMatchForNearDuplicate:u?.minimumMatchForNearDuplicate??.8}),{segmentsToReview:v}=a({englishBlocks:h,frenchBlocks:g,plan:_});return{englishBlocks:h,frenchBlocks:g,plan:_,segmentsToReview:v}};export{c as buildAlignmentPlan,o as mergeReviewedSegments};
|
|
36
2
|
//# sourceMappingURL=pipeline.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.mjs","names":[],"sources":["../../../src/translation-alignment/pipeline.ts"],"sourcesContent":["import { alignEnglishAndFrenchBlocks } from './alignBlocks';\nimport { fingerprintBlock } from './fingerprintBlock';\nimport { mapChangedLinesToBlocks } from './mapChangedLinesToBlocks';\nimport { normalizeBlock } from './normalizeBlock';\nimport { planAlignmentActions } from './planActions';\nimport {\n identifySegmentsToReview,\n mergeReviewedSegments,\n type SegmentToReview,\n} from './rebuildDocument';\nimport { segmentDocument } from './segmentDocument';\nimport type {\n AlignmentPlan,\n FingerprintedBlock,\n SimilarityOptions,\n} from './types';\n\nexport type BuildAlignmentPlanInput = {\n englishText: string;\n frenchText: string;\n changedLines: number[] | undefined;\n similarityOptions?: Partial<SimilarityOptions>;\n};\n\nexport type BuildAlignmentPlanOutput = {\n englishBlocks: FingerprintedBlock[];\n frenchBlocks: FingerprintedBlock[];\n plan: AlignmentPlan;\n segmentsToReview: SegmentToReview[];\n};\n\nexport const buildAlignmentPlan = ({\n englishText,\n frenchText,\n changedLines,\n similarityOptions,\n}: BuildAlignmentPlanInput): BuildAlignmentPlanOutput => {\n const englishBlocksRaw = segmentDocument(englishText);\n const frenchBlocksRaw = segmentDocument(frenchText);\n\n const englishNormalized = englishBlocksRaw.map(normalizeBlock);\n const frenchNormalized = frenchBlocksRaw.map(normalizeBlock);\n\n const englishBlocks: FingerprintedBlock[] = englishNormalized.map(\n (block, index, array) =>\n fingerprintBlock(\n block,\n array[index - 1] ?? null,\n array[index + 1] ?? null\n )\n );\n const frenchBlocks: FingerprintedBlock[] = frenchNormalized.map(\n (block, index, array) =>\n fingerprintBlock(\n block,\n array[index - 1] ?? null,\n array[index + 1] ?? null\n )\n );\n\n const alignment = alignEnglishAndFrenchBlocks(englishBlocks, frenchBlocks);\n\n const changedIndexes = mapChangedLinesToBlocks(\n englishBlocks,\n Array.isArray(changedLines) ? changedLines : []\n );\n\n const plan = planAlignmentActions(alignment, changedIndexes, {\n minimumMatchForReuse: similarityOptions?.minimumMatchForReuse ?? 0.9,\n minimumMatchForNearDuplicate:\n similarityOptions?.minimumMatchForNearDuplicate ?? 0.8,\n });\n\n const { segmentsToReview } = identifySegmentsToReview({\n englishBlocks,\n frenchBlocks,\n plan,\n });\n\n return { englishBlocks, frenchBlocks, plan, segmentsToReview };\n};\n\nexport { mergeReviewedSegments };\nexport type { SegmentToReview };\n"],"mappings":"
|
|
1
|
+
{"version":3,"file":"pipeline.mjs","names":[],"sources":["../../../src/translation-alignment/pipeline.ts"],"sourcesContent":["import { alignEnglishAndFrenchBlocks } from './alignBlocks';\nimport { fingerprintBlock } from './fingerprintBlock';\nimport { mapChangedLinesToBlocks } from './mapChangedLinesToBlocks';\nimport { normalizeBlock } from './normalizeBlock';\nimport { planAlignmentActions } from './planActions';\nimport {\n identifySegmentsToReview,\n mergeReviewedSegments,\n type SegmentToReview,\n} from './rebuildDocument';\nimport { segmentDocument } from './segmentDocument';\nimport type {\n AlignmentPlan,\n FingerprintedBlock,\n SimilarityOptions,\n} from './types';\n\nexport type BuildAlignmentPlanInput = {\n englishText: string;\n frenchText: string;\n changedLines: number[] | undefined;\n similarityOptions?: Partial<SimilarityOptions>;\n};\n\nexport type BuildAlignmentPlanOutput = {\n englishBlocks: FingerprintedBlock[];\n frenchBlocks: FingerprintedBlock[];\n plan: AlignmentPlan;\n segmentsToReview: SegmentToReview[];\n};\n\nexport const buildAlignmentPlan = ({\n englishText,\n frenchText,\n changedLines,\n similarityOptions,\n}: BuildAlignmentPlanInput): BuildAlignmentPlanOutput => {\n const englishBlocksRaw = segmentDocument(englishText);\n const frenchBlocksRaw = segmentDocument(frenchText);\n\n const englishNormalized = englishBlocksRaw.map(normalizeBlock);\n const frenchNormalized = frenchBlocksRaw.map(normalizeBlock);\n\n const englishBlocks: FingerprintedBlock[] = englishNormalized.map(\n (block, index, array) =>\n fingerprintBlock(\n block,\n array[index - 1] ?? null,\n array[index + 1] ?? null\n )\n );\n const frenchBlocks: FingerprintedBlock[] = frenchNormalized.map(\n (block, index, array) =>\n fingerprintBlock(\n block,\n array[index - 1] ?? null,\n array[index + 1] ?? null\n )\n );\n\n const alignment = alignEnglishAndFrenchBlocks(englishBlocks, frenchBlocks);\n\n const changedIndexes = mapChangedLinesToBlocks(\n englishBlocks,\n Array.isArray(changedLines) ? changedLines : []\n );\n\n const plan = planAlignmentActions(alignment, changedIndexes, {\n minimumMatchForReuse: similarityOptions?.minimumMatchForReuse ?? 0.9,\n minimumMatchForNearDuplicate:\n similarityOptions?.minimumMatchForNearDuplicate ?? 0.8,\n });\n\n const { segmentsToReview } = identifySegmentsToReview({\n englishBlocks,\n frenchBlocks,\n plan,\n });\n\n return { englishBlocks, frenchBlocks, plan, segmentsToReview };\n};\n\nexport { mergeReviewedSegments };\nexport type { SegmentToReview };\n"],"mappings":"qcA+BA,MAAa,GAAsB,CACjC,cACA,aACA,eACA,uBACuD,CACvD,IAAM,EAAmB,EAAgB,EAAY,CAC/C,EAAkB,EAAgB,EAAW,CAE7C,EAAoB,EAAiB,IAAI,EAAe,CACxD,EAAmB,EAAgB,IAAI,EAAe,CAEtD,EAAsC,EAAkB,KAC3D,EAAO,EAAO,IACb,EACE,EACA,EAAM,EAAQ,IAAM,KACpB,EAAM,EAAQ,IAAM,KACrB,CACJ,CACK,EAAqC,EAAiB,KACzD,EAAO,EAAO,IACb,EACE,EACA,EAAM,EAAQ,IAAM,KACpB,EAAM,EAAQ,IAAM,KACrB,CACJ,CASK,EAAO,EAPK,EAA4B,EAAe,EAAa,CAEnD,EACrB,EACA,MAAM,QAAQ,EAAa,CAAG,EAAe,EAAE,CAChD,CAE4D,CAC3D,qBAAsB,GAAmB,sBAAwB,GACjE,6BACE,GAAmB,8BAAgC,GACtD,CAAC,CAEI,CAAE,oBAAqB,EAAyB,CACpD,gBACA,eACA,OACD,CAAC,CAEF,MAAO,CAAE,gBAAe,eAAc,OAAM,mBAAkB"}
|