@intlayer/cli 7.0.7 → 7.0.8-canary.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/translation-alignment/ARCHITECTURE.md +518 -0
- package/dist/assets/translation-alignment/IMPROVEMENTS.md +550 -0
- package/dist/assets/translation-alignment/INTEGRATION_EXAMPLE.md +682 -0
- package/dist/assets/translation-alignment/QUICK_START.md +494 -0
- package/dist/assets/translation-alignment/README.md +485 -0
- package/dist/assets/translation-alignment/SUMMARY.md +440 -0
- package/dist/cjs/IntlayerEventListener.cjs +0 -3
- package/dist/cjs/IntlayerEventListener.cjs.map +1 -1
- package/dist/cjs/_virtual/_utils_asset.cjs +0 -3
- package/dist/cjs/build.cjs +0 -2
- package/dist/cjs/build.cjs.map +1 -1
- package/dist/cjs/cli.cjs +6 -7
- package/dist/cjs/cli.cjs.map +1 -1
- package/dist/cjs/config.cjs +0 -1
- package/dist/cjs/config.cjs.map +1 -1
- package/dist/cjs/editor.cjs +0 -4
- package/dist/cjs/editor.cjs.map +1 -1
- package/dist/cjs/fill/fill.cjs +0 -3
- package/dist/cjs/fill/fill.cjs.map +1 -1
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs +0 -1
- package/dist/cjs/fill/formatAutoFilledFilePath.cjs.map +1 -1
- package/dist/cjs/fill/listTranslationsTasks.cjs +0 -6
- package/dist/cjs/fill/listTranslationsTasks.cjs.map +1 -1
- package/dist/cjs/fill/translateDictionary.cjs +0 -6
- package/dist/cjs/fill/translateDictionary.cjs.map +1 -1
- package/dist/cjs/fill/writeFill.cjs +0 -4
- package/dist/cjs/fill/writeFill.cjs.map +1 -1
- package/dist/cjs/getTargetDictionary.cjs +0 -4
- package/dist/cjs/getTargetDictionary.cjs.map +1 -1
- package/dist/cjs/index.cjs +0 -1
- package/dist/cjs/listContentDeclaration.cjs +0 -4
- package/dist/cjs/listContentDeclaration.cjs.map +1 -1
- package/dist/cjs/liveSync.cjs +0 -6
- package/dist/cjs/liveSync.cjs.map +1 -1
- package/dist/cjs/pull.cjs +0 -5
- package/dist/cjs/pull.cjs.map +1 -1
- package/dist/cjs/push/pullLog.cjs +0 -1
- package/dist/cjs/push/pullLog.cjs.map +1 -1
- package/dist/cjs/push/push.cjs +0 -5
- package/dist/cjs/push/push.cjs.map +1 -1
- package/dist/cjs/pushConfig.cjs +0 -2
- package/dist/cjs/pushConfig.cjs.map +1 -1
- package/dist/cjs/pushLog.cjs +0 -1
- package/dist/cjs/pushLog.cjs.map +1 -1
- package/dist/cjs/reviewDoc.cjs +8 -131
- package/dist/cjs/reviewDoc.cjs.map +1 -1
- package/dist/cjs/reviewDocBlockAware.cjs +90 -0
- package/dist/cjs/reviewDocBlockAware.cjs.map +1 -0
- package/dist/cjs/test/index.cjs +0 -2
- package/dist/cjs/test/index.cjs.map +1 -1
- package/dist/cjs/test/listMissingTranslations.cjs +0 -4
- package/dist/cjs/test/listMissingTranslations.cjs.map +1 -1
- package/dist/cjs/translateDoc.cjs +8 -8
- package/dist/cjs/translateDoc.cjs.map +1 -1
- package/dist/cjs/translation-alignment/alignBlocks.cjs +67 -0
- package/dist/cjs/translation-alignment/alignBlocks.cjs.map +1 -0
- package/dist/cjs/translation-alignment/computeSimilarity.cjs +25 -0
- package/dist/cjs/translation-alignment/computeSimilarity.cjs.map +1 -0
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs +23 -0
- package/dist/cjs/translation-alignment/fingerprintBlock.cjs.map +1 -0
- package/dist/cjs/translation-alignment/index.cjs +21 -0
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs +18 -0
- package/dist/cjs/translation-alignment/mapChangedLinesToBlocks.cjs.map +1 -0
- package/dist/cjs/translation-alignment/normalizeBlock.cjs +22 -0
- package/dist/cjs/translation-alignment/normalizeBlock.cjs.map +1 -0
- package/dist/cjs/translation-alignment/pipeline.cjs +37 -0
- package/dist/cjs/translation-alignment/pipeline.cjs.map +1 -0
- package/dist/cjs/translation-alignment/planActions.cjs +48 -0
- package/dist/cjs/translation-alignment/planActions.cjs.map +1 -0
- package/dist/cjs/translation-alignment/rebuildDocument.cjs +49 -0
- package/dist/cjs/translation-alignment/rebuildDocument.cjs.map +1 -0
- package/dist/cjs/translation-alignment/segmentDocument.cjs +132 -0
- package/dist/cjs/translation-alignment/segmentDocument.cjs.map +1 -0
- package/dist/cjs/translation-alignment/types.cjs +0 -0
- package/dist/cjs/utils/calculateChunks.cjs +0 -1
- package/dist/cjs/utils/calculateChunks.cjs.map +1 -1
- package/dist/cjs/utils/checkAccess.cjs +0 -2
- package/dist/cjs/utils/checkAccess.cjs.map +1 -1
- package/dist/cjs/utils/checkLastUpdateTime.cjs +0 -1
- package/dist/cjs/utils/checkLastUpdateTime.cjs.map +1 -1
- package/dist/cjs/utils/chunkInference.cjs +0 -2
- package/dist/cjs/utils/chunkInference.cjs.map +1 -1
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs +0 -1
- package/dist/cjs/utils/getIsFileUpdatedRecently.cjs.map +1 -1
- package/dist/cjs/utils/getParentPackageJSON.cjs +0 -2
- package/dist/cjs/utils/getParentPackageJSON.cjs.map +1 -1
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs +0 -1
- package/dist/cjs/utils/mapChunksBetweenFiles.cjs.map +1 -1
- package/dist/cjs/watch.cjs +0 -2
- package/dist/cjs/watch.cjs.map +1 -1
- package/dist/esm/cli.mjs +6 -3
- package/dist/esm/cli.mjs.map +1 -1
- package/dist/esm/index.mjs +2 -2
- package/dist/esm/reviewDoc.mjs +13 -128
- package/dist/esm/reviewDoc.mjs.map +1 -1
- package/dist/esm/reviewDocBlockAware.mjs +89 -0
- package/dist/esm/reviewDocBlockAware.mjs.map +1 -0
- package/dist/esm/translateDoc.mjs +8 -3
- package/dist/esm/translateDoc.mjs.map +1 -1
- package/dist/esm/translation-alignment/alignBlocks.mjs +67 -0
- package/dist/esm/translation-alignment/alignBlocks.mjs.map +1 -0
- package/dist/esm/translation-alignment/computeSimilarity.mjs +23 -0
- package/dist/esm/translation-alignment/computeSimilarity.mjs.map +1 -0
- package/dist/esm/translation-alignment/fingerprintBlock.mjs +21 -0
- package/dist/esm/translation-alignment/fingerprintBlock.mjs.map +1 -0
- package/dist/esm/translation-alignment/index.mjs +11 -0
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs +17 -0
- package/dist/esm/translation-alignment/mapChangedLinesToBlocks.mjs.map +1 -0
- package/dist/esm/translation-alignment/normalizeBlock.mjs +21 -0
- package/dist/esm/translation-alignment/normalizeBlock.mjs.map +1 -0
- package/dist/esm/translation-alignment/pipeline.mjs +36 -0
- package/dist/esm/translation-alignment/pipeline.mjs.map +1 -0
- package/dist/esm/translation-alignment/planActions.mjs +47 -0
- package/dist/esm/translation-alignment/planActions.mjs.map +1 -0
- package/dist/esm/translation-alignment/rebuildDocument.mjs +47 -0
- package/dist/esm/translation-alignment/rebuildDocument.mjs.map +1 -0
- package/dist/esm/translation-alignment/segmentDocument.mjs +131 -0
- package/dist/esm/translation-alignment/segmentDocument.mjs.map +1 -0
- package/dist/esm/translation-alignment/types.mjs +0 -0
- package/dist/types/cli.d.ts.map +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/types/reviewDoc.d.ts +3 -6
- package/dist/types/reviewDoc.d.ts.map +1 -1
- package/dist/types/reviewDocBlockAware.d.ts +19 -0
- package/dist/types/reviewDocBlockAware.d.ts.map +1 -0
- package/dist/types/translateDoc.d.ts +2 -0
- package/dist/types/translateDoc.d.ts.map +1 -1
- package/dist/types/translation-alignment/alignBlocks.d.ts +7 -0
- package/dist/types/translation-alignment/alignBlocks.d.ts.map +1 -0
- package/dist/types/translation-alignment/computeSimilarity.d.ts +6 -0
- package/dist/types/translation-alignment/computeSimilarity.d.ts.map +1 -0
- package/dist/types/translation-alignment/fingerprintBlock.d.ts +7 -0
- package/dist/types/translation-alignment/fingerprintBlock.d.ts.map +1 -0
- package/dist/types/translation-alignment/index.d.ts +11 -0
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts +7 -0
- package/dist/types/translation-alignment/mapChangedLinesToBlocks.d.ts.map +1 -0
- package/dist/types/translation-alignment/normalizeBlock.d.ts +7 -0
- package/dist/types/translation-alignment/normalizeBlock.d.ts.map +1 -0
- package/dist/types/translation-alignment/pipeline.d.ts +25 -0
- package/dist/types/translation-alignment/pipeline.d.ts.map +1 -0
- package/dist/types/translation-alignment/planActions.d.ts +7 -0
- package/dist/types/translation-alignment/planActions.d.ts.map +1 -0
- package/dist/types/translation-alignment/rebuildDocument.d.ts +32 -0
- package/dist/types/translation-alignment/rebuildDocument.d.ts.map +1 -0
- package/dist/types/translation-alignment/segmentDocument.d.ts +7 -0
- package/dist/types/translation-alignment/segmentDocument.d.ts.map +1 -0
- package/dist/types/translation-alignment/types.d.ts +49 -0
- package/dist/types/translation-alignment/types.d.ts.map +1 -0
- package/package.json +23 -23
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"translateDoc.mjs","names":["docList: string[]"],"sources":["../../src/translateDoc.ts"],"sourcesContent":["import { existsSync, mkdirSync, writeFileSync } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { dirname, join, relative } from 'node:path';\nimport { readAsset } from 'utils:asset';\nimport type { AIOptions } from '@intlayer/api';\nimport {\n formatLocale,\n formatPath,\n getChunk,\n type ListGitFilesOptions,\n listGitFiles,\n parallelize,\n} from '@intlayer/chokidar';\nimport {\n ANSIColors,\n colon,\n colorize,\n colorizeNumber,\n type GetConfigurationOptions,\n getAppLogger,\n getConfiguration,\n retryManager,\n} from '@intlayer/config';\nimport type { IntlayerConfig, Locale, Locales } from '@intlayer/types';\nimport fg from 'fast-glob';\nimport { chunkText } from './utils/calculateChunks';\nimport { checkAIAccess } from './utils/checkAccess';\nimport { checkFileModifiedRange } from './utils/checkFileModifiedRange';\nimport { chunkInference } from './utils/chunkInference';\nimport { fixChunkStartEndChars } from './utils/fixChunkStartEndChars';\nimport { getOutputFilePath } from './utils/getOutputFilePath';\n\n/**\n * Translate a single file for a given locale\n */\nexport const translateFile = async (\n baseFilePath: string,\n outputFilePath: string,\n locale: Locale,\n baseLocale: Locale,\n configuration: IntlayerConfig,\n aiOptions?: AIOptions,\n customInstructions?: string\n) => {\n try {\n const appLogger = getAppLogger(configuration, {\n config: {\n prefix: '',\n },\n });\n\n // Determine the target locale file path\n const fileContent = await readFile(baseFilePath, 'utf-8');\n\n let fileResultContent = fileContent;\n\n // Prepare the base prompt for ChatGPT\n const basePrompt = readAsset('./prompts/TRANSLATE_PROMPT.md', 'utf-8')\n .replaceAll('{{localeName}}', `${formatLocale(locale, false)}`)\n .replaceAll('{{baseLocaleName}}', `${formatLocale(baseLocale, false)}`)\n .replace('{{applicationContext}}', aiOptions?.applicationContext ?? '-')\n .replace('{{customInstructions}}', customInstructions ?? '-');\n\n const filePrefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}] `;\n const filePrefix = [\n colon(filePrefixText, { colSize: 40 }),\n `→ ${ANSIColors.RESET}`,\n ].join('');\n\n const prefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}][${formatLocale(locale)}${ANSIColors.GREY_DARK}] `;\n const prefix = [\n colon(prefixText, { colSize: 40 }),\n `→ ${ANSIColors.RESET}`,\n ].join('');\n\n // 1. Chunk the file by number of lines instead of characters\n const chunks = chunkText(fileContent);\n appLogger(\n `${filePrefix}Base file splitted into ${colorizeNumber(chunks.length)} chunks`\n );\n\n for await (const [i, chunk] of chunks.entries()) {\n const isFirstChunk = i === 0;\n\n // Build the chunk-specific prompt\n const getPrevChunkPrompt = () =>\n `**CHUNK ${i} of ${chunks.length}** that has been translated in ${formatLocale(locale)}:\\n` +\n `///chunkStart///` +\n getChunk(fileResultContent, chunks[i - 1]) +\n `///chunkEnd///`;\n\n const getBaseChunkContextPrompt = () =>\n `**CHUNK ${i + 1} to ${Math.min(i + 3, chunks.length)} of ${chunks.length}** is the base chunk in ${formatLocale(baseLocale, false)} as reference.\\n` +\n `///chunksStart///` +\n (chunks[i - 1]?.content ?? '') +\n chunks[i].content +\n (chunks[i + 1]?.content ?? '') +\n `///chunksEnd///`;\n\n const fileToTranslateCurrentChunk = chunk.content;\n\n // Make the actual translation call\n const chunkTranslation = await retryManager(async () => {\n const result = await chunkInference(\n [\n { role: 'system', content: basePrompt },\n\n { role: 'system', content: getBaseChunkContextPrompt() },\n ...(isFirstChunk\n ? []\n : [{ role: 'system', content: getPrevChunkPrompt() } as const]),\n {\n role: 'system',\n content: `The next user message will be the **CHUNK ${colorizeNumber(i + 1)} of ${colorizeNumber(chunks.length)}** in ${formatLocale(baseLocale, false)} to translate in ${formatLocale(locale, false)}:`,\n },\n { role: 'user', content: fileToTranslateCurrentChunk },\n ],\n aiOptions,\n configuration\n );\n\n appLogger(\n [\n `${prefix}`,\n `${ANSIColors.GREY_DARK}[Chunk `,\n colorizeNumber(i + 1),\n `${ANSIColors.GREY_DARK} of `,\n colorizeNumber(chunks.length),\n `${ANSIColors.GREY_DARK}] →${ANSIColors.RESET} `,\n `${colorizeNumber(result.tokenUsed)} tokens used`,\n ].join('')\n );\n\n const fixedTranslatedChunkResult = fixChunkStartEndChars(\n result?.fileContent,\n fileToTranslateCurrentChunk\n );\n\n return fixedTranslatedChunkResult;\n })();\n\n // Replace the chunk in the file content\n fileResultContent = fileResultContent.replace(\n fileToTranslateCurrentChunk,\n chunkTranslation\n );\n }\n\n // 4. Write the final translation to the appropriate file path\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, fileResultContent);\n\n const relativePath = relative(\n configuration.content.baseDir,\n outputFilePath\n );\n\n appLogger(\n `${colorize('✔', ANSIColors.GREEN)} File ${formatPath(relativePath)} created/updated successfully.`\n );\n } catch (error) {\n console.error(error);\n }\n};\n\ntype TranslateDocOptions = {\n docPattern: string[];\n locales: Locale[];\n excludedGlobPattern: string[];\n baseLocale: Locale;\n aiOptions?: AIOptions;\n nbSimultaneousFileProcessed?: number;\n configOptions?: GetConfigurationOptions;\n customInstructions?: string;\n skipIfModifiedBefore?: number | string | Date;\n skipIfModifiedAfter?: number | string | Date;\n gitOptions?: ListGitFilesOptions;\n};\n\n/**\n * Main translate function: scans all .md files in \"en/\" (unless you specified DOC_LIST),\n * then translates them to each locale in LOCALE_LIST.\n */\nexport const translateDoc = async ({\n docPattern,\n locales,\n excludedGlobPattern,\n baseLocale,\n aiOptions,\n nbSimultaneousFileProcessed,\n configOptions,\n customInstructions,\n skipIfModifiedBefore,\n skipIfModifiedAfter,\n gitOptions,\n}: TranslateDocOptions) => {\n const configuration = getConfiguration(configOptions);\n const appLogger = getAppLogger(configuration);\n\n if (nbSimultaneousFileProcessed && nbSimultaneousFileProcessed > 10) {\n appLogger(\n `Warning: nbSimultaneousFileProcessed is set to ${nbSimultaneousFileProcessed}, which is greater than 10. Setting it to 10.`\n );\n nbSimultaneousFileProcessed = 10; // Limit the number of simultaneous file processed to 10\n }\n\n let docList: string[] = await fg(docPattern, {\n ignore: excludedGlobPattern,\n });\n\n const hasCMSAuth = await checkAIAccess(configuration, aiOptions);\n\n if (!hasCMSAuth) return;\n\n if (gitOptions) {\n const gitChangedFiles = await listGitFiles(gitOptions);\n\n if (gitChangedFiles) {\n // Convert dictionary file paths to be relative to git root for comparison\n\n // Filter dictionaries based on git changed files\n docList = docList.filter((path) =>\n gitChangedFiles.some((gitFile) => join(process.cwd(), path) === gitFile)\n );\n }\n }\n\n // OAuth handled by API proxy internally\n\n appLogger(`Base locale is ${formatLocale(baseLocale)}`);\n appLogger(\n `Translating ${colorizeNumber(locales.length)} locales: [ ${formatLocale(locales)} ]`\n );\n\n appLogger(`Translating ${colorizeNumber(docList.length)} files:`);\n appLogger(docList.map((path) => ` - ${formatPath(path)}\\n`));\n\n // Create all tasks to be processed\n const allTasks = docList.flatMap((docPath) =>\n locales.map((locale) => async () => {\n appLogger(\n `Translating file: ${formatPath(docPath)} to ${formatLocale(locale)}`\n );\n\n const absoluteBaseFilePath = join(configuration.content.baseDir, docPath);\n const outputFilePath = getOutputFilePath(\n absoluteBaseFilePath,\n locale,\n baseLocale\n );\n\n // check if the file exist, otherwise create it\n if (!existsSync(outputFilePath)) {\n appLogger(`File ${outputFilePath} does not exist, creating it...`);\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, '');\n }\n\n const fileModificationData = checkFileModifiedRange(outputFilePath, {\n skipIfModifiedBefore,\n skipIfModifiedAfter,\n });\n\n if (fileModificationData.isSkipped) {\n appLogger(fileModificationData.message);\n return;\n }\n\n await translateFile(\n absoluteBaseFilePath,\n outputFilePath,\n locale as Locale,\n baseLocale,\n configuration,\n aiOptions,\n customInstructions\n );\n })\n );\n\n await parallelize(\n allTasks,\n (task) => task(),\n nbSimultaneousFileProcessed ?? 3\n );\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAmCA,MAAa,gBAAgB,OAC3B,cACA,gBACA,QACA,YACA,eACA,WACA,uBACG;AACH,KAAI;EACF,MAAM,YAAY,aAAa,eAAe,EAC5C,QAAQ,EACN,QAAQ,IACT,EACF,CAAC;EAGF,MAAM,cAAc,MAAM,SAAS,cAAc,QAAQ;EAEzD,IAAI,oBAAoB;EAGxB,MAAM,aAAa,UAAU,iCAAiC,QAAQ,CACnE,WAAW,kBAAkB,GAAG,aAAa,QAAQ,MAAM,GAAG,CAC9D,WAAW,sBAAsB,GAAG,aAAa,YAAY,MAAM,GAAG,CACtE,QAAQ,0BAA0B,WAAW,sBAAsB,IAAI,CACvE,QAAQ,0BAA0B,sBAAsB,IAAI;EAG/D,MAAM,aAAa,CACjB,MAFqB,GAAG,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,KAE1E,EAAE,SAAS,IAAI,CAAC,EACtC,KAAK,WAAW,QACjB,CAAC,KAAK,GAAG;EAGV,MAAM,SAAS,CACb,MAFiB,GAAG,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,IAAI,aAAa,OAAO,GAAG,WAAW,UAAU,KAE1H,EAAE,SAAS,IAAI,CAAC,EAClC,KAAK,WAAW,QACjB,CAAC,KAAK,GAAG;EAGV,MAAM,SAAS,UAAU,YAAY;AACrC,YACE,GAAG,WAAW,0BAA0B,eAAe,OAAO,OAAO,CAAC,SACvE;AAED,aAAW,MAAM,CAAC,GAAG,UAAU,OAAO,SAAS,EAAE;GAC/C,MAAM,eAAe,MAAM;GAG3B,MAAM,2BACJ,WAAW,EAAE,MAAM,OAAO,OAAO,iCAAiC,aAAa,OAAO,CAAC,uBAEvF,SAAS,mBAAmB,OAAO,IAAI,GAAG,GAC1C;GAEF,MAAM,kCACJ,WAAW,IAAI,EAAE,MAAM,KAAK,IAAI,IAAI,GAAG,OAAO,OAAO,CAAC,MAAM,OAAO,OAAO,0BAA0B,aAAa,YAAY,MAAM,CAAC,sCAEnI,OAAO,IAAI,IAAI,WAAW,MAC3B,OAAO,GAAG,WACT,OAAO,IAAI,IAAI,WAAW,MAC3B;GAEF,MAAM,8BAA8B,MAAM;GAG1C,MAAM,mBAAmB,MAAM,aAAa,YAAY;IACtD,MAAM,SAAS,MAAM,eACnB;KACE;MAAE,MAAM;MAAU,SAAS;MAAY;KAEvC;MAAE,MAAM;MAAU,SAAS,2BAA2B;MAAE;KACxD,GAAI,eACA,EAAE,GACF,CAAC;MAAE,MAAM;MAAU,SAAS,oBAAoB;MAAE,CAAU;KAChE;MACE,MAAM;MACN,SAAS,6CAA6C,eAAe,IAAI,EAAE,CAAC,MAAM,eAAe,OAAO,OAAO,CAAC,QAAQ,aAAa,YAAY,MAAM,CAAC,mBAAmB,aAAa,QAAQ,MAAM,CAAC;MACxM;KACD;MAAE,MAAM;MAAQ,SAAS;MAA6B;KACvD,EACD,WACA,cACD;AAED,cACE;KACE,GAAG;KACH,GAAG,WAAW,UAAU;KACxB,eAAe,IAAI,EAAE;KACrB,GAAG,WAAW,UAAU;KACxB,eAAe,OAAO,OAAO;KAC7B,GAAG,WAAW,UAAU,KAAK,WAAW,MAAM;KAC9C,GAAG,eAAe,OAAO,UAAU,CAAC;KACrC,CAAC,KAAK,GAAG,CACX;AAOD,WALmC,sBACjC,QAAQ,aACR,4BACD;KAGD,EAAE;AAGJ,uBAAoB,kBAAkB,QACpC,6BACA,iBACD;;AAIH,YAAU,QAAQ,eAAe,EAAE,EAAE,WAAW,MAAM,CAAC;AACvD,gBAAc,gBAAgB,kBAAkB;EAEhD,MAAM,eAAe,SACnB,cAAc,QAAQ,SACtB,eACD;AAED,YACE,GAAG,SAAS,KAAK,WAAW,MAAM,CAAC,QAAQ,WAAW,aAAa,CAAC,gCACrE;UACM,OAAO;AACd,UAAQ,MAAM,MAAM;;;;;;;AAsBxB,MAAa,eAAe,OAAO,EACjC,YACA,SACA,qBACA,YACA,WACA,6BACA,eACA,oBACA,sBACA,qBACA,iBACyB;CACzB,MAAM,gBAAgB,iBAAiB,cAAc;CACrD,MAAM,YAAY,aAAa,cAAc;AAE7C,KAAI,+BAA+B,8BAA8B,IAAI;AACnE,YACE,kDAAkD,4BAA4B,+CAC/E;AACD,gCAA8B;;CAGhC,IAAIA,UAAoB,MAAM,GAAG,YAAY,EAC3C,QAAQ,qBACT,CAAC;AAIF,KAAI,CAFe,MAAM,cAAc,eAAe,UAAU,CAE/C;AAEjB,KAAI,YAAY;EACd,MAAM,kBAAkB,MAAM,aAAa,WAAW;AAEtD,MAAI,gBAIF,WAAU,QAAQ,QAAQ,SACxB,gBAAgB,MAAM,YAAY,KAAK,QAAQ,KAAK,EAAE,KAAK,KAAK,QAAQ,CACzE;;AAML,WAAU,kBAAkB,aAAa,WAAW,GAAG;AACvD,WACE,eAAe,eAAe,QAAQ,OAAO,CAAC,cAAc,aAAa,QAAQ,CAAC,IACnF;AAED,WAAU,eAAe,eAAe,QAAQ,OAAO,CAAC,SAAS;AACjE,WAAU,QAAQ,KAAK,SAAS,MAAM,WAAW,KAAK,CAAC,IAAI,CAAC;AA6C5D,OAAM,YA1CW,QAAQ,SAAS,YAChC,QAAQ,KAAK,WAAW,YAAY;AAClC,YACE,qBAAqB,WAAW,QAAQ,CAAC,MAAM,aAAa,OAAO,GACpE;EAED,MAAM,uBAAuB,KAAK,cAAc,QAAQ,SAAS,QAAQ;EACzE,MAAM,iBAAiB,kBACrB,sBACA,QACA,WACD;AAGD,MAAI,CAAC,WAAW,eAAe,EAAE;AAC/B,aAAU,QAAQ,eAAe,iCAAiC;AAClE,aAAU,QAAQ,eAAe,EAAE,EAAE,WAAW,MAAM,CAAC;AACvD,iBAAc,gBAAgB,GAAG;;EAGnC,MAAM,uBAAuB,uBAAuB,gBAAgB;GAClE;GACA;GACD,CAAC;AAEF,MAAI,qBAAqB,WAAW;AAClC,aAAU,qBAAqB,QAAQ;AACvC;;AAGF,QAAM,cACJ,sBACA,gBACA,QACA,YACA,eACA,WACA,mBACD;GACD,CACH,GAIE,SAAS,MAAM,EAChB,+BAA+B,EAChC"}
|
|
1
|
+
{"version":3,"file":"translateDoc.mjs","names":["docList: string[]"],"sources":["../../src/translateDoc.ts"],"sourcesContent":["import { existsSync, mkdirSync, writeFileSync } from 'node:fs';\nimport { readFile } from 'node:fs/promises';\nimport { dirname, join, relative } from 'node:path';\nimport { readAsset } from 'utils:asset';\nimport type { AIOptions } from '@intlayer/api';\nimport {\n formatLocale,\n formatPath,\n getChunk,\n type ListGitFilesOptions,\n listGitFiles,\n parallelize,\n} from '@intlayer/chokidar';\nimport {\n ANSIColors,\n colon,\n colorize,\n colorizeNumber,\n type GetConfigurationOptions,\n getAppLogger,\n getConfiguration,\n retryManager,\n} from '@intlayer/config';\nimport type { IntlayerConfig, Locale } from '@intlayer/types';\nimport fg from 'fast-glob';\nimport { chunkText } from './utils/calculateChunks';\nimport { checkAIAccess } from './utils/checkAccess';\nimport { checkFileModifiedRange } from './utils/checkFileModifiedRange';\nimport { chunkInference } from './utils/chunkInference';\nimport { fixChunkStartEndChars } from './utils/fixChunkStartEndChars';\nimport { getOutputFilePath } from './utils/getOutputFilePath';\n\n/**\n * Translate a single file for a given locale\n */\nexport const translateFile = async (\n baseFilePath: string,\n outputFilePath: string,\n locale: Locale,\n baseLocale: Locale,\n configuration: IntlayerConfig,\n aiOptions?: AIOptions,\n customInstructions?: string\n) => {\n try {\n const appLogger = getAppLogger(configuration, {\n config: {\n prefix: '',\n },\n });\n\n // Determine the target locale file path\n const fileContent = await readFile(baseFilePath, 'utf-8');\n\n let fileResultContent = fileContent;\n\n // Prepare the base prompt for ChatGPT\n const basePrompt = readAsset('./prompts/TRANSLATE_PROMPT.md', 'utf-8')\n .replaceAll('{{localeName}}', `${formatLocale(locale, false)}`)\n .replaceAll('{{baseLocaleName}}', `${formatLocale(baseLocale, false)}`)\n .replace('{{applicationContext}}', aiOptions?.applicationContext ?? '-')\n .replace('{{customInstructions}}', customInstructions ?? '-');\n\n const filePrefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}] `;\n const filePrefix = [\n colon(filePrefixText, { colSize: 40 }),\n `→ ${ANSIColors.RESET}`,\n ].join('');\n\n const prefixText = `${ANSIColors.GREY_DARK}[${formatPath(baseFilePath)}${ANSIColors.GREY_DARK}][${formatLocale(locale)}${ANSIColors.GREY_DARK}] `;\n const prefix = [\n colon(prefixText, { colSize: 40 }),\n `→ ${ANSIColors.RESET}`,\n ].join('');\n\n // 1. Chunk the file by number of lines instead of characters\n const chunks = chunkText(fileContent);\n appLogger(\n `${filePrefix}Base file splitted into ${colorizeNumber(chunks.length)} chunks`\n );\n\n for await (const [i, chunk] of chunks.entries()) {\n const isFirstChunk = i === 0;\n\n // Build the chunk-specific prompt\n const getPrevChunkPrompt = () =>\n `**CHUNK ${i} of ${chunks.length}** that has been translated in ${formatLocale(locale)}:\\n` +\n `///chunkStart///` +\n getChunk(fileResultContent, chunks[i - 1]) +\n `///chunkEnd///`;\n\n const getBaseChunkContextPrompt = () =>\n `**CHUNK ${i + 1} to ${Math.min(i + 3, chunks.length)} of ${chunks.length}** is the base chunk in ${formatLocale(baseLocale, false)} as reference.\\n` +\n `///chunksStart///` +\n (chunks[i - 1]?.content ?? '') +\n chunks[i].content +\n (chunks[i + 1]?.content ?? '') +\n `///chunksEnd///`;\n\n const fileToTranslateCurrentChunk = chunk.content;\n\n // Make the actual translation call\n const chunkTranslation = await retryManager(async () => {\n const result = await chunkInference(\n [\n { role: 'system', content: basePrompt },\n\n { role: 'system', content: getBaseChunkContextPrompt() },\n ...(isFirstChunk\n ? []\n : [{ role: 'system', content: getPrevChunkPrompt() } as const]),\n {\n role: 'system',\n content: `The next user message will be the **CHUNK ${colorizeNumber(i + 1)} of ${colorizeNumber(chunks.length)}** in ${formatLocale(baseLocale, false)} to translate in ${formatLocale(locale, false)}:`,\n },\n { role: 'user', content: fileToTranslateCurrentChunk },\n ],\n aiOptions,\n configuration\n );\n\n appLogger(\n [\n `${prefix}`,\n `${ANSIColors.GREY_DARK}[Chunk `,\n colorizeNumber(i + 1),\n `${ANSIColors.GREY_DARK} of `,\n colorizeNumber(chunks.length),\n `${ANSIColors.GREY_DARK}] →${ANSIColors.RESET} `,\n `${colorizeNumber(result.tokenUsed)} tokens used`,\n ].join('')\n );\n\n const fixedTranslatedChunkResult = fixChunkStartEndChars(\n result?.fileContent,\n fileToTranslateCurrentChunk\n );\n\n return fixedTranslatedChunkResult;\n })();\n\n // Replace the chunk in the file content\n fileResultContent = fileResultContent.replace(\n fileToTranslateCurrentChunk,\n chunkTranslation\n );\n }\n\n // 4. Write the final translation to the appropriate file path\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, fileResultContent);\n\n const relativePath = relative(\n configuration.content.baseDir,\n outputFilePath\n );\n\n appLogger(\n `${colorize('✔', ANSIColors.GREEN)} File ${formatPath(relativePath)} created/updated successfully.`\n );\n } catch (error) {\n console.error(error);\n }\n};\n\ntype TranslateDocOptions = {\n docPattern: string[];\n locales: Locale[];\n excludedGlobPattern: string[];\n baseLocale: Locale;\n aiOptions?: AIOptions;\n nbSimultaneousFileProcessed?: number;\n configOptions?: GetConfigurationOptions;\n customInstructions?: string;\n skipIfModifiedBefore?: number | string | Date;\n skipIfModifiedAfter?: number | string | Date;\n skipIfExists?: boolean;\n gitOptions?: ListGitFilesOptions;\n};\n\n/**\n * Main translate function: scans all .md files in \"en/\" (unless you specified DOC_LIST),\n * then translates them to each locale in LOCALE_LIST.\n */\nexport const translateDoc = async ({\n docPattern,\n locales,\n excludedGlobPattern,\n baseLocale,\n aiOptions,\n nbSimultaneousFileProcessed,\n configOptions,\n customInstructions,\n skipIfModifiedBefore,\n skipIfModifiedAfter,\n skipIfExists,\n gitOptions,\n}: TranslateDocOptions) => {\n const configuration = getConfiguration(configOptions);\n const appLogger = getAppLogger(configuration);\n\n if (nbSimultaneousFileProcessed && nbSimultaneousFileProcessed > 10) {\n appLogger(\n `Warning: nbSimultaneousFileProcessed is set to ${nbSimultaneousFileProcessed}, which is greater than 10. Setting it to 10.`\n );\n nbSimultaneousFileProcessed = 10; // Limit the number of simultaneous file processed to 10\n }\n\n let docList: string[] = await fg(docPattern, {\n ignore: excludedGlobPattern,\n });\n\n const hasCMSAuth = await checkAIAccess(configuration, aiOptions);\n\n if (!hasCMSAuth) return;\n\n if (gitOptions) {\n const gitChangedFiles = await listGitFiles(gitOptions);\n\n if (gitChangedFiles) {\n // Convert dictionary file paths to be relative to git root for comparison\n\n // Filter dictionaries based on git changed files\n docList = docList.filter((path) =>\n gitChangedFiles.some((gitFile) => join(process.cwd(), path) === gitFile)\n );\n }\n }\n\n // OAuth handled by API proxy internally\n\n appLogger(`Base locale is ${formatLocale(baseLocale)}`);\n appLogger(\n `Translating ${colorizeNumber(locales.length)} locales: [ ${formatLocale(locales)} ]`\n );\n\n appLogger(`Translating ${colorizeNumber(docList.length)} files:`);\n appLogger(docList.map((path) => ` - ${formatPath(path)}\\n`));\n\n // Create all tasks to be processed\n const allTasks = docList.flatMap((docPath) =>\n locales.map((locale) => async () => {\n appLogger(\n `Translating file: ${formatPath(docPath)} to ${formatLocale(locale)}`\n );\n\n const absoluteBaseFilePath = join(configuration.content.baseDir, docPath);\n const outputFilePath = getOutputFilePath(\n absoluteBaseFilePath,\n locale,\n baseLocale\n );\n\n // Skip if file exists and skipIfExists option is enabled\n if (skipIfExists && existsSync(outputFilePath)) {\n const relativePath = relative(\n configuration.content.baseDir,\n outputFilePath\n );\n appLogger(\n `${colorize('⊘', ANSIColors.YELLOW)} File ${formatPath(relativePath)} already exists, skipping.`\n );\n return;\n }\n\n // check if the file exist, otherwise create it\n if (!existsSync(outputFilePath)) {\n appLogger(`File ${outputFilePath} does not exist, creating it...`);\n mkdirSync(dirname(outputFilePath), { recursive: true });\n writeFileSync(outputFilePath, '');\n }\n\n const fileModificationData = checkFileModifiedRange(outputFilePath, {\n skipIfModifiedBefore,\n skipIfModifiedAfter,\n });\n\n if (fileModificationData.isSkipped) {\n appLogger(fileModificationData.message);\n return;\n }\n\n await translateFile(\n absoluteBaseFilePath,\n outputFilePath,\n locale as Locale,\n baseLocale,\n configuration,\n aiOptions,\n customInstructions\n );\n })\n );\n\n await parallelize(\n allTasks,\n (task) => task(),\n nbSimultaneousFileProcessed ?? 3\n );\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAmCA,MAAa,gBAAgB,OAC3B,cACA,gBACA,QACA,YACA,eACA,WACA,uBACG;AACH,KAAI;EACF,MAAM,YAAY,aAAa,eAAe,EAC5C,QAAQ,EACN,QAAQ,IACT,EACF,CAAC;EAGF,MAAM,cAAc,MAAM,SAAS,cAAc,QAAQ;EAEzD,IAAI,oBAAoB;EAGxB,MAAM,aAAa,UAAU,iCAAiC,QAAQ,CACnE,WAAW,kBAAkB,GAAG,aAAa,QAAQ,MAAM,GAAG,CAC9D,WAAW,sBAAsB,GAAG,aAAa,YAAY,MAAM,GAAG,CACtE,QAAQ,0BAA0B,WAAW,sBAAsB,IAAI,CACvE,QAAQ,0BAA0B,sBAAsB,IAAI;EAG/D,MAAM,aAAa,CACjB,MAFqB,GAAG,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,KAE1E,EAAE,SAAS,IAAI,CAAC,EACtC,KAAK,WAAW,QACjB,CAAC,KAAK,GAAG;EAGV,MAAM,SAAS,CACb,MAFiB,GAAG,WAAW,UAAU,GAAG,WAAW,aAAa,GAAG,WAAW,UAAU,IAAI,aAAa,OAAO,GAAG,WAAW,UAAU,KAE1H,EAAE,SAAS,IAAI,CAAC,EAClC,KAAK,WAAW,QACjB,CAAC,KAAK,GAAG;EAGV,MAAM,SAAS,UAAU,YAAY;AACrC,YACE,GAAG,WAAW,0BAA0B,eAAe,OAAO,OAAO,CAAC,SACvE;AAED,aAAW,MAAM,CAAC,GAAG,UAAU,OAAO,SAAS,EAAE;GAC/C,MAAM,eAAe,MAAM;GAG3B,MAAM,2BACJ,WAAW,EAAE,MAAM,OAAO,OAAO,iCAAiC,aAAa,OAAO,CAAC,uBAEvF,SAAS,mBAAmB,OAAO,IAAI,GAAG,GAC1C;GAEF,MAAM,kCACJ,WAAW,IAAI,EAAE,MAAM,KAAK,IAAI,IAAI,GAAG,OAAO,OAAO,CAAC,MAAM,OAAO,OAAO,0BAA0B,aAAa,YAAY,MAAM,CAAC,sCAEnI,OAAO,IAAI,IAAI,WAAW,MAC3B,OAAO,GAAG,WACT,OAAO,IAAI,IAAI,WAAW,MAC3B;GAEF,MAAM,8BAA8B,MAAM;GAG1C,MAAM,mBAAmB,MAAM,aAAa,YAAY;IACtD,MAAM,SAAS,MAAM,eACnB;KACE;MAAE,MAAM;MAAU,SAAS;MAAY;KAEvC;MAAE,MAAM;MAAU,SAAS,2BAA2B;MAAE;KACxD,GAAI,eACA,EAAE,GACF,CAAC;MAAE,MAAM;MAAU,SAAS,oBAAoB;MAAE,CAAU;KAChE;MACE,MAAM;MACN,SAAS,6CAA6C,eAAe,IAAI,EAAE,CAAC,MAAM,eAAe,OAAO,OAAO,CAAC,QAAQ,aAAa,YAAY,MAAM,CAAC,mBAAmB,aAAa,QAAQ,MAAM,CAAC;MACxM;KACD;MAAE,MAAM;MAAQ,SAAS;MAA6B;KACvD,EACD,WACA,cACD;AAED,cACE;KACE,GAAG;KACH,GAAG,WAAW,UAAU;KACxB,eAAe,IAAI,EAAE;KACrB,GAAG,WAAW,UAAU;KACxB,eAAe,OAAO,OAAO;KAC7B,GAAG,WAAW,UAAU,KAAK,WAAW,MAAM;KAC9C,GAAG,eAAe,OAAO,UAAU,CAAC;KACrC,CAAC,KAAK,GAAG,CACX;AAOD,WALmC,sBACjC,QAAQ,aACR,4BACD;KAGD,EAAE;AAGJ,uBAAoB,kBAAkB,QACpC,6BACA,iBACD;;AAIH,YAAU,QAAQ,eAAe,EAAE,EAAE,WAAW,MAAM,CAAC;AACvD,gBAAc,gBAAgB,kBAAkB;EAEhD,MAAM,eAAe,SACnB,cAAc,QAAQ,SACtB,eACD;AAED,YACE,GAAG,SAAS,KAAK,WAAW,MAAM,CAAC,QAAQ,WAAW,aAAa,CAAC,gCACrE;UACM,OAAO;AACd,UAAQ,MAAM,MAAM;;;;;;;AAuBxB,MAAa,eAAe,OAAO,EACjC,YACA,SACA,qBACA,YACA,WACA,6BACA,eACA,oBACA,sBACA,qBACA,cACA,iBACyB;CACzB,MAAM,gBAAgB,iBAAiB,cAAc;CACrD,MAAM,YAAY,aAAa,cAAc;AAE7C,KAAI,+BAA+B,8BAA8B,IAAI;AACnE,YACE,kDAAkD,4BAA4B,+CAC/E;AACD,gCAA8B;;CAGhC,IAAIA,UAAoB,MAAM,GAAG,YAAY,EAC3C,QAAQ,qBACT,CAAC;AAIF,KAAI,CAFe,MAAM,cAAc,eAAe,UAAU,CAE/C;AAEjB,KAAI,YAAY;EACd,MAAM,kBAAkB,MAAM,aAAa,WAAW;AAEtD,MAAI,gBAIF,WAAU,QAAQ,QAAQ,SACxB,gBAAgB,MAAM,YAAY,KAAK,QAAQ,KAAK,EAAE,KAAK,KAAK,QAAQ,CACzE;;AAML,WAAU,kBAAkB,aAAa,WAAW,GAAG;AACvD,WACE,eAAe,eAAe,QAAQ,OAAO,CAAC,cAAc,aAAa,QAAQ,CAAC,IACnF;AAED,WAAU,eAAe,eAAe,QAAQ,OAAO,CAAC,SAAS;AACjE,WAAU,QAAQ,KAAK,SAAS,MAAM,WAAW,KAAK,CAAC,IAAI,CAAC;AAyD5D,OAAM,YAtDW,QAAQ,SAAS,YAChC,QAAQ,KAAK,WAAW,YAAY;AAClC,YACE,qBAAqB,WAAW,QAAQ,CAAC,MAAM,aAAa,OAAO,GACpE;EAED,MAAM,uBAAuB,KAAK,cAAc,QAAQ,SAAS,QAAQ;EACzE,MAAM,iBAAiB,kBACrB,sBACA,QACA,WACD;AAGD,MAAI,gBAAgB,WAAW,eAAe,EAAE;GAC9C,MAAM,eAAe,SACnB,cAAc,QAAQ,SACtB,eACD;AACD,aACE,GAAG,SAAS,KAAK,WAAW,OAAO,CAAC,QAAQ,WAAW,aAAa,CAAC,4BACtE;AACD;;AAIF,MAAI,CAAC,WAAW,eAAe,EAAE;AAC/B,aAAU,QAAQ,eAAe,iCAAiC;AAClE,aAAU,QAAQ,eAAe,EAAE,EAAE,WAAW,MAAM,CAAC;AACvD,iBAAc,gBAAgB,GAAG;;EAGnC,MAAM,uBAAuB,uBAAuB,gBAAgB;GAClE;GACA;GACD,CAAC;AAEF,MAAI,qBAAqB,WAAW;AAClC,aAAU,qBAAqB,QAAQ;AACvC;;AAGF,QAAM,cACJ,sBACA,gBACA,QACA,YACA,eACA,WACA,mBACD;GACD,CACH,GAIE,SAAS,MAAM,EAChB,+BAA+B,EAChC"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { computeJaccardSimilarity } from "./computeSimilarity.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/translation-alignment/alignBlocks.ts
|
|
4
|
+
const alignEnglishAndFrenchBlocks = (defaultBlocks, secondaryBlocks) => {
|
|
5
|
+
const defaultLength = defaultBlocks.length;
|
|
6
|
+
const secondaryLength = secondaryBlocks.length;
|
|
7
|
+
const scoreMatrix = Array.from({ length: defaultLength + 1 }, () => Array.from({ length: secondaryLength + 1 }, () => 0));
|
|
8
|
+
const traceMatrix = Array.from({ length: defaultLength + 1 }, () => Array.from({ length: secondaryLength + 1 }, () => "diagonal"));
|
|
9
|
+
const gapPenalty = -2;
|
|
10
|
+
const computeMatchScore = (defaultIndex, secondaryIndex) => {
|
|
11
|
+
const defaultBlock = defaultBlocks[defaultIndex];
|
|
12
|
+
const secondaryBlock = secondaryBlocks[secondaryIndex];
|
|
13
|
+
const typeBonus = defaultBlock.type === secondaryBlock.type ? 2 : 0;
|
|
14
|
+
const anchorSimilarity = computeJaccardSimilarity(defaultBlock.anchorText, secondaryBlock.anchorText, 3);
|
|
15
|
+
return typeBonus + (Math.min(defaultBlock.content.length, secondaryBlock.content.length) / Math.max(defaultBlock.content.length, secondaryBlock.content.length) > .75 ? 1 : 0) + anchorSimilarity * 8;
|
|
16
|
+
};
|
|
17
|
+
for (let i$1 = 1; i$1 <= defaultLength; i$1 += 1) {
|
|
18
|
+
scoreMatrix[i$1][0] = scoreMatrix[i$1 - 1][0] + gapPenalty;
|
|
19
|
+
traceMatrix[i$1][0] = "up";
|
|
20
|
+
}
|
|
21
|
+
for (let j$1 = 1; j$1 <= secondaryLength; j$1 += 1) {
|
|
22
|
+
scoreMatrix[0][j$1] = scoreMatrix[0][j$1 - 1] + gapPenalty;
|
|
23
|
+
traceMatrix[0][j$1] = "left";
|
|
24
|
+
}
|
|
25
|
+
for (let i$1 = 1; i$1 <= defaultLength; i$1 += 1) for (let j$1 = 1; j$1 <= secondaryLength; j$1 += 1) {
|
|
26
|
+
const match = scoreMatrix[i$1 - 1][j$1 - 1] + computeMatchScore(i$1 - 1, j$1 - 1);
|
|
27
|
+
const deleteGap = scoreMatrix[i$1 - 1][j$1] + gapPenalty;
|
|
28
|
+
const insertGap = scoreMatrix[i$1][j$1 - 1] + gapPenalty;
|
|
29
|
+
const best = Math.max(match, deleteGap, insertGap);
|
|
30
|
+
scoreMatrix[i$1][j$1] = best;
|
|
31
|
+
traceMatrix[i$1][j$1] = best === match ? "diagonal" : best === deleteGap ? "up" : "left";
|
|
32
|
+
}
|
|
33
|
+
const result = [];
|
|
34
|
+
let i = defaultLength;
|
|
35
|
+
let j = secondaryLength;
|
|
36
|
+
while (i > 0 || j > 0) if (i > 0 && j > 0 && traceMatrix[i][j] === "diagonal") {
|
|
37
|
+
const englishIndex = i - 1;
|
|
38
|
+
const frenchIndex = j - 1;
|
|
39
|
+
const similarityScore = computeJaccardSimilarity(defaultBlocks[englishIndex].anchorText, secondaryBlocks[frenchIndex].anchorText, 3);
|
|
40
|
+
result.unshift({
|
|
41
|
+
englishIndex,
|
|
42
|
+
frenchIndex,
|
|
43
|
+
similarityScore
|
|
44
|
+
});
|
|
45
|
+
i -= 1;
|
|
46
|
+
j -= 1;
|
|
47
|
+
} else if (i > 0 && (j === 0 || traceMatrix[i][j] === "up")) {
|
|
48
|
+
result.unshift({
|
|
49
|
+
englishIndex: i - 1,
|
|
50
|
+
frenchIndex: null,
|
|
51
|
+
similarityScore: 0
|
|
52
|
+
});
|
|
53
|
+
i -= 1;
|
|
54
|
+
} else if (j > 0 && (i === 0 || traceMatrix[i][j] === "left")) {
|
|
55
|
+
result.unshift({
|
|
56
|
+
englishIndex: -1,
|
|
57
|
+
frenchIndex: j - 1,
|
|
58
|
+
similarityScore: 0
|
|
59
|
+
});
|
|
60
|
+
j -= 1;
|
|
61
|
+
}
|
|
62
|
+
return result;
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
//#endregion
|
|
66
|
+
export { alignEnglishAndFrenchBlocks };
|
|
67
|
+
//# sourceMappingURL=alignBlocks.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"alignBlocks.mjs","names":["scoreMatrix: number[][]","traceMatrix: ('diagonal' | 'up' | 'left')[][]","i","j","result: AlignmentPair[]"],"sources":["../../../src/translation-alignment/alignBlocks.ts"],"sourcesContent":["import { computeJaccardSimilarity } from './computeSimilarity';\nimport type { AlignmentPair, FingerprintedBlock } from './types';\n\nexport const alignEnglishAndFrenchBlocks = (\n defaultBlocks: FingerprintedBlock[],\n secondaryBlocks: FingerprintedBlock[]\n): AlignmentPair[] => {\n // Needleman–Wunsch style global alignment using anchor similarity and type equality\n const defaultLength = defaultBlocks.length;\n const secondaryLength = secondaryBlocks.length;\n\n const scoreMatrix: number[][] = Array.from(\n { length: defaultLength + 1 },\n () => Array.from({ length: secondaryLength + 1 }, () => 0)\n );\n const traceMatrix: ('diagonal' | 'up' | 'left')[][] = Array.from(\n { length: defaultLength + 1 },\n () => Array.from({ length: secondaryLength + 1 }, () => 'diagonal')\n );\n\n const gapPenalty = -2;\n\n const computeMatchScore = (\n defaultIndex: number,\n secondaryIndex: number\n ): number => {\n const defaultBlock = defaultBlocks[defaultIndex];\n const secondaryBlock = secondaryBlocks[secondaryIndex];\n const typeBonus = defaultBlock.type === secondaryBlock.type ? 2 : 0;\n const anchorSimilarity = computeJaccardSimilarity(\n defaultBlock.anchorText,\n secondaryBlock.anchorText,\n 3\n );\n const lengthRatio =\n Math.min(defaultBlock.content.length, secondaryBlock.content.length) /\n Math.max(defaultBlock.content.length, secondaryBlock.content.length);\n const lengthBonus = lengthRatio > 0.75 ? 1 : 0;\n return typeBonus + lengthBonus + anchorSimilarity * 8; // weighted toward anchor similarity\n };\n\n // initialize first row and column\n for (let i = 1; i <= defaultLength; i += 1) {\n scoreMatrix[i][0] = scoreMatrix[i - 1][0] + gapPenalty;\n traceMatrix[i][0] = 'up';\n }\n for (let j = 1; j <= secondaryLength; j += 1) {\n scoreMatrix[0][j] = scoreMatrix[0][j - 1] + gapPenalty;\n traceMatrix[0][j] = 'left';\n }\n\n // fill\n for (let i = 1; i <= defaultLength; i += 1) {\n for (let j = 1; j <= secondaryLength; j += 1) {\n const match = scoreMatrix[i - 1][j - 1] + computeMatchScore(i - 1, j - 1);\n const deleteGap = scoreMatrix[i - 1][j] + gapPenalty;\n const insertGap = scoreMatrix[i][j - 1] + gapPenalty;\n\n const best = Math.max(match, deleteGap, insertGap);\n scoreMatrix[i][j] = best;\n traceMatrix[i][j] =\n best === match ? 'diagonal' : best === deleteGap ? 'up' : 'left';\n }\n }\n\n // traceback\n const result: AlignmentPair[] = [];\n let i = defaultLength;\n let j = secondaryLength;\n while (i > 0 || j > 0) {\n if (i > 0 && j > 0 && traceMatrix[i][j] === 'diagonal') {\n const englishIndex = i - 1;\n const frenchIndex = j - 1;\n const similarityScore = computeJaccardSimilarity(\n defaultBlocks[englishIndex].anchorText,\n secondaryBlocks[frenchIndex].anchorText,\n 3\n );\n result.unshift({ englishIndex, frenchIndex, similarityScore });\n i -= 1;\n j -= 1;\n } else if (i > 0 && (j === 0 || traceMatrix[i][j] === 'up')) {\n result.unshift({\n englishIndex: i - 1,\n frenchIndex: null,\n similarityScore: 0,\n });\n i -= 1;\n } else if (j > 0 && (i === 0 || traceMatrix[i][j] === 'left')) {\n // french block has no corresponding english block (deleted)\n result.unshift({\n englishIndex: -1,\n frenchIndex: j - 1,\n similarityScore: 0,\n });\n j -= 1;\n }\n }\n return result;\n};\n"],"mappings":";;;AAGA,MAAa,+BACX,eACA,oBACoB;CAEpB,MAAM,gBAAgB,cAAc;CACpC,MAAM,kBAAkB,gBAAgB;CAExC,MAAMA,cAA0B,MAAM,KACpC,EAAE,QAAQ,gBAAgB,GAAG,QACvB,MAAM,KAAK,EAAE,QAAQ,kBAAkB,GAAG,QAAQ,EAAE,CAC3D;CACD,MAAMC,cAAgD,MAAM,KAC1D,EAAE,QAAQ,gBAAgB,GAAG,QACvB,MAAM,KAAK,EAAE,QAAQ,kBAAkB,GAAG,QAAQ,WAAW,CACpE;CAED,MAAM,aAAa;CAEnB,MAAM,qBACJ,cACA,mBACW;EACX,MAAM,eAAe,cAAc;EACnC,MAAM,iBAAiB,gBAAgB;EACvC,MAAM,YAAY,aAAa,SAAS,eAAe,OAAO,IAAI;EAClE,MAAM,mBAAmB,yBACvB,aAAa,YACb,eAAe,YACf,EACD;AAKD,SAAO,aAHL,KAAK,IAAI,aAAa,QAAQ,QAAQ,eAAe,QAAQ,OAAO,GACpE,KAAK,IAAI,aAAa,QAAQ,QAAQ,eAAe,QAAQ,OAAO,GACpC,MAAO,IAAI,KACZ,mBAAmB;;AAItD,MAAK,IAAIC,MAAI,GAAGA,OAAK,eAAe,OAAK,GAAG;AAC1C,cAAYA,KAAG,KAAK,YAAYA,MAAI,GAAG,KAAK;AAC5C,cAAYA,KAAG,KAAK;;AAEtB,MAAK,IAAIC,MAAI,GAAGA,OAAK,iBAAiB,OAAK,GAAG;AAC5C,cAAY,GAAGA,OAAK,YAAY,GAAGA,MAAI,KAAK;AAC5C,cAAY,GAAGA,OAAK;;AAItB,MAAK,IAAID,MAAI,GAAGA,OAAK,eAAe,OAAK,EACvC,MAAK,IAAIC,MAAI,GAAGA,OAAK,iBAAiB,OAAK,GAAG;EAC5C,MAAM,QAAQ,YAAYD,MAAI,GAAGC,MAAI,KAAK,kBAAkBD,MAAI,GAAGC,MAAI,EAAE;EACzE,MAAM,YAAY,YAAYD,MAAI,GAAGC,OAAK;EAC1C,MAAM,YAAY,YAAYD,KAAGC,MAAI,KAAK;EAE1C,MAAM,OAAO,KAAK,IAAI,OAAO,WAAW,UAAU;AAClD,cAAYD,KAAGC,OAAK;AACpB,cAAYD,KAAGC,OACb,SAAS,QAAQ,aAAa,SAAS,YAAY,OAAO;;CAKhE,MAAMC,SAA0B,EAAE;CAClC,IAAI,IAAI;CACR,IAAI,IAAI;AACR,QAAO,IAAI,KAAK,IAAI,EAClB,KAAI,IAAI,KAAK,IAAI,KAAK,YAAY,GAAG,OAAO,YAAY;EACtD,MAAM,eAAe,IAAI;EACzB,MAAM,cAAc,IAAI;EACxB,MAAM,kBAAkB,yBACtB,cAAc,cAAc,YAC5B,gBAAgB,aAAa,YAC7B,EACD;AACD,SAAO,QAAQ;GAAE;GAAc;GAAa;GAAiB,CAAC;AAC9D,OAAK;AACL,OAAK;YACI,IAAI,MAAM,MAAM,KAAK,YAAY,GAAG,OAAO,OAAO;AAC3D,SAAO,QAAQ;GACb,cAAc,IAAI;GAClB,aAAa;GACb,iBAAiB;GAClB,CAAC;AACF,OAAK;YACI,IAAI,MAAM,MAAM,KAAK,YAAY,GAAG,OAAO,SAAS;AAE7D,SAAO,QAAQ;GACb,cAAc;GACd,aAAa,IAAI;GACjB,iBAAiB;GAClB,CAAC;AACF,OAAK;;AAGT,QAAO"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
//#region src/translation-alignment/computeSimilarity.ts
|
|
2
|
+
const generateCharacterShingles = (text, shingleLength) => {
|
|
3
|
+
const normalized = text.replace(/\s+/g, " ").trim();
|
|
4
|
+
const set = /* @__PURE__ */ new Set();
|
|
5
|
+
if (normalized.length < shingleLength) {
|
|
6
|
+
if (normalized.length > 0) set.add(normalized);
|
|
7
|
+
return set;
|
|
8
|
+
}
|
|
9
|
+
for (let index = 0; index <= normalized.length - shingleLength; index += 1) set.add(normalized.slice(index, index + shingleLength));
|
|
10
|
+
return set;
|
|
11
|
+
};
|
|
12
|
+
const computeJaccardSimilarity = (a, b, shingleLength = 3) => {
|
|
13
|
+
const setA = generateCharacterShingles(a, shingleLength);
|
|
14
|
+
const setB = generateCharacterShingles(b, shingleLength);
|
|
15
|
+
if (setA.size === 0 && setB.size === 0) return 1;
|
|
16
|
+
const intersectionSize = Array.from(setA).filter((token) => setB.has(token)).length;
|
|
17
|
+
const unionSize = new Set([...Array.from(setA), ...Array.from(setB)]).size;
|
|
18
|
+
return unionSize === 0 ? 0 : intersectionSize / unionSize;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
//#endregion
|
|
22
|
+
export { computeJaccardSimilarity, generateCharacterShingles };
|
|
23
|
+
//# sourceMappingURL=computeSimilarity.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"computeSimilarity.mjs","names":[],"sources":["../../../src/translation-alignment/computeSimilarity.ts"],"sourcesContent":["// Character shingle Jaccard similarity (language agnostic)\nexport const generateCharacterShingles = (\n text: string,\n shingleLength: number\n): Set<string> => {\n const normalized = text.replace(/\\s+/g, ' ').trim();\n const set = new Set<string>();\n if (normalized.length < shingleLength) {\n if (normalized.length > 0) {\n set.add(normalized);\n }\n return set;\n }\n for (let index = 0; index <= normalized.length - shingleLength; index += 1) {\n set.add(normalized.slice(index, index + shingleLength));\n }\n return set;\n};\n\nexport const computeJaccardSimilarity = (\n a: string,\n b: string,\n shingleLength: number = 3\n): number => {\n const setA = generateCharacterShingles(a, shingleLength);\n const setB = generateCharacterShingles(b, shingleLength);\n if (setA.size === 0 && setB.size === 0) return 1;\n const intersectionSize = Array.from(setA).filter((token) =>\n setB.has(token)\n ).length;\n const unionSize = new Set([...Array.from(setA), ...Array.from(setB)]).size;\n return unionSize === 0 ? 0 : intersectionSize / unionSize;\n};\n"],"mappings":";AACA,MAAa,6BACX,MACA,kBACgB;CAChB,MAAM,aAAa,KAAK,QAAQ,QAAQ,IAAI,CAAC,MAAM;CACnD,MAAM,sBAAM,IAAI,KAAa;AAC7B,KAAI,WAAW,SAAS,eAAe;AACrC,MAAI,WAAW,SAAS,EACtB,KAAI,IAAI,WAAW;AAErB,SAAO;;AAET,MAAK,IAAI,QAAQ,GAAG,SAAS,WAAW,SAAS,eAAe,SAAS,EACvE,KAAI,IAAI,WAAW,MAAM,OAAO,QAAQ,cAAc,CAAC;AAEzD,QAAO;;AAGT,MAAa,4BACX,GACA,GACA,gBAAwB,MACb;CACX,MAAM,OAAO,0BAA0B,GAAG,cAAc;CACxD,MAAM,OAAO,0BAA0B,GAAG,cAAc;AACxD,KAAI,KAAK,SAAS,KAAK,KAAK,SAAS,EAAG,QAAO;CAC/C,MAAM,mBAAmB,MAAM,KAAK,KAAK,CAAC,QAAQ,UAChD,KAAK,IAAI,MAAM,CAChB,CAAC;CACF,MAAM,YAAY,IAAI,IAAI,CAAC,GAAG,MAAM,KAAK,KAAK,EAAE,GAAG,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC;AACtE,QAAO,cAAc,IAAI,IAAI,mBAAmB"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
|
|
3
|
+
//#region src/translation-alignment/fingerprintBlock.ts
|
|
4
|
+
const computeStringDigest = (text) => crypto.createHash("sha256").update(text).digest("hex");
|
|
5
|
+
const fingerprintBlock = (block, previousBlock, nextBlock) => {
|
|
6
|
+
const semanticDigest = computeStringDigest(block.semanticText);
|
|
7
|
+
const anchorDigest = computeStringDigest(block.anchorText);
|
|
8
|
+
const compositeKey = `${semanticDigest}:${anchorDigest}`;
|
|
9
|
+
const contextKey = computeStringDigest(`${computeStringDigest(previousBlock?.semanticText ?? "")}:${computeStringDigest(nextBlock?.semanticText ?? "")}`);
|
|
10
|
+
return {
|
|
11
|
+
...block,
|
|
12
|
+
semanticDigest,
|
|
13
|
+
anchorDigest,
|
|
14
|
+
compositeKey,
|
|
15
|
+
contextKey
|
|
16
|
+
};
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
//#endregion
|
|
20
|
+
export { fingerprintBlock };
|
|
21
|
+
//# sourceMappingURL=fingerprintBlock.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fingerprintBlock.mjs","names":[],"sources":["../../../src/translation-alignment/fingerprintBlock.ts"],"sourcesContent":["import crypto from 'node:crypto';\nimport type { FingerprintedBlock, NormalizedBlock } from './types';\n\nconst computeStringDigest = (text: string): string =>\n crypto.createHash('sha256').update(text).digest('hex');\n\nexport const fingerprintBlock = (\n block: NormalizedBlock,\n previousBlock: NormalizedBlock | null,\n nextBlock: NormalizedBlock | null\n): FingerprintedBlock => {\n const semanticDigest = computeStringDigest(block.semanticText);\n const anchorDigest = computeStringDigest(block.anchorText);\n const compositeKey = `${semanticDigest}:${anchorDigest}`;\n\n const previousDigest = computeStringDigest(previousBlock?.semanticText ?? '');\n const nextDigest = computeStringDigest(nextBlock?.semanticText ?? '');\n const contextKey = computeStringDigest(`${previousDigest}:${nextDigest}`);\n\n return {\n ...block,\n semanticDigest,\n anchorDigest,\n compositeKey,\n contextKey,\n };\n};\n"],"mappings":";;;AAGA,MAAM,uBAAuB,SAC3B,OAAO,WAAW,SAAS,CAAC,OAAO,KAAK,CAAC,OAAO,MAAM;AAExD,MAAa,oBACX,OACA,eACA,cACuB;CACvB,MAAM,iBAAiB,oBAAoB,MAAM,aAAa;CAC9D,MAAM,eAAe,oBAAoB,MAAM,WAAW;CAC1D,MAAM,eAAe,GAAG,eAAe,GAAG;CAI1C,MAAM,aAAa,oBAAoB,GAFhB,oBAAoB,eAAe,gBAAgB,GAAG,CAEpB,GADtC,oBAAoB,WAAW,gBAAgB,GAAG,GACI;AAEzE,QAAO;EACL,GAAG;EACH;EACA;EACA;EACA;EACD"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { computeJaccardSimilarity, generateCharacterShingles } from "./computeSimilarity.mjs";
|
|
2
|
+
import { alignEnglishAndFrenchBlocks } from "./alignBlocks.mjs";
|
|
3
|
+
import { fingerprintBlock } from "./fingerprintBlock.mjs";
|
|
4
|
+
import { mapChangedLinesToBlocks } from "./mapChangedLinesToBlocks.mjs";
|
|
5
|
+
import { normalizeBlock } from "./normalizeBlock.mjs";
|
|
6
|
+
import { planAlignmentActions } from "./planActions.mjs";
|
|
7
|
+
import { identifySegmentsToReview, mergeReviewedSegments } from "./rebuildDocument.mjs";
|
|
8
|
+
import { segmentDocument } from "./segmentDocument.mjs";
|
|
9
|
+
import { buildAlignmentPlan } from "./pipeline.mjs";
|
|
10
|
+
|
|
11
|
+
export { alignEnglishAndFrenchBlocks, buildAlignmentPlan, computeJaccardSimilarity, fingerprintBlock, generateCharacterShingles, identifySegmentsToReview, mapChangedLinesToBlocks, mergeReviewedSegments, normalizeBlock, planAlignmentActions, segmentDocument };
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
//#region src/translation-alignment/mapChangedLinesToBlocks.ts
|
|
2
|
+
const mapChangedLinesToBlocks = (blocks, changedLines) => {
|
|
3
|
+
const changedSet = /* @__PURE__ */ new Set();
|
|
4
|
+
if (!changedLines || changedLines.length === 0) return changedSet;
|
|
5
|
+
const changedLookup = new Set(changedLines);
|
|
6
|
+
blocks.forEach((block, index) => {
|
|
7
|
+
for (let line = block.lineStart; line <= block.lineEnd; line += 1) if (changedLookup.has(line)) {
|
|
8
|
+
changedSet.add(index);
|
|
9
|
+
break;
|
|
10
|
+
}
|
|
11
|
+
});
|
|
12
|
+
return changedSet;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
//#endregion
|
|
16
|
+
export { mapChangedLinesToBlocks };
|
|
17
|
+
//# sourceMappingURL=mapChangedLinesToBlocks.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mapChangedLinesToBlocks.mjs","names":[],"sources":["../../../src/translation-alignment/mapChangedLinesToBlocks.ts"],"sourcesContent":["import type { Block, LineChange } from './types';\n\nexport const mapChangedLinesToBlocks = (\n blocks: Block[],\n changedLines: LineChange[]\n): Set<number> => {\n const changedSet = new Set<number>();\n if (!changedLines || changedLines.length === 0) return changedSet;\n\n const changedLookup = new Set<number>(changedLines);\n\n blocks.forEach((block, index) => {\n for (let line = block.lineStart; line <= block.lineEnd; line += 1) {\n if (changedLookup.has(line)) {\n changedSet.add(index);\n break;\n }\n }\n });\n\n return changedSet;\n};\n"],"mappings":";AAEA,MAAa,2BACX,QACA,iBACgB;CAChB,MAAM,6BAAa,IAAI,KAAa;AACpC,KAAI,CAAC,gBAAgB,aAAa,WAAW,EAAG,QAAO;CAEvD,MAAM,gBAAgB,IAAI,IAAY,aAAa;AAEnD,QAAO,SAAS,OAAO,UAAU;AAC/B,OAAK,IAAI,OAAO,MAAM,WAAW,QAAQ,MAAM,SAAS,QAAQ,EAC9D,KAAI,cAAc,IAAI,KAAK,EAAE;AAC3B,cAAW,IAAI,MAAM;AACrB;;GAGJ;AAEF,QAAO"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
//#region src/translation-alignment/normalizeBlock.ts
|
|
2
|
+
const removeMarkdownFormatting = (text) => {
|
|
3
|
+
return text.replace(/`{1,3}[^`]*`{1,3}/g, " ").replace(/\*\*([^*]+)\*\*/g, "$1").replace(/\*([^*]+)\*/g, "$1").replace(/_([^_]+)_/g, "$1").replace(/~~([^~]+)~~/g, "$1").replace(/!?\[[^\]]*\]\([^)]*\)/g, " ").replace(/^\s*#{1,6}\s+/gm, "").replace(/^\s*>\s?/gm, "").replace(/^\s*[-*+]\s+/gm, "").replace(/^\s*\d+\.\s+/gm, "");
|
|
4
|
+
};
|
|
5
|
+
const collapseWhitespace = (text) => text.replace(/\s+/g, " ").trim();
|
|
6
|
+
const stripLettersKeepDigitsAndSymbols = (text) => {
|
|
7
|
+
return text.replace(/\p{L}+/gu, "");
|
|
8
|
+
};
|
|
9
|
+
const normalizeBlock = (block) => {
|
|
10
|
+
const semanticCollapsed = collapseWhitespace(removeMarkdownFormatting(block.content).toLowerCase());
|
|
11
|
+
const anchorCollapsed = collapseWhitespace(stripLettersKeepDigitsAndSymbols(block.content));
|
|
12
|
+
return {
|
|
13
|
+
...block,
|
|
14
|
+
semanticText: semanticCollapsed,
|
|
15
|
+
anchorText: anchorCollapsed
|
|
16
|
+
};
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
//#endregion
|
|
20
|
+
export { normalizeBlock };
|
|
21
|
+
//# sourceMappingURL=normalizeBlock.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalizeBlock.mjs","names":[],"sources":["../../../src/translation-alignment/normalizeBlock.ts"],"sourcesContent":["import type { Block, NormalizedBlock } from './types';\n\nconst removeMarkdownFormatting = (text: string): string => {\n return text\n .replace(/`{1,3}[^`]*`{1,3}/g, ' ')\n .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')\n .replace(/\\*([^*]+)\\*/g, '$1')\n .replace(/_([^_]+)_/g, '$1')\n .replace(/~~([^~]+)~~/g, '$1')\n .replace(/!?\\[[^\\]]*\\]\\([^)]*\\)/g, ' ')\n .replace(/^\\s*#{1,6}\\s+/gm, '')\n .replace(/^\\s*>\\s?/gm, '')\n .replace(/^\\s*[-*+]\\s+/gm, '')\n .replace(/^\\s*\\d+\\.\\s+/gm, '');\n};\n\nconst collapseWhitespace = (text: string): string =>\n text.replace(/\\s+/g, ' ').trim();\n\nconst stripLettersKeepDigitsAndSymbols = (text: string): string => {\n // Keep digits and non-letter characters, remove all letters (including accents)\n return text.replace(/\\p{L}+/gu, '');\n};\n\nexport const normalizeBlock = (block: Block): NormalizedBlock => {\n const contentWithoutMarkdown = removeMarkdownFormatting(block.content);\n const semanticLowercased = contentWithoutMarkdown.toLowerCase();\n const semanticCollapsed = collapseWhitespace(semanticLowercased);\n\n const anchorOnlySymbols = stripLettersKeepDigitsAndSymbols(block.content);\n const anchorCollapsed = collapseWhitespace(anchorOnlySymbols);\n\n return {\n ...block,\n semanticText: semanticCollapsed,\n anchorText: anchorCollapsed,\n };\n};\n"],"mappings":";AAEA,MAAM,4BAA4B,SAAyB;AACzD,QAAO,KACJ,QAAQ,sBAAsB,IAAI,CAClC,QAAQ,oBAAoB,KAAK,CACjC,QAAQ,gBAAgB,KAAK,CAC7B,QAAQ,cAAc,KAAK,CAC3B,QAAQ,gBAAgB,KAAK,CAC7B,QAAQ,0BAA0B,IAAI,CACtC,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,cAAc,GAAG,CACzB,QAAQ,kBAAkB,GAAG,CAC7B,QAAQ,kBAAkB,GAAG;;AAGlC,MAAM,sBAAsB,SAC1B,KAAK,QAAQ,QAAQ,IAAI,CAAC,MAAM;AAElC,MAAM,oCAAoC,SAAyB;AAEjE,QAAO,KAAK,QAAQ,YAAY,GAAG;;AAGrC,MAAa,kBAAkB,UAAkC;CAG/D,MAAM,oBAAoB,mBAFK,yBAAyB,MAAM,QAAQ,CACpB,aAAa,CACC;CAGhE,MAAM,kBAAkB,mBADE,iCAAiC,MAAM,QAAQ,CACZ;AAE7D,QAAO;EACL,GAAG;EACH,cAAc;EACd,YAAY;EACb"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { alignEnglishAndFrenchBlocks } from "./alignBlocks.mjs";
|
|
2
|
+
import { fingerprintBlock } from "./fingerprintBlock.mjs";
|
|
3
|
+
import { mapChangedLinesToBlocks } from "./mapChangedLinesToBlocks.mjs";
|
|
4
|
+
import { normalizeBlock } from "./normalizeBlock.mjs";
|
|
5
|
+
import { planAlignmentActions } from "./planActions.mjs";
|
|
6
|
+
import { identifySegmentsToReview, mergeReviewedSegments } from "./rebuildDocument.mjs";
|
|
7
|
+
import { segmentDocument } from "./segmentDocument.mjs";
|
|
8
|
+
|
|
9
|
+
//#region src/translation-alignment/pipeline.ts
|
|
10
|
+
const buildAlignmentPlan = ({ englishText, frenchText, changedLines, similarityOptions }) => {
|
|
11
|
+
const englishBlocksRaw = segmentDocument(englishText);
|
|
12
|
+
const frenchBlocksRaw = segmentDocument(frenchText);
|
|
13
|
+
const englishNormalized = englishBlocksRaw.map(normalizeBlock);
|
|
14
|
+
const frenchNormalized = frenchBlocksRaw.map(normalizeBlock);
|
|
15
|
+
const englishBlocks = englishNormalized.map((block, index, array) => fingerprintBlock(block, array[index - 1] ?? null, array[index + 1] ?? null));
|
|
16
|
+
const frenchBlocks = frenchNormalized.map((block, index, array) => fingerprintBlock(block, array[index - 1] ?? null, array[index + 1] ?? null));
|
|
17
|
+
const plan = planAlignmentActions(alignEnglishAndFrenchBlocks(englishBlocks, frenchBlocks), mapChangedLinesToBlocks(englishBlocks, Array.isArray(changedLines) ? changedLines : []), {
|
|
18
|
+
minimumMatchForReuse: similarityOptions?.minimumMatchForReuse ?? .9,
|
|
19
|
+
minimumMatchForNearDuplicate: similarityOptions?.minimumMatchForNearDuplicate ?? .8
|
|
20
|
+
});
|
|
21
|
+
const { segmentsToReview } = identifySegmentsToReview({
|
|
22
|
+
englishBlocks,
|
|
23
|
+
frenchBlocks,
|
|
24
|
+
plan
|
|
25
|
+
});
|
|
26
|
+
return {
|
|
27
|
+
englishBlocks,
|
|
28
|
+
frenchBlocks,
|
|
29
|
+
plan,
|
|
30
|
+
segmentsToReview
|
|
31
|
+
};
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
//#endregion
|
|
35
|
+
export { buildAlignmentPlan, mergeReviewedSegments };
|
|
36
|
+
//# sourceMappingURL=pipeline.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.mjs","names":["englishBlocks: FingerprintedBlock[]","frenchBlocks: FingerprintedBlock[]"],"sources":["../../../src/translation-alignment/pipeline.ts"],"sourcesContent":["import { alignEnglishAndFrenchBlocks } from './alignBlocks';\nimport { fingerprintBlock } from './fingerprintBlock';\nimport { mapChangedLinesToBlocks } from './mapChangedLinesToBlocks';\nimport { normalizeBlock } from './normalizeBlock';\nimport { planAlignmentActions } from './planActions';\nimport {\n identifySegmentsToReview,\n mergeReviewedSegments,\n type SegmentToReview,\n} from './rebuildDocument';\nimport { segmentDocument } from './segmentDocument';\nimport type {\n AlignmentPlan,\n FingerprintedBlock,\n SimilarityOptions,\n} from './types';\n\nexport type BuildAlignmentPlanInput = {\n englishText: string;\n frenchText: string;\n changedLines: number[] | undefined;\n similarityOptions?: Partial<SimilarityOptions>;\n};\n\nexport type BuildAlignmentPlanOutput = {\n englishBlocks: FingerprintedBlock[];\n frenchBlocks: FingerprintedBlock[];\n plan: AlignmentPlan;\n segmentsToReview: SegmentToReview[];\n};\n\nexport const buildAlignmentPlan = ({\n englishText,\n frenchText,\n changedLines,\n similarityOptions,\n}: BuildAlignmentPlanInput): BuildAlignmentPlanOutput => {\n const englishBlocksRaw = segmentDocument(englishText);\n const frenchBlocksRaw = segmentDocument(frenchText);\n\n const englishNormalized = englishBlocksRaw.map(normalizeBlock);\n const frenchNormalized = frenchBlocksRaw.map(normalizeBlock);\n\n const englishBlocks: FingerprintedBlock[] = englishNormalized.map(\n (block, index, array) =>\n fingerprintBlock(\n block,\n array[index - 1] ?? null,\n array[index + 1] ?? null\n )\n );\n const frenchBlocks: FingerprintedBlock[] = frenchNormalized.map(\n (block, index, array) =>\n fingerprintBlock(\n block,\n array[index - 1] ?? null,\n array[index + 1] ?? null\n )\n );\n\n const alignment = alignEnglishAndFrenchBlocks(englishBlocks, frenchBlocks);\n\n const changedIndexes = mapChangedLinesToBlocks(\n englishBlocks,\n Array.isArray(changedLines) ? changedLines : []\n );\n\n const plan = planAlignmentActions(alignment, changedIndexes, {\n minimumMatchForReuse: similarityOptions?.minimumMatchForReuse ?? 0.9,\n minimumMatchForNearDuplicate:\n similarityOptions?.minimumMatchForNearDuplicate ?? 0.8,\n });\n\n const { segmentsToReview } = identifySegmentsToReview({\n englishBlocks,\n frenchBlocks,\n plan,\n });\n\n return { englishBlocks, frenchBlocks, plan, segmentsToReview };\n};\n\nexport { mergeReviewedSegments };\nexport type { SegmentToReview };\n"],"mappings":";;;;;;;;;AA+BA,MAAa,sBAAsB,EACjC,aACA,YACA,cACA,wBACuD;CACvD,MAAM,mBAAmB,gBAAgB,YAAY;CACrD,MAAM,kBAAkB,gBAAgB,WAAW;CAEnD,MAAM,oBAAoB,iBAAiB,IAAI,eAAe;CAC9D,MAAM,mBAAmB,gBAAgB,IAAI,eAAe;CAE5D,MAAMA,gBAAsC,kBAAkB,KAC3D,OAAO,OAAO,UACb,iBACE,OACA,MAAM,QAAQ,MAAM,MACpB,MAAM,QAAQ,MAAM,KACrB,CACJ;CACD,MAAMC,eAAqC,iBAAiB,KACzD,OAAO,OAAO,UACb,iBACE,OACA,MAAM,QAAQ,MAAM,MACpB,MAAM,QAAQ,MAAM,KACrB,CACJ;CASD,MAAM,OAAO,qBAPK,4BAA4B,eAAe,aAAa,EAEnD,wBACrB,eACA,MAAM,QAAQ,aAAa,GAAG,eAAe,EAAE,CAChD,EAE4D;EAC3D,sBAAsB,mBAAmB,wBAAwB;EACjE,8BACE,mBAAmB,gCAAgC;EACtD,CAAC;CAEF,MAAM,EAAE,qBAAqB,yBAAyB;EACpD;EACA;EACA;EACD,CAAC;AAEF,QAAO;EAAE;EAAe;EAAc;EAAM;EAAkB"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
//#region src/translation-alignment/planActions.ts
|
|
2
|
+
const planAlignmentActions = (alignment, changedEnglishBlockIndexes, similarityOptions) => {
|
|
3
|
+
const actions = [];
|
|
4
|
+
const seenFrench = /* @__PURE__ */ new Set();
|
|
5
|
+
alignment.forEach((pair) => {
|
|
6
|
+
const englishIndex = pair.englishIndex;
|
|
7
|
+
const frenchIndex = pair.frenchIndex;
|
|
8
|
+
if (englishIndex === -1 && frenchIndex !== null) {
|
|
9
|
+
if (!seenFrench.has(frenchIndex)) {
|
|
10
|
+
actions.push({
|
|
11
|
+
kind: "delete",
|
|
12
|
+
frenchIndex
|
|
13
|
+
});
|
|
14
|
+
seenFrench.add(frenchIndex);
|
|
15
|
+
}
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
if (englishIndex >= 0 && frenchIndex === null) {
|
|
19
|
+
actions.push({
|
|
20
|
+
kind: "insert_new",
|
|
21
|
+
englishIndex
|
|
22
|
+
});
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
if (englishIndex >= 0 && frenchIndex !== null) {
|
|
26
|
+
const isChanged = changedEnglishBlockIndexes.has(englishIndex);
|
|
27
|
+
const isHighSimilarity = pair.similarityScore >= similarityOptions.minimumMatchForReuse;
|
|
28
|
+
if (!isChanged && isHighSimilarity) actions.push({
|
|
29
|
+
kind: "reuse",
|
|
30
|
+
englishIndex,
|
|
31
|
+
frenchIndex
|
|
32
|
+
});
|
|
33
|
+
else actions.push({
|
|
34
|
+
kind: "review",
|
|
35
|
+
englishIndex,
|
|
36
|
+
frenchIndex
|
|
37
|
+
});
|
|
38
|
+
seenFrench.add(frenchIndex);
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
return { actions };
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
//#endregion
|
|
46
|
+
export { planAlignmentActions };
|
|
47
|
+
//# sourceMappingURL=planActions.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"planActions.mjs","names":["actions: PlannedAction[]"],"sources":["../../../src/translation-alignment/planActions.ts"],"sourcesContent":["import type {\n AlignmentPair,\n AlignmentPlan,\n PlannedAction,\n SimilarityOptions,\n} from './types';\n\nexport const planAlignmentActions = (\n alignment: AlignmentPair[],\n changedEnglishBlockIndexes: Set<number>,\n similarityOptions: SimilarityOptions\n): AlignmentPlan => {\n const actions: PlannedAction[] = [];\n const seenFrench = new Set<number>();\n\n alignment.forEach((pair) => {\n const englishIndex = pair.englishIndex;\n const frenchIndex = pair.frenchIndex;\n\n if (englishIndex === -1 && frenchIndex !== null) {\n // french only -> delete\n if (!seenFrench.has(frenchIndex)) {\n actions.push({ kind: 'delete', frenchIndex });\n seenFrench.add(frenchIndex);\n }\n return;\n }\n\n if (englishIndex >= 0 && frenchIndex === null) {\n // new english block\n actions.push({ kind: 'insert_new', englishIndex });\n return;\n }\n\n if (englishIndex >= 0 && frenchIndex !== null) {\n // matched pair\n const isChanged = changedEnglishBlockIndexes.has(englishIndex);\n const isHighSimilarity =\n pair.similarityScore >= similarityOptions.minimumMatchForReuse;\n\n if (!isChanged && isHighSimilarity) {\n actions.push({ kind: 'reuse', englishIndex, frenchIndex });\n } else {\n actions.push({ kind: 'review', englishIndex, frenchIndex });\n }\n seenFrench.add(frenchIndex);\n return;\n }\n });\n\n return { actions };\n};\n"],"mappings":";AAOA,MAAa,wBACX,WACA,4BACA,sBACkB;CAClB,MAAMA,UAA2B,EAAE;CACnC,MAAM,6BAAa,IAAI,KAAa;AAEpC,WAAU,SAAS,SAAS;EAC1B,MAAM,eAAe,KAAK;EAC1B,MAAM,cAAc,KAAK;AAEzB,MAAI,iBAAiB,MAAM,gBAAgB,MAAM;AAE/C,OAAI,CAAC,WAAW,IAAI,YAAY,EAAE;AAChC,YAAQ,KAAK;KAAE,MAAM;KAAU;KAAa,CAAC;AAC7C,eAAW,IAAI,YAAY;;AAE7B;;AAGF,MAAI,gBAAgB,KAAK,gBAAgB,MAAM;AAE7C,WAAQ,KAAK;IAAE,MAAM;IAAc;IAAc,CAAC;AAClD;;AAGF,MAAI,gBAAgB,KAAK,gBAAgB,MAAM;GAE7C,MAAM,YAAY,2BAA2B,IAAI,aAAa;GAC9D,MAAM,mBACJ,KAAK,mBAAmB,kBAAkB;AAE5C,OAAI,CAAC,aAAa,iBAChB,SAAQ,KAAK;IAAE,MAAM;IAAS;IAAc;IAAa,CAAC;OAE1D,SAAQ,KAAK;IAAE,MAAM;IAAU;IAAc;IAAa,CAAC;AAE7D,cAAW,IAAI,YAAY;AAC3B;;GAEF;AAEF,QAAO,EAAE,SAAS"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
//#region src/translation-alignment/rebuildDocument.ts
|
|
2
|
+
/**
|
|
3
|
+
* Analyzes the alignment plan and returns only the segments that need review/translation.
|
|
4
|
+
* Does not generate output text - that's done by mergeReviewedSegments after translation.
|
|
5
|
+
*/
|
|
6
|
+
const identifySegmentsToReview = ({ englishBlocks, frenchBlocks, plan }) => {
|
|
7
|
+
const segmentsToReview = [];
|
|
8
|
+
plan.actions.forEach((action, actionIndex) => {
|
|
9
|
+
if (action.kind === "review") {
|
|
10
|
+
const englishBlock = englishBlocks[action.englishIndex];
|
|
11
|
+
const frenchBlockText = action.frenchIndex !== null ? frenchBlocks[action.frenchIndex].content : null;
|
|
12
|
+
segmentsToReview.push({
|
|
13
|
+
englishBlock,
|
|
14
|
+
frenchBlockText,
|
|
15
|
+
actionIndex
|
|
16
|
+
});
|
|
17
|
+
} else if (action.kind === "insert_new") {
|
|
18
|
+
const englishBlock = englishBlocks[action.englishIndex];
|
|
19
|
+
segmentsToReview.push({
|
|
20
|
+
englishBlock,
|
|
21
|
+
frenchBlockText: null,
|
|
22
|
+
actionIndex
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
return { segmentsToReview };
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Merges reviewed translations back into the final document following the alignment plan.
|
|
30
|
+
*/
|
|
31
|
+
const mergeReviewedSegments = (plan, frenchBlocks, reviewedSegments) => {
|
|
32
|
+
const outputParts = [];
|
|
33
|
+
plan.actions.forEach((action, actionIndex) => {
|
|
34
|
+
if (action.kind === "reuse") outputParts.push(frenchBlocks[action.frenchIndex].content);
|
|
35
|
+
else if (action.kind === "review" || action.kind === "insert_new") {
|
|
36
|
+
const reviewedContent = reviewedSegments.get(actionIndex);
|
|
37
|
+
if (reviewedContent !== void 0) outputParts.push(reviewedContent);
|
|
38
|
+
else if (action.kind === "review" && action.frenchIndex !== null) outputParts.push(frenchBlocks[action.frenchIndex].content);
|
|
39
|
+
else outputParts.push("\n");
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
return outputParts.join("");
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
//#endregion
|
|
46
|
+
export { identifySegmentsToReview, mergeReviewedSegments };
|
|
47
|
+
//# sourceMappingURL=rebuildDocument.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rebuildDocument.mjs","names":["segmentsToReview: SegmentToReview[]","outputParts: string[]"],"sources":["../../../src/translation-alignment/rebuildDocument.ts"],"sourcesContent":["import type { AlignmentPlan, FingerprintedBlock } from './types';\n\nexport type SegmentToReview = {\n englishBlock: FingerprintedBlock;\n frenchBlockText: string | null;\n actionIndex: number;\n};\n\nexport type RebuildInput = {\n englishBlocks: FingerprintedBlock[];\n frenchBlocks: FingerprintedBlock[];\n plan: AlignmentPlan;\n};\n\nexport type RebuildResult = {\n segmentsToReview: SegmentToReview[];\n};\n\n/**\n * Analyzes the alignment plan and returns only the segments that need review/translation.\n * Does not generate output text - that's done by mergeReviewedSegments after translation.\n */\nexport const identifySegmentsToReview = ({\n englishBlocks,\n frenchBlocks,\n plan,\n}: RebuildInput): RebuildResult => {\n const segmentsToReview: SegmentToReview[] = [];\n\n plan.actions.forEach((action, actionIndex) => {\n if (action.kind === 'review') {\n const englishBlock = englishBlocks[action.englishIndex];\n const frenchBlockText =\n action.frenchIndex !== null\n ? frenchBlocks[action.frenchIndex].content\n : null;\n\n segmentsToReview.push({ englishBlock, frenchBlockText, actionIndex });\n } else if (action.kind === 'insert_new') {\n const englishBlock = englishBlocks[action.englishIndex];\n\n segmentsToReview.push({\n englishBlock,\n frenchBlockText: null,\n actionIndex,\n });\n }\n });\n\n return { segmentsToReview };\n};\n\n/**\n * Merges reviewed translations back into the final document following the alignment plan.\n */\nexport const mergeReviewedSegments = (\n plan: AlignmentPlan,\n frenchBlocks: FingerprintedBlock[],\n reviewedSegments: Map<number, string>\n): string => {\n const outputParts: string[] = [];\n\n plan.actions.forEach((action, actionIndex) => {\n if (action.kind === 'reuse') {\n outputParts.push(frenchBlocks[action.frenchIndex].content);\n } else if (action.kind === 'review' || action.kind === 'insert_new') {\n const reviewedContent = reviewedSegments.get(actionIndex);\n\n if (reviewedContent !== undefined) {\n outputParts.push(reviewedContent);\n } else {\n // Fallback: if review failed, use existing or blank\n if (action.kind === 'review' && action.frenchIndex !== null) {\n outputParts.push(frenchBlocks[action.frenchIndex].content);\n } else {\n outputParts.push('\\n');\n }\n }\n }\n // \"delete\" actions are simply skipped - no output\n });\n\n return outputParts.join('');\n};\n"],"mappings":";;;;;AAsBA,MAAa,4BAA4B,EACvC,eACA,cACA,WACiC;CACjC,MAAMA,mBAAsC,EAAE;AAE9C,MAAK,QAAQ,SAAS,QAAQ,gBAAgB;AAC5C,MAAI,OAAO,SAAS,UAAU;GAC5B,MAAM,eAAe,cAAc,OAAO;GAC1C,MAAM,kBACJ,OAAO,gBAAgB,OACnB,aAAa,OAAO,aAAa,UACjC;AAEN,oBAAiB,KAAK;IAAE;IAAc;IAAiB;IAAa,CAAC;aAC5D,OAAO,SAAS,cAAc;GACvC,MAAM,eAAe,cAAc,OAAO;AAE1C,oBAAiB,KAAK;IACpB;IACA,iBAAiB;IACjB;IACD,CAAC;;GAEJ;AAEF,QAAO,EAAE,kBAAkB;;;;;AAM7B,MAAa,yBACX,MACA,cACA,qBACW;CACX,MAAMC,cAAwB,EAAE;AAEhC,MAAK,QAAQ,SAAS,QAAQ,gBAAgB;AAC5C,MAAI,OAAO,SAAS,QAClB,aAAY,KAAK,aAAa,OAAO,aAAa,QAAQ;WACjD,OAAO,SAAS,YAAY,OAAO,SAAS,cAAc;GACnE,MAAM,kBAAkB,iBAAiB,IAAI,YAAY;AAEzD,OAAI,oBAAoB,OACtB,aAAY,KAAK,gBAAgB;YAG7B,OAAO,SAAS,YAAY,OAAO,gBAAgB,KACrD,aAAY,KAAK,aAAa,OAAO,aAAa,QAAQ;OAE1D,aAAY,KAAK,KAAK;;GAK5B;AAEF,QAAO,YAAY,KAAK,GAAG"}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
//#region src/translation-alignment/segmentDocument.ts
|
|
2
|
+
const isBlankLine = (line) => line.trim().length === 0;
|
|
3
|
+
const isFencedCodeDelimiter = (line) => /^\s*```/.test(line);
|
|
4
|
+
const isHeading = (line) => /^\s*#{1,6}\s+/.test(line);
|
|
5
|
+
const isHorizontalRule = (line) => /^(\s*[-*_]){3,}\s*$/.test(line);
|
|
6
|
+
const isListItem = (line) => /^\s*([-*+]\s+|\d+\.[\t\s]+)/.test(line);
|
|
7
|
+
const isBlockquote = (line) => /^\s*>\s?/.test(line);
|
|
8
|
+
const isTableLike = (line) => /\|/.test(line) && !isCodeFenceStart(line);
|
|
9
|
+
const isCodeFenceStart = (line) => /^\s*```/.test(line);
|
|
10
|
+
const trimTrailingNewlines = (text) => text.replace(/\n+$/g, "\n");
|
|
11
|
+
const segmentDocument = (text) => {
|
|
12
|
+
const lines = text.split("\n");
|
|
13
|
+
const blocks = [];
|
|
14
|
+
let index = 0;
|
|
15
|
+
while (index < lines.length) {
|
|
16
|
+
const startIndex = index;
|
|
17
|
+
const currentLine = lines[index];
|
|
18
|
+
if (isFencedCodeDelimiter(currentLine)) {
|
|
19
|
+
const contentLines = [currentLine];
|
|
20
|
+
index += 1;
|
|
21
|
+
while (index < lines.length && !isFencedCodeDelimiter(lines[index])) {
|
|
22
|
+
contentLines.push(lines[index]);
|
|
23
|
+
index += 1;
|
|
24
|
+
}
|
|
25
|
+
if (index < lines.length) {
|
|
26
|
+
contentLines.push(lines[index]);
|
|
27
|
+
index += 1;
|
|
28
|
+
}
|
|
29
|
+
blocks.push({
|
|
30
|
+
type: "code_block",
|
|
31
|
+
content: `${trimTrailingNewlines(contentLines.join("\n"))}\n`,
|
|
32
|
+
lineStart: startIndex + 1,
|
|
33
|
+
lineEnd: index
|
|
34
|
+
});
|
|
35
|
+
continue;
|
|
36
|
+
}
|
|
37
|
+
if (isHorizontalRule(currentLine)) {
|
|
38
|
+
blocks.push({
|
|
39
|
+
type: "horizontal_rule",
|
|
40
|
+
content: `${currentLine}\n`,
|
|
41
|
+
lineStart: startIndex + 1,
|
|
42
|
+
lineEnd: startIndex + 1
|
|
43
|
+
});
|
|
44
|
+
index += 1;
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
if (isHeading(currentLine)) {
|
|
48
|
+
blocks.push({
|
|
49
|
+
type: "heading",
|
|
50
|
+
content: `${currentLine}\n`,
|
|
51
|
+
lineStart: startIndex + 1,
|
|
52
|
+
lineEnd: startIndex + 1
|
|
53
|
+
});
|
|
54
|
+
index += 1;
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (isListItem(currentLine)) {
|
|
58
|
+
const contentLines = [];
|
|
59
|
+
while (index < lines.length && (isListItem(lines[index]) || !isBlankLine(lines[index]) && /^\s{2,}/.test(lines[index]))) {
|
|
60
|
+
contentLines.push(lines[index]);
|
|
61
|
+
index += 1;
|
|
62
|
+
}
|
|
63
|
+
blocks.push({
|
|
64
|
+
type: "list_item",
|
|
65
|
+
content: `${trimTrailingNewlines(contentLines.join("\n"))}\n`,
|
|
66
|
+
lineStart: startIndex + 1,
|
|
67
|
+
lineEnd: index
|
|
68
|
+
});
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
if (isBlockquote(currentLine)) {
|
|
72
|
+
const contentLines = [];
|
|
73
|
+
while (index < lines.length && (isBlockquote(lines[index]) || !isBlankLine(lines[index]))) {
|
|
74
|
+
contentLines.push(lines[index]);
|
|
75
|
+
index += 1;
|
|
76
|
+
}
|
|
77
|
+
blocks.push({
|
|
78
|
+
type: "blockquote",
|
|
79
|
+
content: `${trimTrailingNewlines(contentLines.join("\n"))}\n`,
|
|
80
|
+
lineStart: startIndex + 1,
|
|
81
|
+
lineEnd: index
|
|
82
|
+
});
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
if (isTableLike(currentLine)) {
|
|
86
|
+
const contentLines = [];
|
|
87
|
+
while (index < lines.length && /\|/.test(lines[index]) && !isBlankLine(lines[index])) {
|
|
88
|
+
contentLines.push(lines[index]);
|
|
89
|
+
index += 1;
|
|
90
|
+
}
|
|
91
|
+
blocks.push({
|
|
92
|
+
type: "table",
|
|
93
|
+
content: `${trimTrailingNewlines(contentLines.join("\n"))}\n`,
|
|
94
|
+
lineStart: startIndex + 1,
|
|
95
|
+
lineEnd: index
|
|
96
|
+
});
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
if (!isBlankLine(currentLine)) {
|
|
100
|
+
const contentLines = [];
|
|
101
|
+
while (index < lines.length && !isBlankLine(lines[index])) {
|
|
102
|
+
if (isHeading(lines[index]) || isFencedCodeDelimiter(lines[index]) || isHorizontalRule(lines[index]) || isListItem(lines[index]) || isBlockquote(lines[index]) || isTableLike(lines[index])) break;
|
|
103
|
+
contentLines.push(lines[index]);
|
|
104
|
+
index += 1;
|
|
105
|
+
}
|
|
106
|
+
if (index < lines.length && isBlankLine(lines[index])) {
|
|
107
|
+
contentLines.push(lines[index]);
|
|
108
|
+
index += 1;
|
|
109
|
+
}
|
|
110
|
+
blocks.push({
|
|
111
|
+
type: "paragraph",
|
|
112
|
+
content: `${trimTrailingNewlines(contentLines.join("\n"))}\n`,
|
|
113
|
+
lineStart: startIndex + 1,
|
|
114
|
+
lineEnd: index
|
|
115
|
+
});
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
blocks.push({
|
|
119
|
+
type: "unknown",
|
|
120
|
+
content: `${currentLine}\n`,
|
|
121
|
+
lineStart: startIndex + 1,
|
|
122
|
+
lineEnd: startIndex + 1
|
|
123
|
+
});
|
|
124
|
+
index += 1;
|
|
125
|
+
}
|
|
126
|
+
return blocks;
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
//#endregion
|
|
130
|
+
export { segmentDocument };
|
|
131
|
+
//# sourceMappingURL=segmentDocument.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"segmentDocument.mjs","names":["blocks: Block[]","contentLines: string[]"],"sources":["../../../src/translation-alignment/segmentDocument.ts"],"sourcesContent":["import type { Block } from './types';\n\nconst isBlankLine = (line: string): boolean => line.trim().length === 0;\n\nconst isFencedCodeDelimiter = (line: string): boolean => /^\\s*```/.test(line);\n\nconst isHeading = (line: string): boolean => /^\\s*#{1,6}\\s+/.test(line);\n\nconst isHorizontalRule = (line: string): boolean =>\n /^(\\s*[-*_]){3,}\\s*$/.test(line);\n\nconst isListItem = (line: string): boolean =>\n /^\\s*([-*+]\\s+|\\d+\\.[\\t\\s]+)/.test(line);\n\nconst isBlockquote = (line: string): boolean => /^\\s*>\\s?/.test(line);\n\nconst isTableLike = (line: string): boolean =>\n /\\|/.test(line) && !isCodeFenceStart(line);\n\nconst isCodeFenceStart = (line: string): boolean => /^\\s*```/.test(line);\n\nconst trimTrailingNewlines = (text: string): string =>\n text.replace(/\\n+$/g, '\\n');\n\nexport const segmentDocument = (text: string): Block[] => {\n const lines = text.split('\\n');\n const blocks: Block[] = [];\n\n let index = 0;\n while (index < lines.length) {\n const startIndex = index;\n const currentLine = lines[index];\n\n // Code block (fenced)\n if (isFencedCodeDelimiter(currentLine)) {\n const contentLines: string[] = [currentLine];\n index += 1;\n while (index < lines.length && !isFencedCodeDelimiter(lines[index])) {\n contentLines.push(lines[index]);\n index += 1;\n }\n if (index < lines.length) {\n contentLines.push(lines[index]);\n index += 1;\n }\n blocks.push({\n type: 'code_block',\n content: `${trimTrailingNewlines(contentLines.join('\\n'))}\\n`,\n lineStart: startIndex + 1,\n lineEnd: index,\n });\n continue;\n }\n\n // Horizontal rule\n if (isHorizontalRule(currentLine)) {\n blocks.push({\n type: 'horizontal_rule',\n content: `${currentLine}\\n`,\n lineStart: startIndex + 1,\n lineEnd: startIndex + 1,\n });\n index += 1;\n continue;\n }\n\n // Heading\n if (isHeading(currentLine)) {\n blocks.push({\n type: 'heading',\n content: `${currentLine}\\n`,\n lineStart: startIndex + 1,\n lineEnd: startIndex + 1,\n });\n index += 1;\n continue;\n }\n\n // List block (one or more consecutive list items)\n if (isListItem(currentLine)) {\n const contentLines: string[] = [];\n while (\n index < lines.length &&\n (isListItem(lines[index]) ||\n (!isBlankLine(lines[index]) && /^\\s{2,}/.test(lines[index])))\n ) {\n contentLines.push(lines[index]);\n index += 1;\n }\n blocks.push({\n type: 'list_item',\n content: `${trimTrailingNewlines(contentLines.join('\\n'))}\\n`,\n lineStart: startIndex + 1,\n lineEnd: index,\n });\n continue;\n }\n\n // Blockquote (may span multiple lines)\n if (isBlockquote(currentLine)) {\n const contentLines: string[] = [];\n while (\n index < lines.length &&\n (isBlockquote(lines[index]) || !isBlankLine(lines[index]))\n ) {\n contentLines.push(lines[index]);\n index += 1;\n }\n blocks.push({\n type: 'blockquote',\n content: `${trimTrailingNewlines(contentLines.join('\\n'))}\\n`,\n lineStart: startIndex + 1,\n lineEnd: index,\n });\n continue;\n }\n\n // Table-like (simple heuristic)\n if (isTableLike(currentLine)) {\n const contentLines: string[] = [];\n while (\n index < lines.length &&\n /\\|/.test(lines[index]) &&\n !isBlankLine(lines[index])\n ) {\n contentLines.push(lines[index]);\n index += 1;\n }\n blocks.push({\n type: 'table',\n content: `${trimTrailingNewlines(contentLines.join('\\n'))}\\n`,\n lineStart: startIndex + 1,\n lineEnd: index,\n });\n continue;\n }\n\n // Paragraph (gathers until blank line)\n if (!isBlankLine(currentLine)) {\n const contentLines: string[] = [];\n while (index < lines.length && !isBlankLine(lines[index])) {\n // stop if we detect a new structural block start\n if (\n isHeading(lines[index]) ||\n isFencedCodeDelimiter(lines[index]) ||\n isHorizontalRule(lines[index]) ||\n isListItem(lines[index]) ||\n isBlockquote(lines[index]) ||\n isTableLike(lines[index])\n ) {\n break;\n }\n contentLines.push(lines[index]);\n index += 1;\n }\n // consume a single trailing blank line if present\n if (index < lines.length && isBlankLine(lines[index])) {\n contentLines.push(lines[index]);\n index += 1;\n }\n blocks.push({\n type: 'paragraph',\n content: `${trimTrailingNewlines(contentLines.join('\\n'))}\\n`,\n lineStart: startIndex + 1,\n lineEnd: index,\n });\n continue;\n }\n\n // Blank line outside of a paragraph: keep to preserve spacing minimally\n blocks.push({\n type: 'unknown',\n content: `${currentLine}\\n`,\n lineStart: startIndex + 1,\n lineEnd: startIndex + 1,\n });\n index += 1;\n }\n\n return blocks;\n};\n"],"mappings":";AAEA,MAAM,eAAe,SAA0B,KAAK,MAAM,CAAC,WAAW;AAEtE,MAAM,yBAAyB,SAA0B,UAAU,KAAK,KAAK;AAE7E,MAAM,aAAa,SAA0B,gBAAgB,KAAK,KAAK;AAEvE,MAAM,oBAAoB,SACxB,sBAAsB,KAAK,KAAK;AAElC,MAAM,cAAc,SAClB,8BAA8B,KAAK,KAAK;AAE1C,MAAM,gBAAgB,SAA0B,WAAW,KAAK,KAAK;AAErE,MAAM,eAAe,SACnB,KAAK,KAAK,KAAK,IAAI,CAAC,iBAAiB,KAAK;AAE5C,MAAM,oBAAoB,SAA0B,UAAU,KAAK,KAAK;AAExE,MAAM,wBAAwB,SAC5B,KAAK,QAAQ,SAAS,KAAK;AAE7B,MAAa,mBAAmB,SAA0B;CACxD,MAAM,QAAQ,KAAK,MAAM,KAAK;CAC9B,MAAMA,SAAkB,EAAE;CAE1B,IAAI,QAAQ;AACZ,QAAO,QAAQ,MAAM,QAAQ;EAC3B,MAAM,aAAa;EACnB,MAAM,cAAc,MAAM;AAG1B,MAAI,sBAAsB,YAAY,EAAE;GACtC,MAAMC,eAAyB,CAAC,YAAY;AAC5C,YAAS;AACT,UAAO,QAAQ,MAAM,UAAU,CAAC,sBAAsB,MAAM,OAAO,EAAE;AACnE,iBAAa,KAAK,MAAM,OAAO;AAC/B,aAAS;;AAEX,OAAI,QAAQ,MAAM,QAAQ;AACxB,iBAAa,KAAK,MAAM,OAAO;AAC/B,aAAS;;AAEX,UAAO,KAAK;IACV,MAAM;IACN,SAAS,GAAG,qBAAqB,aAAa,KAAK,KAAK,CAAC,CAAC;IAC1D,WAAW,aAAa;IACxB,SAAS;IACV,CAAC;AACF;;AAIF,MAAI,iBAAiB,YAAY,EAAE;AACjC,UAAO,KAAK;IACV,MAAM;IACN,SAAS,GAAG,YAAY;IACxB,WAAW,aAAa;IACxB,SAAS,aAAa;IACvB,CAAC;AACF,YAAS;AACT;;AAIF,MAAI,UAAU,YAAY,EAAE;AAC1B,UAAO,KAAK;IACV,MAAM;IACN,SAAS,GAAG,YAAY;IACxB,WAAW,aAAa;IACxB,SAAS,aAAa;IACvB,CAAC;AACF,YAAS;AACT;;AAIF,MAAI,WAAW,YAAY,EAAE;GAC3B,MAAMA,eAAyB,EAAE;AACjC,UACE,QAAQ,MAAM,WACb,WAAW,MAAM,OAAO,IACtB,CAAC,YAAY,MAAM,OAAO,IAAI,UAAU,KAAK,MAAM,OAAO,GAC7D;AACA,iBAAa,KAAK,MAAM,OAAO;AAC/B,aAAS;;AAEX,UAAO,KAAK;IACV,MAAM;IACN,SAAS,GAAG,qBAAqB,aAAa,KAAK,KAAK,CAAC,CAAC;IAC1D,WAAW,aAAa;IACxB,SAAS;IACV,CAAC;AACF;;AAIF,MAAI,aAAa,YAAY,EAAE;GAC7B,MAAMA,eAAyB,EAAE;AACjC,UACE,QAAQ,MAAM,WACb,aAAa,MAAM,OAAO,IAAI,CAAC,YAAY,MAAM,OAAO,GACzD;AACA,iBAAa,KAAK,MAAM,OAAO;AAC/B,aAAS;;AAEX,UAAO,KAAK;IACV,MAAM;IACN,SAAS,GAAG,qBAAqB,aAAa,KAAK,KAAK,CAAC,CAAC;IAC1D,WAAW,aAAa;IACxB,SAAS;IACV,CAAC;AACF;;AAIF,MAAI,YAAY,YAAY,EAAE;GAC5B,MAAMA,eAAyB,EAAE;AACjC,UACE,QAAQ,MAAM,UACd,KAAK,KAAK,MAAM,OAAO,IACvB,CAAC,YAAY,MAAM,OAAO,EAC1B;AACA,iBAAa,KAAK,MAAM,OAAO;AAC/B,aAAS;;AAEX,UAAO,KAAK;IACV,MAAM;IACN,SAAS,GAAG,qBAAqB,aAAa,KAAK,KAAK,CAAC,CAAC;IAC1D,WAAW,aAAa;IACxB,SAAS;IACV,CAAC;AACF;;AAIF,MAAI,CAAC,YAAY,YAAY,EAAE;GAC7B,MAAMA,eAAyB,EAAE;AACjC,UAAO,QAAQ,MAAM,UAAU,CAAC,YAAY,MAAM,OAAO,EAAE;AAEzD,QACE,UAAU,MAAM,OAAO,IACvB,sBAAsB,MAAM,OAAO,IACnC,iBAAiB,MAAM,OAAO,IAC9B,WAAW,MAAM,OAAO,IACxB,aAAa,MAAM,OAAO,IAC1B,YAAY,MAAM,OAAO,CAEzB;AAEF,iBAAa,KAAK,MAAM,OAAO;AAC/B,aAAS;;AAGX,OAAI,QAAQ,MAAM,UAAU,YAAY,MAAM,OAAO,EAAE;AACrD,iBAAa,KAAK,MAAM,OAAO;AAC/B,aAAS;;AAEX,UAAO,KAAK;IACV,MAAM;IACN,SAAS,GAAG,qBAAqB,aAAa,KAAK,KAAK,CAAC,CAAC;IAC1D,WAAW,aAAa;IACxB,SAAS;IACV,CAAC;AACF;;AAIF,SAAO,KAAK;GACV,MAAM;GACN,SAAS,GAAG,YAAY;GACxB,WAAW,aAAa;GACxB,SAAS,aAAa;GACvB,CAAC;AACF,WAAS;;AAGX,QAAO"}
|
|
File without changes
|
package/dist/types/cli.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.d.ts","names":[],"sources":["../../src/cli.ts"],"sourcesContent":[],"mappings":";;;cAgCa;KAsIR,UAAA;EAtIQ,MAAA,CAAA,EAAA,MAEA;EAoIR,OAAA,CAAA,EAAA,OAAU;AAKf,CAAA;AA2Da,KA3DD,oBAAA,
|
|
1
|
+
{"version":3,"file":"cli.d.ts","names":[],"sources":["../../src/cli.ts"],"sourcesContent":[],"mappings":";;;cAgCa;KAsIR,UAAA;EAtIQ,MAAA,CAAA,EAAA,MAEA;EAoIR,OAAA,CAAA,EAAA,OAAU;AAKf,CAAA;AA2Da,KA3DD,oBAAA,GA0kBX;;;;;IArkBG;;;;;;;;;cAsDS,cAAa"}
|
package/dist/types/index.d.ts
CHANGED
|
@@ -7,9 +7,9 @@ import { liveSync } from "./liveSync.js";
|
|
|
7
7
|
import { pull } from "./pull.js";
|
|
8
8
|
import { push } from "./push/push.js";
|
|
9
9
|
import { pushConfig } from "./pushConfig.js";
|
|
10
|
-
import { reviewDoc
|
|
10
|
+
import { reviewDoc } from "./reviewDoc.js";
|
|
11
11
|
import { listMissingTranslations } from "./test/listMissingTranslations.js";
|
|
12
12
|
import { testMissingTranslations } from "./test/index.js";
|
|
13
13
|
import { translateDoc, translateFile } from "./translateDoc.js";
|
|
14
14
|
export * from "@intlayer/chokidar";
|
|
15
|
-
export { ConfigurationOptions, FillOptions, build, dirname, fill, listContentDeclaration, listContentDeclarationRows, listMissingTranslations, liveSync, pull, push, pushConfig, reviewDoc,
|
|
15
|
+
export { ConfigurationOptions, FillOptions, build, dirname, fill, listContentDeclaration, listContentDeclarationRows, listMissingTranslations, liveSync, pull, push, pushConfig, reviewDoc, setAPI, startEditor, testMissingTranslations, translateDoc, translateFile };
|