@llm-translate/cli 1.0.0-next.2 → 1.0.0-next.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +7 -2
- package/dist/cli/index.js +391 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.js +384 -1
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/core/engine.ts +215 -2
- package/src/parsers/html.ts +597 -0
- package/tests/unit/html-parser.test.ts +382 -0
package/Dockerfile
CHANGED
|
@@ -38,9 +38,13 @@ COPY --from=builder --chown=llmtranslate:nodejs /app/node_modules ./node_modules
|
|
|
38
38
|
COPY --from=builder --chown=llmtranslate:nodejs /app/dist ./dist
|
|
39
39
|
COPY --from=builder --chown=llmtranslate:nodejs /app/package.json ./
|
|
40
40
|
|
|
41
|
+
# Create cache directory with correct ownership
|
|
42
|
+
RUN mkdir -p /app/cache && chown llmtranslate:nodejs /app/cache
|
|
43
|
+
|
|
41
44
|
# Environment
|
|
42
45
|
ENV NODE_ENV=production
|
|
43
46
|
ENV TRANSLATE_PORT=3000
|
|
47
|
+
ENV TRANSLATE_CACHE_DIR=/app/cache
|
|
44
48
|
|
|
45
49
|
# Switch to non-root user
|
|
46
50
|
USER llmtranslate
|
|
@@ -51,5 +55,6 @@ EXPOSE 3000
|
|
|
51
55
|
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
|
52
56
|
CMD node -e "fetch('http://localhost:3000/health/live').then(r => r.ok ? process.exit(0) : process.exit(1)).catch(() => process.exit(1))"
|
|
53
57
|
|
|
54
|
-
#
|
|
55
|
-
|
|
58
|
+
# ENTRYPOINT for CLI, CMD for default arguments
|
|
59
|
+
ENTRYPOINT ["node", "dist/cli/index.js"]
|
|
60
|
+
CMD ["serve", "--json", "--cors", "--no-auth", "--cache-dir", "/app/cache"]
|
package/dist/cli/index.js
CHANGED
|
@@ -8,6 +8,7 @@ import 'remark-parse';
|
|
|
8
8
|
import 'remark-stringify';
|
|
9
9
|
import 'remark-gfm';
|
|
10
10
|
import 'unist-util-visit';
|
|
11
|
+
import * as cheerio from 'cheerio';
|
|
11
12
|
import { createAnthropic } from '@ai-sdk/anthropic';
|
|
12
13
|
import { generateText, streamText } from 'ai';
|
|
13
14
|
import { createOpenAI } from '@ai-sdk/openai';
|
|
@@ -1789,6 +1790,243 @@ var init_markdown = __esm({
|
|
|
1789
1790
|
"src/parsers/markdown.ts"() {
|
|
1790
1791
|
}
|
|
1791
1792
|
});
|
|
1793
|
+
function parseHTML(content) {
|
|
1794
|
+
const $ = cheerio.load(content, {
|
|
1795
|
+
decodeEntities: false,
|
|
1796
|
+
xmlMode: false
|
|
1797
|
+
});
|
|
1798
|
+
const isFullDocument = content.includes("<html") || content.includes("<body");
|
|
1799
|
+
const sections = extractTranslatableSections($, isFullDocument);
|
|
1800
|
+
return {
|
|
1801
|
+
original: content,
|
|
1802
|
+
$,
|
|
1803
|
+
sections,
|
|
1804
|
+
isFullDocument
|
|
1805
|
+
};
|
|
1806
|
+
}
|
|
1807
|
+
function extractTranslatableSections($, isFullDocument) {
|
|
1808
|
+
const sections = [];
|
|
1809
|
+
let sectionId = 0;
|
|
1810
|
+
const root = isFullDocument ? $("body") : $.root();
|
|
1811
|
+
function processElement(element, parentSelector) {
|
|
1812
|
+
let textNodeIndex = 0;
|
|
1813
|
+
element.contents().each((_index, node) => {
|
|
1814
|
+
if (node.type === "text") {
|
|
1815
|
+
const textNode = node;
|
|
1816
|
+
const text = textNode.data;
|
|
1817
|
+
const currentTextIndex = textNodeIndex++;
|
|
1818
|
+
if (!text || !text.trim()) return;
|
|
1819
|
+
const parent = $(node).parent();
|
|
1820
|
+
const tagName = parent[0]?.tagName?.toLowerCase() || "unknown";
|
|
1821
|
+
if (SKIP_TAGS.has(tagName)) return;
|
|
1822
|
+
const selector = buildSelector($, parent, parentSelector, currentTextIndex);
|
|
1823
|
+
sections.push({
|
|
1824
|
+
id: `section-${sectionId++}`,
|
|
1825
|
+
content: text,
|
|
1826
|
+
selector,
|
|
1827
|
+
tagName,
|
|
1828
|
+
tokenCount: estimateTokens(text),
|
|
1829
|
+
translatable: true
|
|
1830
|
+
});
|
|
1831
|
+
} else if (node.type === "tag") {
|
|
1832
|
+
const elem = node;
|
|
1833
|
+
const tagName = elem.tagName?.toLowerCase();
|
|
1834
|
+
if (SKIP_TAGS.has(tagName)) return;
|
|
1835
|
+
const $elem = $(elem);
|
|
1836
|
+
const selector = buildSelector($, $elem, parentSelector);
|
|
1837
|
+
for (const attrName of TRANSLATABLE_ATTRIBUTES) {
|
|
1838
|
+
const attrValue = $elem.attr(attrName);
|
|
1839
|
+
if (attrValue && attrValue.trim()) {
|
|
1840
|
+
sections.push({
|
|
1841
|
+
id: `section-${sectionId++}`,
|
|
1842
|
+
content: attrValue,
|
|
1843
|
+
selector,
|
|
1844
|
+
tagName,
|
|
1845
|
+
isAttribute: true,
|
|
1846
|
+
attributeName: attrName,
|
|
1847
|
+
tokenCount: estimateTokens(attrValue),
|
|
1848
|
+
translatable: true
|
|
1849
|
+
});
|
|
1850
|
+
}
|
|
1851
|
+
}
|
|
1852
|
+
processElement($elem, selector);
|
|
1853
|
+
}
|
|
1854
|
+
});
|
|
1855
|
+
}
|
|
1856
|
+
processElement(root, "");
|
|
1857
|
+
return sections;
|
|
1858
|
+
}
|
|
1859
|
+
function buildSelector($, element, parentSelector, textIndex) {
|
|
1860
|
+
const elem = element[0];
|
|
1861
|
+
if (!elem || elem.type !== "tag") {
|
|
1862
|
+
return parentSelector + (textIndex !== void 0 ? `::text(${textIndex})` : "");
|
|
1863
|
+
}
|
|
1864
|
+
const tagElem = elem;
|
|
1865
|
+
const tagName = tagElem.tagName?.toLowerCase() || "unknown";
|
|
1866
|
+
const id = $(elem).attr("id");
|
|
1867
|
+
if (textIndex !== void 0) {
|
|
1868
|
+
if (id) {
|
|
1869
|
+
return `#${id}::text(${textIndex})`;
|
|
1870
|
+
}
|
|
1871
|
+
return parentSelector ? `${parentSelector}::text(${textIndex})` : `${tagName}::text(${textIndex})`;
|
|
1872
|
+
}
|
|
1873
|
+
if (id) {
|
|
1874
|
+
return `#${id}`;
|
|
1875
|
+
}
|
|
1876
|
+
const parent = $(elem).parent();
|
|
1877
|
+
const siblings = parent.children(tagName);
|
|
1878
|
+
const index = siblings.index(elem);
|
|
1879
|
+
let selector = tagName;
|
|
1880
|
+
if (siblings.length > 1) {
|
|
1881
|
+
selector += `:nth-of-type(${index + 1})`;
|
|
1882
|
+
}
|
|
1883
|
+
if (parentSelector) {
|
|
1884
|
+
selector = `${parentSelector} > ${selector}`;
|
|
1885
|
+
}
|
|
1886
|
+
return selector;
|
|
1887
|
+
}
|
|
1888
|
+
function chunkHTMLSections(sections, options = {}) {
|
|
1889
|
+
const config2 = { ...DEFAULT_HTML_CHUNKING, ...options };
|
|
1890
|
+
const chunks = [];
|
|
1891
|
+
const translatableSections = sections.filter((s) => s.translatable);
|
|
1892
|
+
if (translatableSections.length === 0) {
|
|
1893
|
+
return [];
|
|
1894
|
+
}
|
|
1895
|
+
let currentChunk = [];
|
|
1896
|
+
let currentTokens = 0;
|
|
1897
|
+
let chunkId = 0;
|
|
1898
|
+
for (const section of translatableSections) {
|
|
1899
|
+
const sectionTokens = section.tokenCount;
|
|
1900
|
+
if (currentTokens + sectionTokens > config2.maxTokens && currentChunk.length > 0) {
|
|
1901
|
+
chunks.push(createChunk(currentChunk, chunkId++));
|
|
1902
|
+
currentChunk = [];
|
|
1903
|
+
currentTokens = 0;
|
|
1904
|
+
}
|
|
1905
|
+
currentChunk.push(section);
|
|
1906
|
+
currentTokens += sectionTokens;
|
|
1907
|
+
}
|
|
1908
|
+
if (currentChunk.length > 0) {
|
|
1909
|
+
chunks.push(createChunk(currentChunk, chunkId));
|
|
1910
|
+
}
|
|
1911
|
+
return chunks;
|
|
1912
|
+
}
|
|
1913
|
+
function createChunk(sections, id) {
|
|
1914
|
+
const lines = [];
|
|
1915
|
+
for (const section of sections) {
|
|
1916
|
+
if (section.isAttribute) {
|
|
1917
|
+
lines.push(`[${section.id}:${section.attributeName}] ${section.content}`);
|
|
1918
|
+
} else {
|
|
1919
|
+
lines.push(`[${section.id}] ${section.content}`);
|
|
1920
|
+
}
|
|
1921
|
+
}
|
|
1922
|
+
const content = lines.join("\n\n");
|
|
1923
|
+
return {
|
|
1924
|
+
id: `chunk-${id}`,
|
|
1925
|
+
content,
|
|
1926
|
+
sections,
|
|
1927
|
+
tokenCount: estimateTokens(content)
|
|
1928
|
+
};
|
|
1929
|
+
}
|
|
1930
|
+
function parseTranslatedChunk(chunk, translatedContent) {
|
|
1931
|
+
const map = {};
|
|
1932
|
+
const regex = /\[([^\]]+)\]\s*([^[]*?)(?=\n\n\[|\n*$)/gs;
|
|
1933
|
+
let match;
|
|
1934
|
+
while ((match = regex.exec(translatedContent)) !== null) {
|
|
1935
|
+
const marker = match[1];
|
|
1936
|
+
let translation = match[2]?.trim() || "";
|
|
1937
|
+
const idMatch = marker?.match(/^(section-\d+)/);
|
|
1938
|
+
if (idMatch) {
|
|
1939
|
+
map[idMatch[1]] = translation;
|
|
1940
|
+
}
|
|
1941
|
+
}
|
|
1942
|
+
if (Object.keys(map).length === 0 && chunk.sections.length === 1) {
|
|
1943
|
+
map[chunk.sections[0].id] = translatedContent.trim();
|
|
1944
|
+
}
|
|
1945
|
+
return map;
|
|
1946
|
+
}
|
|
1947
|
+
function applyHTMLTranslations(document, translations) {
|
|
1948
|
+
const $ = document.$;
|
|
1949
|
+
for (const section of document.sections) {
|
|
1950
|
+
const translation = translations[section.id];
|
|
1951
|
+
if (!translation) continue;
|
|
1952
|
+
try {
|
|
1953
|
+
if (section.isAttribute && section.attributeName) {
|
|
1954
|
+
const elem = $(section.selector.replace(/::text\(\d+\)$/, ""));
|
|
1955
|
+
elem.attr(section.attributeName, translation);
|
|
1956
|
+
} else {
|
|
1957
|
+
const textMatch = section.selector.match(/^(.*)::text\((\d+)\)$/);
|
|
1958
|
+
if (textMatch) {
|
|
1959
|
+
const [, parentSelector, textIndexStr] = textMatch;
|
|
1960
|
+
const textIndex = parseInt(textIndexStr, 10);
|
|
1961
|
+
const parent = $(parentSelector);
|
|
1962
|
+
let currentTextIndex = 0;
|
|
1963
|
+
parent.contents().each((_i, node) => {
|
|
1964
|
+
if (node.type === "text") {
|
|
1965
|
+
if (currentTextIndex === textIndex) {
|
|
1966
|
+
node.data = translation;
|
|
1967
|
+
return false;
|
|
1968
|
+
}
|
|
1969
|
+
currentTextIndex++;
|
|
1970
|
+
}
|
|
1971
|
+
});
|
|
1972
|
+
} else {
|
|
1973
|
+
const elem = $(section.selector);
|
|
1974
|
+
const contents = elem.contents();
|
|
1975
|
+
let updated = false;
|
|
1976
|
+
contents.each((_i, node) => {
|
|
1977
|
+
if (node.type === "text" && !updated) {
|
|
1978
|
+
node.data = translation;
|
|
1979
|
+
updated = true;
|
|
1980
|
+
}
|
|
1981
|
+
});
|
|
1982
|
+
}
|
|
1983
|
+
}
|
|
1984
|
+
} catch (error) {
|
|
1985
|
+
console.warn(`Failed to apply translation for ${section.id}:`, error);
|
|
1986
|
+
}
|
|
1987
|
+
}
|
|
1988
|
+
if (document.isFullDocument) {
|
|
1989
|
+
return $.html();
|
|
1990
|
+
} else {
|
|
1991
|
+
return $("body").html() || $.html();
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
function getHTMLStats(document) {
|
|
1995
|
+
const translatableSections = document.sections.filter((s) => s.translatable);
|
|
1996
|
+
const attributeSections = document.sections.filter((s) => s.isAttribute);
|
|
1997
|
+
const totalTokens = document.sections.reduce((sum, s) => sum + s.tokenCount, 0);
|
|
1998
|
+
return {
|
|
1999
|
+
totalSections: document.sections.length,
|
|
2000
|
+
translatableSections: translatableSections.length,
|
|
2001
|
+
attributeSections: attributeSections.length,
|
|
2002
|
+
totalTokens,
|
|
2003
|
+
avgTokensPerSection: document.sections.length > 0 ? Math.round(totalTokens / document.sections.length) : 0
|
|
2004
|
+
};
|
|
2005
|
+
}
|
|
2006
|
+
var SKIP_TAGS, TRANSLATABLE_ATTRIBUTES, DEFAULT_HTML_CHUNKING;
|
|
2007
|
+
var init_html = __esm({
|
|
2008
|
+
"src/parsers/html.ts"() {
|
|
2009
|
+
init_tokens();
|
|
2010
|
+
SKIP_TAGS = /* @__PURE__ */ new Set([
|
|
2011
|
+
"script",
|
|
2012
|
+
"style",
|
|
2013
|
+
"code",
|
|
2014
|
+
"pre",
|
|
2015
|
+
"kbd",
|
|
2016
|
+
"samp",
|
|
2017
|
+
"var",
|
|
2018
|
+
"noscript",
|
|
2019
|
+
"template",
|
|
2020
|
+
"svg",
|
|
2021
|
+
"math"
|
|
2022
|
+
]);
|
|
2023
|
+
TRANSLATABLE_ATTRIBUTES = ["alt", "title", "placeholder", "aria-label"];
|
|
2024
|
+
DEFAULT_HTML_CHUNKING = {
|
|
2025
|
+
maxTokens: 2048,
|
|
2026
|
+
minTokensForChunk: 100
|
|
2027
|
+
};
|
|
2028
|
+
}
|
|
2029
|
+
});
|
|
1792
2030
|
function mapFinishReason(reason) {
|
|
1793
2031
|
switch (reason) {
|
|
1794
2032
|
case "stop":
|
|
@@ -2949,6 +3187,7 @@ var init_engine = __esm({
|
|
|
2949
3187
|
init_agent();
|
|
2950
3188
|
init_chunker();
|
|
2951
3189
|
init_markdown();
|
|
3190
|
+
init_html();
|
|
2952
3191
|
init_glossary();
|
|
2953
3192
|
init_registry();
|
|
2954
3193
|
init_logger();
|
|
@@ -3037,7 +3276,7 @@ var init_engine = __esm({
|
|
|
3037
3276
|
result = await this.translateMarkdown(options, glossary);
|
|
3038
3277
|
break;
|
|
3039
3278
|
case "html":
|
|
3040
|
-
result = await this.
|
|
3279
|
+
result = await this.translateHTML(options, glossary);
|
|
3041
3280
|
break;
|
|
3042
3281
|
case "text":
|
|
3043
3282
|
default:
|
|
@@ -3176,6 +3415,157 @@ var init_engine = __esm({
|
|
|
3176
3415
|
}
|
|
3177
3416
|
};
|
|
3178
3417
|
}
|
|
3418
|
+
async translateHTML(options, glossary) {
|
|
3419
|
+
const document = parseHTML(options.content);
|
|
3420
|
+
if (this.verbose) {
|
|
3421
|
+
const stats = getHTMLStats(document);
|
|
3422
|
+
logger.info(`Parsed HTML: ${stats.translatableSections} translatable sections, ${stats.totalTokens} tokens`);
|
|
3423
|
+
}
|
|
3424
|
+
if (document.sections.length === 0) {
|
|
3425
|
+
return {
|
|
3426
|
+
content: options.content,
|
|
3427
|
+
chunks: [],
|
|
3428
|
+
metadata: {
|
|
3429
|
+
totalTokensUsed: 0,
|
|
3430
|
+
totalDuration: 0,
|
|
3431
|
+
averageQuality: 100,
|
|
3432
|
+
provider: this.provider.name,
|
|
3433
|
+
model: this.config.provider.model ?? this.provider.defaultModel,
|
|
3434
|
+
totalIterations: 0,
|
|
3435
|
+
tokensUsed: { input: 0, output: 0 },
|
|
3436
|
+
cache: { hits: 0, misses: 0 }
|
|
3437
|
+
}
|
|
3438
|
+
};
|
|
3439
|
+
}
|
|
3440
|
+
const chunks = chunkHTMLSections(document.sections, {
|
|
3441
|
+
maxTokens: this.config.chunking.maxTokens
|
|
3442
|
+
});
|
|
3443
|
+
if (this.verbose) {
|
|
3444
|
+
logger.info(`Chunked into ${chunks.length} translation units`);
|
|
3445
|
+
}
|
|
3446
|
+
const agent = createTranslationAgent({
|
|
3447
|
+
provider: this.provider,
|
|
3448
|
+
qualityThreshold: options.qualityThreshold ?? this.config.quality.threshold,
|
|
3449
|
+
maxIterations: options.maxIterations ?? this.config.quality.maxIterations,
|
|
3450
|
+
verbose: this.verbose,
|
|
3451
|
+
strictQuality: options.strictQuality
|
|
3452
|
+
});
|
|
3453
|
+
const allTranslations = {};
|
|
3454
|
+
const chunkResults = [];
|
|
3455
|
+
let totalInputTokens = 0;
|
|
3456
|
+
let totalOutputTokens = 0;
|
|
3457
|
+
let totalIterations = 0;
|
|
3458
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
3459
|
+
const chunk = chunks[i];
|
|
3460
|
+
if (!chunk) continue;
|
|
3461
|
+
if (this.verbose) {
|
|
3462
|
+
logger.info(`Translating HTML chunk ${i + 1}/${chunks.length} (${chunk.sections.length} sections)...`);
|
|
3463
|
+
}
|
|
3464
|
+
const glossaryString = glossary ? JSON.stringify(glossary.terms.map((t) => ({ s: t.source, t: t.target }))) : void 0;
|
|
3465
|
+
const cacheKey = {
|
|
3466
|
+
content: chunk.content,
|
|
3467
|
+
sourceLang: options.sourceLang,
|
|
3468
|
+
targetLang: options.targetLang,
|
|
3469
|
+
glossary: glossaryString,
|
|
3470
|
+
provider: this.provider.name,
|
|
3471
|
+
model: this.config.provider.model ?? this.provider.defaultModel
|
|
3472
|
+
};
|
|
3473
|
+
const cacheResult = this.cache.get(cacheKey);
|
|
3474
|
+
if (cacheResult.hit && cacheResult.entry) {
|
|
3475
|
+
this.cacheHits++;
|
|
3476
|
+
if (this.verbose) {
|
|
3477
|
+
logger.info(` \u21B3 Cache hit (quality: ${cacheResult.entry.qualityScore})`);
|
|
3478
|
+
}
|
|
3479
|
+
const chunkTranslations = parseTranslatedChunk(chunk, cacheResult.entry.translation);
|
|
3480
|
+
Object.assign(allTranslations, chunkTranslations);
|
|
3481
|
+
chunkResults.push({
|
|
3482
|
+
original: chunk.content,
|
|
3483
|
+
translated: cacheResult.entry.translation,
|
|
3484
|
+
startOffset: 0,
|
|
3485
|
+
endOffset: chunk.content.length,
|
|
3486
|
+
qualityScore: cacheResult.entry.qualityScore,
|
|
3487
|
+
iterations: 0,
|
|
3488
|
+
tokensUsed: { input: 0, output: 0, cacheRead: 1 },
|
|
3489
|
+
cached: true
|
|
3490
|
+
});
|
|
3491
|
+
continue;
|
|
3492
|
+
}
|
|
3493
|
+
this.cacheMisses++;
|
|
3494
|
+
const resolvedStyleInstruction = options.styleInstruction ?? this.config.languages.styles?.[options.targetLang];
|
|
3495
|
+
const request = {
|
|
3496
|
+
content: chunk.content,
|
|
3497
|
+
sourceLang: options.sourceLang,
|
|
3498
|
+
targetLang: options.targetLang,
|
|
3499
|
+
format: "html",
|
|
3500
|
+
glossary,
|
|
3501
|
+
context: {
|
|
3502
|
+
documentPurpose: options.context,
|
|
3503
|
+
styleInstruction: resolvedStyleInstruction,
|
|
3504
|
+
documentSummary: "HTML document with structured sections. Preserve the [section-N] markers exactly as they appear. Translate only the text after each marker."
|
|
3505
|
+
}
|
|
3506
|
+
};
|
|
3507
|
+
try {
|
|
3508
|
+
const result = await agent.translate(request);
|
|
3509
|
+
const chunkTranslations = parseTranslatedChunk(chunk, result.content);
|
|
3510
|
+
Object.assign(allTranslations, chunkTranslations);
|
|
3511
|
+
this.cache.set(cacheKey, result.content, result.metadata.qualityScore);
|
|
3512
|
+
chunkResults.push({
|
|
3513
|
+
original: chunk.content,
|
|
3514
|
+
translated: result.content,
|
|
3515
|
+
startOffset: 0,
|
|
3516
|
+
endOffset: chunk.content.length,
|
|
3517
|
+
qualityScore: result.metadata.qualityScore,
|
|
3518
|
+
iterations: result.metadata.iterations,
|
|
3519
|
+
tokensUsed: result.metadata.tokensUsed
|
|
3520
|
+
});
|
|
3521
|
+
if (result.metadata.tokensUsed) {
|
|
3522
|
+
totalInputTokens += result.metadata.tokensUsed.input;
|
|
3523
|
+
totalOutputTokens += result.metadata.tokensUsed.output;
|
|
3524
|
+
}
|
|
3525
|
+
totalIterations += result.metadata.iterations;
|
|
3526
|
+
} catch (error) {
|
|
3527
|
+
logger.error(`Failed to translate HTML chunk ${i + 1}: ${error}`);
|
|
3528
|
+
for (const section of chunk.sections) {
|
|
3529
|
+
allTranslations[section.id] = section.content;
|
|
3530
|
+
}
|
|
3531
|
+
chunkResults.push({
|
|
3532
|
+
original: chunk.content,
|
|
3533
|
+
translated: chunk.content,
|
|
3534
|
+
startOffset: 0,
|
|
3535
|
+
endOffset: chunk.content.length,
|
|
3536
|
+
qualityScore: 0,
|
|
3537
|
+
iterations: 0,
|
|
3538
|
+
tokensUsed: { input: 0, output: 0 }
|
|
3539
|
+
});
|
|
3540
|
+
}
|
|
3541
|
+
}
|
|
3542
|
+
const finalContent = applyHTMLTranslations(document, allTranslations);
|
|
3543
|
+
const qualityScores = chunkResults.filter((r) => r.qualityScore > 0).map((r) => r.qualityScore);
|
|
3544
|
+
const averageQuality = qualityScores.length > 0 ? qualityScores.reduce((a, b) => a + b, 0) / qualityScores.length : 0;
|
|
3545
|
+
const cacheHits = chunkResults.filter((r) => r.cached).length;
|
|
3546
|
+
const cacheMisses = chunkResults.filter((r) => !r.cached && r.qualityScore > 0).length;
|
|
3547
|
+
return {
|
|
3548
|
+
content: finalContent,
|
|
3549
|
+
chunks: chunkResults,
|
|
3550
|
+
metadata: {
|
|
3551
|
+
totalTokensUsed: totalInputTokens + totalOutputTokens,
|
|
3552
|
+
totalDuration: 0,
|
|
3553
|
+
// Will be set by caller
|
|
3554
|
+
averageQuality,
|
|
3555
|
+
provider: this.provider.name,
|
|
3556
|
+
model: this.config.provider.model ?? this.provider.defaultModel,
|
|
3557
|
+
totalIterations,
|
|
3558
|
+
tokensUsed: {
|
|
3559
|
+
input: totalInputTokens,
|
|
3560
|
+
output: totalOutputTokens
|
|
3561
|
+
},
|
|
3562
|
+
cache: {
|
|
3563
|
+
hits: cacheHits,
|
|
3564
|
+
misses: cacheMisses
|
|
3565
|
+
}
|
|
3566
|
+
}
|
|
3567
|
+
};
|
|
3568
|
+
}
|
|
3179
3569
|
async translatePlainText(options, glossary) {
|
|
3180
3570
|
const chunks = chunkContent(options.content, {
|
|
3181
3571
|
maxTokens: this.config.chunking.maxTokens,
|