@vertana/core 0.1.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/dist/_virtual/rolldown_runtime.cjs +29 -0
- package/dist/accumulator.cjs +64 -0
- package/dist/accumulator.d.cts +51 -0
- package/dist/accumulator.d.ts +51 -0
- package/dist/accumulator.js +61 -0
- package/dist/chunking.cjs +76 -0
- package/dist/chunking.d.cts +124 -0
- package/dist/chunking.d.ts +124 -0
- package/dist/chunking.js +74 -0
- package/dist/context.cjs +51 -0
- package/dist/context.d.cts +148 -0
- package/dist/context.d.ts +148 -0
- package/dist/context.js +49 -0
- package/dist/evaluation.cjs +120 -0
- package/dist/evaluation.d.cts +111 -0
- package/dist/evaluation.d.ts +111 -0
- package/dist/evaluation.js +119 -0
- package/dist/glossary.cjs +0 -0
- package/dist/glossary.d.cts +25 -0
- package/dist/glossary.d.ts +25 -0
- package/dist/glossary.js +0 -0
- package/dist/html.cjs +253 -0
- package/dist/html.d.cts +41 -0
- package/dist/html.d.ts +41 -0
- package/dist/html.js +250 -0
- package/dist/index.cjs +39 -0
- package/dist/index.d.cts +17 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +16 -0
- package/dist/markdown.cjs +300 -0
- package/dist/markdown.d.cts +17 -0
- package/dist/markdown.d.ts +17 -0
- package/dist/markdown.js +300 -0
- package/dist/plaintext.cjs +70 -0
- package/dist/plaintext.d.cts +17 -0
- package/dist/plaintext.d.ts +17 -0
- package/dist/plaintext.js +70 -0
- package/dist/prompt.cjs +91 -0
- package/dist/prompt.d.cts +74 -0
- package/dist/prompt.d.ts +74 -0
- package/dist/prompt.js +86 -0
- package/dist/refine.cjs +243 -0
- package/dist/refine.d.cts +148 -0
- package/dist/refine.d.ts +148 -0
- package/dist/refine.js +241 -0
- package/dist/select.cjs +62 -0
- package/dist/select.d.cts +83 -0
- package/dist/select.d.ts +83 -0
- package/dist/select.js +61 -0
- package/dist/terms.cjs +60 -0
- package/dist/terms.d.cts +36 -0
- package/dist/terms.d.ts +36 -0
- package/dist/terms.js +59 -0
- package/dist/tokens.cjs +40 -0
- package/dist/tokens.d.cts +24 -0
- package/dist/tokens.d.ts +24 -0
- package/dist/tokens.js +38 -0
- package/dist/tools.cjs +35 -0
- package/dist/tools.d.cts +20 -0
- package/dist/tools.d.ts +20 -0
- package/dist/tools.js +34 -0
- package/dist/translate.cjs +200 -0
- package/dist/translate.d.cts +190 -0
- package/dist/translate.d.ts +190 -0
- package/dist/translate.js +199 -0
- package/dist/window.cjs +0 -0
- package/dist/window.d.cts +48 -0
- package/dist/window.d.ts +48 -0
- package/dist/window.js +0 -0
- package/package.json +215 -0
package/dist/prompt.d.ts
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { Glossary } from "./glossary.js";
|
|
2
|
+
|
|
3
|
+
//#region src/prompt.d.ts
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* The media type of the input text.
|
|
7
|
+
*
|
|
8
|
+
* - `"text/plain"`: Plain text
|
|
9
|
+
* - `"text/html"`: HTML content
|
|
10
|
+
* - `"text/markdown"`: Markdown content
|
|
11
|
+
*/
|
|
12
|
+
type MediaType = "text/plain" | "text/html" | "text/markdown";
|
|
13
|
+
/**
|
|
14
|
+
* The desired tone for the translated text.
|
|
15
|
+
*/
|
|
16
|
+
type TranslationTone = "formal" | "informal" | "technical" | "casual" | "professional" | "literary" | "journalistic";
|
|
17
|
+
/**
|
|
18
|
+
* Gets the English display name for a language.
|
|
19
|
+
*
|
|
20
|
+
* @param language The language as an `Intl.Locale` or BCP 47 tag.
|
|
21
|
+
* @returns The English display name for the language.
|
|
22
|
+
*/
|
|
23
|
+
declare function getLanguageName(language: Intl.Locale | string): string;
|
|
24
|
+
/**
|
|
25
|
+
* Options for building the system prompt.
|
|
26
|
+
*/
|
|
27
|
+
interface SystemPromptOptions {
|
|
28
|
+
readonly sourceLanguage?: Intl.Locale | string;
|
|
29
|
+
readonly tone?: TranslationTone;
|
|
30
|
+
readonly domain?: string;
|
|
31
|
+
readonly mediaType?: MediaType;
|
|
32
|
+
readonly context?: string;
|
|
33
|
+
readonly glossary?: Glossary;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Builds the system prompt for the translation.
|
|
37
|
+
*
|
|
38
|
+
* @param targetLanguage The target language for translation.
|
|
39
|
+
* @param options Additional options for the prompt.
|
|
40
|
+
* @returns The system prompt string.
|
|
41
|
+
*/
|
|
42
|
+
declare function buildSystemPrompt(targetLanguage: Intl.Locale | string, options?: SystemPromptOptions): string;
|
|
43
|
+
/**
|
|
44
|
+
* Represents a previously translated chunk for context.
|
|
45
|
+
*/
|
|
46
|
+
interface TranslatedChunk {
|
|
47
|
+
readonly source: string;
|
|
48
|
+
readonly translation: string;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Builds the user prompt for the translation.
|
|
52
|
+
*
|
|
53
|
+
* @param text The text to translate.
|
|
54
|
+
* @param title An optional title to include.
|
|
55
|
+
* @returns The user prompt string.
|
|
56
|
+
*/
|
|
57
|
+
declare function buildUserPrompt(text: string, title?: string): string;
|
|
58
|
+
/**
|
|
59
|
+
* Builds the user prompt with previous chunk context.
|
|
60
|
+
*
|
|
61
|
+
* @param text The text to translate.
|
|
62
|
+
* @param previousChunks Previously translated chunks for context.
|
|
63
|
+
* @returns The user prompt string with context.
|
|
64
|
+
*/
|
|
65
|
+
declare function buildUserPromptWithContext(text: string, previousChunks: readonly TranslatedChunk[]): string;
|
|
66
|
+
/**
|
|
67
|
+
* Extracts the translated title from the translated text.
|
|
68
|
+
*
|
|
69
|
+
* @param translatedText The translated text that may contain a title.
|
|
70
|
+
* @returns The extracted title, or undefined if not found.
|
|
71
|
+
*/
|
|
72
|
+
declare function extractTitle(translatedText: string): string | undefined;
|
|
73
|
+
//#endregion
|
|
74
|
+
export { MediaType, SystemPromptOptions, TranslatedChunk, TranslationTone, buildSystemPrompt, buildUserPrompt, buildUserPromptWithContext, extractTitle, getLanguageName };
|
package/dist/prompt.js
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
//#region src/prompt.ts
|
|
2
|
+
const languageNames = new Intl.DisplayNames(["en"], { type: "language" });
|
|
3
|
+
/**
|
|
4
|
+
* Gets the English display name for a language.
|
|
5
|
+
*
|
|
6
|
+
* @param language The language as an `Intl.Locale` or BCP 47 tag.
|
|
7
|
+
* @returns The English display name for the language.
|
|
8
|
+
*/
|
|
9
|
+
function getLanguageName(language) {
|
|
10
|
+
const tag = typeof language === "string" ? language : language.toString();
|
|
11
|
+
return languageNames.of(tag) ?? tag;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Builds the system prompt for the translation.
|
|
15
|
+
*
|
|
16
|
+
* @param targetLanguage The target language for translation.
|
|
17
|
+
* @param options Additional options for the prompt.
|
|
18
|
+
* @returns The system prompt string.
|
|
19
|
+
*/
|
|
20
|
+
function buildSystemPrompt(targetLanguage, options) {
|
|
21
|
+
const parts = [
|
|
22
|
+
"You are a professional translator.",
|
|
23
|
+
`Translate the given text into ${getLanguageName(targetLanguage)}.`,
|
|
24
|
+
"Preserve the original meaning, tone, and nuance as accurately as possible.",
|
|
25
|
+
"Output only the translated text without any explanations or notes."
|
|
26
|
+
];
|
|
27
|
+
if (options?.sourceLanguage != null) {
|
|
28
|
+
const sourceLangName = getLanguageName(options.sourceLanguage);
|
|
29
|
+
parts.push(`The source language is ${sourceLangName}.`);
|
|
30
|
+
}
|
|
31
|
+
if (options?.tone != null) parts.push(`Use a ${options.tone} tone in the translation.`);
|
|
32
|
+
if (options?.domain != null) parts.push(`This text is from the ${options.domain} domain. Use appropriate terminology for this field.`);
|
|
33
|
+
if (options?.mediaType != null && options.mediaType !== "text/plain") {
|
|
34
|
+
const formatName = options.mediaType === "text/html" ? "HTML" : "Markdown";
|
|
35
|
+
parts.push(`The input is formatted as ${formatName}. Preserve the formatting structure in your translation.`);
|
|
36
|
+
}
|
|
37
|
+
if (options?.context != null) parts.push(`Additional context: ${options.context}`);
|
|
38
|
+
if (options?.glossary != null && options.glossary.length > 0) {
|
|
39
|
+
const glossaryLines = options.glossary.map((entry) => {
|
|
40
|
+
const contextNote = entry.context != null ? ` (${entry.context})` : "";
|
|
41
|
+
return ` - "${entry.original}" → "${entry.translated}"${contextNote}`;
|
|
42
|
+
});
|
|
43
|
+
parts.push("Use the following glossary for consistent terminology:\n" + glossaryLines.join("\n"));
|
|
44
|
+
}
|
|
45
|
+
return parts.join("\n\n");
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Builds the user prompt for the translation.
|
|
49
|
+
*
|
|
50
|
+
* @param text The text to translate.
|
|
51
|
+
* @param title An optional title to include.
|
|
52
|
+
* @returns The user prompt string.
|
|
53
|
+
*/
|
|
54
|
+
function buildUserPrompt(text, title) {
|
|
55
|
+
if (title != null) return `Title: ${title}\n\n${text}`;
|
|
56
|
+
return text;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Builds the user prompt with previous chunk context.
|
|
60
|
+
*
|
|
61
|
+
* @param text The text to translate.
|
|
62
|
+
* @param previousChunks Previously translated chunks for context.
|
|
63
|
+
* @returns The user prompt string with context.
|
|
64
|
+
*/
|
|
65
|
+
function buildUserPromptWithContext(text, previousChunks) {
|
|
66
|
+
if (previousChunks.length === 0) return text;
|
|
67
|
+
return `The following sections have already been translated. Maintain consistency in terminology, style, and tone with the previous translations.
|
|
68
|
+
|
|
69
|
+
${previousChunks.map((chunk, index) => {
|
|
70
|
+
return `[Previous section ${index + 1}]\nOriginal: ${chunk.source}\nTranslation: ${chunk.translation}`;
|
|
71
|
+
}).join("\n\n")}\n\n[Current section to translate]\n${text}`;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Extracts the translated title from the translated text.
|
|
75
|
+
*
|
|
76
|
+
* @param translatedText The translated text that may contain a title.
|
|
77
|
+
* @returns The extracted title, or undefined if not found.
|
|
78
|
+
*/
|
|
79
|
+
function extractTitle(translatedText) {
|
|
80
|
+
const match = translatedText.match(/^Title:\s*(.+?)(?:\n|$)/);
|
|
81
|
+
if (match != null) return match[1].trim();
|
|
82
|
+
return translatedText.split("\n")[0]?.trim() || void 0;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
//#endregion
|
|
86
|
+
export { buildSystemPrompt, buildUserPrompt, buildUserPromptWithContext, extractTitle, getLanguageName };
|
package/dist/refine.cjs
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
const require_rolldown_runtime = require('./_virtual/rolldown_runtime.cjs');
|
|
2
|
+
const require_evaluation = require('./evaluation.cjs');
|
|
3
|
+
let _logtape_logtape = require("@logtape/logtape");
|
|
4
|
+
let ai = require("ai");
|
|
5
|
+
|
|
6
|
+
//#region src/refine.ts
|
|
7
|
+
const logger = (0, _logtape_logtape.getLogger)([
|
|
8
|
+
"vertana",
|
|
9
|
+
"core",
|
|
10
|
+
"refine"
|
|
11
|
+
]);
|
|
12
|
+
/**
|
|
13
|
+
* Gets the language name from a locale.
|
|
14
|
+
*/
|
|
15
|
+
function getLanguageName(locale) {
|
|
16
|
+
const tag = typeof locale === "string" ? locale : locale.baseName;
|
|
17
|
+
try {
|
|
18
|
+
return new Intl.DisplayNames(["en"], { type: "language" }).of(tag) ?? tag;
|
|
19
|
+
} catch {
|
|
20
|
+
return tag;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Builds the system prompt for chunk refinement.
|
|
25
|
+
*/
|
|
26
|
+
function buildRefineSystemPrompt(options, issues) {
|
|
27
|
+
const targetLang = getLanguageName(options.targetLanguage);
|
|
28
|
+
let prompt = `You are an expert translator refining a translation from ${(options.sourceLanguage ? getLanguageName(options.sourceLanguage) : null) ?? "the source language"} to ${targetLang}.
|
|
29
|
+
|
|
30
|
+
You will be given:
|
|
31
|
+
1. The original text
|
|
32
|
+
2. The current translation
|
|
33
|
+
3. A list of issues found in the translation
|
|
34
|
+
|
|
35
|
+
Your task is to fix the issues while preserving the parts that are correct.
|
|
36
|
+
Output ONLY the improved translation, nothing else.
|
|
37
|
+
|
|
38
|
+
## Issues to fix
|
|
39
|
+
|
|
40
|
+
`;
|
|
41
|
+
for (const issue of issues) prompt += `- [${issue.type}] ${issue.description}\n`;
|
|
42
|
+
if (options.glossary != null && options.glossary.length > 0) {
|
|
43
|
+
prompt += `\n## Glossary (must follow exactly)\n\n`;
|
|
44
|
+
for (const entry of options.glossary) prompt += `- "${entry.original}" → "${entry.translated}"\n`;
|
|
45
|
+
}
|
|
46
|
+
return prompt;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Builds the user prompt for chunk refinement.
|
|
50
|
+
*/
|
|
51
|
+
function buildRefineUserPrompt(original, translated) {
|
|
52
|
+
return `## Original Text
|
|
53
|
+
|
|
54
|
+
${original}
|
|
55
|
+
|
|
56
|
+
## Current Translation
|
|
57
|
+
|
|
58
|
+
${translated}
|
|
59
|
+
|
|
60
|
+
Please provide the improved translation:`;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Refines a single chunk based on evaluation feedback.
|
|
64
|
+
*/
|
|
65
|
+
async function refineChunk(model, original, translated, issues, options) {
|
|
66
|
+
return (await (0, ai.generateText)({
|
|
67
|
+
model,
|
|
68
|
+
system: buildRefineSystemPrompt(options, issues),
|
|
69
|
+
prompt: buildRefineUserPrompt(original, translated),
|
|
70
|
+
abortSignal: options.signal
|
|
71
|
+
})).text.trim();
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Evaluates the boundary between two chunks for coherence.
|
|
75
|
+
*/
|
|
76
|
+
async function evaluateBoundary(model, chunk1Translated, chunk2Translated, chunk1Original, chunk2Original, options) {
|
|
77
|
+
const targetLang = getLanguageName(options.targetLanguage);
|
|
78
|
+
const boundarySize = 200;
|
|
79
|
+
const chunk1End = chunk1Translated.slice(-boundarySize);
|
|
80
|
+
const chunk2Start = chunk2Translated.slice(0, boundarySize);
|
|
81
|
+
const result = await (0, ai.generateText)({
|
|
82
|
+
model,
|
|
83
|
+
system: `You are an expert translation quality evaluator.
|
|
84
|
+
|
|
85
|
+
Evaluate the coherence at the boundary between two consecutive translation chunks.
|
|
86
|
+
|
|
87
|
+
Check for:
|
|
88
|
+
1. **Coherence**: Does the text flow naturally from one chunk to the next?
|
|
89
|
+
2. **Style**: Is the style consistent across the boundary?
|
|
90
|
+
3. **Reference**: Are pronouns and references consistent?
|
|
91
|
+
4. **Terminology**: Are terms used consistently?
|
|
92
|
+
|
|
93
|
+
Respond in this exact JSON format:
|
|
94
|
+
{
|
|
95
|
+
"score": <number between 0 and 1>,
|
|
96
|
+
"issues": [
|
|
97
|
+
{"type": "<coherence|style|reference|terminology>", "description": "<description>"}
|
|
98
|
+
]
|
|
99
|
+
}`,
|
|
100
|
+
prompt: `## End of chunk 1 (original)
|
|
101
|
+
${chunk1Original.slice(-boundarySize)}
|
|
102
|
+
|
|
103
|
+
## End of chunk 1 (translated to ${targetLang})
|
|
104
|
+
${chunk1End}
|
|
105
|
+
|
|
106
|
+
## Start of chunk 2 (original)
|
|
107
|
+
${chunk2Original.slice(0, boundarySize)}
|
|
108
|
+
|
|
109
|
+
## Start of chunk 2 (translated to ${targetLang})
|
|
110
|
+
${chunk2Start}
|
|
111
|
+
|
|
112
|
+
Evaluate the boundary coherence:`,
|
|
113
|
+
abortSignal: options.signal
|
|
114
|
+
});
|
|
115
|
+
try {
|
|
116
|
+
const parsed = JSON.parse(result.text);
|
|
117
|
+
return {
|
|
118
|
+
score: Math.max(0, Math.min(1, parsed.score)),
|
|
119
|
+
issues: parsed.issues ?? []
|
|
120
|
+
};
|
|
121
|
+
} catch {
|
|
122
|
+
return {
|
|
123
|
+
score: 1,
|
|
124
|
+
issues: []
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Refines translated chunks to improve quality using an iterative
|
|
130
|
+
* evaluate-fix loop.
|
|
131
|
+
*
|
|
132
|
+
* @param model The language model to use for refinement.
|
|
133
|
+
* @param originalChunks The original text chunks that were translated.
|
|
134
|
+
* @param translatedChunks The translated chunks to refine.
|
|
135
|
+
* @param options Refinement options.
|
|
136
|
+
* @returns A promise that resolves to the refinement result.
|
|
137
|
+
* @throws {RangeError} If the number of original and translated chunks
|
|
138
|
+
* do not match.
|
|
139
|
+
*/
|
|
140
|
+
async function refineChunks(model, originalChunks, translatedChunks, options) {
|
|
141
|
+
if (originalChunks.length !== translatedChunks.length) throw new RangeError(`Chunk count mismatch: ${originalChunks.length} original vs ${translatedChunks.length} translated`);
|
|
142
|
+
const targetScore = options.targetScore ?? .85;
|
|
143
|
+
const maxIterations = options.maxIterations ?? 3;
|
|
144
|
+
const shouldEvaluateBoundaries = options.evaluateBoundaries ?? true;
|
|
145
|
+
logger.info("Starting refinement of {chunkCount} chunks...", {
|
|
146
|
+
chunkCount: originalChunks.length,
|
|
147
|
+
targetScore,
|
|
148
|
+
maxIterations
|
|
149
|
+
});
|
|
150
|
+
const refinedChunks = [...translatedChunks];
|
|
151
|
+
const scores = new Array(translatedChunks.length).fill(0);
|
|
152
|
+
const history = [];
|
|
153
|
+
let totalIterations = 0;
|
|
154
|
+
for (let i = 0; i < refinedChunks.length; i++) {
|
|
155
|
+
options.signal?.throwIfAborted();
|
|
156
|
+
logger.debug("Evaluating chunk {index} of {total}...", {
|
|
157
|
+
index: i + 1,
|
|
158
|
+
total: refinedChunks.length
|
|
159
|
+
});
|
|
160
|
+
let currentText = refinedChunks[i];
|
|
161
|
+
let evaluation;
|
|
162
|
+
evaluation = await require_evaluation.evaluate(model, originalChunks[i], currentText, {
|
|
163
|
+
targetLanguage: options.targetLanguage,
|
|
164
|
+
sourceLanguage: options.sourceLanguage,
|
|
165
|
+
glossary: options.glossary,
|
|
166
|
+
signal: options.signal
|
|
167
|
+
});
|
|
168
|
+
scores[i] = evaluation.score;
|
|
169
|
+
logger.debug("Chunk {index} initial score: {score}.", {
|
|
170
|
+
index: i + 1,
|
|
171
|
+
score: evaluation.score,
|
|
172
|
+
issues: evaluation.issues.length
|
|
173
|
+
});
|
|
174
|
+
let iteration = 0;
|
|
175
|
+
while (evaluation.score < targetScore && iteration < maxIterations) {
|
|
176
|
+
options.signal?.throwIfAborted();
|
|
177
|
+
iteration++;
|
|
178
|
+
totalIterations++;
|
|
179
|
+
const beforeText = currentText;
|
|
180
|
+
const scoreBefore = evaluation.score;
|
|
181
|
+
const issuesAddressed = evaluation.issues;
|
|
182
|
+
currentText = await refineChunk(model, originalChunks[i], currentText, evaluation.issues, options);
|
|
183
|
+
evaluation = await require_evaluation.evaluate(model, originalChunks[i], currentText, {
|
|
184
|
+
targetLanguage: options.targetLanguage,
|
|
185
|
+
sourceLanguage: options.sourceLanguage,
|
|
186
|
+
glossary: options.glossary,
|
|
187
|
+
signal: options.signal
|
|
188
|
+
});
|
|
189
|
+
history.push({
|
|
190
|
+
chunkIndex: i,
|
|
191
|
+
iteration,
|
|
192
|
+
before: beforeText,
|
|
193
|
+
after: currentText,
|
|
194
|
+
scoreBefore,
|
|
195
|
+
scoreAfter: evaluation.score,
|
|
196
|
+
issuesAddressed
|
|
197
|
+
});
|
|
198
|
+
logger.debug("Chunk {chunkIndex} iteration {iteration}: {scoreBefore} → {scoreAfter}.", {
|
|
199
|
+
chunkIndex: i + 1,
|
|
200
|
+
iteration,
|
|
201
|
+
scoreBefore,
|
|
202
|
+
scoreAfter: evaluation.score
|
|
203
|
+
});
|
|
204
|
+
scores[i] = evaluation.score;
|
|
205
|
+
}
|
|
206
|
+
refinedChunks[i] = currentText;
|
|
207
|
+
}
|
|
208
|
+
let boundaryEvaluations;
|
|
209
|
+
if (shouldEvaluateBoundaries && refinedChunks.length > 1) {
|
|
210
|
+
logger.debug("Evaluating {count} chunk boundaries...", { count: refinedChunks.length - 1 });
|
|
211
|
+
boundaryEvaluations = [];
|
|
212
|
+
for (let i = 0; i < refinedChunks.length - 1; i++) {
|
|
213
|
+
options.signal?.throwIfAborted();
|
|
214
|
+
const boundaryResult = await evaluateBoundary(model, refinedChunks[i], refinedChunks[i + 1], originalChunks[i], originalChunks[i + 1], options);
|
|
215
|
+
boundaryEvaluations.push({
|
|
216
|
+
chunkIndex: i,
|
|
217
|
+
...boundaryResult
|
|
218
|
+
});
|
|
219
|
+
if (boundaryResult.issues.length > 0) logger.warn("Boundary {index} has {issueCount} issue(s), score: {score}.", {
|
|
220
|
+
index: i + 1,
|
|
221
|
+
issueCount: boundaryResult.issues.length,
|
|
222
|
+
score: boundaryResult.score
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
const averageScore = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
227
|
+
logger.info("Refinement completed.", {
|
|
228
|
+
totalIterations,
|
|
229
|
+
averageScore,
|
|
230
|
+
chunkCount: refinedChunks.length
|
|
231
|
+
});
|
|
232
|
+
return {
|
|
233
|
+
chunks: refinedChunks,
|
|
234
|
+
scores,
|
|
235
|
+
totalIterations,
|
|
236
|
+
history,
|
|
237
|
+
boundaryEvaluations
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
//#endregion
|
|
242
|
+
exports.evaluateBoundary = evaluateBoundary;
|
|
243
|
+
exports.refineChunks = refineChunks;
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { Glossary } from "./glossary.cjs";
|
|
2
|
+
import { TranslationIssue } from "./evaluation.cjs";
|
|
3
|
+
import { LanguageModel } from "ai";
|
|
4
|
+
|
|
5
|
+
//#region src/refine.d.ts
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Options for the {@link refineChunks} function.
|
|
9
|
+
*/
|
|
10
|
+
interface RefineChunksOptions {
|
|
11
|
+
/**
|
|
12
|
+
* The target language of the translation.
|
|
13
|
+
*/
|
|
14
|
+
readonly targetLanguage: Intl.Locale | string;
|
|
15
|
+
/**
|
|
16
|
+
* The source language of the original text.
|
|
17
|
+
*/
|
|
18
|
+
readonly sourceLanguage?: Intl.Locale | string;
|
|
19
|
+
/**
|
|
20
|
+
* The minimum acceptable quality score (0-1). Chunks with scores below
|
|
21
|
+
* this threshold will be refined. Defaults to 0.85.
|
|
22
|
+
*/
|
|
23
|
+
readonly targetScore?: number;
|
|
24
|
+
/**
|
|
25
|
+
* Maximum number of refinement iterations per chunk. Defaults to 3.
|
|
26
|
+
*/
|
|
27
|
+
readonly maxIterations?: number;
|
|
28
|
+
/**
|
|
29
|
+
* A glossary of terms that should be used consistently.
|
|
30
|
+
*/
|
|
31
|
+
readonly glossary?: Glossary;
|
|
32
|
+
/**
|
|
33
|
+
* Whether to evaluate boundaries between chunks for coherence.
|
|
34
|
+
* Defaults to true.
|
|
35
|
+
*/
|
|
36
|
+
readonly evaluateBoundaries?: boolean;
|
|
37
|
+
/**
|
|
38
|
+
* An optional `AbortSignal` to cancel the refinement.
|
|
39
|
+
*/
|
|
40
|
+
readonly signal?: AbortSignal;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* The result of evaluating a boundary between two chunks.
|
|
44
|
+
*/
|
|
45
|
+
interface BoundaryEvaluation {
|
|
46
|
+
/**
|
|
47
|
+
* Index of the first chunk in the boundary (chunk i and chunk i+1).
|
|
48
|
+
*/
|
|
49
|
+
readonly chunkIndex: number;
|
|
50
|
+
/**
|
|
51
|
+
* A coherence score between 0 and 1.
|
|
52
|
+
*/
|
|
53
|
+
readonly score: number;
|
|
54
|
+
/**
|
|
55
|
+
* Issues found at the boundary.
|
|
56
|
+
*/
|
|
57
|
+
readonly issues: readonly BoundaryIssue[];
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* An issue found at a chunk boundary.
|
|
61
|
+
*/
|
|
62
|
+
interface BoundaryIssue {
|
|
63
|
+
/**
|
|
64
|
+
* The type of boundary issue.
|
|
65
|
+
*/
|
|
66
|
+
readonly type: "coherence" | "style" | "reference" | "terminology";
|
|
67
|
+
/**
|
|
68
|
+
* A human-readable description of the issue.
|
|
69
|
+
*/
|
|
70
|
+
readonly description: string;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Record of a single refinement iteration for a chunk.
|
|
74
|
+
*/
|
|
75
|
+
interface RefineIteration {
|
|
76
|
+
/**
|
|
77
|
+
* The chunk index that was refined.
|
|
78
|
+
*/
|
|
79
|
+
readonly chunkIndex: number;
|
|
80
|
+
/**
|
|
81
|
+
* The iteration number (1-based).
|
|
82
|
+
*/
|
|
83
|
+
readonly iteration: number;
|
|
84
|
+
/**
|
|
85
|
+
* The text before refinement.
|
|
86
|
+
*/
|
|
87
|
+
readonly before: string;
|
|
88
|
+
/**
|
|
89
|
+
* The text after refinement.
|
|
90
|
+
*/
|
|
91
|
+
readonly after: string;
|
|
92
|
+
/**
|
|
93
|
+
* The evaluation score before refinement.
|
|
94
|
+
*/
|
|
95
|
+
readonly scoreBefore: number;
|
|
96
|
+
/**
|
|
97
|
+
* The evaluation score after refinement.
|
|
98
|
+
*/
|
|
99
|
+
readonly scoreAfter: number;
|
|
100
|
+
/**
|
|
101
|
+
* Issues that were addressed in this iteration.
|
|
102
|
+
*/
|
|
103
|
+
readonly issuesAddressed: readonly TranslationIssue[];
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* The result of the {@link refineChunks} function.
|
|
107
|
+
*/
|
|
108
|
+
interface RefineChunksResult {
|
|
109
|
+
/**
|
|
110
|
+
* The refined translated chunks.
|
|
111
|
+
*/
|
|
112
|
+
readonly chunks: readonly string[];
|
|
113
|
+
/**
|
|
114
|
+
* Final evaluation scores for each chunk.
|
|
115
|
+
*/
|
|
116
|
+
readonly scores: readonly number[];
|
|
117
|
+
/**
|
|
118
|
+
* Total number of refinement iterations performed.
|
|
119
|
+
*/
|
|
120
|
+
readonly totalIterations: number;
|
|
121
|
+
/**
|
|
122
|
+
* History of all refinement iterations.
|
|
123
|
+
*/
|
|
124
|
+
readonly history: readonly RefineIteration[];
|
|
125
|
+
/**
|
|
126
|
+
* Boundary evaluations (if evaluateBoundaries was enabled).
|
|
127
|
+
*/
|
|
128
|
+
readonly boundaryEvaluations?: readonly BoundaryEvaluation[];
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Evaluates the boundary between two chunks for coherence.
|
|
132
|
+
*/
|
|
133
|
+
declare function evaluateBoundary(model: LanguageModel, chunk1Translated: string, chunk2Translated: string, chunk1Original: string, chunk2Original: string, options: RefineChunksOptions): Promise<Omit<BoundaryEvaluation, "chunkIndex">>;
|
|
134
|
+
/**
|
|
135
|
+
* Refines translated chunks to improve quality using an iterative
|
|
136
|
+
* evaluate-fix loop.
|
|
137
|
+
*
|
|
138
|
+
* @param model The language model to use for refinement.
|
|
139
|
+
* @param originalChunks The original text chunks that were translated.
|
|
140
|
+
* @param translatedChunks The translated chunks to refine.
|
|
141
|
+
* @param options Refinement options.
|
|
142
|
+
* @returns A promise that resolves to the refinement result.
|
|
143
|
+
* @throws {RangeError} If the number of original and translated chunks
|
|
144
|
+
* do not match.
|
|
145
|
+
*/
|
|
146
|
+
declare function refineChunks(model: LanguageModel, originalChunks: readonly string[], translatedChunks: readonly string[], options: RefineChunksOptions): Promise<RefineChunksResult>;
|
|
147
|
+
//#endregion
|
|
148
|
+
export { BoundaryEvaluation, BoundaryIssue, RefineChunksOptions, RefineChunksResult, RefineIteration, evaluateBoundary, refineChunks };
|
package/dist/refine.d.ts
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { Glossary } from "./glossary.js";
|
|
2
|
+
import { TranslationIssue } from "./evaluation.js";
|
|
3
|
+
import { LanguageModel } from "ai";
|
|
4
|
+
|
|
5
|
+
//#region src/refine.d.ts
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Options for the {@link refineChunks} function.
|
|
9
|
+
*/
|
|
10
|
+
interface RefineChunksOptions {
|
|
11
|
+
/**
|
|
12
|
+
* The target language of the translation.
|
|
13
|
+
*/
|
|
14
|
+
readonly targetLanguage: Intl.Locale | string;
|
|
15
|
+
/**
|
|
16
|
+
* The source language of the original text.
|
|
17
|
+
*/
|
|
18
|
+
readonly sourceLanguage?: Intl.Locale | string;
|
|
19
|
+
/**
|
|
20
|
+
* The minimum acceptable quality score (0-1). Chunks with scores below
|
|
21
|
+
* this threshold will be refined. Defaults to 0.85.
|
|
22
|
+
*/
|
|
23
|
+
readonly targetScore?: number;
|
|
24
|
+
/**
|
|
25
|
+
* Maximum number of refinement iterations per chunk. Defaults to 3.
|
|
26
|
+
*/
|
|
27
|
+
readonly maxIterations?: number;
|
|
28
|
+
/**
|
|
29
|
+
* A glossary of terms that should be used consistently.
|
|
30
|
+
*/
|
|
31
|
+
readonly glossary?: Glossary;
|
|
32
|
+
/**
|
|
33
|
+
* Whether to evaluate boundaries between chunks for coherence.
|
|
34
|
+
* Defaults to true.
|
|
35
|
+
*/
|
|
36
|
+
readonly evaluateBoundaries?: boolean;
|
|
37
|
+
/**
|
|
38
|
+
* An optional `AbortSignal` to cancel the refinement.
|
|
39
|
+
*/
|
|
40
|
+
readonly signal?: AbortSignal;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* The result of evaluating a boundary between two chunks.
|
|
44
|
+
*/
|
|
45
|
+
interface BoundaryEvaluation {
|
|
46
|
+
/**
|
|
47
|
+
* Index of the first chunk in the boundary (chunk i and chunk i+1).
|
|
48
|
+
*/
|
|
49
|
+
readonly chunkIndex: number;
|
|
50
|
+
/**
|
|
51
|
+
* A coherence score between 0 and 1.
|
|
52
|
+
*/
|
|
53
|
+
readonly score: number;
|
|
54
|
+
/**
|
|
55
|
+
* Issues found at the boundary.
|
|
56
|
+
*/
|
|
57
|
+
readonly issues: readonly BoundaryIssue[];
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* An issue found at a chunk boundary.
|
|
61
|
+
*/
|
|
62
|
+
interface BoundaryIssue {
|
|
63
|
+
/**
|
|
64
|
+
* The type of boundary issue.
|
|
65
|
+
*/
|
|
66
|
+
readonly type: "coherence" | "style" | "reference" | "terminology";
|
|
67
|
+
/**
|
|
68
|
+
* A human-readable description of the issue.
|
|
69
|
+
*/
|
|
70
|
+
readonly description: string;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Record of a single refinement iteration for a chunk.
|
|
74
|
+
*/
|
|
75
|
+
interface RefineIteration {
|
|
76
|
+
/**
|
|
77
|
+
* The chunk index that was refined.
|
|
78
|
+
*/
|
|
79
|
+
readonly chunkIndex: number;
|
|
80
|
+
/**
|
|
81
|
+
* The iteration number (1-based).
|
|
82
|
+
*/
|
|
83
|
+
readonly iteration: number;
|
|
84
|
+
/**
|
|
85
|
+
* The text before refinement.
|
|
86
|
+
*/
|
|
87
|
+
readonly before: string;
|
|
88
|
+
/**
|
|
89
|
+
* The text after refinement.
|
|
90
|
+
*/
|
|
91
|
+
readonly after: string;
|
|
92
|
+
/**
|
|
93
|
+
* The evaluation score before refinement.
|
|
94
|
+
*/
|
|
95
|
+
readonly scoreBefore: number;
|
|
96
|
+
/**
|
|
97
|
+
* The evaluation score after refinement.
|
|
98
|
+
*/
|
|
99
|
+
readonly scoreAfter: number;
|
|
100
|
+
/**
|
|
101
|
+
* Issues that were addressed in this iteration.
|
|
102
|
+
*/
|
|
103
|
+
readonly issuesAddressed: readonly TranslationIssue[];
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* The result of the {@link refineChunks} function.
|
|
107
|
+
*/
|
|
108
|
+
interface RefineChunksResult {
|
|
109
|
+
/**
|
|
110
|
+
* The refined translated chunks.
|
|
111
|
+
*/
|
|
112
|
+
readonly chunks: readonly string[];
|
|
113
|
+
/**
|
|
114
|
+
* Final evaluation scores for each chunk.
|
|
115
|
+
*/
|
|
116
|
+
readonly scores: readonly number[];
|
|
117
|
+
/**
|
|
118
|
+
* Total number of refinement iterations performed.
|
|
119
|
+
*/
|
|
120
|
+
readonly totalIterations: number;
|
|
121
|
+
/**
|
|
122
|
+
* History of all refinement iterations.
|
|
123
|
+
*/
|
|
124
|
+
readonly history: readonly RefineIteration[];
|
|
125
|
+
/**
|
|
126
|
+
* Boundary evaluations (if evaluateBoundaries was enabled).
|
|
127
|
+
*/
|
|
128
|
+
readonly boundaryEvaluations?: readonly BoundaryEvaluation[];
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Evaluates the boundary between two chunks for coherence.
|
|
132
|
+
*/
|
|
133
|
+
declare function evaluateBoundary(model: LanguageModel, chunk1Translated: string, chunk2Translated: string, chunk1Original: string, chunk2Original: string, options: RefineChunksOptions): Promise<Omit<BoundaryEvaluation, "chunkIndex">>;
|
|
134
|
+
/**
|
|
135
|
+
* Refines translated chunks to improve quality using an iterative
|
|
136
|
+
* evaluate-fix loop.
|
|
137
|
+
*
|
|
138
|
+
* @param model The language model to use for refinement.
|
|
139
|
+
* @param originalChunks The original text chunks that were translated.
|
|
140
|
+
* @param translatedChunks The translated chunks to refine.
|
|
141
|
+
* @param options Refinement options.
|
|
142
|
+
* @returns A promise that resolves to the refinement result.
|
|
143
|
+
* @throws {RangeError} If the number of original and translated chunks
|
|
144
|
+
* do not match.
|
|
145
|
+
*/
|
|
146
|
+
declare function refineChunks(model: LanguageModel, originalChunks: readonly string[], translatedChunks: readonly string[], options: RefineChunksOptions): Promise<RefineChunksResult>;
|
|
147
|
+
//#endregion
|
|
148
|
+
export { BoundaryEvaluation, BoundaryIssue, RefineChunksOptions, RefineChunksResult, RefineIteration, evaluateBoundary, refineChunks };
|