@vertana/core 0.1.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -0
- package/dist/_virtual/rolldown_runtime.cjs +29 -0
- package/dist/accumulator.cjs +64 -0
- package/dist/accumulator.d.cts +51 -0
- package/dist/accumulator.d.ts +51 -0
- package/dist/accumulator.js +61 -0
- package/dist/chunking.cjs +76 -0
- package/dist/chunking.d.cts +124 -0
- package/dist/chunking.d.ts +124 -0
- package/dist/chunking.js +74 -0
- package/dist/context.cjs +51 -0
- package/dist/context.d.cts +148 -0
- package/dist/context.d.ts +148 -0
- package/dist/context.js +49 -0
- package/dist/evaluation.cjs +120 -0
- package/dist/evaluation.d.cts +111 -0
- package/dist/evaluation.d.ts +111 -0
- package/dist/evaluation.js +119 -0
- package/dist/glossary.cjs +0 -0
- package/dist/glossary.d.cts +25 -0
- package/dist/glossary.d.ts +25 -0
- package/dist/glossary.js +0 -0
- package/dist/html.cjs +253 -0
- package/dist/html.d.cts +41 -0
- package/dist/html.d.ts +41 -0
- package/dist/html.js +250 -0
- package/dist/index.cjs +39 -0
- package/dist/index.d.cts +17 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +16 -0
- package/dist/markdown.cjs +300 -0
- package/dist/markdown.d.cts +17 -0
- package/dist/markdown.d.ts +17 -0
- package/dist/markdown.js +300 -0
- package/dist/plaintext.cjs +70 -0
- package/dist/plaintext.d.cts +17 -0
- package/dist/plaintext.d.ts +17 -0
- package/dist/plaintext.js +70 -0
- package/dist/prompt.cjs +91 -0
- package/dist/prompt.d.cts +74 -0
- package/dist/prompt.d.ts +74 -0
- package/dist/prompt.js +86 -0
- package/dist/refine.cjs +243 -0
- package/dist/refine.d.cts +148 -0
- package/dist/refine.d.ts +148 -0
- package/dist/refine.js +241 -0
- package/dist/select.cjs +62 -0
- package/dist/select.d.cts +83 -0
- package/dist/select.d.ts +83 -0
- package/dist/select.js +61 -0
- package/dist/terms.cjs +60 -0
- package/dist/terms.d.cts +36 -0
- package/dist/terms.d.ts +36 -0
- package/dist/terms.js +59 -0
- package/dist/tokens.cjs +40 -0
- package/dist/tokens.d.cts +24 -0
- package/dist/tokens.d.ts +24 -0
- package/dist/tokens.js +38 -0
- package/dist/tools.cjs +35 -0
- package/dist/tools.d.cts +20 -0
- package/dist/tools.d.ts +20 -0
- package/dist/tools.js +34 -0
- package/dist/translate.cjs +200 -0
- package/dist/translate.d.cts +190 -0
- package/dist/translate.d.ts +190 -0
- package/dist/translate.js +199 -0
- package/dist/window.cjs +0 -0
- package/dist/window.d.cts +48 -0
- package/dist/window.d.ts +48 -0
- package/dist/window.js +0 -0
- package/package.json +215 -0
package/dist/refine.js
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import { evaluate } from "./evaluation.js";
|
|
2
|
+
import { getLogger } from "@logtape/logtape";
|
|
3
|
+
import { generateText } from "ai";
|
|
4
|
+
|
|
5
|
+
//#region src/refine.ts
|
|
6
|
+
const logger = getLogger([
|
|
7
|
+
"vertana",
|
|
8
|
+
"core",
|
|
9
|
+
"refine"
|
|
10
|
+
]);
|
|
11
|
+
/**
|
|
12
|
+
* Gets the language name from a locale.
|
|
13
|
+
*/
|
|
14
|
+
function getLanguageName(locale) {
|
|
15
|
+
const tag = typeof locale === "string" ? locale : locale.baseName;
|
|
16
|
+
try {
|
|
17
|
+
return new Intl.DisplayNames(["en"], { type: "language" }).of(tag) ?? tag;
|
|
18
|
+
} catch {
|
|
19
|
+
return tag;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Builds the system prompt for chunk refinement.
|
|
24
|
+
*/
|
|
25
|
+
function buildRefineSystemPrompt(options, issues) {
|
|
26
|
+
const targetLang = getLanguageName(options.targetLanguage);
|
|
27
|
+
let prompt = `You are an expert translator refining a translation from ${(options.sourceLanguage ? getLanguageName(options.sourceLanguage) : null) ?? "the source language"} to ${targetLang}.
|
|
28
|
+
|
|
29
|
+
You will be given:
|
|
30
|
+
1. The original text
|
|
31
|
+
2. The current translation
|
|
32
|
+
3. A list of issues found in the translation
|
|
33
|
+
|
|
34
|
+
Your task is to fix the issues while preserving the parts that are correct.
|
|
35
|
+
Output ONLY the improved translation, nothing else.
|
|
36
|
+
|
|
37
|
+
## Issues to fix
|
|
38
|
+
|
|
39
|
+
`;
|
|
40
|
+
for (const issue of issues) prompt += `- [${issue.type}] ${issue.description}\n`;
|
|
41
|
+
if (options.glossary != null && options.glossary.length > 0) {
|
|
42
|
+
prompt += `\n## Glossary (must follow exactly)\n\n`;
|
|
43
|
+
for (const entry of options.glossary) prompt += `- "${entry.original}" → "${entry.translated}"\n`;
|
|
44
|
+
}
|
|
45
|
+
return prompt;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Builds the user prompt for chunk refinement.
|
|
49
|
+
*/
|
|
50
|
+
function buildRefineUserPrompt(original, translated) {
|
|
51
|
+
return `## Original Text
|
|
52
|
+
|
|
53
|
+
${original}
|
|
54
|
+
|
|
55
|
+
## Current Translation
|
|
56
|
+
|
|
57
|
+
${translated}
|
|
58
|
+
|
|
59
|
+
Please provide the improved translation:`;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Refines a single chunk based on evaluation feedback.
|
|
63
|
+
*/
|
|
64
|
+
async function refineChunk(model, original, translated, issues, options) {
|
|
65
|
+
return (await generateText({
|
|
66
|
+
model,
|
|
67
|
+
system: buildRefineSystemPrompt(options, issues),
|
|
68
|
+
prompt: buildRefineUserPrompt(original, translated),
|
|
69
|
+
abortSignal: options.signal
|
|
70
|
+
})).text.trim();
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Evaluates the boundary between two chunks for coherence.
|
|
74
|
+
*/
|
|
75
|
+
async function evaluateBoundary(model, chunk1Translated, chunk2Translated, chunk1Original, chunk2Original, options) {
|
|
76
|
+
const targetLang = getLanguageName(options.targetLanguage);
|
|
77
|
+
const boundarySize = 200;
|
|
78
|
+
const chunk1End = chunk1Translated.slice(-boundarySize);
|
|
79
|
+
const chunk2Start = chunk2Translated.slice(0, boundarySize);
|
|
80
|
+
const result = await generateText({
|
|
81
|
+
model,
|
|
82
|
+
system: `You are an expert translation quality evaluator.
|
|
83
|
+
|
|
84
|
+
Evaluate the coherence at the boundary between two consecutive translation chunks.
|
|
85
|
+
|
|
86
|
+
Check for:
|
|
87
|
+
1. **Coherence**: Does the text flow naturally from one chunk to the next?
|
|
88
|
+
2. **Style**: Is the style consistent across the boundary?
|
|
89
|
+
3. **Reference**: Are pronouns and references consistent?
|
|
90
|
+
4. **Terminology**: Are terms used consistently?
|
|
91
|
+
|
|
92
|
+
Respond in this exact JSON format:
|
|
93
|
+
{
|
|
94
|
+
"score": <number between 0 and 1>,
|
|
95
|
+
"issues": [
|
|
96
|
+
{"type": "<coherence|style|reference|terminology>", "description": "<description>"}
|
|
97
|
+
]
|
|
98
|
+
}`,
|
|
99
|
+
prompt: `## End of chunk 1 (original)
|
|
100
|
+
${chunk1Original.slice(-boundarySize)}
|
|
101
|
+
|
|
102
|
+
## End of chunk 1 (translated to ${targetLang})
|
|
103
|
+
${chunk1End}
|
|
104
|
+
|
|
105
|
+
## Start of chunk 2 (original)
|
|
106
|
+
${chunk2Original.slice(0, boundarySize)}
|
|
107
|
+
|
|
108
|
+
## Start of chunk 2 (translated to ${targetLang})
|
|
109
|
+
${chunk2Start}
|
|
110
|
+
|
|
111
|
+
Evaluate the boundary coherence:`,
|
|
112
|
+
abortSignal: options.signal
|
|
113
|
+
});
|
|
114
|
+
try {
|
|
115
|
+
const parsed = JSON.parse(result.text);
|
|
116
|
+
return {
|
|
117
|
+
score: Math.max(0, Math.min(1, parsed.score)),
|
|
118
|
+
issues: parsed.issues ?? []
|
|
119
|
+
};
|
|
120
|
+
} catch {
|
|
121
|
+
return {
|
|
122
|
+
score: 1,
|
|
123
|
+
issues: []
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Refines translated chunks to improve quality using an iterative
|
|
129
|
+
* evaluate-fix loop.
|
|
130
|
+
*
|
|
131
|
+
* @param model The language model to use for refinement.
|
|
132
|
+
* @param originalChunks The original text chunks that were translated.
|
|
133
|
+
* @param translatedChunks The translated chunks to refine.
|
|
134
|
+
* @param options Refinement options.
|
|
135
|
+
* @returns A promise that resolves to the refinement result.
|
|
136
|
+
* @throws {RangeError} If the number of original and translated chunks
|
|
137
|
+
* do not match.
|
|
138
|
+
*/
|
|
139
|
+
async function refineChunks(model, originalChunks, translatedChunks, options) {
|
|
140
|
+
if (originalChunks.length !== translatedChunks.length) throw new RangeError(`Chunk count mismatch: ${originalChunks.length} original vs ${translatedChunks.length} translated`);
|
|
141
|
+
const targetScore = options.targetScore ?? .85;
|
|
142
|
+
const maxIterations = options.maxIterations ?? 3;
|
|
143
|
+
const shouldEvaluateBoundaries = options.evaluateBoundaries ?? true;
|
|
144
|
+
logger.info("Starting refinement of {chunkCount} chunks...", {
|
|
145
|
+
chunkCount: originalChunks.length,
|
|
146
|
+
targetScore,
|
|
147
|
+
maxIterations
|
|
148
|
+
});
|
|
149
|
+
const refinedChunks = [...translatedChunks];
|
|
150
|
+
const scores = new Array(translatedChunks.length).fill(0);
|
|
151
|
+
const history = [];
|
|
152
|
+
let totalIterations = 0;
|
|
153
|
+
for (let i = 0; i < refinedChunks.length; i++) {
|
|
154
|
+
options.signal?.throwIfAborted();
|
|
155
|
+
logger.debug("Evaluating chunk {index} of {total}...", {
|
|
156
|
+
index: i + 1,
|
|
157
|
+
total: refinedChunks.length
|
|
158
|
+
});
|
|
159
|
+
let currentText = refinedChunks[i];
|
|
160
|
+
let evaluation;
|
|
161
|
+
evaluation = await evaluate(model, originalChunks[i], currentText, {
|
|
162
|
+
targetLanguage: options.targetLanguage,
|
|
163
|
+
sourceLanguage: options.sourceLanguage,
|
|
164
|
+
glossary: options.glossary,
|
|
165
|
+
signal: options.signal
|
|
166
|
+
});
|
|
167
|
+
scores[i] = evaluation.score;
|
|
168
|
+
logger.debug("Chunk {index} initial score: {score}.", {
|
|
169
|
+
index: i + 1,
|
|
170
|
+
score: evaluation.score,
|
|
171
|
+
issues: evaluation.issues.length
|
|
172
|
+
});
|
|
173
|
+
let iteration = 0;
|
|
174
|
+
while (evaluation.score < targetScore && iteration < maxIterations) {
|
|
175
|
+
options.signal?.throwIfAborted();
|
|
176
|
+
iteration++;
|
|
177
|
+
totalIterations++;
|
|
178
|
+
const beforeText = currentText;
|
|
179
|
+
const scoreBefore = evaluation.score;
|
|
180
|
+
const issuesAddressed = evaluation.issues;
|
|
181
|
+
currentText = await refineChunk(model, originalChunks[i], currentText, evaluation.issues, options);
|
|
182
|
+
evaluation = await evaluate(model, originalChunks[i], currentText, {
|
|
183
|
+
targetLanguage: options.targetLanguage,
|
|
184
|
+
sourceLanguage: options.sourceLanguage,
|
|
185
|
+
glossary: options.glossary,
|
|
186
|
+
signal: options.signal
|
|
187
|
+
});
|
|
188
|
+
history.push({
|
|
189
|
+
chunkIndex: i,
|
|
190
|
+
iteration,
|
|
191
|
+
before: beforeText,
|
|
192
|
+
after: currentText,
|
|
193
|
+
scoreBefore,
|
|
194
|
+
scoreAfter: evaluation.score,
|
|
195
|
+
issuesAddressed
|
|
196
|
+
});
|
|
197
|
+
logger.debug("Chunk {chunkIndex} iteration {iteration}: {scoreBefore} → {scoreAfter}.", {
|
|
198
|
+
chunkIndex: i + 1,
|
|
199
|
+
iteration,
|
|
200
|
+
scoreBefore,
|
|
201
|
+
scoreAfter: evaluation.score
|
|
202
|
+
});
|
|
203
|
+
scores[i] = evaluation.score;
|
|
204
|
+
}
|
|
205
|
+
refinedChunks[i] = currentText;
|
|
206
|
+
}
|
|
207
|
+
let boundaryEvaluations;
|
|
208
|
+
if (shouldEvaluateBoundaries && refinedChunks.length > 1) {
|
|
209
|
+
logger.debug("Evaluating {count} chunk boundaries...", { count: refinedChunks.length - 1 });
|
|
210
|
+
boundaryEvaluations = [];
|
|
211
|
+
for (let i = 0; i < refinedChunks.length - 1; i++) {
|
|
212
|
+
options.signal?.throwIfAborted();
|
|
213
|
+
const boundaryResult = await evaluateBoundary(model, refinedChunks[i], refinedChunks[i + 1], originalChunks[i], originalChunks[i + 1], options);
|
|
214
|
+
boundaryEvaluations.push({
|
|
215
|
+
chunkIndex: i,
|
|
216
|
+
...boundaryResult
|
|
217
|
+
});
|
|
218
|
+
if (boundaryResult.issues.length > 0) logger.warn("Boundary {index} has {issueCount} issue(s), score: {score}.", {
|
|
219
|
+
index: i + 1,
|
|
220
|
+
issueCount: boundaryResult.issues.length,
|
|
221
|
+
score: boundaryResult.score
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
const averageScore = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
226
|
+
logger.info("Refinement completed.", {
|
|
227
|
+
totalIterations,
|
|
228
|
+
averageScore,
|
|
229
|
+
chunkCount: refinedChunks.length
|
|
230
|
+
});
|
|
231
|
+
return {
|
|
232
|
+
chunks: refinedChunks,
|
|
233
|
+
scores,
|
|
234
|
+
totalIterations,
|
|
235
|
+
history,
|
|
236
|
+
boundaryEvaluations
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
//#endregion
|
|
241
|
+
export { evaluateBoundary, refineChunks };
|
package/dist/select.cjs
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
const require_rolldown_runtime = require('./_virtual/rolldown_runtime.cjs');
|
|
2
|
+
const require_evaluation = require('./evaluation.cjs');
|
|
3
|
+
let _logtape_logtape = require("@logtape/logtape");
|
|
4
|
+
|
|
5
|
+
//#region src/select.ts
|
|
6
|
+
const logger = (0, _logtape_logtape.getLogger)([
|
|
7
|
+
"vertana",
|
|
8
|
+
"core",
|
|
9
|
+
"select"
|
|
10
|
+
]);
|
|
11
|
+
/**
|
|
12
|
+
* Evaluates multiple translation candidates and selects the best one.
|
|
13
|
+
*
|
|
14
|
+
* @param evaluatorModel The language model to use for evaluation.
|
|
15
|
+
* @param original The original text that was translated.
|
|
16
|
+
* @param candidates The translation candidates to evaluate.
|
|
17
|
+
* @param options Selection options.
|
|
18
|
+
* @returns A promise that resolves to the selection result.
|
|
19
|
+
* @throws {RangeError} If no candidates are provided.
|
|
20
|
+
*/
|
|
21
|
+
async function selectBest(evaluatorModel, original, candidates, options) {
|
|
22
|
+
if (candidates.length === 0) throw new RangeError("At least one candidate is required.");
|
|
23
|
+
logger.debug("Selecting best from {count} candidates...", { count: candidates.length });
|
|
24
|
+
const evaluatedCandidates = [];
|
|
25
|
+
for (const candidate of candidates) {
|
|
26
|
+
options.signal?.throwIfAborted();
|
|
27
|
+
const evaluation = await require_evaluation.evaluate(evaluatorModel, original, candidate.text, {
|
|
28
|
+
targetLanguage: options.targetLanguage,
|
|
29
|
+
sourceLanguage: options.sourceLanguage,
|
|
30
|
+
glossary: options.glossary,
|
|
31
|
+
signal: options.signal
|
|
32
|
+
});
|
|
33
|
+
evaluatedCandidates.push({
|
|
34
|
+
candidate,
|
|
35
|
+
score: evaluation.score,
|
|
36
|
+
issues: evaluation.issues
|
|
37
|
+
});
|
|
38
|
+
logger.debug("Candidate {index} score: {score}.", {
|
|
39
|
+
index: evaluatedCandidates.length,
|
|
40
|
+
score: evaluation.score,
|
|
41
|
+
issues: evaluation.issues.length
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
const rankedCandidates = [...evaluatedCandidates].sort((a, b) => b.score - a.score).map((item, index) => ({
|
|
45
|
+
text: item.candidate.text,
|
|
46
|
+
metadata: item.candidate.metadata,
|
|
47
|
+
score: item.score,
|
|
48
|
+
issues: item.issues,
|
|
49
|
+
rank: index + 1
|
|
50
|
+
}));
|
|
51
|
+
logger.debug("Selected best candidate with score: {score}.", {
|
|
52
|
+
score: rankedCandidates[0].score,
|
|
53
|
+
totalCandidates: candidates.length
|
|
54
|
+
});
|
|
55
|
+
return {
|
|
56
|
+
best: rankedCandidates[0],
|
|
57
|
+
all: rankedCandidates
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
//#endregion
|
|
62
|
+
exports.selectBest = selectBest;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { Glossary } from "./glossary.cjs";
|
|
2
|
+
import { TranslationIssue } from "./evaluation.cjs";
|
|
3
|
+
import { LanguageModel } from "ai";
|
|
4
|
+
|
|
5
|
+
//#region src/select.d.ts
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* A translation candidate to be evaluated.
|
|
9
|
+
*/
|
|
10
|
+
interface Candidate<T = unknown> {
|
|
11
|
+
/**
|
|
12
|
+
* The translated text.
|
|
13
|
+
*/
|
|
14
|
+
readonly text: string;
|
|
15
|
+
/**
|
|
16
|
+
* Optional metadata associated with this candidate (e.g., model info).
|
|
17
|
+
*/
|
|
18
|
+
readonly metadata?: T;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* A candidate with evaluation results and ranking.
|
|
22
|
+
*/
|
|
23
|
+
interface RankedCandidate<T = unknown> extends Candidate<T> {
|
|
24
|
+
/**
|
|
25
|
+
* The evaluation score (0-1).
|
|
26
|
+
*/
|
|
27
|
+
readonly score: number;
|
|
28
|
+
/**
|
|
29
|
+
* Issues found in the translation.
|
|
30
|
+
*/
|
|
31
|
+
readonly issues: readonly TranslationIssue[];
|
|
32
|
+
/**
|
|
33
|
+
* The rank of this candidate (1-based, 1 is best).
|
|
34
|
+
*/
|
|
35
|
+
readonly rank: number;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Options for the {@link selectBest} function.
|
|
39
|
+
*/
|
|
40
|
+
interface SelectBestOptions {
|
|
41
|
+
/**
|
|
42
|
+
* The target language of the translation.
|
|
43
|
+
*/
|
|
44
|
+
readonly targetLanguage: Intl.Locale | string;
|
|
45
|
+
/**
|
|
46
|
+
* The source language of the original text.
|
|
47
|
+
*/
|
|
48
|
+
readonly sourceLanguage?: Intl.Locale | string;
|
|
49
|
+
/**
|
|
50
|
+
* A glossary of terms that should be used consistently.
|
|
51
|
+
*/
|
|
52
|
+
readonly glossary?: Glossary;
|
|
53
|
+
/**
|
|
54
|
+
* An optional `AbortSignal` to cancel the selection.
|
|
55
|
+
*/
|
|
56
|
+
readonly signal?: AbortSignal;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* The result of the {@link selectBest} function.
|
|
60
|
+
*/
|
|
61
|
+
interface SelectBestResult<T = unknown> {
|
|
62
|
+
/**
|
|
63
|
+
* The best candidate based on evaluation scores.
|
|
64
|
+
*/
|
|
65
|
+
readonly best: RankedCandidate<T>;
|
|
66
|
+
/**
|
|
67
|
+
* All candidates with their evaluation results, sorted by rank.
|
|
68
|
+
*/
|
|
69
|
+
readonly all: readonly RankedCandidate<T>[];
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Evaluates multiple translation candidates and selects the best one.
|
|
73
|
+
*
|
|
74
|
+
* @param evaluatorModel The language model to use for evaluation.
|
|
75
|
+
* @param original The original text that was translated.
|
|
76
|
+
* @param candidates The translation candidates to evaluate.
|
|
77
|
+
* @param options Selection options.
|
|
78
|
+
* @returns A promise that resolves to the selection result.
|
|
79
|
+
* @throws {RangeError} If no candidates are provided.
|
|
80
|
+
*/
|
|
81
|
+
declare function selectBest<T = unknown>(evaluatorModel: LanguageModel, original: string, candidates: readonly Candidate<T>[], options: SelectBestOptions): Promise<SelectBestResult<T>>;
|
|
82
|
+
//#endregion
|
|
83
|
+
export { Candidate, RankedCandidate, SelectBestOptions, SelectBestResult, selectBest };
|
package/dist/select.d.ts
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { Glossary } from "./glossary.js";
|
|
2
|
+
import { TranslationIssue } from "./evaluation.js";
|
|
3
|
+
import { LanguageModel } from "ai";
|
|
4
|
+
|
|
5
|
+
//#region src/select.d.ts
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* A translation candidate to be evaluated.
|
|
9
|
+
*/
|
|
10
|
+
interface Candidate<T = unknown> {
|
|
11
|
+
/**
|
|
12
|
+
* The translated text.
|
|
13
|
+
*/
|
|
14
|
+
readonly text: string;
|
|
15
|
+
/**
|
|
16
|
+
* Optional metadata associated with this candidate (e.g., model info).
|
|
17
|
+
*/
|
|
18
|
+
readonly metadata?: T;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* A candidate with evaluation results and ranking.
|
|
22
|
+
*/
|
|
23
|
+
interface RankedCandidate<T = unknown> extends Candidate<T> {
|
|
24
|
+
/**
|
|
25
|
+
* The evaluation score (0-1).
|
|
26
|
+
*/
|
|
27
|
+
readonly score: number;
|
|
28
|
+
/**
|
|
29
|
+
* Issues found in the translation.
|
|
30
|
+
*/
|
|
31
|
+
readonly issues: readonly TranslationIssue[];
|
|
32
|
+
/**
|
|
33
|
+
* The rank of this candidate (1-based, 1 is best).
|
|
34
|
+
*/
|
|
35
|
+
readonly rank: number;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Options for the {@link selectBest} function.
|
|
39
|
+
*/
|
|
40
|
+
interface SelectBestOptions {
|
|
41
|
+
/**
|
|
42
|
+
* The target language of the translation.
|
|
43
|
+
*/
|
|
44
|
+
readonly targetLanguage: Intl.Locale | string;
|
|
45
|
+
/**
|
|
46
|
+
* The source language of the original text.
|
|
47
|
+
*/
|
|
48
|
+
readonly sourceLanguage?: Intl.Locale | string;
|
|
49
|
+
/**
|
|
50
|
+
* A glossary of terms that should be used consistently.
|
|
51
|
+
*/
|
|
52
|
+
readonly glossary?: Glossary;
|
|
53
|
+
/**
|
|
54
|
+
* An optional `AbortSignal` to cancel the selection.
|
|
55
|
+
*/
|
|
56
|
+
readonly signal?: AbortSignal;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* The result of the {@link selectBest} function.
|
|
60
|
+
*/
|
|
61
|
+
interface SelectBestResult<T = unknown> {
|
|
62
|
+
/**
|
|
63
|
+
* The best candidate based on evaluation scores.
|
|
64
|
+
*/
|
|
65
|
+
readonly best: RankedCandidate<T>;
|
|
66
|
+
/**
|
|
67
|
+
* All candidates with their evaluation results, sorted by rank.
|
|
68
|
+
*/
|
|
69
|
+
readonly all: readonly RankedCandidate<T>[];
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Evaluates multiple translation candidates and selects the best one.
|
|
73
|
+
*
|
|
74
|
+
* @param evaluatorModel The language model to use for evaluation.
|
|
75
|
+
* @param original The original text that was translated.
|
|
76
|
+
* @param candidates The translation candidates to evaluate.
|
|
77
|
+
* @param options Selection options.
|
|
78
|
+
* @returns A promise that resolves to the selection result.
|
|
79
|
+
* @throws {RangeError} If no candidates are provided.
|
|
80
|
+
*/
|
|
81
|
+
declare function selectBest<T = unknown>(evaluatorModel: LanguageModel, original: string, candidates: readonly Candidate<T>[], options: SelectBestOptions): Promise<SelectBestResult<T>>;
|
|
82
|
+
//#endregion
|
|
83
|
+
export { Candidate, RankedCandidate, SelectBestOptions, SelectBestResult, selectBest };
|
package/dist/select.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { evaluate } from "./evaluation.js";
|
|
2
|
+
import { getLogger } from "@logtape/logtape";
|
|
3
|
+
|
|
4
|
+
//#region src/select.ts
|
|
5
|
+
const logger = getLogger([
|
|
6
|
+
"vertana",
|
|
7
|
+
"core",
|
|
8
|
+
"select"
|
|
9
|
+
]);
|
|
10
|
+
/**
|
|
11
|
+
* Evaluates multiple translation candidates and selects the best one.
|
|
12
|
+
*
|
|
13
|
+
* @param evaluatorModel The language model to use for evaluation.
|
|
14
|
+
* @param original The original text that was translated.
|
|
15
|
+
* @param candidates The translation candidates to evaluate.
|
|
16
|
+
* @param options Selection options.
|
|
17
|
+
* @returns A promise that resolves to the selection result.
|
|
18
|
+
* @throws {RangeError} If no candidates are provided.
|
|
19
|
+
*/
|
|
20
|
+
async function selectBest(evaluatorModel, original, candidates, options) {
|
|
21
|
+
if (candidates.length === 0) throw new RangeError("At least one candidate is required.");
|
|
22
|
+
logger.debug("Selecting best from {count} candidates...", { count: candidates.length });
|
|
23
|
+
const evaluatedCandidates = [];
|
|
24
|
+
for (const candidate of candidates) {
|
|
25
|
+
options.signal?.throwIfAborted();
|
|
26
|
+
const evaluation = await evaluate(evaluatorModel, original, candidate.text, {
|
|
27
|
+
targetLanguage: options.targetLanguage,
|
|
28
|
+
sourceLanguage: options.sourceLanguage,
|
|
29
|
+
glossary: options.glossary,
|
|
30
|
+
signal: options.signal
|
|
31
|
+
});
|
|
32
|
+
evaluatedCandidates.push({
|
|
33
|
+
candidate,
|
|
34
|
+
score: evaluation.score,
|
|
35
|
+
issues: evaluation.issues
|
|
36
|
+
});
|
|
37
|
+
logger.debug("Candidate {index} score: {score}.", {
|
|
38
|
+
index: evaluatedCandidates.length,
|
|
39
|
+
score: evaluation.score,
|
|
40
|
+
issues: evaluation.issues.length
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
const rankedCandidates = [...evaluatedCandidates].sort((a, b) => b.score - a.score).map((item, index) => ({
|
|
44
|
+
text: item.candidate.text,
|
|
45
|
+
metadata: item.candidate.metadata,
|
|
46
|
+
score: item.score,
|
|
47
|
+
issues: item.issues,
|
|
48
|
+
rank: index + 1
|
|
49
|
+
}));
|
|
50
|
+
logger.debug("Selected best candidate with score: {score}.", {
|
|
51
|
+
score: rankedCandidates[0].score,
|
|
52
|
+
totalCandidates: candidates.length
|
|
53
|
+
});
|
|
54
|
+
return {
|
|
55
|
+
best: rankedCandidates[0],
|
|
56
|
+
all: rankedCandidates
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
//#endregion
|
|
61
|
+
export { selectBest };
|
package/dist/terms.cjs
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
const require_rolldown_runtime = require('./_virtual/rolldown_runtime.cjs');
|
|
2
|
+
let ai = require("ai");
|
|
3
|
+
let zod = require("zod");
|
|
4
|
+
|
|
5
|
+
//#region src/terms.ts
|
|
6
|
+
/**
|
|
7
|
+
* Schema for extracted terms.
|
|
8
|
+
*/
|
|
9
|
+
const extractedTermsSchema = zod.z.object({ terms: zod.z.array(zod.z.object({
|
|
10
|
+
original: zod.z.string().describe("The original term in the source text"),
|
|
11
|
+
translated: zod.z.string().describe("The translated term"),
|
|
12
|
+
context: zod.z.string().optional().describe("Optional context for when to use this translation")
|
|
13
|
+
})) });
|
|
14
|
+
/**
|
|
15
|
+
* Extracts key terminology pairs from source text and its translation.
|
|
16
|
+
*
|
|
17
|
+
* This function uses an LLM to identify important terms, proper nouns,
|
|
18
|
+
* technical vocabulary, and other key phrases that should be translated
|
|
19
|
+
* consistently throughout a document.
|
|
20
|
+
*
|
|
21
|
+
* @param model The language model to use for extraction.
|
|
22
|
+
* @param sourceText The original source text.
|
|
23
|
+
* @param translatedText The translated text.
|
|
24
|
+
* @param options Optional extraction options.
|
|
25
|
+
* @returns An array of glossary entries.
|
|
26
|
+
*/
|
|
27
|
+
async function extractTerms(model, sourceText, translatedText, options) {
|
|
28
|
+
const maxTerms = options?.maxTerms ?? 10;
|
|
29
|
+
const signal = options?.signal;
|
|
30
|
+
signal?.throwIfAborted();
|
|
31
|
+
return (await (0, ai.generateObject)({
|
|
32
|
+
model,
|
|
33
|
+
schema: extractedTermsSchema,
|
|
34
|
+
system: `You are a terminology extraction expert. Your task is to identify key terms from a source text and its translation that should be translated consistently.
|
|
35
|
+
|
|
36
|
+
Focus on extracting:
|
|
37
|
+
- Technical terms and domain-specific vocabulary
|
|
38
|
+
- Proper nouns (names, organizations, products)
|
|
39
|
+
- Key concepts and phrases
|
|
40
|
+
- Terms that appear multiple times or are central to the meaning
|
|
41
|
+
|
|
42
|
+
Do NOT extract:
|
|
43
|
+
- Common words that don't need consistent translation
|
|
44
|
+
- Function words (articles, prepositions, conjunctions)
|
|
45
|
+
- Terms that are already well-known in both languages
|
|
46
|
+
|
|
47
|
+
Extract at most ${maxTerms} of the most important terms.`,
|
|
48
|
+
prompt: `Source text:
|
|
49
|
+
${sourceText}
|
|
50
|
+
|
|
51
|
+
Translated text:
|
|
52
|
+
${translatedText}
|
|
53
|
+
|
|
54
|
+
Extract the key terminology pairs from the above texts.`,
|
|
55
|
+
abortSignal: signal
|
|
56
|
+
})).object.terms.slice(0, maxTerms);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
//#endregion
|
|
60
|
+
exports.extractTerms = extractTerms;
|
package/dist/terms.d.cts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { GlossaryEntry } from "./glossary.cjs";
|
|
2
|
+
import { LanguageModel } from "ai";
|
|
3
|
+
|
|
4
|
+
//#region src/terms.d.ts
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Options for extracting terms from a translation.
|
|
8
|
+
*/
|
|
9
|
+
interface ExtractTermsOptions {
|
|
10
|
+
/**
|
|
11
|
+
* Maximum number of terms to extract.
|
|
12
|
+
*
|
|
13
|
+
* @default `10`
|
|
14
|
+
*/
|
|
15
|
+
readonly maxTerms?: number;
|
|
16
|
+
/**
|
|
17
|
+
* Optional abort signal.
|
|
18
|
+
*/
|
|
19
|
+
readonly signal?: AbortSignal;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Extracts key terminology pairs from source text and its translation.
|
|
23
|
+
*
|
|
24
|
+
* This function uses an LLM to identify important terms, proper nouns,
|
|
25
|
+
* technical vocabulary, and other key phrases that should be translated
|
|
26
|
+
* consistently throughout a document.
|
|
27
|
+
*
|
|
28
|
+
* @param model The language model to use for extraction.
|
|
29
|
+
* @param sourceText The original source text.
|
|
30
|
+
* @param translatedText The translated text.
|
|
31
|
+
* @param options Optional extraction options.
|
|
32
|
+
* @returns An array of glossary entries.
|
|
33
|
+
*/
|
|
34
|
+
declare function extractTerms(model: LanguageModel, sourceText: string, translatedText: string, options?: ExtractTermsOptions): Promise<readonly GlossaryEntry[]>;
|
|
35
|
+
//#endregion
|
|
36
|
+
export { ExtractTermsOptions, extractTerms };
|
package/dist/terms.d.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { GlossaryEntry } from "./glossary.js";
|
|
2
|
+
import { LanguageModel } from "ai";
|
|
3
|
+
|
|
4
|
+
//#region src/terms.d.ts
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Options for extracting terms from a translation.
|
|
8
|
+
*/
|
|
9
|
+
interface ExtractTermsOptions {
|
|
10
|
+
/**
|
|
11
|
+
* Maximum number of terms to extract.
|
|
12
|
+
*
|
|
13
|
+
* @default `10`
|
|
14
|
+
*/
|
|
15
|
+
readonly maxTerms?: number;
|
|
16
|
+
/**
|
|
17
|
+
* Optional abort signal.
|
|
18
|
+
*/
|
|
19
|
+
readonly signal?: AbortSignal;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Extracts key terminology pairs from source text and its translation.
|
|
23
|
+
*
|
|
24
|
+
* This function uses an LLM to identify important terms, proper nouns,
|
|
25
|
+
* technical vocabulary, and other key phrases that should be translated
|
|
26
|
+
* consistently throughout a document.
|
|
27
|
+
*
|
|
28
|
+
* @param model The language model to use for extraction.
|
|
29
|
+
* @param sourceText The original source text.
|
|
30
|
+
* @param translatedText The translated text.
|
|
31
|
+
* @param options Optional extraction options.
|
|
32
|
+
* @returns An array of glossary entries.
|
|
33
|
+
*/
|
|
34
|
+
declare function extractTerms(model: LanguageModel, sourceText: string, translatedText: string, options?: ExtractTermsOptions): Promise<readonly GlossaryEntry[]>;
|
|
35
|
+
//#endregion
|
|
36
|
+
export { ExtractTermsOptions, extractTerms };
|