@vertana/core 0.1.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +20 -0
  2. package/dist/_virtual/rolldown_runtime.cjs +29 -0
  3. package/dist/accumulator.cjs +64 -0
  4. package/dist/accumulator.d.cts +51 -0
  5. package/dist/accumulator.d.ts +51 -0
  6. package/dist/accumulator.js +61 -0
  7. package/dist/chunking.cjs +76 -0
  8. package/dist/chunking.d.cts +124 -0
  9. package/dist/chunking.d.ts +124 -0
  10. package/dist/chunking.js +74 -0
  11. package/dist/context.cjs +51 -0
  12. package/dist/context.d.cts +148 -0
  13. package/dist/context.d.ts +148 -0
  14. package/dist/context.js +49 -0
  15. package/dist/evaluation.cjs +120 -0
  16. package/dist/evaluation.d.cts +111 -0
  17. package/dist/evaluation.d.ts +111 -0
  18. package/dist/evaluation.js +119 -0
  19. package/dist/glossary.cjs +0 -0
  20. package/dist/glossary.d.cts +25 -0
  21. package/dist/glossary.d.ts +25 -0
  22. package/dist/glossary.js +0 -0
  23. package/dist/html.cjs +253 -0
  24. package/dist/html.d.cts +41 -0
  25. package/dist/html.d.ts +41 -0
  26. package/dist/html.js +250 -0
  27. package/dist/index.cjs +39 -0
  28. package/dist/index.d.cts +17 -0
  29. package/dist/index.d.ts +17 -0
  30. package/dist/index.js +16 -0
  31. package/dist/markdown.cjs +300 -0
  32. package/dist/markdown.d.cts +17 -0
  33. package/dist/markdown.d.ts +17 -0
  34. package/dist/markdown.js +300 -0
  35. package/dist/plaintext.cjs +70 -0
  36. package/dist/plaintext.d.cts +17 -0
  37. package/dist/plaintext.d.ts +17 -0
  38. package/dist/plaintext.js +70 -0
  39. package/dist/prompt.cjs +91 -0
  40. package/dist/prompt.d.cts +74 -0
  41. package/dist/prompt.d.ts +74 -0
  42. package/dist/prompt.js +86 -0
  43. package/dist/refine.cjs +243 -0
  44. package/dist/refine.d.cts +148 -0
  45. package/dist/refine.d.ts +148 -0
  46. package/dist/refine.js +241 -0
  47. package/dist/select.cjs +62 -0
  48. package/dist/select.d.cts +83 -0
  49. package/dist/select.d.ts +83 -0
  50. package/dist/select.js +61 -0
  51. package/dist/terms.cjs +60 -0
  52. package/dist/terms.d.cts +36 -0
  53. package/dist/terms.d.ts +36 -0
  54. package/dist/terms.js +59 -0
  55. package/dist/tokens.cjs +40 -0
  56. package/dist/tokens.d.cts +24 -0
  57. package/dist/tokens.d.ts +24 -0
  58. package/dist/tokens.js +38 -0
  59. package/dist/tools.cjs +35 -0
  60. package/dist/tools.d.cts +20 -0
  61. package/dist/tools.d.ts +20 -0
  62. package/dist/tools.js +34 -0
  63. package/dist/translate.cjs +200 -0
  64. package/dist/translate.d.cts +190 -0
  65. package/dist/translate.d.ts +190 -0
  66. package/dist/translate.js +199 -0
  67. package/dist/window.cjs +0 -0
  68. package/dist/window.d.cts +48 -0
  69. package/dist/window.d.ts +48 -0
  70. package/dist/window.js +0 -0
  71. package/package.json +215 -0
package/dist/refine.js ADDED
@@ -0,0 +1,241 @@
1
+ import { evaluate } from "./evaluation.js";
2
+ import { getLogger } from "@logtape/logtape";
3
+ import { generateText } from "ai";
4
+
5
+ //#region src/refine.ts
6
+ const logger = getLogger([
7
+ "vertana",
8
+ "core",
9
+ "refine"
10
+ ]);
11
+ /**
12
+ * Gets the language name from a locale.
13
+ */
14
+ function getLanguageName(locale) {
15
+ const tag = typeof locale === "string" ? locale : locale.baseName;
16
+ try {
17
+ return new Intl.DisplayNames(["en"], { type: "language" }).of(tag) ?? tag;
18
+ } catch {
19
+ return tag;
20
+ }
21
+ }
22
+ /**
23
+ * Builds the system prompt for chunk refinement.
24
+ */
25
+ function buildRefineSystemPrompt(options, issues) {
26
+ const targetLang = getLanguageName(options.targetLanguage);
27
+ let prompt = `You are an expert translator refining a translation from ${(options.sourceLanguage ? getLanguageName(options.sourceLanguage) : null) ?? "the source language"} to ${targetLang}.
28
+
29
+ You will be given:
30
+ 1. The original text
31
+ 2. The current translation
32
+ 3. A list of issues found in the translation
33
+
34
+ Your task is to fix the issues while preserving the parts that are correct.
35
+ Output ONLY the improved translation, nothing else.
36
+
37
+ ## Issues to fix
38
+
39
+ `;
40
+ for (const issue of issues) prompt += `- [${issue.type}] ${issue.description}\n`;
41
+ if (options.glossary != null && options.glossary.length > 0) {
42
+ prompt += `\n## Glossary (must follow exactly)\n\n`;
43
+ for (const entry of options.glossary) prompt += `- "${entry.original}" → "${entry.translated}"\n`;
44
+ }
45
+ return prompt;
46
+ }
47
+ /**
48
+ * Builds the user prompt for chunk refinement.
49
+ */
50
+ function buildRefineUserPrompt(original, translated) {
51
+ return `## Original Text
52
+
53
+ ${original}
54
+
55
+ ## Current Translation
56
+
57
+ ${translated}
58
+
59
+ Please provide the improved translation:`;
60
+ }
61
+ /**
62
+ * Refines a single chunk based on evaluation feedback.
63
+ */
64
+ async function refineChunk(model, original, translated, issues, options) {
65
+ return (await generateText({
66
+ model,
67
+ system: buildRefineSystemPrompt(options, issues),
68
+ prompt: buildRefineUserPrompt(original, translated),
69
+ abortSignal: options.signal
70
+ })).text.trim();
71
+ }
72
+ /**
73
+ * Evaluates the boundary between two chunks for coherence.
74
+ */
75
+ async function evaluateBoundary(model, chunk1Translated, chunk2Translated, chunk1Original, chunk2Original, options) {
76
+ const targetLang = getLanguageName(options.targetLanguage);
77
+ const boundarySize = 200;
78
+ const chunk1End = chunk1Translated.slice(-boundarySize);
79
+ const chunk2Start = chunk2Translated.slice(0, boundarySize);
80
+ const result = await generateText({
81
+ model,
82
+ system: `You are an expert translation quality evaluator.
83
+
84
+ Evaluate the coherence at the boundary between two consecutive translation chunks.
85
+
86
+ Check for:
87
+ 1. **Coherence**: Does the text flow naturally from one chunk to the next?
88
+ 2. **Style**: Is the style consistent across the boundary?
89
+ 3. **Reference**: Are pronouns and references consistent?
90
+ 4. **Terminology**: Are terms used consistently?
91
+
92
+ Respond in this exact JSON format:
93
+ {
94
+ "score": <number between 0 and 1>,
95
+ "issues": [
96
+ {"type": "<coherence|style|reference|terminology>", "description": "<description>"}
97
+ ]
98
+ }`,
99
+ prompt: `## End of chunk 1 (original)
100
+ ${chunk1Original.slice(-boundarySize)}
101
+
102
+ ## End of chunk 1 (translated to ${targetLang})
103
+ ${chunk1End}
104
+
105
+ ## Start of chunk 2 (original)
106
+ ${chunk2Original.slice(0, boundarySize)}
107
+
108
+ ## Start of chunk 2 (translated to ${targetLang})
109
+ ${chunk2Start}
110
+
111
+ Evaluate the boundary coherence:`,
112
+ abortSignal: options.signal
113
+ });
114
+ try {
115
+ const parsed = JSON.parse(result.text);
116
+ return {
117
+ score: Math.max(0, Math.min(1, parsed.score)),
118
+ issues: parsed.issues ?? []
119
+ };
120
+ } catch {
121
+ return {
122
+ score: 1,
123
+ issues: []
124
+ };
125
+ }
126
+ }
127
+ /**
128
+ * Refines translated chunks to improve quality using an iterative
129
+ * evaluate-fix loop.
130
+ *
131
+ * @param model The language model to use for refinement.
132
+ * @param originalChunks The original text chunks that were translated.
133
+ * @param translatedChunks The translated chunks to refine.
134
+ * @param options Refinement options.
135
+ * @returns A promise that resolves to the refinement result.
136
+ * @throws {RangeError} If the number of original and translated chunks
137
+ * do not match.
138
+ */
139
+ async function refineChunks(model, originalChunks, translatedChunks, options) {
140
+ if (originalChunks.length !== translatedChunks.length) throw new RangeError(`Chunk count mismatch: ${originalChunks.length} original vs ${translatedChunks.length} translated`);
141
+ const targetScore = options.targetScore ?? .85;
142
+ const maxIterations = options.maxIterations ?? 3;
143
+ const shouldEvaluateBoundaries = options.evaluateBoundaries ?? true;
144
+ logger.info("Starting refinement of {chunkCount} chunks...", {
145
+ chunkCount: originalChunks.length,
146
+ targetScore,
147
+ maxIterations
148
+ });
149
+ const refinedChunks = [...translatedChunks];
150
+ const scores = new Array(translatedChunks.length).fill(0);
151
+ const history = [];
152
+ let totalIterations = 0;
153
+ for (let i = 0; i < refinedChunks.length; i++) {
154
+ options.signal?.throwIfAborted();
155
+ logger.debug("Evaluating chunk {index} of {total}...", {
156
+ index: i + 1,
157
+ total: refinedChunks.length
158
+ });
159
+ let currentText = refinedChunks[i];
160
+ let evaluation;
161
+ evaluation = await evaluate(model, originalChunks[i], currentText, {
162
+ targetLanguage: options.targetLanguage,
163
+ sourceLanguage: options.sourceLanguage,
164
+ glossary: options.glossary,
165
+ signal: options.signal
166
+ });
167
+ scores[i] = evaluation.score;
168
+ logger.debug("Chunk {index} initial score: {score}.", {
169
+ index: i + 1,
170
+ score: evaluation.score,
171
+ issues: evaluation.issues.length
172
+ });
173
+ let iteration = 0;
174
+ while (evaluation.score < targetScore && iteration < maxIterations) {
175
+ options.signal?.throwIfAborted();
176
+ iteration++;
177
+ totalIterations++;
178
+ const beforeText = currentText;
179
+ const scoreBefore = evaluation.score;
180
+ const issuesAddressed = evaluation.issues;
181
+ currentText = await refineChunk(model, originalChunks[i], currentText, evaluation.issues, options);
182
+ evaluation = await evaluate(model, originalChunks[i], currentText, {
183
+ targetLanguage: options.targetLanguage,
184
+ sourceLanguage: options.sourceLanguage,
185
+ glossary: options.glossary,
186
+ signal: options.signal
187
+ });
188
+ history.push({
189
+ chunkIndex: i,
190
+ iteration,
191
+ before: beforeText,
192
+ after: currentText,
193
+ scoreBefore,
194
+ scoreAfter: evaluation.score,
195
+ issuesAddressed
196
+ });
197
+ logger.debug("Chunk {chunkIndex} iteration {iteration}: {scoreBefore} → {scoreAfter}.", {
198
+ chunkIndex: i + 1,
199
+ iteration,
200
+ scoreBefore,
201
+ scoreAfter: evaluation.score
202
+ });
203
+ scores[i] = evaluation.score;
204
+ }
205
+ refinedChunks[i] = currentText;
206
+ }
207
+ let boundaryEvaluations;
208
+ if (shouldEvaluateBoundaries && refinedChunks.length > 1) {
209
+ logger.debug("Evaluating {count} chunk boundaries...", { count: refinedChunks.length - 1 });
210
+ boundaryEvaluations = [];
211
+ for (let i = 0; i < refinedChunks.length - 1; i++) {
212
+ options.signal?.throwIfAborted();
213
+ const boundaryResult = await evaluateBoundary(model, refinedChunks[i], refinedChunks[i + 1], originalChunks[i], originalChunks[i + 1], options);
214
+ boundaryEvaluations.push({
215
+ chunkIndex: i,
216
+ ...boundaryResult
217
+ });
218
+ if (boundaryResult.issues.length > 0) logger.warn("Boundary {index} has {issueCount} issue(s), score: {score}.", {
219
+ index: i + 1,
220
+ issueCount: boundaryResult.issues.length,
221
+ score: boundaryResult.score
222
+ });
223
+ }
224
+ }
225
+ const averageScore = scores.reduce((a, b) => a + b, 0) / scores.length;
226
+ logger.info("Refinement completed.", {
227
+ totalIterations,
228
+ averageScore,
229
+ chunkCount: refinedChunks.length
230
+ });
231
+ return {
232
+ chunks: refinedChunks,
233
+ scores,
234
+ totalIterations,
235
+ history,
236
+ boundaryEvaluations
237
+ };
238
+ }
239
+
240
+ //#endregion
241
+ export { evaluateBoundary, refineChunks };
@@ -0,0 +1,62 @@
1
+ const require_rolldown_runtime = require('./_virtual/rolldown_runtime.cjs');
2
+ const require_evaluation = require('./evaluation.cjs');
3
+ let _logtape_logtape = require("@logtape/logtape");
4
+
5
+ //#region src/select.ts
6
+ const logger = (0, _logtape_logtape.getLogger)([
7
+ "vertana",
8
+ "core",
9
+ "select"
10
+ ]);
11
+ /**
12
+ * Evaluates multiple translation candidates and selects the best one.
13
+ *
14
+ * @param evaluatorModel The language model to use for evaluation.
15
+ * @param original The original text that was translated.
16
+ * @param candidates The translation candidates to evaluate.
17
+ * @param options Selection options.
18
+ * @returns A promise that resolves to the selection result.
19
+ * @throws {RangeError} If no candidates are provided.
20
+ */
21
+ async function selectBest(evaluatorModel, original, candidates, options) {
22
+ if (candidates.length === 0) throw new RangeError("At least one candidate is required.");
23
+ logger.debug("Selecting best from {count} candidates...", { count: candidates.length });
24
+ const evaluatedCandidates = [];
25
+ for (const candidate of candidates) {
26
+ options.signal?.throwIfAborted();
27
+ const evaluation = await require_evaluation.evaluate(evaluatorModel, original, candidate.text, {
28
+ targetLanguage: options.targetLanguage,
29
+ sourceLanguage: options.sourceLanguage,
30
+ glossary: options.glossary,
31
+ signal: options.signal
32
+ });
33
+ evaluatedCandidates.push({
34
+ candidate,
35
+ score: evaluation.score,
36
+ issues: evaluation.issues
37
+ });
38
+ logger.debug("Candidate {index} score: {score}.", {
39
+ index: evaluatedCandidates.length,
40
+ score: evaluation.score,
41
+ issues: evaluation.issues.length
42
+ });
43
+ }
44
+ const rankedCandidates = [...evaluatedCandidates].sort((a, b) => b.score - a.score).map((item, index) => ({
45
+ text: item.candidate.text,
46
+ metadata: item.candidate.metadata,
47
+ score: item.score,
48
+ issues: item.issues,
49
+ rank: index + 1
50
+ }));
51
+ logger.debug("Selected best candidate with score: {score}.", {
52
+ score: rankedCandidates[0].score,
53
+ totalCandidates: candidates.length
54
+ });
55
+ return {
56
+ best: rankedCandidates[0],
57
+ all: rankedCandidates
58
+ };
59
+ }
60
+
61
+ //#endregion
62
+ exports.selectBest = selectBest;
@@ -0,0 +1,83 @@
1
+ import { Glossary } from "./glossary.cjs";
2
+ import { TranslationIssue } from "./evaluation.cjs";
3
+ import { LanguageModel } from "ai";
4
+
5
+ //#region src/select.d.ts
6
+
7
+ /**
8
+ * A translation candidate to be evaluated.
9
+ */
10
+ interface Candidate<T = unknown> {
11
+ /**
12
+ * The translated text.
13
+ */
14
+ readonly text: string;
15
+ /**
16
+ * Optional metadata associated with this candidate (e.g., model info).
17
+ */
18
+ readonly metadata?: T;
19
+ }
20
+ /**
21
+ * A candidate with evaluation results and ranking.
22
+ */
23
+ interface RankedCandidate<T = unknown> extends Candidate<T> {
24
+ /**
25
+ * The evaluation score (0-1).
26
+ */
27
+ readonly score: number;
28
+ /**
29
+ * Issues found in the translation.
30
+ */
31
+ readonly issues: readonly TranslationIssue[];
32
+ /**
33
+ * The rank of this candidate (1-based, 1 is best).
34
+ */
35
+ readonly rank: number;
36
+ }
37
+ /**
38
+ * Options for the {@link selectBest} function.
39
+ */
40
+ interface SelectBestOptions {
41
+ /**
42
+ * The target language of the translation.
43
+ */
44
+ readonly targetLanguage: Intl.Locale | string;
45
+ /**
46
+ * The source language of the original text.
47
+ */
48
+ readonly sourceLanguage?: Intl.Locale | string;
49
+ /**
50
+ * A glossary of terms that should be used consistently.
51
+ */
52
+ readonly glossary?: Glossary;
53
+ /**
54
+ * An optional `AbortSignal` to cancel the selection.
55
+ */
56
+ readonly signal?: AbortSignal;
57
+ }
58
+ /**
59
+ * The result of the {@link selectBest} function.
60
+ */
61
+ interface SelectBestResult<T = unknown> {
62
+ /**
63
+ * The best candidate based on evaluation scores.
64
+ */
65
+ readonly best: RankedCandidate<T>;
66
+ /**
67
+ * All candidates with their evaluation results, sorted by rank.
68
+ */
69
+ readonly all: readonly RankedCandidate<T>[];
70
+ }
71
+ /**
72
+ * Evaluates multiple translation candidates and selects the best one.
73
+ *
74
+ * @param evaluatorModel The language model to use for evaluation.
75
+ * @param original The original text that was translated.
76
+ * @param candidates The translation candidates to evaluate.
77
+ * @param options Selection options.
78
+ * @returns A promise that resolves to the selection result.
79
+ * @throws {RangeError} If no candidates are provided.
80
+ */
81
+ declare function selectBest<T = unknown>(evaluatorModel: LanguageModel, original: string, candidates: readonly Candidate<T>[], options: SelectBestOptions): Promise<SelectBestResult<T>>;
82
+ //#endregion
83
+ export { Candidate, RankedCandidate, SelectBestOptions, SelectBestResult, selectBest };
@@ -0,0 +1,83 @@
1
+ import { Glossary } from "./glossary.js";
2
+ import { TranslationIssue } from "./evaluation.js";
3
+ import { LanguageModel } from "ai";
4
+
5
+ //#region src/select.d.ts
6
+
7
+ /**
8
+ * A translation candidate to be evaluated.
9
+ */
10
+ interface Candidate<T = unknown> {
11
+ /**
12
+ * The translated text.
13
+ */
14
+ readonly text: string;
15
+ /**
16
+ * Optional metadata associated with this candidate (e.g., model info).
17
+ */
18
+ readonly metadata?: T;
19
+ }
20
+ /**
21
+ * A candidate with evaluation results and ranking.
22
+ */
23
+ interface RankedCandidate<T = unknown> extends Candidate<T> {
24
+ /**
25
+ * The evaluation score (0-1).
26
+ */
27
+ readonly score: number;
28
+ /**
29
+ * Issues found in the translation.
30
+ */
31
+ readonly issues: readonly TranslationIssue[];
32
+ /**
33
+ * The rank of this candidate (1-based, 1 is best).
34
+ */
35
+ readonly rank: number;
36
+ }
37
+ /**
38
+ * Options for the {@link selectBest} function.
39
+ */
40
+ interface SelectBestOptions {
41
+ /**
42
+ * The target language of the translation.
43
+ */
44
+ readonly targetLanguage: Intl.Locale | string;
45
+ /**
46
+ * The source language of the original text.
47
+ */
48
+ readonly sourceLanguage?: Intl.Locale | string;
49
+ /**
50
+ * A glossary of terms that should be used consistently.
51
+ */
52
+ readonly glossary?: Glossary;
53
+ /**
54
+ * An optional `AbortSignal` to cancel the selection.
55
+ */
56
+ readonly signal?: AbortSignal;
57
+ }
58
+ /**
59
+ * The result of the {@link selectBest} function.
60
+ */
61
+ interface SelectBestResult<T = unknown> {
62
+ /**
63
+ * The best candidate based on evaluation scores.
64
+ */
65
+ readonly best: RankedCandidate<T>;
66
+ /**
67
+ * All candidates with their evaluation results, sorted by rank.
68
+ */
69
+ readonly all: readonly RankedCandidate<T>[];
70
+ }
71
+ /**
72
+ * Evaluates multiple translation candidates and selects the best one.
73
+ *
74
+ * @param evaluatorModel The language model to use for evaluation.
75
+ * @param original The original text that was translated.
76
+ * @param candidates The translation candidates to evaluate.
77
+ * @param options Selection options.
78
+ * @returns A promise that resolves to the selection result.
79
+ * @throws {RangeError} If no candidates are provided.
80
+ */
81
+ declare function selectBest<T = unknown>(evaluatorModel: LanguageModel, original: string, candidates: readonly Candidate<T>[], options: SelectBestOptions): Promise<SelectBestResult<T>>;
82
+ //#endregion
83
+ export { Candidate, RankedCandidate, SelectBestOptions, SelectBestResult, selectBest };
package/dist/select.js ADDED
@@ -0,0 +1,61 @@
1
+ import { evaluate } from "./evaluation.js";
2
+ import { getLogger } from "@logtape/logtape";
3
+
4
+ //#region src/select.ts
5
+ const logger = getLogger([
6
+ "vertana",
7
+ "core",
8
+ "select"
9
+ ]);
10
+ /**
11
+ * Evaluates multiple translation candidates and selects the best one.
12
+ *
13
+ * @param evaluatorModel The language model to use for evaluation.
14
+ * @param original The original text that was translated.
15
+ * @param candidates The translation candidates to evaluate.
16
+ * @param options Selection options.
17
+ * @returns A promise that resolves to the selection result.
18
+ * @throws {RangeError} If no candidates are provided.
19
+ */
20
+ async function selectBest(evaluatorModel, original, candidates, options) {
21
+ if (candidates.length === 0) throw new RangeError("At least one candidate is required.");
22
+ logger.debug("Selecting best from {count} candidates...", { count: candidates.length });
23
+ const evaluatedCandidates = [];
24
+ for (const candidate of candidates) {
25
+ options.signal?.throwIfAborted();
26
+ const evaluation = await evaluate(evaluatorModel, original, candidate.text, {
27
+ targetLanguage: options.targetLanguage,
28
+ sourceLanguage: options.sourceLanguage,
29
+ glossary: options.glossary,
30
+ signal: options.signal
31
+ });
32
+ evaluatedCandidates.push({
33
+ candidate,
34
+ score: evaluation.score,
35
+ issues: evaluation.issues
36
+ });
37
+ logger.debug("Candidate {index} score: {score}.", {
38
+ index: evaluatedCandidates.length,
39
+ score: evaluation.score,
40
+ issues: evaluation.issues.length
41
+ });
42
+ }
43
+ const rankedCandidates = [...evaluatedCandidates].sort((a, b) => b.score - a.score).map((item, index) => ({
44
+ text: item.candidate.text,
45
+ metadata: item.candidate.metadata,
46
+ score: item.score,
47
+ issues: item.issues,
48
+ rank: index + 1
49
+ }));
50
+ logger.debug("Selected best candidate with score: {score}.", {
51
+ score: rankedCandidates[0].score,
52
+ totalCandidates: candidates.length
53
+ });
54
+ return {
55
+ best: rankedCandidates[0],
56
+ all: rankedCandidates
57
+ };
58
+ }
59
+
60
+ //#endregion
61
+ export { selectBest };
package/dist/terms.cjs ADDED
@@ -0,0 +1,60 @@
1
+ const require_rolldown_runtime = require('./_virtual/rolldown_runtime.cjs');
2
+ let ai = require("ai");
3
+ let zod = require("zod");
4
+
5
+ //#region src/terms.ts
6
+ /**
7
+ * Schema for extracted terms.
8
+ */
9
+ const extractedTermsSchema = zod.z.object({ terms: zod.z.array(zod.z.object({
10
+ original: zod.z.string().describe("The original term in the source text"),
11
+ translated: zod.z.string().describe("The translated term"),
12
+ context: zod.z.string().optional().describe("Optional context for when to use this translation")
13
+ })) });
14
+ /**
15
+ * Extracts key terminology pairs from source text and its translation.
16
+ *
17
+ * This function uses an LLM to identify important terms, proper nouns,
18
+ * technical vocabulary, and other key phrases that should be translated
19
+ * consistently throughout a document.
20
+ *
21
+ * @param model The language model to use for extraction.
22
+ * @param sourceText The original source text.
23
+ * @param translatedText The translated text.
24
+ * @param options Optional extraction options.
25
+ * @returns An array of glossary entries.
26
+ */
27
+ async function extractTerms(model, sourceText, translatedText, options) {
28
+ const maxTerms = options?.maxTerms ?? 10;
29
+ const signal = options?.signal;
30
+ signal?.throwIfAborted();
31
+ return (await (0, ai.generateObject)({
32
+ model,
33
+ schema: extractedTermsSchema,
34
+ system: `You are a terminology extraction expert. Your task is to identify key terms from a source text and its translation that should be translated consistently.
35
+
36
+ Focus on extracting:
37
+ - Technical terms and domain-specific vocabulary
38
+ - Proper nouns (names, organizations, products)
39
+ - Key concepts and phrases
40
+ - Terms that appear multiple times or are central to the meaning
41
+
42
+ Do NOT extract:
43
+ - Common words that don't need consistent translation
44
+ - Function words (articles, prepositions, conjunctions)
45
+ - Terms that are already well-known in both languages
46
+
47
+ Extract at most ${maxTerms} of the most important terms.`,
48
+ prompt: `Source text:
49
+ ${sourceText}
50
+
51
+ Translated text:
52
+ ${translatedText}
53
+
54
+ Extract the key terminology pairs from the above texts.`,
55
+ abortSignal: signal
56
+ })).object.terms.slice(0, maxTerms);
57
+ }
58
+
59
+ //#endregion
60
+ exports.extractTerms = extractTerms;
@@ -0,0 +1,36 @@
1
+ import { GlossaryEntry } from "./glossary.cjs";
2
+ import { LanguageModel } from "ai";
3
+
4
+ //#region src/terms.d.ts
5
+
6
+ /**
7
+ * Options for extracting terms from a translation.
8
+ */
9
+ interface ExtractTermsOptions {
10
+ /**
11
+ * Maximum number of terms to extract.
12
+ *
13
+ * @default `10`
14
+ */
15
+ readonly maxTerms?: number;
16
+ /**
17
+ * Optional abort signal.
18
+ */
19
+ readonly signal?: AbortSignal;
20
+ }
21
+ /**
22
+ * Extracts key terminology pairs from source text and its translation.
23
+ *
24
+ * This function uses an LLM to identify important terms, proper nouns,
25
+ * technical vocabulary, and other key phrases that should be translated
26
+ * consistently throughout a document.
27
+ *
28
+ * @param model The language model to use for extraction.
29
+ * @param sourceText The original source text.
30
+ * @param translatedText The translated text.
31
+ * @param options Optional extraction options.
32
+ * @returns An array of glossary entries.
33
+ */
34
+ declare function extractTerms(model: LanguageModel, sourceText: string, translatedText: string, options?: ExtractTermsOptions): Promise<readonly GlossaryEntry[]>;
35
+ //#endregion
36
+ export { ExtractTermsOptions, extractTerms };
@@ -0,0 +1,36 @@
1
+ import { GlossaryEntry } from "./glossary.js";
2
+ import { LanguageModel } from "ai";
3
+
4
+ //#region src/terms.d.ts
5
+
6
+ /**
7
+ * Options for extracting terms from a translation.
8
+ */
9
+ interface ExtractTermsOptions {
10
+ /**
11
+ * Maximum number of terms to extract.
12
+ *
13
+ * @default `10`
14
+ */
15
+ readonly maxTerms?: number;
16
+ /**
17
+ * Optional abort signal.
18
+ */
19
+ readonly signal?: AbortSignal;
20
+ }
21
+ /**
22
+ * Extracts key terminology pairs from source text and its translation.
23
+ *
24
+ * This function uses an LLM to identify important terms, proper nouns,
25
+ * technical vocabulary, and other key phrases that should be translated
26
+ * consistently throughout a document.
27
+ *
28
+ * @param model The language model to use for extraction.
29
+ * @param sourceText The original source text.
30
+ * @param translatedText The translated text.
31
+ * @param options Optional extraction options.
32
+ * @returns An array of glossary entries.
33
+ */
34
+ declare function extractTerms(model: LanguageModel, sourceText: string, translatedText: string, options?: ExtractTermsOptions): Promise<readonly GlossaryEntry[]>;
35
+ //#endregion
36
+ export { ExtractTermsOptions, extractTerms };