@vertana/core 0.1.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +20 -0
  2. package/dist/_virtual/rolldown_runtime.cjs +29 -0
  3. package/dist/accumulator.cjs +64 -0
  4. package/dist/accumulator.d.cts +51 -0
  5. package/dist/accumulator.d.ts +51 -0
  6. package/dist/accumulator.js +61 -0
  7. package/dist/chunking.cjs +76 -0
  8. package/dist/chunking.d.cts +124 -0
  9. package/dist/chunking.d.ts +124 -0
  10. package/dist/chunking.js +74 -0
  11. package/dist/context.cjs +51 -0
  12. package/dist/context.d.cts +148 -0
  13. package/dist/context.d.ts +148 -0
  14. package/dist/context.js +49 -0
  15. package/dist/evaluation.cjs +120 -0
  16. package/dist/evaluation.d.cts +111 -0
  17. package/dist/evaluation.d.ts +111 -0
  18. package/dist/evaluation.js +119 -0
  19. package/dist/glossary.cjs +0 -0
  20. package/dist/glossary.d.cts +25 -0
  21. package/dist/glossary.d.ts +25 -0
  22. package/dist/glossary.js +0 -0
  23. package/dist/html.cjs +253 -0
  24. package/dist/html.d.cts +41 -0
  25. package/dist/html.d.ts +41 -0
  26. package/dist/html.js +250 -0
  27. package/dist/index.cjs +39 -0
  28. package/dist/index.d.cts +17 -0
  29. package/dist/index.d.ts +17 -0
  30. package/dist/index.js +16 -0
  31. package/dist/markdown.cjs +300 -0
  32. package/dist/markdown.d.cts +17 -0
  33. package/dist/markdown.d.ts +17 -0
  34. package/dist/markdown.js +300 -0
  35. package/dist/plaintext.cjs +70 -0
  36. package/dist/plaintext.d.cts +17 -0
  37. package/dist/plaintext.d.ts +17 -0
  38. package/dist/plaintext.js +70 -0
  39. package/dist/prompt.cjs +91 -0
  40. package/dist/prompt.d.cts +74 -0
  41. package/dist/prompt.d.ts +74 -0
  42. package/dist/prompt.js +86 -0
  43. package/dist/refine.cjs +243 -0
  44. package/dist/refine.d.cts +148 -0
  45. package/dist/refine.d.ts +148 -0
  46. package/dist/refine.js +241 -0
  47. package/dist/select.cjs +62 -0
  48. package/dist/select.d.cts +83 -0
  49. package/dist/select.d.ts +83 -0
  50. package/dist/select.js +61 -0
  51. package/dist/terms.cjs +60 -0
  52. package/dist/terms.d.cts +36 -0
  53. package/dist/terms.d.ts +36 -0
  54. package/dist/terms.js +59 -0
  55. package/dist/tokens.cjs +40 -0
  56. package/dist/tokens.d.cts +24 -0
  57. package/dist/tokens.d.ts +24 -0
  58. package/dist/tokens.js +38 -0
  59. package/dist/tools.cjs +35 -0
  60. package/dist/tools.d.cts +20 -0
  61. package/dist/tools.d.ts +20 -0
  62. package/dist/tools.js +34 -0
  63. package/dist/translate.cjs +200 -0
  64. package/dist/translate.d.cts +190 -0
  65. package/dist/translate.d.ts +190 -0
  66. package/dist/translate.js +199 -0
  67. package/dist/window.cjs +0 -0
  68. package/dist/window.d.cts +48 -0
  69. package/dist/window.d.ts +48 -0
  70. package/dist/window.js +0 -0
  71. package/package.json +215 -0
package/dist/terms.js ADDED
@@ -0,0 +1,59 @@
1
+ import { generateObject } from "ai";
2
+ import { z } from "zod";
3
+
4
+ //#region src/terms.ts
5
+ /**
6
+ * Schema for extracted terms.
7
+ */
8
+ const extractedTermsSchema = z.object({ terms: z.array(z.object({
9
+ original: z.string().describe("The original term in the source text"),
10
+ translated: z.string().describe("The translated term"),
11
+ context: z.string().optional().describe("Optional context for when to use this translation")
12
+ })) });
13
+ /**
14
+ * Extracts key terminology pairs from source text and its translation.
15
+ *
16
+ * This function uses an LLM to identify important terms, proper nouns,
17
+ * technical vocabulary, and other key phrases that should be translated
18
+ * consistently throughout a document.
19
+ *
20
+ * @param model The language model to use for extraction.
21
+ * @param sourceText The original source text.
22
+ * @param translatedText The translated text.
23
+ * @param options Optional extraction options.
24
+ * @returns An array of glossary entries.
25
+ */
26
+ async function extractTerms(model, sourceText, translatedText, options) {
27
+ const maxTerms = options?.maxTerms ?? 10;
28
+ const signal = options?.signal;
29
+ signal?.throwIfAborted();
30
+ return (await generateObject({
31
+ model,
32
+ schema: extractedTermsSchema,
33
+ system: `You are a terminology extraction expert. Your task is to identify key terms from a source text and its translation that should be translated consistently.
34
+
35
+ Focus on extracting:
36
+ - Technical terms and domain-specific vocabulary
37
+ - Proper nouns (names, organizations, products)
38
+ - Key concepts and phrases
39
+ - Terms that appear multiple times or are central to the meaning
40
+
41
+ Do NOT extract:
42
+ - Common words that don't need consistent translation
43
+ - Function words (articles, prepositions, conjunctions)
44
+ - Terms that are already well-known in both languages
45
+
46
+ Extract at most ${maxTerms} of the most important terms.`,
47
+ prompt: `Source text:
48
+ ${sourceText}
49
+
50
+ Translated text:
51
+ ${translatedText}
52
+
53
+ Extract the key terminology pairs from the above texts.`,
54
+ abortSignal: signal
55
+ })).object.terms.slice(0, maxTerms);
56
+ }
57
+
58
+ //#endregion
59
+ export { extractTerms };
@@ -0,0 +1,40 @@
1
+ const require_rolldown_runtime = require('./_virtual/rolldown_runtime.cjs');
2
+ let js_tiktoken = require("js-tiktoken");
3
+
4
+ //#region src/tokens.ts
5
+ let encoder;
6
+ /**
7
+ * Gets the default tiktoken encoder (cl100k_base).
8
+ *
9
+ * @returns The tiktoken encoder instance.
10
+ */
11
+ function getEncoder() {
12
+ if (encoder == null) encoder = (0, js_tiktoken.getEncoding)("cl100k_base");
13
+ return encoder;
14
+ }
15
+ /**
16
+ * Counts the number of tokens in a string using the cl100k_base encoding.
17
+ *
18
+ * This is the default token counter used by the chunker when no custom
19
+ * counter is provided.
20
+ *
21
+ * @param text The text to count tokens for.
22
+ * @returns The number of tokens.
23
+ * @since 0.1.0
24
+ */
25
+ function countTokens(text) {
26
+ return getEncoder().encode(text).length;
27
+ }
28
+ /**
29
+ * Creates a token counter using the default tiktoken encoder (cl100k_base).
30
+ *
31
+ * @returns A token counter function.
32
+ * @since 0.1.0
33
+ */
34
+ function createDefaultTokenCounter() {
35
+ return countTokens;
36
+ }
37
+
38
+ //#endregion
39
+ exports.countTokens = countTokens;
40
+ exports.createDefaultTokenCounter = createDefaultTokenCounter;
@@ -0,0 +1,24 @@
1
+ import { TokenCounter } from "./chunking.cjs";
2
+
3
+ //#region src/tokens.d.ts
4
+
5
+ /**
6
+ * Counts the number of tokens in a string using the cl100k_base encoding.
7
+ *
8
+ * This is the default token counter used by the chunker when no custom
9
+ * counter is provided.
10
+ *
11
+ * @param text The text to count tokens for.
12
+ * @returns The number of tokens.
13
+ * @since 0.1.0
14
+ */
15
+ declare function countTokens(text: string): number;
16
+ /**
17
+ * Creates a token counter using the default tiktoken encoder (cl100k_base).
18
+ *
19
+ * @returns A token counter function.
20
+ * @since 0.1.0
21
+ */
22
+ declare function createDefaultTokenCounter(): TokenCounter;
23
+ //#endregion
24
+ export { countTokens, createDefaultTokenCounter };
@@ -0,0 +1,24 @@
1
+ import { TokenCounter } from "./chunking.js";
2
+
3
+ //#region src/tokens.d.ts
4
+
5
+ /**
6
+ * Counts the number of tokens in a string using the cl100k_base encoding.
7
+ *
8
+ * This is the default token counter used by the chunker when no custom
9
+ * counter is provided.
10
+ *
11
+ * @param text The text to count tokens for.
12
+ * @returns The number of tokens.
13
+ * @since 0.1.0
14
+ */
15
+ declare function countTokens(text: string): number;
16
+ /**
17
+ * Creates a token counter using the default tiktoken encoder (cl100k_base).
18
+ *
19
+ * @returns A token counter function.
20
+ * @since 0.1.0
21
+ */
22
+ declare function createDefaultTokenCounter(): TokenCounter;
23
+ //#endregion
24
+ export { countTokens, createDefaultTokenCounter };
package/dist/tokens.js ADDED
@@ -0,0 +1,38 @@
1
+ import { getEncoding } from "js-tiktoken";
2
+
3
+ //#region src/tokens.ts
4
+ let encoder;
5
+ /**
6
+ * Gets the default tiktoken encoder (cl100k_base).
7
+ *
8
+ * @returns The tiktoken encoder instance.
9
+ */
10
+ function getEncoder() {
11
+ if (encoder == null) encoder = getEncoding("cl100k_base");
12
+ return encoder;
13
+ }
14
+ /**
15
+ * Counts the number of tokens in a string using the cl100k_base encoding.
16
+ *
17
+ * This is the default token counter used by the chunker when no custom
18
+ * counter is provided.
19
+ *
20
+ * @param text The text to count tokens for.
21
+ * @returns The number of tokens.
22
+ * @since 0.1.0
23
+ */
24
+ function countTokens(text) {
25
+ return getEncoder().encode(text).length;
26
+ }
27
+ /**
28
+ * Creates a token counter using the default tiktoken encoder (cl100k_base).
29
+ *
30
+ * @returns A token counter function.
31
+ * @since 0.1.0
32
+ */
33
+ function createDefaultTokenCounter() {
34
+ return countTokens;
35
+ }
36
+
37
+ //#endregion
38
+ export { countTokens, createDefaultTokenCounter };
package/dist/tools.cjs ADDED
@@ -0,0 +1,35 @@
1
+ const require_rolldown_runtime = require('./_virtual/rolldown_runtime.cjs');
2
+ let ai = require("ai");
3
+ let _standard_community_standard_json = require("@standard-community/standard-json");
4
+
5
+ //#region src/tools.ts
6
+ /**
7
+ * Creates an AI SDK ToolSet from passive context sources.
8
+ *
9
+ * This function converts passive context sources into tool definitions that
10
+ * can be used with the AI SDK's `generateText` or `streamText` functions.
11
+ * The LLM can then invoke these tools during translation to gather additional
12
+ * context on demand.
13
+ *
14
+ * @param sources The passive context sources to convert into tools.
15
+ * @param signal Optional abort signal to cancel the operation.
16
+ * @returns A promise that resolves to a ToolSet keyed by source name.
17
+ */
18
+ async function createToolSet(sources, signal) {
19
+ const tools = {};
20
+ for (const source of sources) {
21
+ signal?.throwIfAborted();
22
+ const schema = await (0, _standard_community_standard_json.toJsonSchema)(source.parameters);
23
+ tools[source.name] = (0, ai.tool)({
24
+ description: source.description,
25
+ inputSchema: (0, ai.jsonSchema)(schema),
26
+ execute: async (params) => {
27
+ return (await source.gather(params, { signal })).content;
28
+ }
29
+ });
30
+ }
31
+ return tools;
32
+ }
33
+
34
+ //#endregion
35
+ exports.createToolSet = createToolSet;
@@ -0,0 +1,20 @@
1
+ import { PassiveContextSource } from "./context.cjs";
2
+ import { ToolSet } from "ai";
3
+
4
+ //#region src/tools.d.ts
5
+
6
+ /**
7
+ * Creates an AI SDK ToolSet from passive context sources.
8
+ *
9
+ * This function converts passive context sources into tool definitions that
10
+ * can be used with the AI SDK's `generateText` or `streamText` functions.
11
+ * The LLM can then invoke these tools during translation to gather additional
12
+ * context on demand.
13
+ *
14
+ * @param sources The passive context sources to convert into tools.
15
+ * @param signal Optional abort signal to cancel the operation.
16
+ * @returns A promise that resolves to a ToolSet keyed by source name.
17
+ */
18
+ declare function createToolSet(sources: readonly PassiveContextSource<unknown>[], signal?: AbortSignal): Promise<ToolSet>;
19
+ //#endregion
20
+ export { createToolSet };
@@ -0,0 +1,20 @@
1
+ import { PassiveContextSource } from "./context.js";
2
+ import { ToolSet } from "ai";
3
+
4
+ //#region src/tools.d.ts
5
+
6
+ /**
7
+ * Creates an AI SDK ToolSet from passive context sources.
8
+ *
9
+ * This function converts passive context sources into tool definitions that
10
+ * can be used with the AI SDK's `generateText` or `streamText` functions.
11
+ * The LLM can then invoke these tools during translation to gather additional
12
+ * context on demand.
13
+ *
14
+ * @param sources The passive context sources to convert into tools.
15
+ * @param signal Optional abort signal to cancel the operation.
16
+ * @returns A promise that resolves to a ToolSet keyed by source name.
17
+ */
18
+ declare function createToolSet(sources: readonly PassiveContextSource<unknown>[], signal?: AbortSignal): Promise<ToolSet>;
19
+ //#endregion
20
+ export { createToolSet };
package/dist/tools.js ADDED
@@ -0,0 +1,34 @@
1
+ import { jsonSchema, tool } from "ai";
2
+ import { toJsonSchema } from "@standard-community/standard-json";
3
+
4
+ //#region src/tools.ts
5
+ /**
6
+ * Creates an AI SDK ToolSet from passive context sources.
7
+ *
8
+ * This function converts passive context sources into tool definitions that
9
+ * can be used with the AI SDK's `generateText` or `streamText` functions.
10
+ * The LLM can then invoke these tools during translation to gather additional
11
+ * context on demand.
12
+ *
13
+ * @param sources The passive context sources to convert into tools.
14
+ * @param signal Optional abort signal to cancel the operation.
15
+ * @returns A promise that resolves to a ToolSet keyed by source name.
16
+ */
17
+ async function createToolSet(sources, signal) {
18
+ const tools = {};
19
+ for (const source of sources) {
20
+ signal?.throwIfAborted();
21
+ const schema = await toJsonSchema(source.parameters);
22
+ tools[source.name] = tool({
23
+ description: source.description,
24
+ inputSchema: jsonSchema(schema),
25
+ execute: async (params) => {
26
+ return (await source.gather(params, { signal })).content;
27
+ }
28
+ });
29
+ }
30
+ return tools;
31
+ }
32
+
33
+ //#endregion
34
+ export { createToolSet };
@@ -0,0 +1,200 @@
1
+ const require_rolldown_runtime = require('./_virtual/rolldown_runtime.cjs');
2
+ const require_refine = require('./refine.cjs');
3
+ const require_select = require('./select.cjs');
4
+ const require_prompt = require('./prompt.cjs');
5
+ const require_terms = require('./terms.cjs');
6
+ let _logtape_logtape = require("@logtape/logtape");
7
+ let ai = require("ai");
8
+
9
+ //#region src/translate.ts
10
+ const logger = (0, _logtape_logtape.getLogger)([
11
+ "vertana",
12
+ "core",
13
+ "translate"
14
+ ]);
15
+ /**
16
+ * Translates a single chunk of text.
17
+ */
18
+ async function translateSingleChunk(model, systemPrompt, text, previousChunks, tools, hasPassiveSources, signal, title) {
19
+ const result = await (0, ai.generateText)({
20
+ model,
21
+ system: systemPrompt,
22
+ prompt: previousChunks.length > 0 ? require_prompt.buildUserPromptWithContext(text, previousChunks) : require_prompt.buildUserPrompt(text, title),
23
+ tools,
24
+ stopWhen: hasPassiveSources ? (0, ai.stepCountIs)(10) : void 0,
25
+ abortSignal: signal
26
+ });
27
+ return {
28
+ text: result.text,
29
+ tokenUsed: result.usage?.totalTokens ?? 0
30
+ };
31
+ }
32
+ /**
33
+ * Translates source chunks using the provided models and options.
34
+ *
35
+ * This function returns an async iterable that yields events for each
36
+ * translated chunk, allowing consumers to process chunks incrementally
37
+ * and track progress.
38
+ *
39
+ * Features:
40
+ * - Per-chunk parallel translation with multiple models (best-of-N selection)
41
+ * - Previous chunk context passing for consistency
42
+ * - Dynamic glossary accumulation across chunks
43
+ * - Streaming results via AsyncIterable
44
+ *
45
+ * @param sourceChunks The source text chunks to translate.
46
+ * @param options Translation options.
47
+ * @returns An async iterable of translation events.
48
+ */
49
+ async function* translateChunks(sourceChunks, options) {
50
+ const { targetLanguage, sourceLanguage, title, tone, domain, mediaType, context, glossary: initialGlossary = [], models, evaluatorModel, dynamicGlossary, refinement, tools, signal } = options;
51
+ const primaryModel = models[0];
52
+ const useBestOfN = models.length > 1;
53
+ const hasPassiveSources = tools != null && Object.keys(tools).length > 0;
54
+ logger.info("Starting translation of {chunkCount} chunks with {modelCount} model(s)...", {
55
+ chunkCount: sourceChunks.length,
56
+ modelCount: models.length,
57
+ targetLanguage: targetLanguage.toString(),
58
+ useBestOfN,
59
+ dynamicGlossary: dynamicGlossary != null,
60
+ refinement: refinement != null
61
+ });
62
+ const baseSystemPromptOptions = {
63
+ sourceLanguage,
64
+ tone,
65
+ domain,
66
+ mediaType,
67
+ context
68
+ };
69
+ const accumulatedGlossary = [];
70
+ /**
71
+ * Builds system prompt with the current glossary state.
72
+ */
73
+ function buildCurrentSystemPrompt() {
74
+ const currentGlossary = accumulatedGlossary.length > 0 ? [...initialGlossary, ...accumulatedGlossary] : initialGlossary;
75
+ return require_prompt.buildSystemPrompt(targetLanguage, {
76
+ ...baseSystemPromptOptions,
77
+ glossary: currentGlossary.length > 0 ? currentGlossary : void 0
78
+ });
79
+ }
80
+ const translations = [];
81
+ let totalTokensUsed = 0;
82
+ const previousChunks = [];
83
+ for (let i = 0; i < sourceChunks.length; i++) {
84
+ signal?.throwIfAborted();
85
+ logger.debug("Translating chunk {index} of {total}...", {
86
+ index: i + 1,
87
+ total: sourceChunks.length
88
+ });
89
+ const currentSystemPrompt = dynamicGlossary != null ? buildCurrentSystemPrompt() : require_prompt.buildSystemPrompt(targetLanguage, {
90
+ ...baseSystemPromptOptions,
91
+ glossary: initialGlossary.length > 0 ? initialGlossary : void 0
92
+ });
93
+ const currentGlossary = accumulatedGlossary.length > 0 ? [...initialGlossary, ...accumulatedGlossary] : initialGlossary;
94
+ const chunkTitle = i === 0 ? title : void 0;
95
+ const chunkResults = await Promise.all(models.map(async (model) => {
96
+ return {
97
+ model,
98
+ ...await translateSingleChunk(model, currentSystemPrompt, sourceChunks[i], previousChunks, tools, hasPassiveSources, signal, chunkTitle)
99
+ };
100
+ }));
101
+ let chunkTokensUsed = 0;
102
+ for (const result of chunkResults) chunkTokensUsed += result.tokenUsed;
103
+ totalTokensUsed += chunkTokensUsed;
104
+ let selectedTranslation;
105
+ let selectedModel;
106
+ let qualityScore;
107
+ if (useBestOfN) {
108
+ const candidates = chunkResults.map((r) => ({
109
+ text: r.text,
110
+ metadata: r.model
111
+ }));
112
+ const selectionResult = await require_select.selectBest(evaluatorModel ?? primaryModel, sourceChunks[i], candidates, {
113
+ targetLanguage,
114
+ sourceLanguage,
115
+ glossary: currentGlossary.length > 0 ? currentGlossary : void 0,
116
+ signal
117
+ });
118
+ selectedTranslation = selectionResult.best.text;
119
+ selectedModel = selectionResult.best.metadata;
120
+ qualityScore = selectionResult.best.score;
121
+ logger.debug("Best-of-N selection for chunk {index}: score {score}.", {
122
+ index: i + 1,
123
+ score: qualityScore
124
+ });
125
+ } else selectedTranslation = chunkResults[0].text;
126
+ translations.push(selectedTranslation);
127
+ let extractedTerms;
128
+ if (dynamicGlossary != null) {
129
+ const extractorModel = dynamicGlossary.extractorModel ?? primaryModel;
130
+ const maxTermsPerChunk = dynamicGlossary.maxTermsPerChunk ?? 10;
131
+ extractedTerms = await require_terms.extractTerms(extractorModel, sourceChunks[i], selectedTranslation, {
132
+ maxTerms: maxTermsPerChunk,
133
+ signal
134
+ });
135
+ let addedTerms = 0;
136
+ for (const term of extractedTerms) if (!(accumulatedGlossary.some((existing) => existing.original.toLowerCase() === term.original.toLowerCase()) || initialGlossary.some((existing) => existing.original.toLowerCase() === term.original.toLowerCase()))) {
137
+ accumulatedGlossary.push(term);
138
+ addedTerms++;
139
+ }
140
+ logger.debug("Extracted {extracted} terms from chunk {index}, added {added} new terms.", {
141
+ extracted: extractedTerms.length,
142
+ index: i + 1,
143
+ added: addedTerms,
144
+ totalGlossary: accumulatedGlossary.length
145
+ });
146
+ }
147
+ previousChunks.push({
148
+ source: sourceChunks[i],
149
+ translation: selectedTranslation
150
+ });
151
+ yield {
152
+ type: "chunk",
153
+ index: i,
154
+ translation: selectedTranslation,
155
+ tokensUsed: chunkTokensUsed,
156
+ qualityScore,
157
+ selectedModel,
158
+ extractedTerms
159
+ };
160
+ }
161
+ let finalTranslations = translations;
162
+ let finalQualityScore;
163
+ let refinementIterations;
164
+ if (refinement != null) {
165
+ logger.info("Starting refinement phase...");
166
+ const refinementGlossary = accumulatedGlossary.length > 0 ? [...initialGlossary, ...accumulatedGlossary] : initialGlossary;
167
+ const refineResult = await require_refine.refineChunks(primaryModel, sourceChunks, translations, {
168
+ targetLanguage,
169
+ sourceLanguage,
170
+ targetScore: refinement.qualityThreshold ?? .85,
171
+ maxIterations: refinement.maxIterations ?? 3,
172
+ glossary: refinementGlossary.length > 0 ? refinementGlossary : void 0,
173
+ evaluateBoundaries: sourceChunks.length > 1,
174
+ signal
175
+ });
176
+ finalTranslations = [...refineResult.chunks];
177
+ finalQualityScore = refineResult.scores.reduce((a, b) => a + b, 0) / refineResult.scores.length;
178
+ refinementIterations = refineResult.totalIterations;
179
+ logger.info("Refinement completed after {iterations} iteration(s), average score: {score}.", {
180
+ iterations: refinementIterations,
181
+ score: finalQualityScore
182
+ });
183
+ }
184
+ logger.info("Translation completed.", {
185
+ totalChunks: sourceChunks.length,
186
+ totalTokensUsed,
187
+ glossaryTerms: accumulatedGlossary.length
188
+ });
189
+ yield {
190
+ type: "complete",
191
+ translations: finalTranslations,
192
+ totalTokensUsed,
193
+ accumulatedGlossary,
194
+ qualityScore: finalQualityScore,
195
+ refinementIterations
196
+ };
197
+ }
198
+
199
+ //#endregion
200
+ exports.translateChunks = translateChunks;
@@ -0,0 +1,190 @@
1
+ import { Glossary, GlossaryEntry } from "./glossary.cjs";
2
+ import { MediaType, TranslationTone } from "./prompt.cjs";
3
+ import { LanguageModel, ToolSet } from "ai";
4
+
5
+ //#region src/translate.d.ts
6
+
7
+ /**
8
+ * Options for dynamic glossary accumulation during chunk translation.
9
+ */
10
+ interface DynamicGlossaryOptions {
11
+ /**
12
+ * Maximum number of terms to extract from each chunk.
13
+ *
14
+ * @default `10`
15
+ */
16
+ readonly maxTermsPerChunk?: number;
17
+ /**
18
+ * The model to use for extracting terms.
19
+ * If not specified, the primary translation model is used.
20
+ */
21
+ readonly extractorModel?: LanguageModel;
22
+ }
23
+ /**
24
+ * Options for iterative refinement of translations.
25
+ */
26
+ interface RefinementOptions {
27
+ /**
28
+ * The minimum acceptable quality score (0-1). Chunks with scores below
29
+ * this threshold will be refined.
30
+ *
31
+ * @default `0.85`
32
+ */
33
+ readonly qualityThreshold?: number;
34
+ /**
35
+ * Maximum number of refinement iterations per chunk.
36
+ *
37
+ * @default `3`
38
+ */
39
+ readonly maxIterations?: number;
40
+ }
41
+ /**
42
+ * Options for translating chunks.
43
+ */
44
+ interface TranslateChunksOptions {
45
+ /**
46
+ * The target language for translation.
47
+ */
48
+ readonly targetLanguage: Intl.Locale | string;
49
+ /**
50
+ * The source language of the input text.
51
+ */
52
+ readonly sourceLanguage?: Intl.Locale | string;
53
+ /**
54
+ * An optional title for the input text. It's translated along with
55
+ * the first chunk if provided.
56
+ */
57
+ readonly title?: string;
58
+ /**
59
+ * The desired tone for the translated text.
60
+ */
61
+ readonly tone?: TranslationTone;
62
+ /**
63
+ * The domain or context of the text.
64
+ */
65
+ readonly domain?: string;
66
+ /**
67
+ * The media type of the input text.
68
+ */
69
+ readonly mediaType?: MediaType;
70
+ /**
71
+ * Additional context for the translation.
72
+ */
73
+ readonly context?: string;
74
+ /**
75
+ * Initial glossary for consistent terminology.
76
+ */
77
+ readonly glossary?: Glossary;
78
+ /**
79
+ * The language models to use for translation.
80
+ * If multiple models are provided, best-of-N selection is used.
81
+ */
82
+ readonly models: readonly LanguageModel[];
83
+ /**
84
+ * The model to use for evaluating and selecting the best translation.
85
+ * If not specified, the first model in the array is used.
86
+ */
87
+ readonly evaluatorModel?: LanguageModel;
88
+ /**
89
+ * Dynamic glossary accumulation settings.
90
+ * When enabled, terms are extracted from each translated chunk
91
+ * and accumulated for use in subsequent chunks.
92
+ */
93
+ readonly dynamicGlossary?: DynamicGlossaryOptions | null;
94
+ /**
95
+ * Refinement settings for iterative translation improvement.
96
+ * When enabled, chunks are evaluated and refined until they meet
97
+ * the quality threshold or reach maximum iterations.
98
+ */
99
+ readonly refinement?: RefinementOptions | null;
100
+ /**
101
+ * Optional tools for passive context sources.
102
+ */
103
+ readonly tools?: ToolSet;
104
+ /**
105
+ * Optional abort signal.
106
+ */
107
+ readonly signal?: AbortSignal;
108
+ }
109
+ /**
110
+ * Event yielded for each translated chunk.
111
+ */
112
+ interface TranslatedChunkEvent {
113
+ readonly type: "chunk";
114
+ /**
115
+ * The index of the chunk (0-based).
116
+ */
117
+ readonly index: number;
118
+ /**
119
+ * The translated text for this chunk.
120
+ */
121
+ readonly translation: string;
122
+ /**
123
+ * The number of tokens used for this chunk.
124
+ */
125
+ readonly tokensUsed: number;
126
+ /**
127
+ * The quality score if best-of-N selection was used.
128
+ */
129
+ readonly qualityScore?: number;
130
+ /**
131
+ * The model that produced the best translation for this chunk.
132
+ */
133
+ readonly selectedModel?: LanguageModel;
134
+ /**
135
+ * Terms extracted from this chunk if dynamic glossary is enabled.
136
+ */
137
+ readonly extractedTerms?: readonly GlossaryEntry[];
138
+ }
139
+ /**
140
+ * Event yielded when all chunks are translated.
141
+ */
142
+ interface TranslateChunksComplete {
143
+ readonly type: "complete";
144
+ /**
145
+ * All translated chunks in order.
146
+ */
147
+ readonly translations: readonly string[];
148
+ /**
149
+ * Total tokens used across all chunks.
150
+ */
151
+ readonly totalTokensUsed: number;
152
+ /**
153
+ * All accumulated glossary terms from dynamic extraction.
154
+ */
155
+ readonly accumulatedGlossary: readonly GlossaryEntry[];
156
+ /**
157
+ * Average quality score across all chunks.
158
+ * Only present if best-of-N selection or refinement was used.
159
+ */
160
+ readonly qualityScore?: number;
161
+ /**
162
+ * Total number of refinement iterations performed.
163
+ * Only present if refinement was enabled.
164
+ */
165
+ readonly refinementIterations?: number;
166
+ }
167
+ /**
168
+ * Events yielded during chunk translation.
169
+ */
170
+ type TranslateChunksEvent = TranslatedChunkEvent | TranslateChunksComplete;
171
+ /**
172
+ * Translates source chunks using the provided models and options.
173
+ *
174
+ * This function returns an async iterable that yields events for each
175
+ * translated chunk, allowing consumers to process chunks incrementally
176
+ * and track progress.
177
+ *
178
+ * Features:
179
+ * - Per-chunk parallel translation with multiple models (best-of-N selection)
180
+ * - Previous chunk context passing for consistency
181
+ * - Dynamic glossary accumulation across chunks
182
+ * - Streaming results via AsyncIterable
183
+ *
184
+ * @param sourceChunks The source text chunks to translate.
185
+ * @param options Translation options.
186
+ * @returns An async iterable of translation events.
187
+ */
188
+ declare function translateChunks(sourceChunks: readonly string[], options: TranslateChunksOptions): AsyncIterable<TranslateChunksEvent>;
189
+ //#endregion
190
+ export { DynamicGlossaryOptions, RefinementOptions, TranslateChunksComplete, TranslateChunksEvent, TranslateChunksOptions, TranslatedChunkEvent, translateChunks };