@vertana/core 0.1.0-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +20 -0
  2. package/dist/_virtual/rolldown_runtime.cjs +29 -0
  3. package/dist/accumulator.cjs +64 -0
  4. package/dist/accumulator.d.cts +51 -0
  5. package/dist/accumulator.d.ts +51 -0
  6. package/dist/accumulator.js +61 -0
  7. package/dist/chunking.cjs +76 -0
  8. package/dist/chunking.d.cts +124 -0
  9. package/dist/chunking.d.ts +124 -0
  10. package/dist/chunking.js +74 -0
  11. package/dist/context.cjs +51 -0
  12. package/dist/context.d.cts +148 -0
  13. package/dist/context.d.ts +148 -0
  14. package/dist/context.js +49 -0
  15. package/dist/evaluation.cjs +120 -0
  16. package/dist/evaluation.d.cts +111 -0
  17. package/dist/evaluation.d.ts +111 -0
  18. package/dist/evaluation.js +119 -0
  19. package/dist/glossary.cjs +0 -0
  20. package/dist/glossary.d.cts +25 -0
  21. package/dist/glossary.d.ts +25 -0
  22. package/dist/glossary.js +0 -0
  23. package/dist/html.cjs +253 -0
  24. package/dist/html.d.cts +41 -0
  25. package/dist/html.d.ts +41 -0
  26. package/dist/html.js +250 -0
  27. package/dist/index.cjs +39 -0
  28. package/dist/index.d.cts +17 -0
  29. package/dist/index.d.ts +17 -0
  30. package/dist/index.js +16 -0
  31. package/dist/markdown.cjs +300 -0
  32. package/dist/markdown.d.cts +17 -0
  33. package/dist/markdown.d.ts +17 -0
  34. package/dist/markdown.js +300 -0
  35. package/dist/plaintext.cjs +70 -0
  36. package/dist/plaintext.d.cts +17 -0
  37. package/dist/plaintext.d.ts +17 -0
  38. package/dist/plaintext.js +70 -0
  39. package/dist/prompt.cjs +91 -0
  40. package/dist/prompt.d.cts +74 -0
  41. package/dist/prompt.d.ts +74 -0
  42. package/dist/prompt.js +86 -0
  43. package/dist/refine.cjs +243 -0
  44. package/dist/refine.d.cts +148 -0
  45. package/dist/refine.d.ts +148 -0
  46. package/dist/refine.js +241 -0
  47. package/dist/select.cjs +62 -0
  48. package/dist/select.d.cts +83 -0
  49. package/dist/select.d.ts +83 -0
  50. package/dist/select.js +61 -0
  51. package/dist/terms.cjs +60 -0
  52. package/dist/terms.d.cts +36 -0
  53. package/dist/terms.d.ts +36 -0
  54. package/dist/terms.js +59 -0
  55. package/dist/tokens.cjs +40 -0
  56. package/dist/tokens.d.cts +24 -0
  57. package/dist/tokens.d.ts +24 -0
  58. package/dist/tokens.js +38 -0
  59. package/dist/tools.cjs +35 -0
  60. package/dist/tools.d.cts +20 -0
  61. package/dist/tools.d.ts +20 -0
  62. package/dist/tools.js +34 -0
  63. package/dist/translate.cjs +200 -0
  64. package/dist/translate.d.cts +190 -0
  65. package/dist/translate.d.ts +190 -0
  66. package/dist/translate.js +199 -0
  67. package/dist/window.cjs +0 -0
  68. package/dist/window.d.cts +48 -0
  69. package/dist/window.d.ts +48 -0
  70. package/dist/window.js +0 -0
  71. package/package.json +215 -0
package/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ MIT License
2
+
3
+ Copyright 2025 Hong Minhee
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ //#region rolldown:runtime
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __copyProps = (to, from, except, desc) => {
9
+ if (from && typeof from === "object" || typeof from === "function") {
10
+ for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
11
+ key = keys[i];
12
+ if (!__hasOwnProp.call(to, key) && key !== except) {
13
+ __defProp(to, key, {
14
+ get: ((k) => from[k]).bind(null, key),
15
+ enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
16
+ });
17
+ }
18
+ }
19
+ }
20
+ return to;
21
+ };
22
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", {
23
+ value: mod,
24
+ enumerable: true
25
+ }) : target, mod));
26
+
27
+ //#endregion
28
+
29
+ exports.__toESM = __toESM;
@@ -0,0 +1,64 @@
1
+
2
+ //#region src/accumulator.ts
3
+ /**
4
+ * Creates the initial accumulator state.
5
+ *
6
+ * @returns A fresh accumulator state with zeroed counters.
7
+ */
8
+ function createInitialAccumulatorState() {
9
+ return {
10
+ totalQualityScore: 0,
11
+ qualityScoreCount: 0,
12
+ modelWinCounts: /* @__PURE__ */ new Map()
13
+ };
14
+ }
15
+ /**
16
+ * Accumulates a translation stream event into the state.
17
+ *
18
+ * This is a pure function that returns a new state without modifying the input.
19
+ *
20
+ * @param state The current accumulator state.
21
+ * @param event The event to accumulate.
22
+ * @returns A new state with the event accumulated.
23
+ */
24
+ function accumulateEvent(state, event) {
25
+ if (event.type === "complete") return {
26
+ ...state,
27
+ complete: event
28
+ };
29
+ let newState = state;
30
+ if (event.qualityScore != null) newState = {
31
+ ...newState,
32
+ totalQualityScore: newState.totalQualityScore + event.qualityScore,
33
+ qualityScoreCount: newState.qualityScoreCount + 1
34
+ };
35
+ if (event.selectedModel != null) {
36
+ const newCounts = new Map(newState.modelWinCounts);
37
+ newCounts.set(event.selectedModel, (newCounts.get(event.selectedModel) ?? 0) + 1);
38
+ newState = {
39
+ ...newState,
40
+ modelWinCounts: newCounts
41
+ };
42
+ }
43
+ return newState;
44
+ }
45
+ /**
46
+ * Returns the key with the highest value in a map.
47
+ *
48
+ * @param map A map of keys to numeric values.
49
+ * @returns The key with the highest value, or undefined if the map is empty.
50
+ */
51
+ function maxByValue(map) {
52
+ let maxKey;
53
+ let maxValue = -Infinity;
54
+ for (const [key, value] of map) if (value > maxValue) {
55
+ maxValue = value;
56
+ maxKey = key;
57
+ }
58
+ return maxKey;
59
+ }
60
+
61
+ //#endregion
62
+ exports.accumulateEvent = accumulateEvent;
63
+ exports.createInitialAccumulatorState = createInitialAccumulatorState;
64
+ exports.maxByValue = maxByValue;
@@ -0,0 +1,51 @@
1
+ import { TranslateChunksComplete, TranslateChunksEvent } from "./translate.cjs";
2
+ import { LanguageModel } from "ai";
3
+
4
+ //#region src/accumulator.d.ts
5
+
6
+ /**
7
+ * Accumulated state from processing translation stream events.
8
+ */
9
+ interface AccumulatorState {
10
+ /**
11
+ * The completion event, if received.
12
+ */
13
+ readonly complete?: TranslateChunksComplete;
14
+ /**
15
+ * Sum of quality scores from chunk events.
16
+ */
17
+ readonly totalQualityScore: number;
18
+ /**
19
+ * Number of chunks that had quality scores.
20
+ */
21
+ readonly qualityScoreCount: number;
22
+ /**
23
+ * Count of wins per model during best-of-N selection.
24
+ */
25
+ readonly modelWinCounts: ReadonlyMap<LanguageModel, number>;
26
+ }
27
+ /**
28
+ * Creates the initial accumulator state.
29
+ *
30
+ * @returns A fresh accumulator state with zeroed counters.
31
+ */
32
+ declare function createInitialAccumulatorState(): AccumulatorState;
33
+ /**
34
+ * Accumulates a translation stream event into the state.
35
+ *
36
+ * This is a pure function that returns a new state without modifying the input.
37
+ *
38
+ * @param state The current accumulator state.
39
+ * @param event The event to accumulate.
40
+ * @returns A new state with the event accumulated.
41
+ */
42
+ declare function accumulateEvent(state: AccumulatorState, event: TranslateChunksEvent): AccumulatorState;
43
+ /**
44
+ * Returns the key with the highest value in a map.
45
+ *
46
+ * @param map A map of keys to numeric values.
47
+ * @returns The key with the highest value, or undefined if the map is empty.
48
+ */
49
+ declare function maxByValue<K>(map: ReadonlyMap<K, number>): K | undefined;
50
+ //#endregion
51
+ export { AccumulatorState, accumulateEvent, createInitialAccumulatorState, maxByValue };
@@ -0,0 +1,51 @@
1
+ import { TranslateChunksComplete, TranslateChunksEvent } from "./translate.js";
2
+ import { LanguageModel } from "ai";
3
+
4
+ //#region src/accumulator.d.ts
5
+
6
+ /**
7
+ * Accumulated state from processing translation stream events.
8
+ */
9
+ interface AccumulatorState {
10
+ /**
11
+ * The completion event, if received.
12
+ */
13
+ readonly complete?: TranslateChunksComplete;
14
+ /**
15
+ * Sum of quality scores from chunk events.
16
+ */
17
+ readonly totalQualityScore: number;
18
+ /**
19
+ * Number of chunks that had quality scores.
20
+ */
21
+ readonly qualityScoreCount: number;
22
+ /**
23
+ * Count of wins per model during best-of-N selection.
24
+ */
25
+ readonly modelWinCounts: ReadonlyMap<LanguageModel, number>;
26
+ }
27
+ /**
28
+ * Creates the initial accumulator state.
29
+ *
30
+ * @returns A fresh accumulator state with zeroed counters.
31
+ */
32
+ declare function createInitialAccumulatorState(): AccumulatorState;
33
+ /**
34
+ * Accumulates a translation stream event into the state.
35
+ *
36
+ * This is a pure function that returns a new state without modifying the input.
37
+ *
38
+ * @param state The current accumulator state.
39
+ * @param event The event to accumulate.
40
+ * @returns A new state with the event accumulated.
41
+ */
42
+ declare function accumulateEvent(state: AccumulatorState, event: TranslateChunksEvent): AccumulatorState;
43
+ /**
44
+ * Returns the key with the highest value in a map.
45
+ *
46
+ * @param map A map of keys to numeric values.
47
+ * @returns The key with the highest value, or undefined if the map is empty.
48
+ */
49
+ declare function maxByValue<K>(map: ReadonlyMap<K, number>): K | undefined;
50
+ //#endregion
51
+ export { AccumulatorState, accumulateEvent, createInitialAccumulatorState, maxByValue };
@@ -0,0 +1,61 @@
1
+ //#region src/accumulator.ts
2
+ /**
3
+ * Creates the initial accumulator state.
4
+ *
5
+ * @returns A fresh accumulator state with zeroed counters.
6
+ */
7
+ function createInitialAccumulatorState() {
8
+ return {
9
+ totalQualityScore: 0,
10
+ qualityScoreCount: 0,
11
+ modelWinCounts: /* @__PURE__ */ new Map()
12
+ };
13
+ }
14
+ /**
15
+ * Accumulates a translation stream event into the state.
16
+ *
17
+ * This is a pure function that returns a new state without modifying the input.
18
+ *
19
+ * @param state The current accumulator state.
20
+ * @param event The event to accumulate.
21
+ * @returns A new state with the event accumulated.
22
+ */
23
+ function accumulateEvent(state, event) {
24
+ if (event.type === "complete") return {
25
+ ...state,
26
+ complete: event
27
+ };
28
+ let newState = state;
29
+ if (event.qualityScore != null) newState = {
30
+ ...newState,
31
+ totalQualityScore: newState.totalQualityScore + event.qualityScore,
32
+ qualityScoreCount: newState.qualityScoreCount + 1
33
+ };
34
+ if (event.selectedModel != null) {
35
+ const newCounts = new Map(newState.modelWinCounts);
36
+ newCounts.set(event.selectedModel, (newCounts.get(event.selectedModel) ?? 0) + 1);
37
+ newState = {
38
+ ...newState,
39
+ modelWinCounts: newCounts
40
+ };
41
+ }
42
+ return newState;
43
+ }
44
+ /**
45
+ * Returns the key with the highest value in a map.
46
+ *
47
+ * @param map A map of keys to numeric values.
48
+ * @returns The key with the highest value, or undefined if the map is empty.
49
+ */
50
+ function maxByValue(map) {
51
+ let maxKey;
52
+ let maxValue = -Infinity;
53
+ for (const [key, value] of map) if (value > maxValue) {
54
+ maxValue = value;
55
+ maxKey = key;
56
+ }
57
+ return maxKey;
58
+ }
59
+
60
+ //#endregion
61
+ export { accumulateEvent, createInitialAccumulatorState, maxByValue };
@@ -0,0 +1,76 @@
1
+ const require_rolldown_runtime = require('./_virtual/rolldown_runtime.cjs');
2
+ let _logtape_logtape = require("@logtape/logtape");
3
+
4
+ //#region src/chunking.ts
5
+ const logger = (0, _logtape_logtape.getLogger)([
6
+ "vertana",
7
+ "core",
8
+ "chunking"
9
+ ]);
10
+ /**
11
+ * Gets the default chunker based on media type.
12
+ *
13
+ * @param mediaType The media type of the text.
14
+ * @returns A promise that resolves to the appropriate chunker for the media type.
15
+ */
16
+ async function getDefaultChunker(mediaType) {
17
+ if (mediaType === "text/html") {
18
+ const { createHtmlChunker } = await Promise.resolve().then(() => require("./html.cjs"));
19
+ return createHtmlChunker();
20
+ }
21
+ if (mediaType === "text/plain") {
22
+ const { createPlainTextChunker } = await Promise.resolve().then(() => require("./plaintext.cjs"));
23
+ return createPlainTextChunker();
24
+ }
25
+ const { createMarkdownChunker } = await Promise.resolve().then(() => require("./markdown.cjs"));
26
+ return createMarkdownChunker();
27
+ }
28
+ /**
29
+ * Chunks text into smaller pieces for translation.
30
+ *
31
+ * This is a convenience function that combines chunker selection and execution.
32
+ * If chunking is disabled (chunker is `null`), the text is returned as a
33
+ * single-element array.
34
+ *
35
+ * @param text The text to chunk.
36
+ * @param options Options for chunking.
37
+ * @returns A promise that resolves to an array of chunk content strings.
38
+ */
39
+ async function chunkText(text, options) {
40
+ const signal = options?.signal;
41
+ signal?.throwIfAborted();
42
+ const mediaType = options?.mediaType ?? "text/markdown";
43
+ if (options?.chunker === null) {
44
+ logger.debug("Chunking disabled, returning as single chunk.", { textLength: text.length });
45
+ return [text];
46
+ }
47
+ logger.debug("Chunking text...", {
48
+ mediaType,
49
+ textLength: text.length,
50
+ maxTokens: options?.maxTokens ?? 4096
51
+ });
52
+ const chunker = options?.chunker ?? await getDefaultChunker(options?.mediaType);
53
+ let countTokens = options?.countTokens;
54
+ if (countTokens == null) {
55
+ const { countTokens: defaultCounter } = await Promise.resolve().then(() => require("./tokens.cjs"));
56
+ countTokens = defaultCounter;
57
+ }
58
+ const chunks = await chunker(text, {
59
+ maxTokens: options?.maxTokens ?? 4096,
60
+ countTokens,
61
+ signal
62
+ });
63
+ if (chunks.length === 0) {
64
+ logger.debug("No chunks produced, returning as single chunk.", { textLength: text.length });
65
+ return [text];
66
+ }
67
+ logger.debug("Chunking completed.", {
68
+ chunkCount: chunks.length,
69
+ mediaType
70
+ });
71
+ return chunks.map((c) => c.content);
72
+ }
73
+
74
+ //#endregion
75
+ exports.chunkText = chunkText;
76
+ exports.getDefaultChunker = getDefaultChunker;
@@ -0,0 +1,124 @@
1
+ import { MediaType } from "./prompt.cjs";
2
+
3
+ //#region src/chunking.d.ts
4
+
5
+ /**
6
+ * A function that counts the number of tokens in a string.
7
+ *
8
+ * @param text The text to count tokens for.
9
+ * @returns The number of tokens.
10
+ */
11
+ type TokenCounter = (text: string) => number;
12
+ /**
13
+ * Options for {@link Chunker}.
14
+ */
15
+ interface ChunkerOptions {
16
+ /**
17
+ * The maximum number of tokens per chunk.
18
+ *
19
+ * @default `4096`
20
+ */
21
+ readonly maxTokens?: number;
22
+ /**
23
+ * A custom token counter function. If not provided, a default
24
+ * implementation using js-tiktoken (cl100k_base encoding) is used.
25
+ */
26
+ readonly countTokens?: TokenCounter;
27
+ /**
28
+ * An optional `AbortSignal` to cancel the chunking operation.
29
+ */
30
+ readonly signal?: AbortSignal;
31
+ }
32
+ /**
33
+ * Splits text into chunks for translation.
34
+ *
35
+ * @param text The text to split into chunks.
36
+ * @param options Optional settings for the chunking operation.
37
+ * @returns A promise that resolves to an array of chunks.
38
+ */
39
+ type Chunker = (text: string, options?: ChunkerOptions) => Promise<readonly Chunk[]>;
40
+ /**
41
+ * The type of content in a chunk.
42
+ *
43
+ * - `"paragraph"`: A paragraph of text.
44
+ * - `"section"`: A section of the document.
45
+ * - `"heading"`: A heading or title.
46
+ * - `"list"`: A list of items.
47
+ * - `"code"`: A code block.
48
+ */
49
+ type ChunkType = "paragraph" | "section" | "heading" | "list" | "code";
50
+ /**
51
+ * A chunk of text to be translated.
52
+ */
53
+ interface Chunk {
54
+ /**
55
+ * The text content of the chunk.
56
+ */
57
+ readonly content: string;
58
+ /**
59
+ * The type of content in the chunk.
60
+ */
61
+ readonly type: ChunkType;
62
+ /**
63
+ * The zero-based index of the chunk in the document.
64
+ */
65
+ readonly index: number;
66
+ }
67
+ /**
68
+ * Options for {@link chunkText}.
69
+ */
70
+ interface ChunkTextOptions {
71
+ /**
72
+ * The media type of the text. Used to select the default chunker
73
+ * when {@link chunker} is not provided.
74
+ *
75
+ * - `"text/html"`: Uses the HTML chunker.
76
+ * - `"text/markdown"`: Uses the Markdown chunker.
77
+ * - `"text/plain"`: Uses the plain text chunker.
78
+ *
79
+ * @default `"text/markdown"`
80
+ */
81
+ readonly mediaType?: MediaType;
82
+ /**
83
+ * A custom chunker function. If not provided, a default chunker
84
+ * based on {@link mediaType} is used. Set to `null` to disable
85
+ * chunking entirely (text will be returned as a single chunk).
86
+ */
87
+ readonly chunker?: Chunker | null;
88
+ /**
89
+ * The maximum number of tokens per chunk.
90
+ *
91
+ * @default `4096`
92
+ */
93
+ readonly maxTokens?: number;
94
+ /**
95
+ * A custom token counter function. If not provided, a default
96
+ * implementation using js-tiktoken (cl100k_base encoding) is used.
97
+ */
98
+ readonly countTokens?: TokenCounter;
99
+ /**
100
+ * An optional `AbortSignal` to cancel the chunking operation.
101
+ */
102
+ readonly signal?: AbortSignal;
103
+ }
104
+ /**
105
+ * Gets the default chunker based on media type.
106
+ *
107
+ * @param mediaType The media type of the text.
108
+ * @returns A promise that resolves to the appropriate chunker for the media type.
109
+ */
110
+ declare function getDefaultChunker(mediaType?: MediaType): Promise<Chunker>;
111
+ /**
112
+ * Chunks text into smaller pieces for translation.
113
+ *
114
+ * This is a convenience function that combines chunker selection and execution.
115
+ * If chunking is disabled (chunker is `null`), the text is returned as a
116
+ * single-element array.
117
+ *
118
+ * @param text The text to chunk.
119
+ * @param options Options for chunking.
120
+ * @returns A promise that resolves to an array of chunk content strings.
121
+ */
122
+ declare function chunkText(text: string, options?: ChunkTextOptions): Promise<readonly string[]>;
123
+ //#endregion
124
+ export { Chunk, ChunkTextOptions, ChunkType, Chunker, ChunkerOptions, type MediaType, TokenCounter, chunkText, getDefaultChunker };
@@ -0,0 +1,124 @@
1
+ import { MediaType } from "./prompt.js";
2
+
3
+ //#region src/chunking.d.ts
4
+
5
+ /**
6
+ * A function that counts the number of tokens in a string.
7
+ *
8
+ * @param text The text to count tokens for.
9
+ * @returns The number of tokens.
10
+ */
11
+ type TokenCounter = (text: string) => number;
12
+ /**
13
+ * Options for {@link Chunker}.
14
+ */
15
+ interface ChunkerOptions {
16
+ /**
17
+ * The maximum number of tokens per chunk.
18
+ *
19
+ * @default `4096`
20
+ */
21
+ readonly maxTokens?: number;
22
+ /**
23
+ * A custom token counter function. If not provided, a default
24
+ * implementation using js-tiktoken (cl100k_base encoding) is used.
25
+ */
26
+ readonly countTokens?: TokenCounter;
27
+ /**
28
+ * An optional `AbortSignal` to cancel the chunking operation.
29
+ */
30
+ readonly signal?: AbortSignal;
31
+ }
32
+ /**
33
+ * Splits text into chunks for translation.
34
+ *
35
+ * @param text The text to split into chunks.
36
+ * @param options Optional settings for the chunking operation.
37
+ * @returns A promise that resolves to an array of chunks.
38
+ */
39
+ type Chunker = (text: string, options?: ChunkerOptions) => Promise<readonly Chunk[]>;
40
+ /**
41
+ * The type of content in a chunk.
42
+ *
43
+ * - `"paragraph"`: A paragraph of text.
44
+ * - `"section"`: A section of the document.
45
+ * - `"heading"`: A heading or title.
46
+ * - `"list"`: A list of items.
47
+ * - `"code"`: A code block.
48
+ */
49
+ type ChunkType = "paragraph" | "section" | "heading" | "list" | "code";
50
+ /**
51
+ * A chunk of text to be translated.
52
+ */
53
+ interface Chunk {
54
+ /**
55
+ * The text content of the chunk.
56
+ */
57
+ readonly content: string;
58
+ /**
59
+ * The type of content in the chunk.
60
+ */
61
+ readonly type: ChunkType;
62
+ /**
63
+ * The zero-based index of the chunk in the document.
64
+ */
65
+ readonly index: number;
66
+ }
67
+ /**
68
+ * Options for {@link chunkText}.
69
+ */
70
+ interface ChunkTextOptions {
71
+ /**
72
+ * The media type of the text. Used to select the default chunker
73
+ * when {@link chunker} is not provided.
74
+ *
75
+ * - `"text/html"`: Uses the HTML chunker.
76
+ * - `"text/markdown"`: Uses the Markdown chunker.
77
+ * - `"text/plain"`: Uses the plain text chunker.
78
+ *
79
+ * @default `"text/markdown"`
80
+ */
81
+ readonly mediaType?: MediaType;
82
+ /**
83
+ * A custom chunker function. If not provided, a default chunker
84
+ * based on {@link mediaType} is used. Set to `null` to disable
85
+ * chunking entirely (text will be returned as a single chunk).
86
+ */
87
+ readonly chunker?: Chunker | null;
88
+ /**
89
+ * The maximum number of tokens per chunk.
90
+ *
91
+ * @default `4096`
92
+ */
93
+ readonly maxTokens?: number;
94
+ /**
95
+ * A custom token counter function. If not provided, a default
96
+ * implementation using js-tiktoken (cl100k_base encoding) is used.
97
+ */
98
+ readonly countTokens?: TokenCounter;
99
+ /**
100
+ * An optional `AbortSignal` to cancel the chunking operation.
101
+ */
102
+ readonly signal?: AbortSignal;
103
+ }
104
+ /**
105
+ * Gets the default chunker based on media type.
106
+ *
107
+ * @param mediaType The media type of the text.
108
+ * @returns A promise that resolves to the appropriate chunker for the media type.
109
+ */
110
+ declare function getDefaultChunker(mediaType?: MediaType): Promise<Chunker>;
111
+ /**
112
+ * Chunks text into smaller pieces for translation.
113
+ *
114
+ * This is a convenience function that combines chunker selection and execution.
115
+ * If chunking is disabled (chunker is `null`), the text is returned as a
116
+ * single-element array.
117
+ *
118
+ * @param text The text to chunk.
119
+ * @param options Options for chunking.
120
+ * @returns A promise that resolves to an array of chunk content strings.
121
+ */
122
+ declare function chunkText(text: string, options?: ChunkTextOptions): Promise<readonly string[]>;
123
+ //#endregion
124
+ export { Chunk, ChunkTextOptions, ChunkType, Chunker, ChunkerOptions, type MediaType, TokenCounter, chunkText, getDefaultChunker };
@@ -0,0 +1,74 @@
1
+ import { getLogger } from "@logtape/logtape";
2
+
3
+ //#region src/chunking.ts
4
+ const logger = getLogger([
5
+ "vertana",
6
+ "core",
7
+ "chunking"
8
+ ]);
9
+ /**
10
+ * Gets the default chunker based on media type.
11
+ *
12
+ * @param mediaType The media type of the text.
13
+ * @returns A promise that resolves to the appropriate chunker for the media type.
14
+ */
15
+ async function getDefaultChunker(mediaType) {
16
+ if (mediaType === "text/html") {
17
+ const { createHtmlChunker } = await import("./html.js");
18
+ return createHtmlChunker();
19
+ }
20
+ if (mediaType === "text/plain") {
21
+ const { createPlainTextChunker } = await import("./plaintext.js");
22
+ return createPlainTextChunker();
23
+ }
24
+ const { createMarkdownChunker } = await import("./markdown.js");
25
+ return createMarkdownChunker();
26
+ }
27
+ /**
28
+ * Chunks text into smaller pieces for translation.
29
+ *
30
+ * This is a convenience function that combines chunker selection and execution.
31
+ * If chunking is disabled (chunker is `null`), the text is returned as a
32
+ * single-element array.
33
+ *
34
+ * @param text The text to chunk.
35
+ * @param options Options for chunking.
36
+ * @returns A promise that resolves to an array of chunk content strings.
37
+ */
38
+ async function chunkText(text, options) {
39
+ const signal = options?.signal;
40
+ signal?.throwIfAborted();
41
+ const mediaType = options?.mediaType ?? "text/markdown";
42
+ if (options?.chunker === null) {
43
+ logger.debug("Chunking disabled, returning as single chunk.", { textLength: text.length });
44
+ return [text];
45
+ }
46
+ logger.debug("Chunking text...", {
47
+ mediaType,
48
+ textLength: text.length,
49
+ maxTokens: options?.maxTokens ?? 4096
50
+ });
51
+ const chunker = options?.chunker ?? await getDefaultChunker(options?.mediaType);
52
+ let countTokens = options?.countTokens;
53
+ if (countTokens == null) {
54
+ const { countTokens: defaultCounter } = await import("./tokens.js");
55
+ countTokens = defaultCounter;
56
+ }
57
+ const chunks = await chunker(text, {
58
+ maxTokens: options?.maxTokens ?? 4096,
59
+ countTokens,
60
+ signal
61
+ });
62
+ if (chunks.length === 0) {
63
+ logger.debug("No chunks produced, returning as single chunk.", { textLength: text.length });
64
+ return [text];
65
+ }
66
+ logger.debug("Chunking completed.", {
67
+ chunkCount: chunks.length,
68
+ mediaType
69
+ });
70
+ return chunks.map((c) => c.content);
71
+ }
72
+
73
+ //#endregion
74
+ export { chunkText, getDefaultChunker };