@smart-cloud/ai-kit-ui 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +9 -19
- package/dist/index.js +9 -19
- package/package.json +2 -2
- package/src/ai-feature/AiFeature.tsx +0 -51
- package/src/ai-feature/chunked-features.ts +0 -254
- package/src/ai-feature/chunking-utils.ts +0 -211
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@smart-cloud/ai-kit-ui",
|
|
3
|
-
"version": "1.4.
|
|
3
|
+
"version": "1.4.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./dist/index.cjs",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"@emotion/cache": "^11.14.0",
|
|
21
21
|
"@emotion/react": "^11.14.0",
|
|
22
22
|
"@mantine/colors-generator": "^8.3.16",
|
|
23
|
-
"@smart-cloud/ai-kit-core": "^1.4.
|
|
23
|
+
"@smart-cloud/ai-kit-core": "^1.4.4",
|
|
24
24
|
"@smart-cloud/wpsuite-core": "^2.2.10",
|
|
25
25
|
"@tabler/icons-react": "^3.40.0",
|
|
26
26
|
"chroma-js": "^3.2.0",
|
|
@@ -55,12 +55,6 @@ import {
|
|
|
55
55
|
|
|
56
56
|
import { translations } from "../i18n";
|
|
57
57
|
import { PoweredBy } from "../poweredBy";
|
|
58
|
-
import { shouldChunkInput } from "./chunking-utils";
|
|
59
|
-
import {
|
|
60
|
-
chunkedSummarize,
|
|
61
|
-
chunkedTranslate,
|
|
62
|
-
chunkedRewrite,
|
|
63
|
-
} from "./chunked-features";
|
|
64
58
|
import {
|
|
65
59
|
isBackendConfigured,
|
|
66
60
|
readDefaultOutputLanguage,
|
|
@@ -566,21 +560,6 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
|
|
|
566
560
|
onDeviceTimeoutOverride: onDeviceTimeout,
|
|
567
561
|
};
|
|
568
562
|
|
|
569
|
-
// Determine if we're using on-device mode
|
|
570
|
-
const isOnDevice =
|
|
571
|
-
modeOverride === "local-only" ||
|
|
572
|
-
(!modeOverride && context === "admin");
|
|
573
|
-
|
|
574
|
-
// Check if chunking is needed
|
|
575
|
-
if (shouldChunkInput(text!.trim(), "summarize", isOnDevice)) {
|
|
576
|
-
return await chunkedSummarize(
|
|
577
|
-
text!.trim(),
|
|
578
|
-
args,
|
|
579
|
-
featureOptions,
|
|
580
|
-
isOnDevice,
|
|
581
|
-
);
|
|
582
|
-
}
|
|
583
|
-
|
|
584
563
|
// Normal single-pass summarization
|
|
585
564
|
const out = await summarize(args, featureOptions);
|
|
586
565
|
return out.result;
|
|
@@ -675,21 +654,6 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
|
|
|
675
654
|
onDeviceTimeoutOverride: onDeviceTimeout,
|
|
676
655
|
};
|
|
677
656
|
|
|
678
|
-
// Determine if we're using on-device mode
|
|
679
|
-
const isOnDevice =
|
|
680
|
-
modeOverride === "local-only" ||
|
|
681
|
-
(!modeOverride && context === "admin");
|
|
682
|
-
|
|
683
|
-
// Check if chunking is needed (both on-device quota and AWS Translate limit)
|
|
684
|
-
if (shouldChunkInput(text!.trim(), "translate", isOnDevice)) {
|
|
685
|
-
return await chunkedTranslate(
|
|
686
|
-
text!.trim(),
|
|
687
|
-
args,
|
|
688
|
-
featureOptions,
|
|
689
|
-
isOnDevice,
|
|
690
|
-
);
|
|
691
|
-
}
|
|
692
|
-
|
|
693
657
|
// Normal single-pass translation
|
|
694
658
|
const out = await translate(args, featureOptions);
|
|
695
659
|
return out.result;
|
|
@@ -732,21 +696,6 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
|
|
|
732
696
|
onDeviceTimeoutOverride: onDeviceTimeout,
|
|
733
697
|
};
|
|
734
698
|
|
|
735
|
-
// Determine if we're using on-device mode
|
|
736
|
-
const isOnDevice =
|
|
737
|
-
modeOverride === "local-only" ||
|
|
738
|
-
(!modeOverride && context === "admin");
|
|
739
|
-
|
|
740
|
-
// Check if chunking is needed
|
|
741
|
-
if (shouldChunkInput(text!.trim(), "rewrite", isOnDevice)) {
|
|
742
|
-
return await chunkedRewrite(
|
|
743
|
-
text!.trim(),
|
|
744
|
-
args,
|
|
745
|
-
featureOptions,
|
|
746
|
-
isOnDevice,
|
|
747
|
-
);
|
|
748
|
-
}
|
|
749
|
-
|
|
750
699
|
// Normal single-pass rewrite
|
|
751
700
|
const out = await rewrite(args, featureOptions);
|
|
752
701
|
return out.result;
|
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Chunked versions of AI features for handling large inputs
|
|
3
|
-
*
|
|
4
|
-
* These wrappers split large inputs into smaller chunks, process them
|
|
5
|
-
* sequentially, and combine the results.
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import type {
|
|
9
|
-
AiKitStatusEvent,
|
|
10
|
-
FeatureOptions,
|
|
11
|
-
SummarizeArgs,
|
|
12
|
-
SummarizeResult,
|
|
13
|
-
TranslateArgs,
|
|
14
|
-
TranslateResult,
|
|
15
|
-
RewriteArgs,
|
|
16
|
-
RewriteResult,
|
|
17
|
-
} from "@smart-cloud/ai-kit-core";
|
|
18
|
-
import { summarize, translate, rewrite } from "@smart-cloud/ai-kit-core";
|
|
19
|
-
import {
|
|
20
|
-
splitTextIntoChunks,
|
|
21
|
-
getChunkSize,
|
|
22
|
-
estimateTokenCount,
|
|
23
|
-
} from "./chunking-utils";
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* Chunked summarize implementation
|
|
27
|
-
*
|
|
28
|
-
* Strategy:
|
|
29
|
-
* 1. Split text into chunks
|
|
30
|
-
* 2. Summarize each chunk
|
|
31
|
-
* 3. If combined summaries are still large, recursively summarize them
|
|
32
|
-
* 4. Return final summary
|
|
33
|
-
*/
|
|
34
|
-
export async function chunkedSummarize(
|
|
35
|
-
text: string,
|
|
36
|
-
args: SummarizeArgs,
|
|
37
|
-
options: FeatureOptions,
|
|
38
|
-
isOnDevice: boolean,
|
|
39
|
-
recursionLevel: number = 0,
|
|
40
|
-
): Promise<SummarizeResult> {
|
|
41
|
-
const maxChunkSize = getChunkSize("summarize", isOnDevice);
|
|
42
|
-
const chunks = splitTextIntoChunks(text, maxChunkSize);
|
|
43
|
-
|
|
44
|
-
if (chunks.length === 1) {
|
|
45
|
-
// No chunking needed
|
|
46
|
-
return await summarize(args, options);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
// Prevent infinite recursion (max 2 levels)
|
|
50
|
-
if (recursionLevel >= 2) {
|
|
51
|
-
throw new Error(
|
|
52
|
-
"Text is too large to summarize. Please try using backend mode or reduce the input size.",
|
|
53
|
-
);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
// Phase 1: Summarize each chunk
|
|
57
|
-
const chunkSummaries: string[] = [];
|
|
58
|
-
|
|
59
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
60
|
-
const chunkResult = await summarize(
|
|
61
|
-
{
|
|
62
|
-
...args,
|
|
63
|
-
text: chunks[i].text,
|
|
64
|
-
},
|
|
65
|
-
{
|
|
66
|
-
...options,
|
|
67
|
-
onStatus: (e: AiKitStatusEvent) => {
|
|
68
|
-
if (options.onStatus) {
|
|
69
|
-
// Modify progress to reflect chunking
|
|
70
|
-
const baseProgress =
|
|
71
|
-
typeof e.progress === "number" ? e.progress : 0;
|
|
72
|
-
const chunkProgress = (i + baseProgress) / chunks.length;
|
|
73
|
-
|
|
74
|
-
options.onStatus({
|
|
75
|
-
...e,
|
|
76
|
-
message:
|
|
77
|
-
recursionLevel === 0
|
|
78
|
-
? `Summarizing part ${i + 1}/${chunks.length}...`
|
|
79
|
-
: `Combining summaries (${i + 1}/${chunks.length})...`,
|
|
80
|
-
progress: chunkProgress,
|
|
81
|
-
});
|
|
82
|
-
}
|
|
83
|
-
},
|
|
84
|
-
},
|
|
85
|
-
);
|
|
86
|
-
|
|
87
|
-
chunkSummaries.push(chunkResult.result);
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
// Phase 2: Combine summaries
|
|
91
|
-
const combinedSummaries = chunkSummaries.join("\n\n");
|
|
92
|
-
|
|
93
|
-
// Check if we need another round of summarization
|
|
94
|
-
if (estimateTokenCount(combinedSummaries) > maxChunkSize / 3.5) {
|
|
95
|
-
// Recursively summarize
|
|
96
|
-
return await chunkedSummarize(
|
|
97
|
-
combinedSummaries,
|
|
98
|
-
{
|
|
99
|
-
...args,
|
|
100
|
-
// Adjust length for recursive summarization
|
|
101
|
-
length: args.length === "short" ? "short" : "medium",
|
|
102
|
-
},
|
|
103
|
-
{
|
|
104
|
-
...options,
|
|
105
|
-
onStatus: (e: AiKitStatusEvent) => {
|
|
106
|
-
if (options.onStatus) {
|
|
107
|
-
options.onStatus({
|
|
108
|
-
...e,
|
|
109
|
-
message: "Creating final summary...",
|
|
110
|
-
});
|
|
111
|
-
}
|
|
112
|
-
},
|
|
113
|
-
},
|
|
114
|
-
isOnDevice,
|
|
115
|
-
recursionLevel + 1,
|
|
116
|
-
);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
// Final summarization
|
|
120
|
-
return await summarize(
|
|
121
|
-
{
|
|
122
|
-
...args,
|
|
123
|
-
text: combinedSummaries,
|
|
124
|
-
length: args.length === "short" ? "short" : "medium",
|
|
125
|
-
},
|
|
126
|
-
{
|
|
127
|
-
...options,
|
|
128
|
-
onStatus: (e: AiKitStatusEvent) => {
|
|
129
|
-
if (options.onStatus) {
|
|
130
|
-
options.onStatus({
|
|
131
|
-
...e,
|
|
132
|
-
message: "Creating final summary...",
|
|
133
|
-
});
|
|
134
|
-
}
|
|
135
|
-
},
|
|
136
|
-
},
|
|
137
|
-
);
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
/**
|
|
141
|
-
* Chunked translate implementation
|
|
142
|
-
*
|
|
143
|
-
* Strategy:
|
|
144
|
-
* 1. Split text into chunks (respecting AWS Translate 10k char limit)
|
|
145
|
-
* 2. Translate each chunk sequentially
|
|
146
|
-
* 3. Join translated chunks
|
|
147
|
-
*/
|
|
148
|
-
export async function chunkedTranslate(
|
|
149
|
-
text: string,
|
|
150
|
-
args: TranslateArgs,
|
|
151
|
-
options: FeatureOptions,
|
|
152
|
-
isOnDevice: boolean,
|
|
153
|
-
): Promise<TranslateResult> {
|
|
154
|
-
const maxChunkSize = getChunkSize("translate", isOnDevice);
|
|
155
|
-
const chunks = splitTextIntoChunks(text, maxChunkSize);
|
|
156
|
-
|
|
157
|
-
if (chunks.length === 1) {
|
|
158
|
-
// No chunking needed
|
|
159
|
-
return await translate(args, options);
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// Translate each chunk sequentially
|
|
163
|
-
const translatedChunks: string[] = [];
|
|
164
|
-
|
|
165
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
166
|
-
const chunkResult = await translate(
|
|
167
|
-
{
|
|
168
|
-
...args,
|
|
169
|
-
text: chunks[i].text,
|
|
170
|
-
},
|
|
171
|
-
{
|
|
172
|
-
...options,
|
|
173
|
-
onStatus: (e: AiKitStatusEvent) => {
|
|
174
|
-
if (options.onStatus) {
|
|
175
|
-
const baseProgress =
|
|
176
|
-
typeof e.progress === "number" ? e.progress : 0;
|
|
177
|
-
const chunkProgress = (i + baseProgress) / chunks.length;
|
|
178
|
-
|
|
179
|
-
options.onStatus({
|
|
180
|
-
...e,
|
|
181
|
-
message: `Translating part ${i + 1}/${chunks.length}...`,
|
|
182
|
-
progress: chunkProgress,
|
|
183
|
-
});
|
|
184
|
-
}
|
|
185
|
-
},
|
|
186
|
-
},
|
|
187
|
-
);
|
|
188
|
-
|
|
189
|
-
translatedChunks.push(chunkResult.result);
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// Join with paragraph breaks to maintain structure
|
|
193
|
-
return {
|
|
194
|
-
result: translatedChunks.join("\n\n"),
|
|
195
|
-
};
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
/**
|
|
199
|
-
* Chunked rewrite implementation
|
|
200
|
-
*
|
|
201
|
-
* Strategy:
|
|
202
|
-
* 1. Split text into chunks
|
|
203
|
-
* 2. Rewrite each chunk sequentially
|
|
204
|
-
* 3. Join rewritten chunks
|
|
205
|
-
*/
|
|
206
|
-
export async function chunkedRewrite(
|
|
207
|
-
text: string,
|
|
208
|
-
args: RewriteArgs,
|
|
209
|
-
options: FeatureOptions,
|
|
210
|
-
isOnDevice: boolean,
|
|
211
|
-
): Promise<RewriteResult> {
|
|
212
|
-
const maxChunkSize = getChunkSize("rewrite", isOnDevice);
|
|
213
|
-
const chunks = splitTextIntoChunks(text, maxChunkSize);
|
|
214
|
-
|
|
215
|
-
if (chunks.length === 1) {
|
|
216
|
-
// No chunking needed
|
|
217
|
-
return await rewrite(args, options);
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
// Rewrite each chunk sequentially
|
|
221
|
-
const rewrittenChunks: string[] = [];
|
|
222
|
-
|
|
223
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
224
|
-
const chunkResult = await rewrite(
|
|
225
|
-
{
|
|
226
|
-
...args,
|
|
227
|
-
text: chunks[i].text,
|
|
228
|
-
},
|
|
229
|
-
{
|
|
230
|
-
...options,
|
|
231
|
-
onStatus: (e: AiKitStatusEvent) => {
|
|
232
|
-
if (options.onStatus) {
|
|
233
|
-
const baseProgress =
|
|
234
|
-
typeof e.progress === "number" ? e.progress : 0;
|
|
235
|
-
const chunkProgress = (i + baseProgress) / chunks.length;
|
|
236
|
-
|
|
237
|
-
options.onStatus({
|
|
238
|
-
...e,
|
|
239
|
-
message: `Rewriting part ${i + 1}/${chunks.length}...`,
|
|
240
|
-
progress: chunkProgress,
|
|
241
|
-
});
|
|
242
|
-
}
|
|
243
|
-
},
|
|
244
|
-
},
|
|
245
|
-
);
|
|
246
|
-
|
|
247
|
-
rewrittenChunks.push(chunkResult.result);
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
// Join with paragraph breaks
|
|
251
|
-
return {
|
|
252
|
-
result: rewrittenChunks.join("\n\n"),
|
|
253
|
-
};
|
|
254
|
-
}
|
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Text chunking utilities for handling large inputs in AI features
|
|
3
|
-
*
|
|
4
|
-
* Chunking is needed for:
|
|
5
|
-
* - On-device models with token quotas (~8000 tokens)
|
|
6
|
-
* - AWS Translate backend (10,000 character limit)
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
export interface TextChunk {
|
|
10
|
-
text: string;
|
|
11
|
-
start: number;
|
|
12
|
-
end: number;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Estimate token count from text
|
|
17
|
-
* Approximation: 1 token ≈ 3.5 characters for Hungarian text
|
|
18
|
-
*/
|
|
19
|
-
export function estimateTokenCount(text: string): number {
|
|
20
|
-
return Math.ceil(text.length / 3.5);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Check if input should be chunked based on mode and size
|
|
25
|
-
*/
|
|
26
|
-
export function shouldChunkInput(
|
|
27
|
-
text: string,
|
|
28
|
-
mode: "summarize" | "translate" | "rewrite" | "proofread",
|
|
29
|
-
isOnDevice: boolean,
|
|
30
|
-
): boolean {
|
|
31
|
-
const tokens = estimateTokenCount(text);
|
|
32
|
-
|
|
33
|
-
if (isOnDevice) {
|
|
34
|
-
// On-device models have token quotas
|
|
35
|
-
const quotas = {
|
|
36
|
-
summarize: 8000,
|
|
37
|
-
translate: 8000,
|
|
38
|
-
rewrite: 8000,
|
|
39
|
-
proofread: 10000, // Proofreader has higher quota
|
|
40
|
-
};
|
|
41
|
-
|
|
42
|
-
const quota = quotas[mode] || 8000;
|
|
43
|
-
// Use 80% threshold for safety (buffer for output)
|
|
44
|
-
return tokens > quota * 0.8;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
// Backend: only AWS Translate has character limit
|
|
48
|
-
if (mode === "translate") {
|
|
49
|
-
// AWS Translate limit is 10,000 characters
|
|
50
|
-
// Use 90% threshold (9,000 chars) for safety
|
|
51
|
-
return text.length > 9000;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
// Other backends can handle large inputs
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* Find the last sentence boundary before the given position
|
|
60
|
-
*/
|
|
61
|
-
function findLastSentenceBoundary(
|
|
62
|
-
text: string,
|
|
63
|
-
start: number,
|
|
64
|
-
end: number,
|
|
65
|
-
): number {
|
|
66
|
-
// Look for sentence enders: . ! ? followed by space or newline
|
|
67
|
-
let lastBoundary = -1;
|
|
68
|
-
|
|
69
|
-
for (let i = end - 1; i >= start; i--) {
|
|
70
|
-
const char = text[i];
|
|
71
|
-
const nextChar = i + 1 < text.length ? text[i + 1] : "";
|
|
72
|
-
|
|
73
|
-
if (
|
|
74
|
-
(char === "." || char === "!" || char === "?") &&
|
|
75
|
-
(nextChar === " " ||
|
|
76
|
-
nextChar === "\n" ||
|
|
77
|
-
nextChar === "\r" ||
|
|
78
|
-
i === text.length - 1)
|
|
79
|
-
) {
|
|
80
|
-
lastBoundary = i + 1;
|
|
81
|
-
break;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
// Only accept if we found a boundary in the latter half of the chunk
|
|
86
|
-
return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* Find the last clause boundary (comma, semicolon, colon)
|
|
91
|
-
*/
|
|
92
|
-
function findLastClauseBoundary(
|
|
93
|
-
text: string,
|
|
94
|
-
start: number,
|
|
95
|
-
end: number,
|
|
96
|
-
): number {
|
|
97
|
-
let lastBoundary = -1;
|
|
98
|
-
|
|
99
|
-
for (let i = end - 1; i >= start; i--) {
|
|
100
|
-
const char = text[i];
|
|
101
|
-
const nextChar = i + 1 < text.length ? text[i + 1] : "";
|
|
102
|
-
|
|
103
|
-
if (
|
|
104
|
-
(char === "," || char === ";" || char === ":") &&
|
|
105
|
-
(nextChar === " " || nextChar === "\n" || nextChar === "\r")
|
|
106
|
-
) {
|
|
107
|
-
lastBoundary = i + 1;
|
|
108
|
-
break;
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
/**
|
|
116
|
-
* Split text into chunks at intelligent boundaries
|
|
117
|
-
*
|
|
118
|
-
* Priority order for splitting:
|
|
119
|
-
* 1. Paragraph breaks (\n\n)
|
|
120
|
-
* 2. Sentence endings (. ! ?)
|
|
121
|
-
* 3. Clause markers (, ; :)
|
|
122
|
-
* 4. Word boundaries (space)
|
|
123
|
-
*/
|
|
124
|
-
export function splitTextIntoChunks(
|
|
125
|
-
text: string,
|
|
126
|
-
maxCharsPerChunk: number,
|
|
127
|
-
): TextChunk[] {
|
|
128
|
-
const chunks: TextChunk[] = [];
|
|
129
|
-
let currentPos = 0;
|
|
130
|
-
|
|
131
|
-
while (currentPos < text.length) {
|
|
132
|
-
let chunkEnd = Math.min(currentPos + maxCharsPerChunk, text.length);
|
|
133
|
-
|
|
134
|
-
if (chunkEnd < text.length) {
|
|
135
|
-
// Try to split at paragraph break
|
|
136
|
-
const paragraphBreakPos = text.lastIndexOf("\n\n", chunkEnd);
|
|
137
|
-
if (paragraphBreakPos > currentPos + maxCharsPerChunk * 0.5) {
|
|
138
|
-
chunkEnd = paragraphBreakPos + 2;
|
|
139
|
-
} else {
|
|
140
|
-
// Try to split at sentence boundary
|
|
141
|
-
const sentenceEnd = findLastSentenceBoundary(
|
|
142
|
-
text,
|
|
143
|
-
currentPos,
|
|
144
|
-
chunkEnd,
|
|
145
|
-
);
|
|
146
|
-
if (sentenceEnd > 0) {
|
|
147
|
-
chunkEnd = sentenceEnd;
|
|
148
|
-
} else {
|
|
149
|
-
// Try to split at clause boundary
|
|
150
|
-
const clauseEnd = findLastClauseBoundary(text, currentPos, chunkEnd);
|
|
151
|
-
if (clauseEnd > 0) {
|
|
152
|
-
chunkEnd = clauseEnd;
|
|
153
|
-
} else {
|
|
154
|
-
// Last resort: split at word boundary
|
|
155
|
-
const wordEnd = text.lastIndexOf(" ", chunkEnd);
|
|
156
|
-
if (wordEnd > currentPos + maxCharsPerChunk * 0.5) {
|
|
157
|
-
chunkEnd = wordEnd + 1;
|
|
158
|
-
}
|
|
159
|
-
// If no good boundary found, just cut at maxCharsPerChunk
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
const chunkText = text.substring(currentPos, chunkEnd).trim();
|
|
166
|
-
if (chunkText.length > 0) {
|
|
167
|
-
chunks.push({
|
|
168
|
-
text: chunkText,
|
|
169
|
-
start: currentPos,
|
|
170
|
-
end: chunkEnd,
|
|
171
|
-
});
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
currentPos = chunkEnd;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
return chunks;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
/**
|
|
181
|
-
* Calculate appropriate chunk size based on mode and whether it's on-device
|
|
182
|
-
*/
|
|
183
|
-
export function getChunkSize(
|
|
184
|
-
mode: "summarize" | "translate" | "rewrite" | "proofread",
|
|
185
|
-
isOnDevice: boolean,
|
|
186
|
-
): number {
|
|
187
|
-
if (isOnDevice) {
|
|
188
|
-
// On-device: use token-based chunking
|
|
189
|
-
// Convert tokens to characters (80% of quota for safety)
|
|
190
|
-
const quotas = {
|
|
191
|
-
summarize: 8000,
|
|
192
|
-
translate: 8000,
|
|
193
|
-
rewrite: 8000,
|
|
194
|
-
proofread: 10000,
|
|
195
|
-
};
|
|
196
|
-
|
|
197
|
-
const quota = quotas[mode] || 8000;
|
|
198
|
-
const safeQuota = quota * 0.8;
|
|
199
|
-
// Convert tokens to chars (1 token ≈ 3.5 chars)
|
|
200
|
-
return Math.floor(safeQuota * 3.5);
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
// Backend: only for AWS Translate
|
|
204
|
-
if (mode === "translate") {
|
|
205
|
-
// AWS Translate: 10,000 char limit, use 9,000 for safety
|
|
206
|
-
return 9000;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
// Should not reach here if shouldChunkInput is used correctly
|
|
210
|
-
return 10000;
|
|
211
|
-
}
|