@smart-cloud/ai-kit-ui 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +19 -9
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +19 -9
- package/package.json +5 -2
- package/src/ai-feature/AiFeature.tsx +98 -8
- package/src/ai-feature/chunked-features.ts +254 -0
- package/src/ai-feature/chunking-utils.ts +211 -0
- package/tsup.config.ts +2 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@smart-cloud/ai-kit-ui",
|
|
3
|
-
"version": "1.4.
|
|
3
|
+
"version": "1.4.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./dist/index.cjs",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -20,17 +20,20 @@
|
|
|
20
20
|
"@emotion/cache": "^11.14.0",
|
|
21
21
|
"@emotion/react": "^11.14.0",
|
|
22
22
|
"@mantine/colors-generator": "^8.3.16",
|
|
23
|
-
"@smart-cloud/ai-kit-core": "^1.4.
|
|
23
|
+
"@smart-cloud/ai-kit-core": "^1.4.3",
|
|
24
24
|
"@smart-cloud/wpsuite-core": "^2.2.10",
|
|
25
25
|
"@tabler/icons-react": "^3.40.0",
|
|
26
26
|
"chroma-js": "^3.2.0",
|
|
27
27
|
"react-markdown": "^10.1.0",
|
|
28
|
+
"rehype-parse": "^9.0.1",
|
|
28
29
|
"rehype-raw": "^7.0.0",
|
|
30
|
+
"rehype-remark": "^10.0.0",
|
|
29
31
|
"rehype-sanitize": "^6.0.0",
|
|
30
32
|
"rehype-stringify": "^10.0.1",
|
|
31
33
|
"remark-gfm": "^4.0.1",
|
|
32
34
|
"remark-parse": "^11.0.0",
|
|
33
35
|
"remark-rehype": "^11.1.2",
|
|
36
|
+
"remark-stringify": "^11.0.0",
|
|
34
37
|
"unified": "^11.0.5"
|
|
35
38
|
},
|
|
36
39
|
"peerDependencies": {
|
|
@@ -55,6 +55,12 @@ import {
|
|
|
55
55
|
|
|
56
56
|
import { translations } from "../i18n";
|
|
57
57
|
import { PoweredBy } from "../poweredBy";
|
|
58
|
+
import { shouldChunkInput } from "./chunking-utils";
|
|
59
|
+
import {
|
|
60
|
+
chunkedSummarize,
|
|
61
|
+
chunkedTranslate,
|
|
62
|
+
chunkedRewrite,
|
|
63
|
+
} from "./chunked-features";
|
|
58
64
|
import {
|
|
59
65
|
isBackendConfigured,
|
|
60
66
|
readDefaultOutputLanguage,
|
|
@@ -472,7 +478,31 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
|
|
|
472
478
|
}, [text, defaults]);
|
|
473
479
|
|
|
474
480
|
const canGenerate = useMemo(() => {
|
|
475
|
-
|
|
481
|
+
// If inputText is a function (async or sync getText), we can't determine
|
|
482
|
+
// if it has content without calling it. Assume it's valid if provided.
|
|
483
|
+
const input = inputText;
|
|
484
|
+
if (typeof input === "function") {
|
|
485
|
+
switch (mode) {
|
|
486
|
+
case "generateImageMetadata":
|
|
487
|
+
return Boolean(image);
|
|
488
|
+
case "translate":
|
|
489
|
+
// For translate, we need outputLanguage check, but can't check text without calling getText
|
|
490
|
+
return Boolean(
|
|
491
|
+
!outputLanguage || detectedLanguage !== outputLanguage,
|
|
492
|
+
);
|
|
493
|
+
case "summarize":
|
|
494
|
+
case "proofread":
|
|
495
|
+
case "rewrite":
|
|
496
|
+
case "write":
|
|
497
|
+
case "generatePostMetadata":
|
|
498
|
+
return true; // Assume getText will provide valid content
|
|
499
|
+
default:
|
|
500
|
+
return false;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// If inputText is a string, check it directly
|
|
505
|
+
const text = input as string | undefined;
|
|
476
506
|
switch (mode) {
|
|
477
507
|
case "generateImageMetadata":
|
|
478
508
|
return Boolean(image);
|
|
@@ -507,8 +537,11 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
|
|
|
507
537
|
setError(null);
|
|
508
538
|
setGenerated(null);
|
|
509
539
|
|
|
540
|
+
const input = await inputText;
|
|
510
541
|
try {
|
|
511
|
-
|
|
542
|
+
// Support both sync and async getText functions
|
|
543
|
+
const text =
|
|
544
|
+
typeof input === "function" ? await Promise.resolve(input()) : input;
|
|
512
545
|
switch (mode) {
|
|
513
546
|
case "summarize": {
|
|
514
547
|
const res = await ai.run(async ({ signal, onStatus }) => {
|
|
@@ -524,13 +557,32 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
|
|
|
524
557
|
type: type as SummarizerType,
|
|
525
558
|
outputLanguage: outLang as SummarizeArgs["outputLanguage"],
|
|
526
559
|
};
|
|
527
|
-
|
|
560
|
+
|
|
561
|
+
const featureOptions: FeatureOptions = {
|
|
528
562
|
signal,
|
|
529
563
|
onStatus,
|
|
530
564
|
context,
|
|
531
565
|
modeOverride,
|
|
532
566
|
onDeviceTimeoutOverride: onDeviceTimeout,
|
|
533
|
-
}
|
|
567
|
+
};
|
|
568
|
+
|
|
569
|
+
// Determine if we're using on-device mode
|
|
570
|
+
const isOnDevice =
|
|
571
|
+
modeOverride === "local-only" ||
|
|
572
|
+
(!modeOverride && context === "admin");
|
|
573
|
+
|
|
574
|
+
// Check if chunking is needed
|
|
575
|
+
if (shouldChunkInput(text!.trim(), "summarize", isOnDevice)) {
|
|
576
|
+
return await chunkedSummarize(
|
|
577
|
+
text!.trim(),
|
|
578
|
+
args,
|
|
579
|
+
featureOptions,
|
|
580
|
+
isOnDevice,
|
|
581
|
+
);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
// Normal single-pass summarization
|
|
585
|
+
const out = await summarize(args, featureOptions);
|
|
534
586
|
return out.result;
|
|
535
587
|
});
|
|
536
588
|
setGenerated((res as never) ?? "");
|
|
@@ -614,13 +666,32 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
|
|
|
614
666
|
sourceLanguage: inputLang!,
|
|
615
667
|
targetLanguage: outLang,
|
|
616
668
|
};
|
|
617
|
-
|
|
669
|
+
|
|
670
|
+
const featureOptions: FeatureOptions = {
|
|
618
671
|
signal,
|
|
619
672
|
onStatus,
|
|
620
673
|
context,
|
|
621
674
|
modeOverride,
|
|
622
675
|
onDeviceTimeoutOverride: onDeviceTimeout,
|
|
623
|
-
}
|
|
676
|
+
};
|
|
677
|
+
|
|
678
|
+
// Determine if we're using on-device mode
|
|
679
|
+
const isOnDevice =
|
|
680
|
+
modeOverride === "local-only" ||
|
|
681
|
+
(!modeOverride && context === "admin");
|
|
682
|
+
|
|
683
|
+
// Check if chunking is needed (both on-device quota and AWS Translate limit)
|
|
684
|
+
if (shouldChunkInput(text!.trim(), "translate", isOnDevice)) {
|
|
685
|
+
return await chunkedTranslate(
|
|
686
|
+
text!.trim(),
|
|
687
|
+
args,
|
|
688
|
+
featureOptions,
|
|
689
|
+
isOnDevice,
|
|
690
|
+
);
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
// Normal single-pass translation
|
|
694
|
+
const out = await translate(args, featureOptions);
|
|
624
695
|
return out.result;
|
|
625
696
|
});
|
|
626
697
|
setGenerated((res as never) ?? "");
|
|
@@ -652,13 +723,32 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
|
|
|
652
723
|
length: length as RewriterLength,
|
|
653
724
|
outputLanguage: outLang as RewriteArgs["outputLanguage"],
|
|
654
725
|
};
|
|
655
|
-
|
|
726
|
+
|
|
727
|
+
const featureOptions: FeatureOptions = {
|
|
656
728
|
signal,
|
|
657
729
|
onStatus,
|
|
658
730
|
context,
|
|
659
731
|
modeOverride,
|
|
660
732
|
onDeviceTimeoutOverride: onDeviceTimeout,
|
|
661
|
-
}
|
|
733
|
+
};
|
|
734
|
+
|
|
735
|
+
// Determine if we're using on-device mode
|
|
736
|
+
const isOnDevice =
|
|
737
|
+
modeOverride === "local-only" ||
|
|
738
|
+
(!modeOverride && context === "admin");
|
|
739
|
+
|
|
740
|
+
// Check if chunking is needed
|
|
741
|
+
if (shouldChunkInput(text!.trim(), "rewrite", isOnDevice)) {
|
|
742
|
+
return await chunkedRewrite(
|
|
743
|
+
text!.trim(),
|
|
744
|
+
args,
|
|
745
|
+
featureOptions,
|
|
746
|
+
isOnDevice,
|
|
747
|
+
);
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
// Normal single-pass rewrite
|
|
751
|
+
const out = await rewrite(args, featureOptions);
|
|
662
752
|
return out.result;
|
|
663
753
|
});
|
|
664
754
|
setGenerated((res as never) ?? "");
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunked versions of AI features for handling large inputs
|
|
3
|
+
*
|
|
4
|
+
* These wrappers split large inputs into smaller chunks, process them
|
|
5
|
+
* sequentially, and combine the results.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type {
|
|
9
|
+
AiKitStatusEvent,
|
|
10
|
+
FeatureOptions,
|
|
11
|
+
SummarizeArgs,
|
|
12
|
+
SummarizeResult,
|
|
13
|
+
TranslateArgs,
|
|
14
|
+
TranslateResult,
|
|
15
|
+
RewriteArgs,
|
|
16
|
+
RewriteResult,
|
|
17
|
+
} from "@smart-cloud/ai-kit-core";
|
|
18
|
+
import { summarize, translate, rewrite } from "@smart-cloud/ai-kit-core";
|
|
19
|
+
import {
|
|
20
|
+
splitTextIntoChunks,
|
|
21
|
+
getChunkSize,
|
|
22
|
+
estimateTokenCount,
|
|
23
|
+
} from "./chunking-utils";
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Chunked summarize implementation
|
|
27
|
+
*
|
|
28
|
+
* Strategy:
|
|
29
|
+
* 1. Split text into chunks
|
|
30
|
+
* 2. Summarize each chunk
|
|
31
|
+
* 3. If combined summaries are still large, recursively summarize them
|
|
32
|
+
* 4. Return final summary
|
|
33
|
+
*/
|
|
34
|
+
export async function chunkedSummarize(
|
|
35
|
+
text: string,
|
|
36
|
+
args: SummarizeArgs,
|
|
37
|
+
options: FeatureOptions,
|
|
38
|
+
isOnDevice: boolean,
|
|
39
|
+
recursionLevel: number = 0,
|
|
40
|
+
): Promise<SummarizeResult> {
|
|
41
|
+
const maxChunkSize = getChunkSize("summarize", isOnDevice);
|
|
42
|
+
const chunks = splitTextIntoChunks(text, maxChunkSize);
|
|
43
|
+
|
|
44
|
+
if (chunks.length === 1) {
|
|
45
|
+
// No chunking needed
|
|
46
|
+
return await summarize(args, options);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Prevent infinite recursion (max 2 levels)
|
|
50
|
+
if (recursionLevel >= 2) {
|
|
51
|
+
throw new Error(
|
|
52
|
+
"Text is too large to summarize. Please try using backend mode or reduce the input size.",
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Phase 1: Summarize each chunk
|
|
57
|
+
const chunkSummaries: string[] = [];
|
|
58
|
+
|
|
59
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
60
|
+
const chunkResult = await summarize(
|
|
61
|
+
{
|
|
62
|
+
...args,
|
|
63
|
+
text: chunks[i].text,
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
...options,
|
|
67
|
+
onStatus: (e: AiKitStatusEvent) => {
|
|
68
|
+
if (options.onStatus) {
|
|
69
|
+
// Modify progress to reflect chunking
|
|
70
|
+
const baseProgress =
|
|
71
|
+
typeof e.progress === "number" ? e.progress : 0;
|
|
72
|
+
const chunkProgress = (i + baseProgress) / chunks.length;
|
|
73
|
+
|
|
74
|
+
options.onStatus({
|
|
75
|
+
...e,
|
|
76
|
+
message:
|
|
77
|
+
recursionLevel === 0
|
|
78
|
+
? `Summarizing part ${i + 1}/${chunks.length}...`
|
|
79
|
+
: `Combining summaries (${i + 1}/${chunks.length})...`,
|
|
80
|
+
progress: chunkProgress,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
chunkSummaries.push(chunkResult.result);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Phase 2: Combine summaries
|
|
91
|
+
const combinedSummaries = chunkSummaries.join("\n\n");
|
|
92
|
+
|
|
93
|
+
// Check if we need another round of summarization
|
|
94
|
+
if (estimateTokenCount(combinedSummaries) > maxChunkSize / 3.5) {
|
|
95
|
+
// Recursively summarize
|
|
96
|
+
return await chunkedSummarize(
|
|
97
|
+
combinedSummaries,
|
|
98
|
+
{
|
|
99
|
+
...args,
|
|
100
|
+
// Adjust length for recursive summarization
|
|
101
|
+
length: args.length === "short" ? "short" : "medium",
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
...options,
|
|
105
|
+
onStatus: (e: AiKitStatusEvent) => {
|
|
106
|
+
if (options.onStatus) {
|
|
107
|
+
options.onStatus({
|
|
108
|
+
...e,
|
|
109
|
+
message: "Creating final summary...",
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
isOnDevice,
|
|
115
|
+
recursionLevel + 1,
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Final summarization
|
|
120
|
+
return await summarize(
|
|
121
|
+
{
|
|
122
|
+
...args,
|
|
123
|
+
text: combinedSummaries,
|
|
124
|
+
length: args.length === "short" ? "short" : "medium",
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
...options,
|
|
128
|
+
onStatus: (e: AiKitStatusEvent) => {
|
|
129
|
+
if (options.onStatus) {
|
|
130
|
+
options.onStatus({
|
|
131
|
+
...e,
|
|
132
|
+
message: "Creating final summary...",
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Chunked translate implementation
|
|
142
|
+
*
|
|
143
|
+
* Strategy:
|
|
144
|
+
* 1. Split text into chunks (respecting AWS Translate 10k char limit)
|
|
145
|
+
* 2. Translate each chunk sequentially
|
|
146
|
+
* 3. Join translated chunks
|
|
147
|
+
*/
|
|
148
|
+
export async function chunkedTranslate(
|
|
149
|
+
text: string,
|
|
150
|
+
args: TranslateArgs,
|
|
151
|
+
options: FeatureOptions,
|
|
152
|
+
isOnDevice: boolean,
|
|
153
|
+
): Promise<TranslateResult> {
|
|
154
|
+
const maxChunkSize = getChunkSize("translate", isOnDevice);
|
|
155
|
+
const chunks = splitTextIntoChunks(text, maxChunkSize);
|
|
156
|
+
|
|
157
|
+
if (chunks.length === 1) {
|
|
158
|
+
// No chunking needed
|
|
159
|
+
return await translate(args, options);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Translate each chunk sequentially
|
|
163
|
+
const translatedChunks: string[] = [];
|
|
164
|
+
|
|
165
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
166
|
+
const chunkResult = await translate(
|
|
167
|
+
{
|
|
168
|
+
...args,
|
|
169
|
+
text: chunks[i].text,
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
...options,
|
|
173
|
+
onStatus: (e: AiKitStatusEvent) => {
|
|
174
|
+
if (options.onStatus) {
|
|
175
|
+
const baseProgress =
|
|
176
|
+
typeof e.progress === "number" ? e.progress : 0;
|
|
177
|
+
const chunkProgress = (i + baseProgress) / chunks.length;
|
|
178
|
+
|
|
179
|
+
options.onStatus({
|
|
180
|
+
...e,
|
|
181
|
+
message: `Translating part ${i + 1}/${chunks.length}...`,
|
|
182
|
+
progress: chunkProgress,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
},
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
translatedChunks.push(chunkResult.result);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Join with paragraph breaks to maintain structure
|
|
193
|
+
return {
|
|
194
|
+
result: translatedChunks.join("\n\n"),
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Chunked rewrite implementation
|
|
200
|
+
*
|
|
201
|
+
* Strategy:
|
|
202
|
+
* 1. Split text into chunks
|
|
203
|
+
* 2. Rewrite each chunk sequentially
|
|
204
|
+
* 3. Join rewritten chunks
|
|
205
|
+
*/
|
|
206
|
+
export async function chunkedRewrite(
|
|
207
|
+
text: string,
|
|
208
|
+
args: RewriteArgs,
|
|
209
|
+
options: FeatureOptions,
|
|
210
|
+
isOnDevice: boolean,
|
|
211
|
+
): Promise<RewriteResult> {
|
|
212
|
+
const maxChunkSize = getChunkSize("rewrite", isOnDevice);
|
|
213
|
+
const chunks = splitTextIntoChunks(text, maxChunkSize);
|
|
214
|
+
|
|
215
|
+
if (chunks.length === 1) {
|
|
216
|
+
// No chunking needed
|
|
217
|
+
return await rewrite(args, options);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Rewrite each chunk sequentially
|
|
221
|
+
const rewrittenChunks: string[] = [];
|
|
222
|
+
|
|
223
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
224
|
+
const chunkResult = await rewrite(
|
|
225
|
+
{
|
|
226
|
+
...args,
|
|
227
|
+
text: chunks[i].text,
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
...options,
|
|
231
|
+
onStatus: (e: AiKitStatusEvent) => {
|
|
232
|
+
if (options.onStatus) {
|
|
233
|
+
const baseProgress =
|
|
234
|
+
typeof e.progress === "number" ? e.progress : 0;
|
|
235
|
+
const chunkProgress = (i + baseProgress) / chunks.length;
|
|
236
|
+
|
|
237
|
+
options.onStatus({
|
|
238
|
+
...e,
|
|
239
|
+
message: `Rewriting part ${i + 1}/${chunks.length}...`,
|
|
240
|
+
progress: chunkProgress,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
},
|
|
244
|
+
},
|
|
245
|
+
);
|
|
246
|
+
|
|
247
|
+
rewrittenChunks.push(chunkResult.result);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// Join with paragraph breaks
|
|
251
|
+
return {
|
|
252
|
+
result: rewrittenChunks.join("\n\n"),
|
|
253
|
+
};
|
|
254
|
+
}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text chunking utilities for handling large inputs in AI features
|
|
3
|
+
*
|
|
4
|
+
* Chunking is needed for:
|
|
5
|
+
* - On-device models with token quotas (~8000 tokens)
|
|
6
|
+
* - AWS Translate backend (10,000 character limit)
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
export interface TextChunk {
|
|
10
|
+
text: string;
|
|
11
|
+
start: number;
|
|
12
|
+
end: number;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Estimate token count from text
|
|
17
|
+
* Approximation: 1 token ≈ 3.5 characters for Hungarian text
|
|
18
|
+
*/
|
|
19
|
+
export function estimateTokenCount(text: string): number {
|
|
20
|
+
return Math.ceil(text.length / 3.5);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Check if input should be chunked based on mode and size
|
|
25
|
+
*/
|
|
26
|
+
export function shouldChunkInput(
|
|
27
|
+
text: string,
|
|
28
|
+
mode: "summarize" | "translate" | "rewrite" | "proofread",
|
|
29
|
+
isOnDevice: boolean,
|
|
30
|
+
): boolean {
|
|
31
|
+
const tokens = estimateTokenCount(text);
|
|
32
|
+
|
|
33
|
+
if (isOnDevice) {
|
|
34
|
+
// On-device models have token quotas
|
|
35
|
+
const quotas = {
|
|
36
|
+
summarize: 8000,
|
|
37
|
+
translate: 8000,
|
|
38
|
+
rewrite: 8000,
|
|
39
|
+
proofread: 10000, // Proofreader has higher quota
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
const quota = quotas[mode] || 8000;
|
|
43
|
+
// Use 80% threshold for safety (buffer for output)
|
|
44
|
+
return tokens > quota * 0.8;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Backend: only AWS Translate has character limit
|
|
48
|
+
if (mode === "translate") {
|
|
49
|
+
// AWS Translate limit is 10,000 characters
|
|
50
|
+
// Use 90% threshold (9,000 chars) for safety
|
|
51
|
+
return text.length > 9000;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Other backends can handle large inputs
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Find the last sentence boundary before the given position
|
|
60
|
+
*/
|
|
61
|
+
function findLastSentenceBoundary(
|
|
62
|
+
text: string,
|
|
63
|
+
start: number,
|
|
64
|
+
end: number,
|
|
65
|
+
): number {
|
|
66
|
+
// Look for sentence enders: . ! ? followed by space or newline
|
|
67
|
+
let lastBoundary = -1;
|
|
68
|
+
|
|
69
|
+
for (let i = end - 1; i >= start; i--) {
|
|
70
|
+
const char = text[i];
|
|
71
|
+
const nextChar = i + 1 < text.length ? text[i + 1] : "";
|
|
72
|
+
|
|
73
|
+
if (
|
|
74
|
+
(char === "." || char === "!" || char === "?") &&
|
|
75
|
+
(nextChar === " " ||
|
|
76
|
+
nextChar === "\n" ||
|
|
77
|
+
nextChar === "\r" ||
|
|
78
|
+
i === text.length - 1)
|
|
79
|
+
) {
|
|
80
|
+
lastBoundary = i + 1;
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Only accept if we found a boundary in the latter half of the chunk
|
|
86
|
+
return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Find the last clause boundary (comma, semicolon, colon)
|
|
91
|
+
*/
|
|
92
|
+
function findLastClauseBoundary(
|
|
93
|
+
text: string,
|
|
94
|
+
start: number,
|
|
95
|
+
end: number,
|
|
96
|
+
): number {
|
|
97
|
+
let lastBoundary = -1;
|
|
98
|
+
|
|
99
|
+
for (let i = end - 1; i >= start; i--) {
|
|
100
|
+
const char = text[i];
|
|
101
|
+
const nextChar = i + 1 < text.length ? text[i + 1] : "";
|
|
102
|
+
|
|
103
|
+
if (
|
|
104
|
+
(char === "," || char === ";" || char === ":") &&
|
|
105
|
+
(nextChar === " " || nextChar === "\n" || nextChar === "\r")
|
|
106
|
+
) {
|
|
107
|
+
lastBoundary = i + 1;
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Split text into chunks at intelligent boundaries
|
|
117
|
+
*
|
|
118
|
+
* Priority order for splitting:
|
|
119
|
+
* 1. Paragraph breaks (\n\n)
|
|
120
|
+
* 2. Sentence endings (. ! ?)
|
|
121
|
+
* 3. Clause markers (, ; :)
|
|
122
|
+
* 4. Word boundaries (space)
|
|
123
|
+
*/
|
|
124
|
+
export function splitTextIntoChunks(
|
|
125
|
+
text: string,
|
|
126
|
+
maxCharsPerChunk: number,
|
|
127
|
+
): TextChunk[] {
|
|
128
|
+
const chunks: TextChunk[] = [];
|
|
129
|
+
let currentPos = 0;
|
|
130
|
+
|
|
131
|
+
while (currentPos < text.length) {
|
|
132
|
+
let chunkEnd = Math.min(currentPos + maxCharsPerChunk, text.length);
|
|
133
|
+
|
|
134
|
+
if (chunkEnd < text.length) {
|
|
135
|
+
// Try to split at paragraph break
|
|
136
|
+
const paragraphBreakPos = text.lastIndexOf("\n\n", chunkEnd);
|
|
137
|
+
if (paragraphBreakPos > currentPos + maxCharsPerChunk * 0.5) {
|
|
138
|
+
chunkEnd = paragraphBreakPos + 2;
|
|
139
|
+
} else {
|
|
140
|
+
// Try to split at sentence boundary
|
|
141
|
+
const sentenceEnd = findLastSentenceBoundary(
|
|
142
|
+
text,
|
|
143
|
+
currentPos,
|
|
144
|
+
chunkEnd,
|
|
145
|
+
);
|
|
146
|
+
if (sentenceEnd > 0) {
|
|
147
|
+
chunkEnd = sentenceEnd;
|
|
148
|
+
} else {
|
|
149
|
+
// Try to split at clause boundary
|
|
150
|
+
const clauseEnd = findLastClauseBoundary(text, currentPos, chunkEnd);
|
|
151
|
+
if (clauseEnd > 0) {
|
|
152
|
+
chunkEnd = clauseEnd;
|
|
153
|
+
} else {
|
|
154
|
+
// Last resort: split at word boundary
|
|
155
|
+
const wordEnd = text.lastIndexOf(" ", chunkEnd);
|
|
156
|
+
if (wordEnd > currentPos + maxCharsPerChunk * 0.5) {
|
|
157
|
+
chunkEnd = wordEnd + 1;
|
|
158
|
+
}
|
|
159
|
+
// If no good boundary found, just cut at maxCharsPerChunk
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const chunkText = text.substring(currentPos, chunkEnd).trim();
|
|
166
|
+
if (chunkText.length > 0) {
|
|
167
|
+
chunks.push({
|
|
168
|
+
text: chunkText,
|
|
169
|
+
start: currentPos,
|
|
170
|
+
end: chunkEnd,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
currentPos = chunkEnd;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return chunks;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Calculate appropriate chunk size based on mode and whether it's on-device
|
|
182
|
+
*/
|
|
183
|
+
export function getChunkSize(
|
|
184
|
+
mode: "summarize" | "translate" | "rewrite" | "proofread",
|
|
185
|
+
isOnDevice: boolean,
|
|
186
|
+
): number {
|
|
187
|
+
if (isOnDevice) {
|
|
188
|
+
// On-device: use token-based chunking
|
|
189
|
+
// Convert tokens to characters (80% of quota for safety)
|
|
190
|
+
const quotas = {
|
|
191
|
+
summarize: 8000,
|
|
192
|
+
translate: 8000,
|
|
193
|
+
rewrite: 8000,
|
|
194
|
+
proofread: 10000,
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
const quota = quotas[mode] || 8000;
|
|
198
|
+
const safeQuota = quota * 0.8;
|
|
199
|
+
// Convert tokens to chars (1 token ≈ 3.5 chars)
|
|
200
|
+
return Math.floor(safeQuota * 3.5);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Backend: only for AWS Translate
|
|
204
|
+
if (mode === "translate") {
|
|
205
|
+
// AWS Translate: 10,000 char limit, use 9,000 for safety
|
|
206
|
+
return 9000;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Should not reach here if shouldChunkInput is used correctly
|
|
210
|
+
return 10000;
|
|
211
|
+
}
|
package/tsup.config.ts
CHANGED
|
@@ -2,7 +2,8 @@ import { defineConfig } from "tsup";
|
|
|
2
2
|
|
|
3
3
|
export default defineConfig({
|
|
4
4
|
// Copy non-hashed global CSS so consumers can import it (like Mantine styles)
|
|
5
|
-
onSuccess:
|
|
5
|
+
onSuccess:
|
|
6
|
+
"node -e \"const fs=require('fs'); const path=require('path'); fs.mkdirSync('dist',{recursive:true}); fs.copyFileSync(path.join('src','styles','ai-kit-ui.css'), path.join('dist','ai-kit-ui.css'));\"",
|
|
6
7
|
|
|
7
8
|
entry: ["src/index.tsx"],
|
|
8
9
|
format: ["cjs", "esm"],
|