@smart-cloud/ai-kit-ui 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@smart-cloud/ai-kit-ui",
3
- "version": "1.4.0",
3
+ "version": "1.4.1",
4
4
  "type": "module",
5
5
  "main": "./dist/index.cjs",
6
6
  "module": "./dist/index.js",
@@ -20,17 +20,20 @@
20
20
  "@emotion/cache": "^11.14.0",
21
21
  "@emotion/react": "^11.14.0",
22
22
  "@mantine/colors-generator": "^8.3.16",
23
- "@smart-cloud/ai-kit-core": "^1.4.2",
23
+ "@smart-cloud/ai-kit-core": "^1.4.3",
24
24
  "@smart-cloud/wpsuite-core": "^2.2.10",
25
25
  "@tabler/icons-react": "^3.40.0",
26
26
  "chroma-js": "^3.2.0",
27
27
  "react-markdown": "^10.1.0",
28
+ "rehype-parse": "^9.0.1",
28
29
  "rehype-raw": "^7.0.0",
30
+ "rehype-remark": "^10.0.0",
29
31
  "rehype-sanitize": "^6.0.0",
30
32
  "rehype-stringify": "^10.0.1",
31
33
  "remark-gfm": "^4.0.1",
32
34
  "remark-parse": "^11.0.0",
33
35
  "remark-rehype": "^11.1.2",
36
+ "remark-stringify": "^11.0.0",
34
37
  "unified": "^11.0.5"
35
38
  },
36
39
  "peerDependencies": {
@@ -55,6 +55,12 @@ import {
55
55
 
56
56
  import { translations } from "../i18n";
57
57
  import { PoweredBy } from "../poweredBy";
58
+ import { shouldChunkInput } from "./chunking-utils";
59
+ import {
60
+ chunkedSummarize,
61
+ chunkedTranslate,
62
+ chunkedRewrite,
63
+ } from "./chunked-features";
58
64
  import {
59
65
  isBackendConfigured,
60
66
  readDefaultOutputLanguage,
@@ -472,7 +478,31 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
472
478
  }, [text, defaults]);
473
479
 
474
480
  const canGenerate = useMemo(() => {
475
- const text = typeof inputText === "function" ? inputText() : inputText;
481
+ // If inputText is a function (async or sync getText), we can't determine
482
+ // if it has content without calling it. Assume it's valid if provided.
483
+ const input = inputText;
484
+ if (typeof input === "function") {
485
+ switch (mode) {
486
+ case "generateImageMetadata":
487
+ return Boolean(image);
488
+ case "translate":
489
+ // For translate, we need outputLanguage check, but can't check text without calling getText
490
+ return Boolean(
491
+ !outputLanguage || detectedLanguage !== outputLanguage,
492
+ );
493
+ case "summarize":
494
+ case "proofread":
495
+ case "rewrite":
496
+ case "write":
497
+ case "generatePostMetadata":
498
+ return true; // Assume getText will provide valid content
499
+ default:
500
+ return false;
501
+ }
502
+ }
503
+
504
+ // If inputText is a string, check it directly
505
+ const text = input as string | undefined;
476
506
  switch (mode) {
477
507
  case "generateImageMetadata":
478
508
  return Boolean(image);
@@ -507,8 +537,11 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
507
537
  setError(null);
508
538
  setGenerated(null);
509
539
 
540
+ const input = await inputText;
510
541
  try {
511
- const text = typeof inputText === "function" ? inputText() : inputText;
542
+ // Support both sync and async getText functions
543
+ const text =
544
+ typeof input === "function" ? await Promise.resolve(input()) : input;
512
545
  switch (mode) {
513
546
  case "summarize": {
514
547
  const res = await ai.run(async ({ signal, onStatus }) => {
@@ -524,13 +557,32 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
524
557
  type: type as SummarizerType,
525
558
  outputLanguage: outLang as SummarizeArgs["outputLanguage"],
526
559
  };
527
- const out = await summarize(args, {
560
+
561
+ const featureOptions: FeatureOptions = {
528
562
  signal,
529
563
  onStatus,
530
564
  context,
531
565
  modeOverride,
532
566
  onDeviceTimeoutOverride: onDeviceTimeout,
533
- });
567
+ };
568
+
569
+ // Determine if we're using on-device mode
570
+ const isOnDevice =
571
+ modeOverride === "local-only" ||
572
+ (!modeOverride && context === "admin");
573
+
574
+ // Check if chunking is needed
575
+ if (shouldChunkInput(text!.trim(), "summarize", isOnDevice)) {
576
+ return await chunkedSummarize(
577
+ text!.trim(),
578
+ args,
579
+ featureOptions,
580
+ isOnDevice,
581
+ );
582
+ }
583
+
584
+ // Normal single-pass summarization
585
+ const out = await summarize(args, featureOptions);
534
586
  return out.result;
535
587
  });
536
588
  setGenerated((res as never) ?? "");
@@ -614,13 +666,32 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
614
666
  sourceLanguage: inputLang!,
615
667
  targetLanguage: outLang,
616
668
  };
617
- const out = await translate(args, {
669
+
670
+ const featureOptions: FeatureOptions = {
618
671
  signal,
619
672
  onStatus,
620
673
  context,
621
674
  modeOverride,
622
675
  onDeviceTimeoutOverride: onDeviceTimeout,
623
- });
676
+ };
677
+
678
+ // Determine if we're using on-device mode
679
+ const isOnDevice =
680
+ modeOverride === "local-only" ||
681
+ (!modeOverride && context === "admin");
682
+
683
+ // Check if chunking is needed (both on-device quota and AWS Translate limit)
684
+ if (shouldChunkInput(text!.trim(), "translate", isOnDevice)) {
685
+ return await chunkedTranslate(
686
+ text!.trim(),
687
+ args,
688
+ featureOptions,
689
+ isOnDevice,
690
+ );
691
+ }
692
+
693
+ // Normal single-pass translation
694
+ const out = await translate(args, featureOptions);
624
695
  return out.result;
625
696
  });
626
697
  setGenerated((res as never) ?? "");
@@ -652,13 +723,32 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
652
723
  length: length as RewriterLength,
653
724
  outputLanguage: outLang as RewriteArgs["outputLanguage"],
654
725
  };
655
- const out = await rewrite(args, {
726
+
727
+ const featureOptions: FeatureOptions = {
656
728
  signal,
657
729
  onStatus,
658
730
  context,
659
731
  modeOverride,
660
732
  onDeviceTimeoutOverride: onDeviceTimeout,
661
- });
733
+ };
734
+
735
+ // Determine if we're using on-device mode
736
+ const isOnDevice =
737
+ modeOverride === "local-only" ||
738
+ (!modeOverride && context === "admin");
739
+
740
+ // Check if chunking is needed
741
+ if (shouldChunkInput(text!.trim(), "rewrite", isOnDevice)) {
742
+ return await chunkedRewrite(
743
+ text!.trim(),
744
+ args,
745
+ featureOptions,
746
+ isOnDevice,
747
+ );
748
+ }
749
+
750
+ // Normal single-pass rewrite
751
+ const out = await rewrite(args, featureOptions);
662
752
  return out.result;
663
753
  });
664
754
  setGenerated((res as never) ?? "");
@@ -0,0 +1,254 @@
1
+ /**
2
+ * Chunked versions of AI features for handling large inputs
3
+ *
4
+ * These wrappers split large inputs into smaller chunks, process them
5
+ * sequentially, and combine the results.
6
+ */
7
+
8
+ import type {
9
+ AiKitStatusEvent,
10
+ FeatureOptions,
11
+ SummarizeArgs,
12
+ SummarizeResult,
13
+ TranslateArgs,
14
+ TranslateResult,
15
+ RewriteArgs,
16
+ RewriteResult,
17
+ } from "@smart-cloud/ai-kit-core";
18
+ import { summarize, translate, rewrite } from "@smart-cloud/ai-kit-core";
19
+ import {
20
+ splitTextIntoChunks,
21
+ getChunkSize,
22
+ estimateTokenCount,
23
+ } from "./chunking-utils";
24
+
25
+ /**
26
+ * Chunked summarize implementation
27
+ *
28
+ * Strategy:
29
+ * 1. Split text into chunks
30
+ * 2. Summarize each chunk
31
+ * 3. If combined summaries are still large, recursively summarize them
32
+ * 4. Return final summary
33
+ */
34
+ export async function chunkedSummarize(
35
+ text: string,
36
+ args: SummarizeArgs,
37
+ options: FeatureOptions,
38
+ isOnDevice: boolean,
39
+ recursionLevel: number = 0,
40
+ ): Promise<SummarizeResult> {
41
+ const maxChunkSize = getChunkSize("summarize", isOnDevice);
42
+ const chunks = splitTextIntoChunks(text, maxChunkSize);
43
+
44
+ if (chunks.length === 1) {
45
+ // No chunking needed
46
+ return await summarize(args, options);
47
+ }
48
+
49
+ // Prevent infinite recursion (max 2 levels)
50
+ if (recursionLevel >= 2) {
51
+ throw new Error(
52
+ "Text is too large to summarize. Please try using backend mode or reduce the input size.",
53
+ );
54
+ }
55
+
56
+ // Phase 1: Summarize each chunk
57
+ const chunkSummaries: string[] = [];
58
+
59
+ for (let i = 0; i < chunks.length; i++) {
60
+ const chunkResult = await summarize(
61
+ {
62
+ ...args,
63
+ text: chunks[i].text,
64
+ },
65
+ {
66
+ ...options,
67
+ onStatus: (e: AiKitStatusEvent) => {
68
+ if (options.onStatus) {
69
+ // Modify progress to reflect chunking
70
+ const baseProgress =
71
+ typeof e.progress === "number" ? e.progress : 0;
72
+ const chunkProgress = (i + baseProgress) / chunks.length;
73
+
74
+ options.onStatus({
75
+ ...e,
76
+ message:
77
+ recursionLevel === 0
78
+ ? `Summarizing part ${i + 1}/${chunks.length}...`
79
+ : `Combining summaries (${i + 1}/${chunks.length})...`,
80
+ progress: chunkProgress,
81
+ });
82
+ }
83
+ },
84
+ },
85
+ );
86
+
87
+ chunkSummaries.push(chunkResult.result);
88
+ }
89
+
90
+ // Phase 2: Combine summaries
91
+ const combinedSummaries = chunkSummaries.join("\n\n");
92
+
93
+ // Check if we need another round of summarization
94
+ if (estimateTokenCount(combinedSummaries) > maxChunkSize / 3.5) {
95
+ // Recursively summarize
96
+ return await chunkedSummarize(
97
+ combinedSummaries,
98
+ {
99
+ ...args,
100
+ // Adjust length for recursive summarization
101
+ length: args.length === "short" ? "short" : "medium",
102
+ },
103
+ {
104
+ ...options,
105
+ onStatus: (e: AiKitStatusEvent) => {
106
+ if (options.onStatus) {
107
+ options.onStatus({
108
+ ...e,
109
+ message: "Creating final summary...",
110
+ });
111
+ }
112
+ },
113
+ },
114
+ isOnDevice,
115
+ recursionLevel + 1,
116
+ );
117
+ }
118
+
119
+ // Final summarization
120
+ return await summarize(
121
+ {
122
+ ...args,
123
+ text: combinedSummaries,
124
+ length: args.length === "short" ? "short" : "medium",
125
+ },
126
+ {
127
+ ...options,
128
+ onStatus: (e: AiKitStatusEvent) => {
129
+ if (options.onStatus) {
130
+ options.onStatus({
131
+ ...e,
132
+ message: "Creating final summary...",
133
+ });
134
+ }
135
+ },
136
+ },
137
+ );
138
+ }
139
+
140
+ /**
141
+ * Chunked translate implementation
142
+ *
143
+ * Strategy:
144
+ * 1. Split text into chunks (respecting AWS Translate 10k char limit)
145
+ * 2. Translate each chunk sequentially
146
+ * 3. Join translated chunks
147
+ */
148
+ export async function chunkedTranslate(
149
+ text: string,
150
+ args: TranslateArgs,
151
+ options: FeatureOptions,
152
+ isOnDevice: boolean,
153
+ ): Promise<TranslateResult> {
154
+ const maxChunkSize = getChunkSize("translate", isOnDevice);
155
+ const chunks = splitTextIntoChunks(text, maxChunkSize);
156
+
157
+ if (chunks.length === 1) {
158
+ // No chunking needed
159
+ return await translate(args, options);
160
+ }
161
+
162
+ // Translate each chunk sequentially
163
+ const translatedChunks: string[] = [];
164
+
165
+ for (let i = 0; i < chunks.length; i++) {
166
+ const chunkResult = await translate(
167
+ {
168
+ ...args,
169
+ text: chunks[i].text,
170
+ },
171
+ {
172
+ ...options,
173
+ onStatus: (e: AiKitStatusEvent) => {
174
+ if (options.onStatus) {
175
+ const baseProgress =
176
+ typeof e.progress === "number" ? e.progress : 0;
177
+ const chunkProgress = (i + baseProgress) / chunks.length;
178
+
179
+ options.onStatus({
180
+ ...e,
181
+ message: `Translating part ${i + 1}/${chunks.length}...`,
182
+ progress: chunkProgress,
183
+ });
184
+ }
185
+ },
186
+ },
187
+ );
188
+
189
+ translatedChunks.push(chunkResult.result);
190
+ }
191
+
192
+ // Join with paragraph breaks to maintain structure
193
+ return {
194
+ result: translatedChunks.join("\n\n"),
195
+ };
196
+ }
197
+
198
+ /**
199
+ * Chunked rewrite implementation
200
+ *
201
+ * Strategy:
202
+ * 1. Split text into chunks
203
+ * 2. Rewrite each chunk sequentially
204
+ * 3. Join rewritten chunks
205
+ */
206
+ export async function chunkedRewrite(
207
+ text: string,
208
+ args: RewriteArgs,
209
+ options: FeatureOptions,
210
+ isOnDevice: boolean,
211
+ ): Promise<RewriteResult> {
212
+ const maxChunkSize = getChunkSize("rewrite", isOnDevice);
213
+ const chunks = splitTextIntoChunks(text, maxChunkSize);
214
+
215
+ if (chunks.length === 1) {
216
+ // No chunking needed
217
+ return await rewrite(args, options);
218
+ }
219
+
220
+ // Rewrite each chunk sequentially
221
+ const rewrittenChunks: string[] = [];
222
+
223
+ for (let i = 0; i < chunks.length; i++) {
224
+ const chunkResult = await rewrite(
225
+ {
226
+ ...args,
227
+ text: chunks[i].text,
228
+ },
229
+ {
230
+ ...options,
231
+ onStatus: (e: AiKitStatusEvent) => {
232
+ if (options.onStatus) {
233
+ const baseProgress =
234
+ typeof e.progress === "number" ? e.progress : 0;
235
+ const chunkProgress = (i + baseProgress) / chunks.length;
236
+
237
+ options.onStatus({
238
+ ...e,
239
+ message: `Rewriting part ${i + 1}/${chunks.length}...`,
240
+ progress: chunkProgress,
241
+ });
242
+ }
243
+ },
244
+ },
245
+ );
246
+
247
+ rewrittenChunks.push(chunkResult.result);
248
+ }
249
+
250
+ // Join with paragraph breaks
251
+ return {
252
+ result: rewrittenChunks.join("\n\n"),
253
+ };
254
+ }
@@ -0,0 +1,211 @@
1
+ /**
2
+ * Text chunking utilities for handling large inputs in AI features
3
+ *
4
+ * Chunking is needed for:
5
+ * - On-device models with token quotas (~8000 tokens)
6
+ * - AWS Translate backend (10,000 character limit)
7
+ */
8
+
9
+ export interface TextChunk {
10
+ text: string;
11
+ start: number;
12
+ end: number;
13
+ }
14
+
15
+ /**
16
+ * Estimate token count from text
17
+ * Approximation: 1 token ≈ 3.5 characters for Hungarian text
18
+ */
19
+ export function estimateTokenCount(text: string): number {
20
+ return Math.ceil(text.length / 3.5);
21
+ }
22
+
23
+ /**
24
+ * Check if input should be chunked based on mode and size
25
+ */
26
+ export function shouldChunkInput(
27
+ text: string,
28
+ mode: "summarize" | "translate" | "rewrite" | "proofread",
29
+ isOnDevice: boolean,
30
+ ): boolean {
31
+ const tokens = estimateTokenCount(text);
32
+
33
+ if (isOnDevice) {
34
+ // On-device models have token quotas
35
+ const quotas = {
36
+ summarize: 8000,
37
+ translate: 8000,
38
+ rewrite: 8000,
39
+ proofread: 10000, // Proofreader has higher quota
40
+ };
41
+
42
+ const quota = quotas[mode] || 8000;
43
+ // Use 80% threshold for safety (buffer for output)
44
+ return tokens > quota * 0.8;
45
+ }
46
+
47
+ // Backend: only AWS Translate has character limit
48
+ if (mode === "translate") {
49
+ // AWS Translate limit is 10,000 characters
50
+ // Use 90% threshold (9,000 chars) for safety
51
+ return text.length > 9000;
52
+ }
53
+
54
+ // Other backends can handle large inputs
55
+ return false;
56
+ }
57
+
58
+ /**
59
+ * Find the last sentence boundary before the given position
60
+ */
61
+ function findLastSentenceBoundary(
62
+ text: string,
63
+ start: number,
64
+ end: number,
65
+ ): number {
66
+ // Look for sentence enders: . ! ? followed by space or newline
67
+ let lastBoundary = -1;
68
+
69
+ for (let i = end - 1; i >= start; i--) {
70
+ const char = text[i];
71
+ const nextChar = i + 1 < text.length ? text[i + 1] : "";
72
+
73
+ if (
74
+ (char === "." || char === "!" || char === "?") &&
75
+ (nextChar === " " ||
76
+ nextChar === "\n" ||
77
+ nextChar === "\r" ||
78
+ i === text.length - 1)
79
+ ) {
80
+ lastBoundary = i + 1;
81
+ break;
82
+ }
83
+ }
84
+
85
+ // Only accept if we found a boundary in the latter half of the chunk
86
+ return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
87
+ }
88
+
89
+ /**
90
+ * Find the last clause boundary (comma, semicolon, colon)
91
+ */
92
+ function findLastClauseBoundary(
93
+ text: string,
94
+ start: number,
95
+ end: number,
96
+ ): number {
97
+ let lastBoundary = -1;
98
+
99
+ for (let i = end - 1; i >= start; i--) {
100
+ const char = text[i];
101
+ const nextChar = i + 1 < text.length ? text[i + 1] : "";
102
+
103
+ if (
104
+ (char === "," || char === ";" || char === ":") &&
105
+ (nextChar === " " || nextChar === "\n" || nextChar === "\r")
106
+ ) {
107
+ lastBoundary = i + 1;
108
+ break;
109
+ }
110
+ }
111
+
112
+ return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
113
+ }
114
+
115
+ /**
116
+ * Split text into chunks at intelligent boundaries
117
+ *
118
+ * Priority order for splitting:
119
+ * 1. Paragraph breaks (\n\n)
120
+ * 2. Sentence endings (. ! ?)
121
+ * 3. Clause markers (, ; :)
122
+ * 4. Word boundaries (space)
123
+ */
124
+ export function splitTextIntoChunks(
125
+ text: string,
126
+ maxCharsPerChunk: number,
127
+ ): TextChunk[] {
128
+ const chunks: TextChunk[] = [];
129
+ let currentPos = 0;
130
+
131
+ while (currentPos < text.length) {
132
+ let chunkEnd = Math.min(currentPos + maxCharsPerChunk, text.length);
133
+
134
+ if (chunkEnd < text.length) {
135
+ // Try to split at paragraph break
136
+ const paragraphBreakPos = text.lastIndexOf("\n\n", chunkEnd);
137
+ if (paragraphBreakPos > currentPos + maxCharsPerChunk * 0.5) {
138
+ chunkEnd = paragraphBreakPos + 2;
139
+ } else {
140
+ // Try to split at sentence boundary
141
+ const sentenceEnd = findLastSentenceBoundary(
142
+ text,
143
+ currentPos,
144
+ chunkEnd,
145
+ );
146
+ if (sentenceEnd > 0) {
147
+ chunkEnd = sentenceEnd;
148
+ } else {
149
+ // Try to split at clause boundary
150
+ const clauseEnd = findLastClauseBoundary(text, currentPos, chunkEnd);
151
+ if (clauseEnd > 0) {
152
+ chunkEnd = clauseEnd;
153
+ } else {
154
+ // Last resort: split at word boundary
155
+ const wordEnd = text.lastIndexOf(" ", chunkEnd);
156
+ if (wordEnd > currentPos + maxCharsPerChunk * 0.5) {
157
+ chunkEnd = wordEnd + 1;
158
+ }
159
+ // If no good boundary found, just cut at maxCharsPerChunk
160
+ }
161
+ }
162
+ }
163
+ }
164
+
165
+ const chunkText = text.substring(currentPos, chunkEnd).trim();
166
+ if (chunkText.length > 0) {
167
+ chunks.push({
168
+ text: chunkText,
169
+ start: currentPos,
170
+ end: chunkEnd,
171
+ });
172
+ }
173
+
174
+ currentPos = chunkEnd;
175
+ }
176
+
177
+ return chunks;
178
+ }
179
+
180
+ /**
181
+ * Calculate appropriate chunk size based on mode and whether it's on-device
182
+ */
183
+ export function getChunkSize(
184
+ mode: "summarize" | "translate" | "rewrite" | "proofread",
185
+ isOnDevice: boolean,
186
+ ): number {
187
+ if (isOnDevice) {
188
+ // On-device: use token-based chunking
189
+ // Convert tokens to characters (80% of quota for safety)
190
+ const quotas = {
191
+ summarize: 8000,
192
+ translate: 8000,
193
+ rewrite: 8000,
194
+ proofread: 10000,
195
+ };
196
+
197
+ const quota = quotas[mode] || 8000;
198
+ const safeQuota = quota * 0.8;
199
+ // Convert tokens to chars (1 token ≈ 3.5 chars)
200
+ return Math.floor(safeQuota * 3.5);
201
+ }
202
+
203
+ // Backend: only for AWS Translate
204
+ if (mode === "translate") {
205
+ // AWS Translate: 10,000 char limit, use 9,000 for safety
206
+ return 9000;
207
+ }
208
+
209
+ // Should not reach here if shouldChunkInput is used correctly
210
+ return 10000;
211
+ }
package/tsup.config.ts CHANGED
@@ -2,7 +2,8 @@ import { defineConfig } from "tsup";
2
2
 
3
3
  export default defineConfig({
4
4
  // Copy non-hashed global CSS so consumers can import it (like Mantine styles)
5
- onSuccess: "node -e \"const fs=require('fs'); const path=require('path'); fs.mkdirSync('dist',{recursive:true}); fs.copyFileSync(path.join('src','styles','ai-kit-ui.css'), path.join('dist','ai-kit-ui.css'));\"",
5
+ onSuccess:
6
+ "node -e \"const fs=require('fs'); const path=require('path'); fs.mkdirSync('dist',{recursive:true}); fs.copyFileSync(path.join('src','styles','ai-kit-ui.css'), path.join('dist','ai-kit-ui.css'));\"",
6
7
 
7
8
  entry: ["src/index.tsx"],
8
9
  format: ["cjs", "esm"],