@smart-cloud/ai-kit-ui 1.3.15 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,211 @@
1
+ /**
2
+ * Text chunking utilities for handling large inputs in AI features
3
+ *
4
+ * Chunking is needed for:
5
+ * - On-device models with token quotas (~8000 tokens)
6
+ * - AWS Translate backend (10,000 character limit)
7
+ */
8
+
9
+ export interface TextChunk {
10
+ text: string;
11
+ start: number;
12
+ end: number;
13
+ }
14
+
15
+ /**
16
+ * Estimate token count from text
17
+ * Approximation: 1 token ≈ 3.5 characters for Hungarian text
18
+ */
19
+ export function estimateTokenCount(text: string): number {
20
+ return Math.ceil(text.length / 3.5);
21
+ }
22
+
23
+ /**
24
+ * Check if input should be chunked based on mode and size
25
+ */
26
+ export function shouldChunkInput(
27
+ text: string,
28
+ mode: "summarize" | "translate" | "rewrite" | "proofread",
29
+ isOnDevice: boolean,
30
+ ): boolean {
31
+ const tokens = estimateTokenCount(text);
32
+
33
+ if (isOnDevice) {
34
+ // On-device models have token quotas
35
+ const quotas = {
36
+ summarize: 8000,
37
+ translate: 8000,
38
+ rewrite: 8000,
39
+ proofread: 10000, // Proofreader has higher quota
40
+ };
41
+
42
+ const quota = quotas[mode] || 8000;
43
+ // Use 80% threshold for safety (buffer for output)
44
+ return tokens > quota * 0.8;
45
+ }
46
+
47
+ // Backend: only AWS Translate has character limit
48
+ if (mode === "translate") {
49
+ // AWS Translate limit is 10,000 characters
50
+ // Use 90% threshold (9,000 chars) for safety
51
+ return text.length > 9000;
52
+ }
53
+
54
+ // Other backends can handle large inputs
55
+ return false;
56
+ }
57
+
58
+ /**
59
+ * Find the last sentence boundary before the given position
60
+ */
61
+ function findLastSentenceBoundary(
62
+ text: string,
63
+ start: number,
64
+ end: number,
65
+ ): number {
66
+ // Look for sentence enders: . ! ? followed by space or newline
67
+ let lastBoundary = -1;
68
+
69
+ for (let i = end - 1; i >= start; i--) {
70
+ const char = text[i];
71
+ const nextChar = i + 1 < text.length ? text[i + 1] : "";
72
+
73
+ if (
74
+ (char === "." || char === "!" || char === "?") &&
75
+ (nextChar === " " ||
76
+ nextChar === "\n" ||
77
+ nextChar === "\r" ||
78
+ i === text.length - 1)
79
+ ) {
80
+ lastBoundary = i + 1;
81
+ break;
82
+ }
83
+ }
84
+
85
+ // Only accept if we found a boundary in the latter half of the chunk
86
+ return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
87
+ }
88
+
89
+ /**
90
+ * Find the last clause boundary (comma, semicolon, colon)
91
+ */
92
+ function findLastClauseBoundary(
93
+ text: string,
94
+ start: number,
95
+ end: number,
96
+ ): number {
97
+ let lastBoundary = -1;
98
+
99
+ for (let i = end - 1; i >= start; i--) {
100
+ const char = text[i];
101
+ const nextChar = i + 1 < text.length ? text[i + 1] : "";
102
+
103
+ if (
104
+ (char === "," || char === ";" || char === ":") &&
105
+ (nextChar === " " || nextChar === "\n" || nextChar === "\r")
106
+ ) {
107
+ lastBoundary = i + 1;
108
+ break;
109
+ }
110
+ }
111
+
112
+ return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
113
+ }
114
+
115
+ /**
116
+ * Split text into chunks at intelligent boundaries
117
+ *
118
+ * Priority order for splitting:
119
+ * 1. Paragraph breaks (\n\n)
120
+ * 2. Sentence endings (. ! ?)
121
+ * 3. Clause markers (, ; :)
122
+ * 4. Word boundaries (space)
123
+ */
124
+ export function splitTextIntoChunks(
125
+ text: string,
126
+ maxCharsPerChunk: number,
127
+ ): TextChunk[] {
128
+ const chunks: TextChunk[] = [];
129
+ let currentPos = 0;
130
+
131
+ while (currentPos < text.length) {
132
+ let chunkEnd = Math.min(currentPos + maxCharsPerChunk, text.length);
133
+
134
+ if (chunkEnd < text.length) {
135
+ // Try to split at paragraph break
136
+ const paragraphBreakPos = text.lastIndexOf("\n\n", chunkEnd);
137
+ if (paragraphBreakPos > currentPos + maxCharsPerChunk * 0.5) {
138
+ chunkEnd = paragraphBreakPos + 2;
139
+ } else {
140
+ // Try to split at sentence boundary
141
+ const sentenceEnd = findLastSentenceBoundary(
142
+ text,
143
+ currentPos,
144
+ chunkEnd,
145
+ );
146
+ if (sentenceEnd > 0) {
147
+ chunkEnd = sentenceEnd;
148
+ } else {
149
+ // Try to split at clause boundary
150
+ const clauseEnd = findLastClauseBoundary(text, currentPos, chunkEnd);
151
+ if (clauseEnd > 0) {
152
+ chunkEnd = clauseEnd;
153
+ } else {
154
+ // Last resort: split at word boundary
155
+ const wordEnd = text.lastIndexOf(" ", chunkEnd);
156
+ if (wordEnd > currentPos + maxCharsPerChunk * 0.5) {
157
+ chunkEnd = wordEnd + 1;
158
+ }
159
+ // If no good boundary found, just cut at maxCharsPerChunk
160
+ }
161
+ }
162
+ }
163
+ }
164
+
165
+ const chunkText = text.substring(currentPos, chunkEnd).trim();
166
+ if (chunkText.length > 0) {
167
+ chunks.push({
168
+ text: chunkText,
169
+ start: currentPos,
170
+ end: chunkEnd,
171
+ });
172
+ }
173
+
174
+ currentPos = chunkEnd;
175
+ }
176
+
177
+ return chunks;
178
+ }
179
+
180
+ /**
181
+ * Calculate appropriate chunk size based on mode and whether it's on-device
182
+ */
183
+ export function getChunkSize(
184
+ mode: "summarize" | "translate" | "rewrite" | "proofread",
185
+ isOnDevice: boolean,
186
+ ): number {
187
+ if (isOnDevice) {
188
+ // On-device: use token-based chunking
189
+ // Convert tokens to characters (80% of quota for safety)
190
+ const quotas = {
191
+ summarize: 8000,
192
+ translate: 8000,
193
+ rewrite: 8000,
194
+ proofread: 10000,
195
+ };
196
+
197
+ const quota = quotas[mode] || 8000;
198
+ const safeQuota = quota * 0.8;
199
+ // Convert tokens to chars (1 token ≈ 3.5 chars)
200
+ return Math.floor(safeQuota * 3.5);
201
+ }
202
+
203
+ // Backend: only for AWS Translate
204
+ if (mode === "translate") {
205
+ // AWS Translate: 10,000 char limit, use 9,000 for safety
206
+ return 9000;
207
+ }
208
+
209
+ // Should not reach here if shouldChunkInput is used correctly
210
+ return 10000;
211
+ }
package/tsup.config.ts CHANGED
@@ -2,7 +2,8 @@ import { defineConfig } from "tsup";
2
2
 
3
3
  export default defineConfig({
4
4
  // Copy non-hashed global CSS so consumers can import it (like Mantine styles)
5
- onSuccess: "node -e \"const fs=require('fs'); const path=require('path'); fs.mkdirSync('dist',{recursive:true}); fs.copyFileSync(path.join('src','styles','ai-kit-ui.css'), path.join('dist','ai-kit-ui.css'));\"",
5
+ onSuccess:
6
+ "node -e \"const fs=require('fs'); const path=require('path'); fs.mkdirSync('dist',{recursive:true}); fs.copyFileSync(path.join('src','styles','ai-kit-ui.css'), path.join('dist','ai-kit-ui.css'));\"",
6
7
 
7
8
  entry: ["src/index.tsx"],
8
9
  format: ["cjs", "esm"],