npm - @smart-cloud/ai-kit-ui - Versions diffs - 1.4.0 → 1.4.1 - Mend

@smart-cloud/ai-kit-ui 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.cjs +19 -9
package/dist/index.d.cts +1 -1
package/dist/index.d.ts +1 -1
package/dist/index.js +19 -9
package/package.json +5 -2
package/src/ai-feature/AiFeature.tsx +98 -8
package/src/ai-feature/chunked-features.ts +254 -0
package/src/ai-feature/chunking-utils.ts +211 -0
package/tsup.config.ts +2 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@smart-cloud/ai-kit-ui",
-  "version": "1.4.0",
+  "version": "1.4.1",
   "type": "module",
   "main": "./dist/index.cjs",
   "module": "./dist/index.js",
@@ -20,17 +20,20 @@
     "@emotion/cache": "^11.14.0",
     "@emotion/react": "^11.14.0",
     "@mantine/colors-generator": "^8.3.16",
-    "@smart-cloud/ai-kit-core": "^1.4.2",
+    "@smart-cloud/ai-kit-core": "^1.4.3",
     "@smart-cloud/wpsuite-core": "^2.2.10",
     "@tabler/icons-react": "^3.40.0",
     "chroma-js": "^3.2.0",
     "react-markdown": "^10.1.0",
+    "rehype-parse": "^9.0.1",
     "rehype-raw": "^7.0.0",
+    "rehype-remark": "^10.0.0",
     "rehype-sanitize": "^6.0.0",
     "rehype-stringify": "^10.0.1",
     "remark-gfm": "^4.0.1",
     "remark-parse": "^11.0.0",
     "remark-rehype": "^11.1.2",
+    "remark-stringify": "^11.0.0",
     "unified": "^11.0.5"
   },
   "peerDependencies": {

package/src/ai-feature/AiFeature.tsx CHANGED Viewed

@@ -55,6 +55,12 @@ import {
 import { translations } from "../i18n";
 import { PoweredBy } from "../poweredBy";
+import { shouldChunkInput } from "./chunking-utils";
+import {
+  chunkedSummarize,
+  chunkedTranslate,
+  chunkedRewrite,
+} from "./chunked-features";
 import {
   isBackendConfigured,
   readDefaultOutputLanguage,
@@ -472,7 +478,31 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
   }, [text, defaults]);
   const canGenerate = useMemo(() => {
-    const text = typeof inputText === "function" ? inputText() : inputText;
+    // If inputText is a function (async or sync getText), we can't determine
+    // if it has content without calling it. Assume it's valid if provided.
+    const input = inputText;
+    if (typeof input === "function") {
+      switch (mode) {
+        case "generateImageMetadata":
+          return Boolean(image);
+        case "translate":
+          // For translate, we need outputLanguage check, but can't check text without calling getText
+          return Boolean(
+            !outputLanguage || detectedLanguage !== outputLanguage,
+          );
+        case "summarize":
+        case "proofread":
+        case "rewrite":
+        case "write":
+        case "generatePostMetadata":
+          return true; // Assume getText will provide valid content
+        default:
+          return false;
+      }
+    }
+    // If inputText is a string, check it directly
+    const text = input as string | undefined;
     switch (mode) {
       case "generateImageMetadata":
         return Boolean(image);
@@ -507,8 +537,11 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
       setError(null);
       setGenerated(null);
+      const input = await inputText;
       try {
-        const text = typeof inputText === "function" ? inputText() : inputText;
+        // Support both sync and async getText functions
+        const text =
+          typeof input === "function" ? await Promise.resolve(input()) : input;
         switch (mode) {
           case "summarize": {
             const res = await ai.run(async ({ signal, onStatus }) => {
@@ -524,13 +557,32 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
                 type: type as SummarizerType,
                 outputLanguage: outLang as SummarizeArgs["outputLanguage"],
               };
-              const out = await summarize(args, {
+              const featureOptions: FeatureOptions = {
                 signal,
                 onStatus,
                 context,
                 modeOverride,
                 onDeviceTimeoutOverride: onDeviceTimeout,
-              });
+              };
+              // Determine if we're using on-device mode
+              const isOnDevice =
+                modeOverride === "local-only" ||
+                (!modeOverride && context === "admin");
+              // Check if chunking is needed
+              if (shouldChunkInput(text!.trim(), "summarize", isOnDevice)) {
+                return await chunkedSummarize(
+                  text!.trim(),
+                  args,
+                  featureOptions,
+                  isOnDevice,
+                );
+              }
+              // Normal single-pass summarization
+              const out = await summarize(args, featureOptions);
               return out.result;
             });
             setGenerated((res as never) ?? "");
@@ -614,13 +666,32 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
                 sourceLanguage: inputLang!,
                 targetLanguage: outLang,
               };
-              const out = await translate(args, {
+              const featureOptions: FeatureOptions = {
                 signal,
                 onStatus,
                 context,
                 modeOverride,
                 onDeviceTimeoutOverride: onDeviceTimeout,
-              });
+              };
+              // Determine if we're using on-device mode
+              const isOnDevice =
+                modeOverride === "local-only" ||
+                (!modeOverride && context === "admin");
+              // Check if chunking is needed (both on-device quota and AWS Translate limit)
+              if (shouldChunkInput(text!.trim(), "translate", isOnDevice)) {
+                return await chunkedTranslate(
+                  text!.trim(),
+                  args,
+                  featureOptions,
+                  isOnDevice,
+                );
+              }
+              // Normal single-pass translation
+              const out = await translate(args, featureOptions);
               return out.result;
             });
             setGenerated((res as never) ?? "");
@@ -652,13 +723,32 @@ const AiFeatureBase: FC<AiFeatureProps & AiKitShellInjectedProps> = (props) => {
                 length: length as RewriterLength,
                 outputLanguage: outLang as RewriteArgs["outputLanguage"],
               };
-              const out = await rewrite(args, {
+              const featureOptions: FeatureOptions = {
                 signal,
                 onStatus,
                 context,
                 modeOverride,
                 onDeviceTimeoutOverride: onDeviceTimeout,
-              });
+              };
+              // Determine if we're using on-device mode
+              const isOnDevice =
+                modeOverride === "local-only" ||
+                (!modeOverride && context === "admin");
+              // Check if chunking is needed
+              if (shouldChunkInput(text!.trim(), "rewrite", isOnDevice)) {
+                return await chunkedRewrite(
+                  text!.trim(),
+                  args,
+                  featureOptions,
+                  isOnDevice,
+                );
+              }
+              // Normal single-pass rewrite
+              const out = await rewrite(args, featureOptions);
               return out.result;
             });
             setGenerated((res as never) ?? "");

package/src/ai-feature/chunked-features.ts ADDED Viewed

@@ -0,0 +1,254 @@
+/**
+ * Chunked versions of AI features for handling large inputs
+ *
+ * These wrappers split large inputs into smaller chunks, process them
+ * sequentially, and combine the results.
+ */
+import type {
+  AiKitStatusEvent,
+  FeatureOptions,
+  SummarizeArgs,
+  SummarizeResult,
+  TranslateArgs,
+  TranslateResult,
+  RewriteArgs,
+  RewriteResult,
+} from "@smart-cloud/ai-kit-core";
+import { summarize, translate, rewrite } from "@smart-cloud/ai-kit-core";
+import {
+  splitTextIntoChunks,
+  getChunkSize,
+  estimateTokenCount,
+} from "./chunking-utils";
+/**
+ * Chunked summarize implementation
+ *
+ * Strategy:
+ * 1. Split text into chunks
+ * 2. Summarize each chunk
+ * 3. If combined summaries are still large, recursively summarize them
+ * 4. Return final summary
+ */
+export async function chunkedSummarize(
+  text: string,
+  args: SummarizeArgs,
+  options: FeatureOptions,
+  isOnDevice: boolean,
+  recursionLevel: number = 0,
+): Promise<SummarizeResult> {
+  const maxChunkSize = getChunkSize("summarize", isOnDevice);
+  const chunks = splitTextIntoChunks(text, maxChunkSize);
+  if (chunks.length === 1) {
+    // No chunking needed
+    return await summarize(args, options);
+  }
+  // Prevent infinite recursion (max 2 levels)
+  if (recursionLevel >= 2) {
+    throw new Error(
+      "Text is too large to summarize. Please try using backend mode or reduce the input size.",
+    );
+  }
+  // Phase 1: Summarize each chunk
+  const chunkSummaries: string[] = [];
+  for (let i = 0; i < chunks.length; i++) {
+    const chunkResult = await summarize(
+      {
+        ...args,
+        text: chunks[i].text,
+      },
+      {
+        ...options,
+        onStatus: (e: AiKitStatusEvent) => {
+          if (options.onStatus) {
+            // Modify progress to reflect chunking
+            const baseProgress =
+              typeof e.progress === "number" ? e.progress : 0;
+            const chunkProgress = (i + baseProgress) / chunks.length;
+            options.onStatus({
+              ...e,
+              message:
+                recursionLevel === 0
+                  ? `Summarizing part ${i + 1}/${chunks.length}...`
+                  : `Combining summaries (${i + 1}/${chunks.length})...`,
+              progress: chunkProgress,
+            });
+          }
+        },
+      },
+    );
+    chunkSummaries.push(chunkResult.result);
+  }
+  // Phase 2: Combine summaries
+  const combinedSummaries = chunkSummaries.join("\n\n");
+  // Check if we need another round of summarization
+  if (estimateTokenCount(combinedSummaries) > maxChunkSize / 3.5) {
+    // Recursively summarize
+    return await chunkedSummarize(
+      combinedSummaries,
+      {
+        ...args,
+        // Adjust length for recursive summarization
+        length: args.length === "short" ? "short" : "medium",
+      },
+      {
+        ...options,
+        onStatus: (e: AiKitStatusEvent) => {
+          if (options.onStatus) {
+            options.onStatus({
+              ...e,
+              message: "Creating final summary...",
+            });
+          }
+        },
+      },
+      isOnDevice,
+      recursionLevel + 1,
+    );
+  }
+  // Final summarization
+  return await summarize(
+    {
+      ...args,
+      text: combinedSummaries,
+      length: args.length === "short" ? "short" : "medium",
+    },
+    {
+      ...options,
+      onStatus: (e: AiKitStatusEvent) => {
+        if (options.onStatus) {
+          options.onStatus({
+            ...e,
+            message: "Creating final summary...",
+          });
+        }
+      },
+    },
+  );
+}
+/**
+ * Chunked translate implementation
+ *
+ * Strategy:
+ * 1. Split text into chunks (respecting AWS Translate 10k char limit)
+ * 2. Translate each chunk sequentially
+ * 3. Join translated chunks
+ */
+export async function chunkedTranslate(
+  text: string,
+  args: TranslateArgs,
+  options: FeatureOptions,
+  isOnDevice: boolean,
+): Promise<TranslateResult> {
+  const maxChunkSize = getChunkSize("translate", isOnDevice);
+  const chunks = splitTextIntoChunks(text, maxChunkSize);
+  if (chunks.length === 1) {
+    // No chunking needed
+    return await translate(args, options);
+  }
+  // Translate each chunk sequentially
+  const translatedChunks: string[] = [];
+  for (let i = 0; i < chunks.length; i++) {
+    const chunkResult = await translate(
+      {
+        ...args,
+        text: chunks[i].text,
+      },
+      {
+        ...options,
+        onStatus: (e: AiKitStatusEvent) => {
+          if (options.onStatus) {
+            const baseProgress =
+              typeof e.progress === "number" ? e.progress : 0;
+            const chunkProgress = (i + baseProgress) / chunks.length;
+            options.onStatus({
+              ...e,
+              message: `Translating part ${i + 1}/${chunks.length}...`,
+              progress: chunkProgress,
+            });
+          }
+        },
+      },
+    );
+    translatedChunks.push(chunkResult.result);
+  }
+  // Join with paragraph breaks to maintain structure
+  return {
+    result: translatedChunks.join("\n\n"),
+  };
+}
+/**
+ * Chunked rewrite implementation
+ *
+ * Strategy:
+ * 1. Split text into chunks
+ * 2. Rewrite each chunk sequentially
+ * 3. Join rewritten chunks
+ */
+export async function chunkedRewrite(
+  text: string,
+  args: RewriteArgs,
+  options: FeatureOptions,
+  isOnDevice: boolean,
+): Promise<RewriteResult> {
+  const maxChunkSize = getChunkSize("rewrite", isOnDevice);
+  const chunks = splitTextIntoChunks(text, maxChunkSize);
+  if (chunks.length === 1) {
+    // No chunking needed
+    return await rewrite(args, options);
+  }
+  // Rewrite each chunk sequentially
+  const rewrittenChunks: string[] = [];
+  for (let i = 0; i < chunks.length; i++) {
+    const chunkResult = await rewrite(
+      {
+        ...args,
+        text: chunks[i].text,
+      },
+      {
+        ...options,
+        onStatus: (e: AiKitStatusEvent) => {
+          if (options.onStatus) {
+            const baseProgress =
+              typeof e.progress === "number" ? e.progress : 0;
+            const chunkProgress = (i + baseProgress) / chunks.length;
+            options.onStatus({
+              ...e,
+              message: `Rewriting part ${i + 1}/${chunks.length}...`,
+              progress: chunkProgress,
+            });
+          }
+        },
+      },
+    );
+    rewrittenChunks.push(chunkResult.result);
+  }
+  // Join with paragraph breaks
+  return {
+    result: rewrittenChunks.join("\n\n"),
+  };
+}

package/src/ai-feature/chunking-utils.ts ADDED Viewed

@@ -0,0 +1,211 @@
+/**
+ * Text chunking utilities for handling large inputs in AI features
+ *
+ * Chunking is needed for:
+ * - On-device models with token quotas (~8000 tokens)
+ * - AWS Translate backend (10,000 character limit)
+ */
+export interface TextChunk {
+  text: string;
+  start: number;
+  end: number;
+}
+/**
+ * Estimate token count from text
+ * Approximation: 1 token ≈ 3.5 characters for Hungarian text
+ */
+export function estimateTokenCount(text: string): number {
+  return Math.ceil(text.length / 3.5);
+}
+/**
+ * Check if input should be chunked based on mode and size
+ */
+export function shouldChunkInput(
+  text: string,
+  mode: "summarize" | "translate" | "rewrite" | "proofread",
+  isOnDevice: boolean,
+): boolean {
+  const tokens = estimateTokenCount(text);
+  if (isOnDevice) {
+    // On-device models have token quotas
+    const quotas = {
+      summarize: 8000,
+      translate: 8000,
+      rewrite: 8000,
+      proofread: 10000, // Proofreader has higher quota
+    };
+    const quota = quotas[mode] || 8000;
+    // Use 80% threshold for safety (buffer for output)
+    return tokens > quota * 0.8;
+  }
+  // Backend: only AWS Translate has character limit
+  if (mode === "translate") {
+    // AWS Translate limit is 10,000 characters
+    // Use 90% threshold (9,000 chars) for safety
+    return text.length > 9000;
+  }
+  // Other backends can handle large inputs
+  return false;
+}
+/**
+ * Find the last sentence boundary before the given position
+ */
+function findLastSentenceBoundary(
+  text: string,
+  start: number,
+  end: number,
+): number {
+  // Look for sentence enders: . ! ? followed by space or newline
+  let lastBoundary = -1;
+  for (let i = end - 1; i >= start; i--) {
+    const char = text[i];
+    const nextChar = i + 1 < text.length ? text[i + 1] : "";
+    if (
+      (char === "." || char === "!" || char === "?") &&
+      (nextChar === " " ||
+        nextChar === "\n" ||
+        nextChar === "\r" ||
+        i === text.length - 1)
+    ) {
+      lastBoundary = i + 1;
+      break;
+    }
+  }
+  // Only accept if we found a boundary in the latter half of the chunk
+  return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
+}
+/**
+ * Find the last clause boundary (comma, semicolon, colon)
+ */
+function findLastClauseBoundary(
+  text: string,
+  start: number,
+  end: number,
+): number {
+  let lastBoundary = -1;
+  for (let i = end - 1; i >= start; i--) {
+    const char = text[i];
+    const nextChar = i + 1 < text.length ? text[i + 1] : "";
+    if (
+      (char === "," || char === ";" || char === ":") &&
+      (nextChar === " " || nextChar === "\n" || nextChar === "\r")
+    ) {
+      lastBoundary = i + 1;
+      break;
+    }
+  }
+  return lastBoundary > start + (end - start) * 0.5 ? lastBoundary : -1;
+}
+/**
+ * Split text into chunks at intelligent boundaries
+ *
+ * Priority order for splitting:
+ * 1. Paragraph breaks (\n\n)
+ * 2. Sentence endings (. ! ?)
+ * 3. Clause markers (, ; :)
+ * 4. Word boundaries (space)
+ */
+export function splitTextIntoChunks(
+  text: string,
+  maxCharsPerChunk: number,
+): TextChunk[] {
+  const chunks: TextChunk[] = [];
+  let currentPos = 0;
+  while (currentPos < text.length) {
+    let chunkEnd = Math.min(currentPos + maxCharsPerChunk, text.length);
+    if (chunkEnd < text.length) {
+      // Try to split at paragraph break
+      const paragraphBreakPos = text.lastIndexOf("\n\n", chunkEnd);
+      if (paragraphBreakPos > currentPos + maxCharsPerChunk * 0.5) {
+        chunkEnd = paragraphBreakPos + 2;
+      } else {
+        // Try to split at sentence boundary
+        const sentenceEnd = findLastSentenceBoundary(
+          text,
+          currentPos,
+          chunkEnd,
+        );
+        if (sentenceEnd > 0) {
+          chunkEnd = sentenceEnd;
+        } else {
+          // Try to split at clause boundary
+          const clauseEnd = findLastClauseBoundary(text, currentPos, chunkEnd);
+          if (clauseEnd > 0) {
+            chunkEnd = clauseEnd;
+          } else {
+            // Last resort: split at word boundary
+            const wordEnd = text.lastIndexOf(" ", chunkEnd);
+            if (wordEnd > currentPos + maxCharsPerChunk * 0.5) {
+              chunkEnd = wordEnd + 1;
+            }
+            // If no good boundary found, just cut at maxCharsPerChunk
+          }
+        }
+      }
+    }
+    const chunkText = text.substring(currentPos, chunkEnd).trim();
+    if (chunkText.length > 0) {
+      chunks.push({
+        text: chunkText,
+        start: currentPos,
+        end: chunkEnd,
+      });
+    }
+    currentPos = chunkEnd;
+  }
+  return chunks;
+}
+/**
+ * Calculate appropriate chunk size based on mode and whether it's on-device
+ */
+export function getChunkSize(
+  mode: "summarize" | "translate" | "rewrite" | "proofread",
+  isOnDevice: boolean,
+): number {
+  if (isOnDevice) {
+    // On-device: use token-based chunking
+    // Convert tokens to characters (80% of quota for safety)
+    const quotas = {
+      summarize: 8000,
+      translate: 8000,
+      rewrite: 8000,
+      proofread: 10000,
+    };
+    const quota = quotas[mode] || 8000;
+    const safeQuota = quota * 0.8;
+    // Convert tokens to chars (1 token ≈ 3.5 chars)
+    return Math.floor(safeQuota * 3.5);
+  }
+  // Backend: only for AWS Translate
+  if (mode === "translate") {
+    // AWS Translate: 10,000 char limit, use 9,000 for safety
+    return 9000;
+  }
+  // Should not reach here if shouldChunkInput is used correctly
+  return 10000;
+}

package/tsup.config.ts CHANGED Viewed

@@ -2,7 +2,8 @@ import { defineConfig } from "tsup";
 export default defineConfig({
   // Copy non-hashed global CSS so consumers can import it (like Mantine styles)
-  onSuccess: "node -e \"const fs=require('fs'); const path=require('path'); fs.mkdirSync('dist',{recursive:true}); fs.copyFileSync(path.join('src','styles','ai-kit-ui.css'), path.join('dist','ai-kit-ui.css'));\"",
+  onSuccess:
+    "node -e \"const fs=require('fs'); const path=require('path'); fs.mkdirSync('dist',{recursive:true}); fs.copyFileSync(path.join('src','styles','ai-kit-ui.css'), path.join('dist','ai-kit-ui.css'));\"",
   entry: ["src/index.tsx"],
   format: ["cjs", "esm"],