npm - @botpress/zai - Versions diffs - 2.1.19 → 2.2.0 - Mend

@botpress/zai 2.1.19 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CLAUDE.md +696 -0
package/README.md +28 -2
package/dist/index.d.ts +39 -18
package/dist/index.js +1 -0
package/dist/operations/errors.js +112 -8
package/dist/operations/extract.js +20 -12
package/dist/operations/filter.js +3 -1
package/dist/operations/group.js +278 -0
package/dist/operations/label.js +3 -1
package/dist/operations/summarize.js +3 -1
package/e2e/data/cache.jsonl +219 -0
package/package.json +4 -3
package/src/index.ts +1 -0
package/src/operations/errors.ts +96 -1
package/src/operations/extract.ts +21 -11
package/src/operations/filter.ts +3 -1
package/src/operations/group.ts +421 -0
package/src/operations/label.ts +3 -1
package/src/operations/summarize.ts +3 -2
package/src/zai.ts +7 -9

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@botpress/zai",
   "description": "Zui AI (zai) – An LLM utility library written on top of Zui and the Botpress API",
-  "version": "2.1.19",
+  "version": "2.2.0",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
   "exports": {
@@ -35,7 +35,8 @@
     "@botpress/cognitive": "0.1.50",
     "json5": "^2.2.3",
     "jsonrepair": "^3.10.0",
-    "lodash-es": "^4.17.21"
+    "lodash-es": "^4.17.21",
+    "p-limit": "^7.2.0"
   },
   "devDependencies": {
     "@botpress/client": "workspace:^",
@@ -53,7 +54,7 @@
   },
   "peerDependencies": {
     "@bpinternal/thicktoken": "^1.0.0",
-    "@bpinternal/zui": "1.2.1"
+    "@bpinternal/zui": "^1.2.2"
   },
   "engines": {
     "node": ">=18.0.0"

package/src/index.ts CHANGED Viewed

@@ -7,5 +7,6 @@ import './operations/check'
 import './operations/filter'
 import './operations/extract'
 import './operations/label'
+import './operations/group'
 export { Zai }

package/src/operations/errors.ts CHANGED Viewed

@@ -1,9 +1,104 @@
+import { ZodError } from '@bpinternal/zui'
 export class JsonParsingError extends Error {
   public constructor(
     public json: unknown,
     public error: Error
   ) {
-    const message = `Error parsing JSON:\n\n---JSON---\n${json}\n\n---Error---\n\n ${error}`
+    const message = JsonParsingError._formatError(json, error)
     super(message)
   }
+  private static _formatError(json: unknown, error: Error): string {
+    let errorMessage = 'Error parsing JSON:\n\n'
+    errorMessage += `---JSON---\n${json}\n\n`
+    if (error instanceof ZodError) {
+      errorMessage += '---Validation Errors---\n\n'
+      errorMessage += JsonParsingError._formatZodError(error)
+    } else {
+      errorMessage += '---Error---\n\n'
+      errorMessage += 'The JSON provided is not valid JSON.\n'
+      errorMessage += `Details: ${error.message}\n`
+    }
+    return errorMessage
+  }
+  private static _formatZodError(zodError: ZodError): string {
+    const issues = zodError.issues
+    if (issues.length === 0) {
+      return 'Unknown validation error\n'
+    }
+    let message = ''
+    for (let i = 0; i < issues.length; i++) {
+      const issue = issues[i]
+      const path = issue.path.length > 0 ? issue.path.join('.') : 'root'
+      message += `${i + 1}. Field: "${path}"\n`
+      switch (issue.code) {
+        case 'invalid_type':
+          message += `   Problem: Expected ${issue.expected}, but received ${issue.received}\n`
+          message += `   Message: ${issue.message}\n`
+          break
+        case 'invalid_string':
+          if ('validation' in issue) {
+            message += `   Problem: Invalid ${issue.validation} format\n`
+          }
+          message += `   Message: ${issue.message}\n`
+          break
+        case 'too_small':
+          if (issue.type === 'string') {
+            if (issue.exact) {
+              message += `   Problem: String must be exactly ${issue.minimum} characters\n`
+            } else {
+              message += `   Problem: String must be at least ${issue.minimum} characters\n`
+            }
+          } else if (issue.type === 'number') {
+            message += `   Problem: Number must be ${issue.inclusive ? 'at least' : 'greater than'} ${issue.minimum}\n`
+          } else if (issue.type === 'array') {
+            message += `   Problem: Array must contain ${issue.inclusive ? 'at least' : 'more than'} ${issue.minimum} items\n`
+          }
+          message += `   Message: ${issue.message}\n`
+          break
+        case 'too_big':
+          if (issue.type === 'string') {
+            if (issue.exact) {
+              message += `   Problem: String must be exactly ${issue.maximum} characters\n`
+            } else {
+              message += `   Problem: String must be at most ${issue.maximum} characters\n`
+            }
+          } else if (issue.type === 'number') {
+            message += `   Problem: Number must be ${issue.inclusive ? 'at most' : 'less than'} ${issue.maximum}\n`
+          } else if (issue.type === 'array') {
+            message += `   Problem: Array must contain ${issue.inclusive ? 'at most' : 'fewer than'} ${issue.maximum} items\n`
+          }
+          message += `   Message: ${issue.message}\n`
+          break
+        case 'invalid_enum_value':
+          message += `   Problem: Invalid value "${issue.received}"\n`
+          message += `   Allowed values: ${issue.options.map((o: any) => `"${o}"`).join(', ')}\n`
+          message += `   Message: ${issue.message}\n`
+          break
+        case 'invalid_literal':
+          message += `   Problem: Expected the literal value "${issue.expected}", but received "${issue.received}"\n`
+          message += `   Message: ${issue.message}\n`
+          break
+        case 'invalid_union':
+          message += "   Problem: Value doesn't match any of the expected formats\n"
+          message += `   Message: ${issue.message}\n`
+          break
+        default:
+          message += `   Problem: ${issue.message}\n`
+      }
+      if (i < issues.length - 1) {
+        message += '\n'
+      }
+    }
+    return message
+  }
 }

package/src/operations/extract.ts CHANGED Viewed

@@ -1,10 +1,11 @@
 // eslint-disable consistent-type-definitions
-import { z, ZodObject } from '@bpinternal/zui'
+import { z, ZodObject, transforms } from '@bpinternal/zui'
 import JSON5 from 'json5'
 import { jsonrepair } from 'jsonrepair'
 import { chunk, isArray } from 'lodash-es'
+import pLimit from 'p-limit'
 import { ZaiContext } from '../context'
 import { Response } from '../response'
 import { getTokenizer } from '../tokenizer'
@@ -48,6 +49,7 @@ declare module '@botpress/zai' {
 const START = '■json_start■'
 const END = '■json_end■'
 const NO_MORE = '■NO_MORE_ELEMENT■'
+const ZERO_ELEMENTS = '■ZERO_ELEMENTS■'
 const extract = async <S extends OfType<AnyObjectOrArray>>(
   input: unknown,
@@ -56,7 +58,9 @@ const extract = async <S extends OfType<AnyObjectOrArray>>(
   ctx: ZaiContext
 ): Promise<S['_output']> => {
   ctx.controller.signal.throwIfAborted()
-  let schema = _schema as any as z.ZodType
+  let schema = transforms.fromJSONSchema(transforms.toJSONSchema(_schema as any as z.ZodType))
   const options = Options.parse(_options ?? {})
   const tokenizer = await getTokenizer()
   const model = await ctx.getModel()
@@ -110,18 +114,21 @@ const extract = async <S extends OfType<AnyObjectOrArray>>(
   const inputAsString = stringify(input)
   if (tokenizer.count(inputAsString) > options.chunkLength) {
+    const limit = pLimit(10) // Limit to 10 concurrent extraction operations
     const tokens = tokenizer.split(inputAsString)
     const chunks = chunk(tokens, options.chunkLength).map((x) => x.join(''))
     const all = await Promise.allSettled(
       chunks.map((chunk) =>
-        extract(
-          chunk,
-          originalSchema,
-          {
-            ...options,
-            strict: false, // We don't want to fail on strict mode for sub-chunks
-          },
-          ctx
+        limit(() =>
+          extract(
+            chunk,
+            originalSchema,
+            {
+              ...options,
+              strict: false, // We don't want to fail on strict mode for sub-chunks
+            },
+            ctx
+          )
         )
       )
     ).then((results) =>
@@ -162,8 +169,11 @@ Merge it back into a final result.`.trim(),
     instructions.push('You may have multiple elements, or zero elements in the input.')
     instructions.push('You must extract each element separately.')
     instructions.push(`Each element must be a JSON object with exactly the format: ${START}${shape}${END}`)
+    instructions.push(`If there are no elements to extract, respond with ${ZERO_ELEMENTS}.`)
     instructions.push(`When you are done extracting all elements, type "${NO_MORE}" to finish.`)
-    instructions.push(`For example, if you have zero elements, the output should look like this: ${NO_MORE}`)
+    instructions.push(
+      `For example, if you have zero elements, the output should look like this: ${ZERO_ELEMENTS}${NO_MORE}`
+    )
     instructions.push(
       `For example, if you have two elements, the output should look like this: ${START}${abbv}${END}${START}${abbv}${END}${NO_MORE}`
     )

package/src/operations/filter.ts CHANGED Viewed

@@ -2,6 +2,7 @@
 import { z } from '@bpinternal/zui'
 import { clamp } from 'lodash-es'
+import pLimit from 'p-limit'
 import { ZaiContext } from '../context'
 import { Response } from '../response'
 import { getTokenizer } from '../tokenizer'
@@ -259,7 +260,8 @@ The condition is: "${condition}"
     return partial
   }
-  const filteredChunks = await Promise.all(chunks.map(filterChunk))
+  const limit = pLimit(10) // Limit to 10 concurrent filtering operations
+  const filteredChunks = await Promise.all(chunks.map((chunk) => limit(() => filterChunk(chunk))))
   return filteredChunks.flat()
 }

package/src/operations/group.ts ADDED Viewed

@@ -0,0 +1,421 @@
+// eslint-disable consistent-type-definitions
+import { z } from '@bpinternal/zui'
+import { clamp } from 'lodash-es'
+import pLimit from 'p-limit'
+import { ZaiContext } from '../context'
+import { Response } from '../response'
+import { getTokenizer } from '../tokenizer'
+import { stringify } from '../utils'
+import { Zai } from '../zai'
+import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
+export type Group<T> = {
+  id: string
+  label: string
+  elements: T[]
+}
+type InitialGroup = {
+  id: string
+  label: string
+  elements?: unknown[]
+}
+const _InitialGroup = z.object({
+  id: z.string().min(1).max(100),
+  label: z.string().min(1).max(250),
+  elements: z.array(z.any()).optional().default([]),
+})
+export type Options = {
+  instructions?: string
+  tokensPerElement?: number
+  chunkLength?: number
+  initialGroups?: Array<InitialGroup>
+}
+const _Options = z.object({
+  instructions: z.string().optional(),
+  tokensPerElement: z.number().min(1).max(100_000).optional().default(250),
+  chunkLength: z.number().min(100).max(100_000).optional().default(16_000),
+  initialGroups: z.array(_InitialGroup).optional().default([]),
+})
+declare module '@botpress/zai' {
+  interface Zai {
+    group<T>(input: Array<T>, options?: Options): Response<Array<Group<T>>, Record<string, T[]>>
+  }
+}
+const END = '■END■'
+// Simplified data structures
+type GroupInfo = {
+  id: string
+  label: string
+  normalizedLabel: string
+}
+const normalizeLabel = (label: string): string => {
+  return label
+    .trim()
+    .toLowerCase()
+    .replace(/^(group|new group|new)\s*[-:]\s*/i, '')
+    .replace(/^(group|new group|new)\s+/i, '')
+    .trim()
+}
+const group = async <T>(input: Array<T>, _options: Options | undefined, ctx: ZaiContext): Promise<Array<Group<T>>> => {
+  ctx.controller.signal.throwIfAborted()
+  const options = _Options.parse(_options ?? {})
+  const tokenizer = await getTokenizer()
+  const model = await ctx.getModel()
+  if (input.length === 0) {
+    return []
+  }
+  // Simple data structures
+  const groups = new Map<string, GroupInfo>() // groupId -> GroupInfo
+  const groupElements = new Map<string, Set<number>>() // groupId -> Set of element indices
+  const elementGroups = new Map<number, Set<string>>() // elementIndex -> Set of groupIds seen/assigned
+  const labelToGroupId = new Map<string, string>() // normalized label -> groupId
+  let groupIdCounter = 0
+  // Initialize with provided groups
+  options.initialGroups.forEach((ig) => {
+    const normalized = normalizeLabel(ig.label)
+    groups.set(ig.id, { id: ig.id, label: ig.label, normalizedLabel: normalized })
+    groupElements.set(ig.id, new Set())
+    labelToGroupId.set(normalized, ig.id)
+  })
+  // Prepare elements
+  const elements = input.map((element, idx) => ({
+    element,
+    index: idx,
+    stringified: stringify(element, false),
+  }))
+  // Token budget
+  const TOKENS_TOTAL_MAX = model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER
+  const TOKENS_INSTRUCTIONS_MAX = options.instructions
+    ? clamp(tokenizer.count(options.instructions), 100, TOKENS_TOTAL_MAX * 0.2)
+    : 0
+  const TOKENS_AVAILABLE = TOKENS_TOTAL_MAX - TOKENS_INSTRUCTIONS_MAX
+  const TOKENS_FOR_GROUPS_MAX = Math.floor(TOKENS_AVAILABLE * 0.4)
+  const TOKENS_FOR_ELEMENTS_MAX = Math.floor(TOKENS_AVAILABLE * 0.6)
+  // Chunk elements by token budget
+  const MAX_ELEMENTS_PER_CHUNK = 50
+  const elementChunks: number[][] = [] // Array of element indices
+  let currentChunk: number[] = []
+  let currentTokens = 0
+  for (const elem of elements) {
+    const truncated = tokenizer.truncate(elem.stringified, options.tokensPerElement)
+    const elemTokens = tokenizer.count(truncated)
+    if (
+      (currentTokens + elemTokens > TOKENS_FOR_ELEMENTS_MAX || currentChunk.length >= MAX_ELEMENTS_PER_CHUNK) &&
+      currentChunk.length > 0
+    ) {
+      elementChunks.push(currentChunk)
+      currentChunk = []
+      currentTokens = 0
+    }
+    currentChunk.push(elem.index)
+    currentTokens += elemTokens
+  }
+  if (currentChunk.length > 0) {
+    elementChunks.push(currentChunk)
+  }
+  // Helper to chunk groups
+  const getGroupChunks = (): string[][] => {
+    const allGroupIds = Array.from(groups.keys())
+    if (allGroupIds.length === 0) return [[]]
+    const chunks: string[][] = []
+    let currentChunk: string[] = []
+    let currentTokens = 0
+    for (const groupId of allGroupIds) {
+      const group = groups.get(groupId)!
+      const groupTokens = tokenizer.count(`${group.label}`) + 10
+      if (currentTokens + groupTokens > TOKENS_FOR_GROUPS_MAX && currentChunk.length > 0) {
+        chunks.push(currentChunk)
+        currentChunk = []
+        currentTokens = 0
+      }
+      currentChunk.push(groupId)
+      currentTokens += groupTokens
+    }
+    if (currentChunk.length > 0) {
+      chunks.push(currentChunk)
+    }
+    return chunks.length > 0 ? chunks : [[]]
+  }
+  // Process elements against groups and get assignments
+  const processChunk = async (
+    elementIndices: number[],
+    groupIds: string[]
+  ): Promise<Array<{ elementIndex: number; label: string }>> => {
+    const elementsText = elementIndices
+      .map((idx, i) => {
+        const elem = elements[idx]
+        const truncated = tokenizer.truncate(elem.stringified, options.tokensPerElement)
+        return `■${i}: ${truncated}■`
+      })
+      .join('\n')
+    const groupsList = groupIds.map((gid) => groups.get(gid)!.label)
+    const groupsText =
+      groupsList.length > 0
+        ? `**Existing Groups (prefer reusing these):**\n${groupsList.map((l) => `- ${l}`).join('\n')}\n\n`
+        : ''
+    const systemPrompt = `You are grouping elements into cohesive groups.
+${options.instructions ? `**Instructions:** ${options.instructions}\n` : '**Instructions:** Group similar elements together.'}
+**Important:**
+- Each element gets exactly ONE group label
+- Use EXACT SAME label for similar items (case-sensitive)
+- Create new descriptive labels when needed
+**Output Format:**
+One line per element:
+■0:Group Label■
+■1:Group Label■
+${END}`.trim()
+    const userPrompt = `${groupsText}**Elements (■0 to ■${elementIndices.length - 1}):**
+${elementsText}
+**Task:** For each element, output one line with its group label.
+${END}`.trim()
+    const { extracted } = await ctx.generateContent({
+      systemPrompt,
+      stopSequences: [END],
+      messages: [{ type: 'text', role: 'user', content: userPrompt }],
+      transform: (text) => {
+        const assignments: Array<{ elementIndex: number; label: string }> = []
+        const regex = /■(\d+):([^■]+)■/g
+        let match: RegExpExecArray | null
+        while ((match = regex.exec(text)) !== null) {
+          const idx = parseInt(match[1] ?? '', 10)
+          if (isNaN(idx) || idx < 0 || idx >= elementIndices.length) continue
+          const label = (match[2] ?? '').trim()
+          if (!label) continue
+          assignments.push({
+            elementIndex: elementIndices[idx],
+            label: label.slice(0, 250),
+          })
+        }
+        return assignments
+      },
+    })
+    return extracted
+  }
+  // Phase 1: Process all element chunks against current groups IN PARALLEL
+  const elementLimit = pLimit(10) // Separate limiter for element chunks
+  const groupLimit = pLimit(10) // Separate limiter for group chunks
+  // Collect all assignments from parallel processing
+  const allChunkResults = await Promise.all(
+    elementChunks.map((elementChunk) =>
+      elementLimit(async () => {
+        const groupChunks = getGroupChunks()
+        const allAssignments = await Promise.all(
+          groupChunks.map((groupChunk) => groupLimit(() => processChunk(elementChunk, groupChunk)))
+        )
+        return allAssignments.flat()
+      })
+    )
+  )
+  // Process all assignments sequentially to avoid race conditions
+  for (const assignments of allChunkResults) {
+    for (const { elementIndex, label } of assignments) {
+      const normalized = normalizeLabel(label)
+      let groupId = labelToGroupId.get(normalized)
+      if (!groupId) {
+        // Create new group
+        groupId = `group_${groupIdCounter++}`
+        groups.set(groupId, { id: groupId, label, normalizedLabel: normalized })
+        groupElements.set(groupId, new Set())
+        labelToGroupId.set(normalized, groupId)
+      }
+      // Add element to group
+      groupElements.get(groupId)!.add(elementIndex)
+      // Track that element saw this group
+      if (!elementGroups.has(elementIndex)) {
+        elementGroups.set(elementIndex, new Set())
+      }
+      elementGroups.get(elementIndex)!.add(groupId)
+    }
+  }
+  // Phase 2: Ensure all elements saw all groups (coverage guarantee)
+  const allGroupIds = Array.from(groups.keys())
+  if (allGroupIds.length > 0) {
+    const elementsNeedingReview: number[] = []
+    for (const elem of elements) {
+      const seenGroups = elementGroups.get(elem.index) ?? new Set()
+      const unseenCount = allGroupIds.filter((gid) => !seenGroups.has(gid)).length
+      if (unseenCount > 0) {
+        elementsNeedingReview.push(elem.index)
+      }
+    }
+    if (elementsNeedingReview.length > 0) {
+      // Chunk elements needing review
+      const reviewChunks: number[][] = []
+      let reviewChunk: number[] = []
+      let reviewTokens = 0
+      for (const elemIdx of elementsNeedingReview) {
+        const elem = elements[elemIdx]
+        const truncated = tokenizer.truncate(elem.stringified, options.tokensPerElement)
+        const elemTokens = tokenizer.count(truncated)
+        const shouldStartNewChunk =
+          (reviewTokens + elemTokens > TOKENS_FOR_ELEMENTS_MAX || reviewChunk.length >= MAX_ELEMENTS_PER_CHUNK) &&
+          reviewChunk.length > 0
+        if (shouldStartNewChunk) {
+          reviewChunks.push(reviewChunk)
+          reviewChunk = []
+          reviewTokens = 0
+        }
+        reviewChunk.push(elemIdx)
+        reviewTokens += elemTokens
+      }
+      if (reviewChunk.length > 0) {
+        reviewChunks.push(reviewChunk)
+      }
+      // Process review chunks IN PARALLEL
+      const reviewResults = await Promise.all(
+        reviewChunks.map((chunk) =>
+          elementLimit(async () => {
+            const groupChunks = getGroupChunks()
+            const allAssignments = await Promise.all(
+              groupChunks.map((groupChunk) => groupLimit(() => processChunk(chunk, groupChunk)))
+            )
+            return allAssignments.flat()
+          })
+        )
+      )
+      // Mark groups as seen and update assignments (sequential to avoid races)
+      const updateElementGroupAssignment = (elementIndex: number, label: string) => {
+        const normalized = normalizeLabel(label)
+        const groupId = labelToGroupId.get(normalized)
+        if (!groupId) return
+        // Add to group and mark as seen
+        groupElements.get(groupId)!.add(elementIndex)
+        // Initialize element groups if needed
+        const elemGroups = elementGroups.get(elementIndex) ?? new Set()
+        if (!elementGroups.has(elementIndex)) {
+          elementGroups.set(elementIndex, elemGroups)
+        }
+        elemGroups.add(groupId)
+      }
+      for (const assignments of reviewResults) {
+        for (const { elementIndex, label } of assignments) {
+          updateElementGroupAssignment(elementIndex, label)
+        }
+      }
+    }
+  }
+  // Phase 3: Resolve conflicts (elements in multiple groups)
+  for (const [elementIndex, groupSet] of elementGroups.entries()) {
+    if (groupSet.size > 1) {
+      // Element is in multiple groups, keep only the most common assignment
+      const groupIds = Array.from(groupSet)
+      // Remove from all groups
+      for (const gid of groupIds) {
+        groupElements.get(gid)?.delete(elementIndex)
+      }
+      // Re-assign to first group (or could use LLM to decide)
+      const finalGroupId = groupIds[0]
+      groupElements.get(finalGroupId)!.add(elementIndex)
+    }
+  }
+  // Build final result
+  const result: Array<Group<T>> = []
+  for (const [groupId, elementIndices] of groupElements.entries()) {
+    if (elementIndices.size > 0) {
+      const groupInfo = groups.get(groupId)!
+      result.push({
+        id: groupInfo.id,
+        label: groupInfo.label,
+        elements: Array.from(elementIndices).map((idx) => elements[idx].element),
+      })
+    }
+  }
+  return result
+}
+Zai.prototype.group = function <T>(
+  this: Zai,
+  input: Array<T>,
+  _options?: Options
+): Response<Array<Group<T>>, Record<string, T[]>> {
+  const context = new ZaiContext({
+    client: this.client,
+    modelId: this.Model,
+    taskId: this.taskId,
+    taskType: 'zai.group',
+    adapter: this.adapter,
+  })
+  return new Response<Array<Group<T>>, Record<string, T[]>>(context, group(input, _options, context), (result) => {
+    const merged: Record<string, T[]> = {}
+    result.forEach((group) => {
+      if (!merged[group.label]) {
+        merged[group.label] = []
+      }
+      merged[group.label].push(...group.elements)
+    })
+    return merged
+  })
+}

package/src/operations/label.ts CHANGED Viewed

@@ -2,6 +2,7 @@
 import { z } from '@bpinternal/zui'
 import { chunk, clamp } from 'lodash-es'
+import pLimit from 'p-limit'
 import { ZaiContext } from '../context'
 import { Response } from '../response'
 import { getTokenizer } from '../tokenizer'
@@ -162,9 +163,10 @@ const label = async <T extends string>(
   const inputAsString = stringify(input)
   if (tokenizer.count(inputAsString) > CHUNK_INPUT_MAX_TOKENS) {
+    const limit = pLimit(10) // Limit to 10 concurrent labeling operations
     const tokens = tokenizer.split(inputAsString)
     const chunks = chunk(tokens, CHUNK_INPUT_MAX_TOKENS).map((x) => x.join(''))
-    const allLabels = await Promise.all(chunks.map((chunk) => label(chunk, _labels, _options, ctx)))
+    const allLabels = await Promise.all(chunks.map((chunk) => limit(() => label(chunk, _labels, _options, ctx))))
     // Merge all the labels together (those who are true will remain true)
     return allLabels.reduce((acc, x) => {

package/src/operations/summarize.ts CHANGED Viewed

@@ -2,6 +2,7 @@
 import { z } from '@bpinternal/zui'
 import { chunk } from 'lodash-es'
+import pLimit from 'p-limit'
 import { ZaiContext } from '../context'
 import { Response } from '../response'
@@ -115,9 +116,9 @@ ${newText}
   const chunkSize = Math.ceil(tokens.length / (parts * N))
   if (useMergeSort) {
-    // TODO: use pLimit here to not have too many chunks
+    const limit = pLimit(10) // Limit to 10 concurrent summarization operations
     const chunks = chunk(tokens, chunkSize).map((x) => x.join(''))
-    const allSummaries = (await Promise.allSettled(chunks.map((chunk) => summarize(chunk, options, ctx))))
+    const allSummaries = (await Promise.allSettled(chunks.map((chunk) => limit(() => summarize(chunk, options, ctx)))))
       .filter((x) => x.status === 'fulfilled')
       .map((x) => x.value)
     return summarize(allSummaries.join('\n\n============\n\n'), options, ctx)