npm - @jackchen_me/open-multi-agent - Versions diffs - 0.2.0 → 1.0.0 - Mend

@jackchen_me/open-multi-agent 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/.github/workflows/ci.yml +1 -1
package/CLAUDE.md +11 -3
package/README.md +87 -20
package/README_zh.md +85 -25
package/dist/agent/agent.d.ts +15 -1
package/dist/agent/agent.d.ts.map +1 -1
package/dist/agent/agent.js +144 -10
package/dist/agent/agent.js.map +1 -1
package/dist/agent/loop-detector.d.ts +39 -0
package/dist/agent/loop-detector.d.ts.map +1 -0
package/dist/agent/loop-detector.js +122 -0
package/dist/agent/loop-detector.js.map +1 -0
package/dist/agent/pool.d.ts +2 -1
package/dist/agent/pool.d.ts.map +1 -1
package/dist/agent/pool.js +4 -2
package/dist/agent/pool.js.map +1 -1
package/dist/agent/runner.d.ts +23 -1
package/dist/agent/runner.d.ts.map +1 -1
package/dist/agent/runner.js +113 -12
package/dist/agent/runner.js.map +1 -1
package/dist/index.d.ts +3 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +2 -0
package/dist/index.js.map +1 -1
package/dist/llm/adapter.d.ts +4 -1
package/dist/llm/adapter.d.ts.map +1 -1
package/dist/llm/adapter.js +11 -0
package/dist/llm/adapter.js.map +1 -1
package/dist/llm/copilot.d.ts.map +1 -1
package/dist/llm/copilot.js +2 -1
package/dist/llm/copilot.js.map +1 -1
package/dist/llm/gemini.d.ts +65 -0
package/dist/llm/gemini.d.ts.map +1 -0
package/dist/llm/gemini.js +317 -0
package/dist/llm/gemini.js.map +1 -0
package/dist/llm/grok.d.ts +21 -0
package/dist/llm/grok.d.ts.map +1 -0
package/dist/llm/grok.js +24 -0
package/dist/llm/grok.js.map +1 -0
package/dist/llm/openai-common.d.ts +8 -1
package/dist/llm/openai-common.d.ts.map +1 -1
package/dist/llm/openai-common.js +35 -2
package/dist/llm/openai-common.js.map +1 -1
package/dist/llm/openai.d.ts +1 -1
package/dist/llm/openai.d.ts.map +1 -1
package/dist/llm/openai.js +20 -2
package/dist/llm/openai.js.map +1 -1
package/dist/orchestrator/orchestrator.d.ts.map +1 -1
package/dist/orchestrator/orchestrator.js +89 -9
package/dist/orchestrator/orchestrator.js.map +1 -1
package/dist/task/queue.d.ts +31 -2
package/dist/task/queue.d.ts.map +1 -1
package/dist/task/queue.js +69 -2
package/dist/task/queue.js.map +1 -1
package/dist/tool/text-tool-extractor.d.ts +32 -0
package/dist/tool/text-tool-extractor.d.ts.map +1 -0
package/dist/tool/text-tool-extractor.js +187 -0
package/dist/tool/text-tool-extractor.js.map +1 -0
package/dist/types.d.ts +139 -7
package/dist/types.d.ts.map +1 -1
package/dist/utils/trace.d.ts +12 -0
package/dist/utils/trace.d.ts.map +1 -0
package/dist/utils/trace.js +30 -0
package/dist/utils/trace.js.map +1 -0
package/examples/06-local-model.ts +1 -0
package/examples/08-gemma4-local.ts +76 -87
package/examples/09-structured-output.ts +73 -0
package/examples/10-task-retry.ts +132 -0
package/examples/11-trace-observability.ts +133 -0
package/examples/12-grok.ts +154 -0
package/examples/13-gemini.ts +48 -0
package/package.json +11 -1
package/src/agent/agent.ts +159 -10
package/src/agent/loop-detector.ts +137 -0
package/src/agent/pool.ts +9 -2
package/src/agent/runner.ts +148 -19
package/src/index.ts +15 -0
package/src/llm/adapter.ts +12 -1
package/src/llm/copilot.ts +2 -1
package/src/llm/gemini.ts +378 -0
package/src/llm/grok.ts +29 -0
package/src/llm/openai-common.ts +41 -2
package/src/llm/openai.ts +23 -3
package/src/orchestrator/orchestrator.ts +105 -11
package/src/task/queue.ts +73 -3
package/src/tool/text-tool-extractor.ts +219 -0
package/src/types.ts +157 -6
package/src/utils/trace.ts +34 -0
package/tests/agent-hooks.test.ts +473 -0
package/tests/agent-pool.test.ts +212 -0
package/tests/approval.test.ts +464 -0
package/tests/built-in-tools.test.ts +393 -0
package/tests/gemini-adapter.test.ts +97 -0
package/tests/grok-adapter.test.ts +74 -0
package/tests/llm-adapters.test.ts +357 -0
package/tests/loop-detection.test.ts +456 -0
package/tests/openai-fallback.test.ts +159 -0
package/tests/orchestrator.test.ts +281 -0
package/tests/scheduler.test.ts +221 -0
package/tests/team-messaging.test.ts +329 -0
package/tests/text-tool-extractor.test.ts +170 -0
package/tests/trace.test.ts +453 -0
package/vitest.config.ts +9 -0
package/examples/09-gemma4-auto-orchestration.ts +0 -162

package/src/llm/gemini.ts ADDED Viewed

@@ -0,0 +1,378 @@
+/**
+ * @fileoverview Google Gemini adapter implementing {@link LLMAdapter}.
+ *
+ * Built for `@google/genai` (the unified Google Gen AI SDK, v1.x), NOT the
+ * legacy `@google/generative-ai` package.
+ *
+ * Converts between the framework's internal {@link ContentBlock} types and the
+ * `@google/genai` SDK's wire format, handling tool definitions, system prompts,
+ * and both batch and streaming response paths.
+ *
+ * API key resolution order:
+ *   1. `apiKey` constructor argument
+ *   2. `GEMINI_API_KEY` environment variable
+ *   3. `GOOGLE_API_KEY` environment variable
+ *
+ * @example
+ * ```ts
+ * import { GeminiAdapter } from './gemini.js'
+ *
+ * const adapter = new GeminiAdapter()
+ * const response = await adapter.chat(messages, {
+ *   model: 'gemini-2.5-flash',
+ *   maxTokens: 1024,
+ * })
+ * ```
+ */
+import {
+  GoogleGenAI,
+  FunctionCallingConfigMode,
+  type Content,
+  type FunctionDeclaration,
+  type GenerateContentConfig,
+  type GenerateContentResponse,
+  type Part,
+  type Tool as GeminiTool,
+} from '@google/genai'
+import type {
+  ContentBlock,
+  LLMAdapter,
+  LLMChatOptions,
+  LLMMessage,
+  LLMResponse,
+  LLMStreamOptions,
+  LLMToolDef,
+  StreamEvent,
+  ToolUseBlock,
+} from '../types.js'
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+/**
+ * Map framework role names to Gemini role names.
+ *
+ * Gemini uses `"model"` instead of `"assistant"`.
+ */
+function toGeminiRole(role: 'user' | 'assistant'): string {
+  return role === 'assistant' ? 'model' : 'user'
+}
+/**
+ * Convert framework messages into Gemini's {@link Content}[] format.
+ *
+ * Key differences from Anthropic:
+ * - Gemini uses `"model"` instead of `"assistant"`.
+ * - `functionResponse` parts (tool results) must appear in `"user"` turns.
+ * - `functionCall` parts appear in `"model"` turns.
+ * - We build a name lookup map from tool_use blocks so tool_result blocks
+ *   can resolve the function name required by Gemini's `functionResponse`.
+ */
+function toGeminiContents(messages: LLMMessage[]): Content[] {
+  // First pass: build id → name map for resolving tool results.
+  const toolNameById = new Map<string, string>()
+  for (const msg of messages) {
+    for (const block of msg.content) {
+      if (block.type === 'tool_use') {
+        toolNameById.set(block.id, block.name)
+      }
+    }
+  }
+  return messages.map((msg): Content => {
+    const parts: Part[] = msg.content.map((block): Part => {
+      switch (block.type) {
+        case 'text':
+          return { text: block.text }
+        case 'tool_use':
+          return {
+            functionCall: {
+              id: block.id,
+              name: block.name,
+              args: block.input,
+            },
+          }
+        case 'tool_result': {
+          const name = toolNameById.get(block.tool_use_id) ?? block.tool_use_id
+          return {
+            functionResponse: {
+              id: block.tool_use_id,
+              name,
+              response: {
+                content:
+                  typeof block.content === 'string'
+                    ? block.content
+                    : JSON.stringify(block.content),
+                isError: block.is_error ?? false,
+              },
+            },
+          }
+        }
+        case 'image':
+          return {
+            inlineData: {
+              mimeType: block.source.media_type,
+              data: block.source.data,
+            },
+          }
+        default: {
+          const _exhaustive: never = block
+          throw new Error(`Unhandled content block type: ${JSON.stringify(_exhaustive)}`)
+        }
+      }
+    })
+    return { role: toGeminiRole(msg.role), parts }
+  })
+}
+/**
+ * Convert framework {@link LLMToolDef}s into a Gemini `tools` config array.
+ *
+ * In `@google/genai`, function declarations use `parametersJsonSchema` (not
+ * `parameters` or `input_schema`). All declarations are grouped under a single
+ * tool entry.
+ */
+function toGeminiTools(tools: readonly LLMToolDef[]): GeminiTool[] {
+  const functionDeclarations: FunctionDeclaration[] = tools.map((t) => ({
+    name: t.name,
+    description: t.description,
+    parametersJsonSchema: t.inputSchema as Record<string, unknown>,
+  }))
+  return [{ functionDeclarations }]
+}
+/**
+ * Build the {@link GenerateContentConfig} shared by chat() and stream().
+ */
+function buildConfig(
+  options: LLMChatOptions | LLMStreamOptions,
+): GenerateContentConfig {
+  return {
+    maxOutputTokens: options.maxTokens ?? 4096,
+    temperature: options.temperature,
+    systemInstruction: options.systemPrompt,
+    tools: options.tools ? toGeminiTools(options.tools) : undefined,
+    toolConfig: options.tools
+      ? { functionCallingConfig: { mode: FunctionCallingConfigMode.AUTO } }
+      : undefined,
+  }
+}
+/**
+ * Generate a stable pseudo-random ID string for tool use blocks.
+ *
+ * Gemini may not always return call IDs (especially in streaming), so we
+ * fabricate them when absent to satisfy the framework's {@link ToolUseBlock}
+ * contract.
+ */
+function generateId(): string {
+  return `gemini-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`
+}
+/**
+ * Extract the function call ID from a Gemini part, or generate one.
+ *
+ * The `id` field exists in newer API versions but may be absent in older
+ * responses, so we cast conservatively and fall back to a generated ID.
+ */
+function getFunctionCallId(part: Part): string {
+  return (part.functionCall as { id?: string } | undefined)?.id ?? generateId()
+}
+/**
+ * Convert a Gemini {@link GenerateContentResponse} into a framework
+ * {@link LLMResponse}.
+ */
+function fromGeminiResponse(
+  response: GenerateContentResponse,
+  id: string,
+  model: string,
+): LLMResponse {
+  const candidate = response.candidates?.[0]
+  const content: ContentBlock[] = []
+  for (const part of candidate?.content?.parts ?? []) {
+    if (part.text !== undefined && part.text !== '') {
+      content.push({ type: 'text', text: part.text })
+    } else if (part.functionCall !== undefined) {
+      content.push({
+        type: 'tool_use',
+        id: getFunctionCallId(part),
+        name: part.functionCall.name ?? '',
+        input: (part.functionCall.args ?? {}) as Record<string, unknown>,
+      })
+    }
+    // inlineData echoes and other part types are silently ignored.
+  }
+  // Map Gemini finish reasons to framework stop_reason vocabulary.
+  const finishReason = candidate?.finishReason as string | undefined
+  let stop_reason: LLMResponse['stop_reason'] = 'end_turn'
+  if (finishReason === 'MAX_TOKENS') {
+    stop_reason = 'max_tokens'
+  } else if (content.some((b) => b.type === 'tool_use')) {
+    // Gemini may report STOP even when it returned function calls.
+    stop_reason = 'tool_use'
+  }
+  const usage = response.usageMetadata
+  return {
+    id,
+    content,
+    model,
+    stop_reason,
+    usage: {
+      input_tokens: usage?.promptTokenCount ?? 0,
+      output_tokens: usage?.candidatesTokenCount ?? 0,
+    },
+  }
+}
+// ---------------------------------------------------------------------------
+// Adapter implementation
+// ---------------------------------------------------------------------------
+/**
+ * LLM adapter backed by the Google Gemini API via `@google/genai`.
+ *
+ * Thread-safe — a single instance may be shared across concurrent agent runs.
+ * The underlying SDK client is stateless across requests.
+ */
+export class GeminiAdapter implements LLMAdapter {
+  readonly name = 'gemini'
+  readonly #client: GoogleGenAI
+  constructor(apiKey?: string) {
+    this.#client = new GoogleGenAI({
+      apiKey: apiKey ?? process.env['GEMINI_API_KEY'] ?? process.env['GOOGLE_API_KEY'],
+    })
+  }
+  // -------------------------------------------------------------------------
+  // chat()
+  // -------------------------------------------------------------------------
+  /**
+   * Send a synchronous (non-streaming) chat request and return the complete
+   * {@link LLMResponse}.
+   *
+   * Uses `ai.models.generateContent()` with the full conversation as `contents`,
+   * which is the idiomatic pattern for `@google/genai`.
+   */
+  async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
+    const id = generateId()
+    const contents = toGeminiContents(messages)
+    const response = await this.#client.models.generateContent({
+      model: options.model,
+      contents,
+      config: buildConfig(options),
+    })
+    return fromGeminiResponse(response, id, options.model)
+  }
+  // -------------------------------------------------------------------------
+  // stream()
+  // -------------------------------------------------------------------------
+  /**
+   * Send a streaming chat request and yield {@link StreamEvent}s as they
+   * arrive from the API.
+   *
+   * Uses `ai.models.generateContentStream()` which returns an
+   * `AsyncGenerator<GenerateContentResponse>`. Each yielded chunk has the same
+   * shape as a full response but contains only the delta for that chunk.
+   *
+   * Because `@google/genai` doesn't expose a `finalMessage()` helper like the
+   * Anthropic SDK, we accumulate content and token counts as we stream so that
+   * the terminal `done` event carries a complete and accurate {@link LLMResponse}.
+   *
+   * Sequence guarantees (matching the Anthropic adapter):
+   * - Zero or more `text` events with incremental deltas
+   * - Zero or more `tool_use` events (one per call; Gemini doesn't stream args)
+   * - Exactly one terminal event: `done` or `error`
+   */
+  async *stream(
+    messages: LLMMessage[],
+    options: LLMStreamOptions,
+  ): AsyncIterable<StreamEvent> {
+    const id = generateId()
+    const contents = toGeminiContents(messages)
+    try {
+      const streamResponse = await this.#client.models.generateContentStream({
+        model: options.model,
+        contents,
+        config: buildConfig(options),
+      })
+      // Accumulators for building the done payload.
+      const accumulatedContent: ContentBlock[] = []
+      let inputTokens = 0
+      let outputTokens = 0
+      let lastFinishReason: string | undefined
+      for await (const chunk of streamResponse) {
+        const candidate = chunk.candidates?.[0]
+        // Accumulate token counts — the API emits these on the final chunk.
+        if (chunk.usageMetadata) {
+          inputTokens = chunk.usageMetadata.promptTokenCount ?? inputTokens
+          outputTokens = chunk.usageMetadata.candidatesTokenCount ?? outputTokens
+        }
+        if (candidate?.finishReason) {
+          lastFinishReason = candidate.finishReason as string
+        }
+        for (const part of candidate?.content?.parts ?? []) {
+          if (part.text) {
+            accumulatedContent.push({ type: 'text', text: part.text })
+            yield { type: 'text', data: part.text } satisfies StreamEvent
+          } else if (part.functionCall) {
+            const toolId = getFunctionCallId(part)
+            const toolUseBlock: ToolUseBlock = {
+              type: 'tool_use',
+              id: toolId,
+              name: part.functionCall.name ?? '',
+              input: (part.functionCall.args ?? {}) as Record<string, unknown>,
+            }
+            accumulatedContent.push(toolUseBlock)
+            yield { type: 'tool_use', data: toolUseBlock } satisfies StreamEvent
+          }
+        }
+      }
+      // Determine stop_reason from the accumulated response.
+      const hasToolUse = accumulatedContent.some((b) => b.type === 'tool_use')
+      let stop_reason: LLMResponse['stop_reason'] = 'end_turn'
+      if (lastFinishReason === 'MAX_TOKENS') {
+        stop_reason = 'max_tokens'
+      } else if (hasToolUse) {
+        stop_reason = 'tool_use'
+      }
+      const finalResponse: LLMResponse = {
+        id,
+        content: accumulatedContent,
+        model: options.model,
+        stop_reason,
+        usage: { input_tokens: inputTokens, output_tokens: outputTokens },
+      }
+      yield { type: 'done', data: finalResponse } satisfies StreamEvent
+    } catch (err) {
+      const error = err instanceof Error ? err : new Error(String(err))
+      yield { type: 'error', data: error } satisfies StreamEvent
+    }
+  }
+}

package/src/llm/grok.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * @fileoverview Grok (xAI) adapter.
+ *
+ * Thin wrapper around OpenAIAdapter that hard-codes the official xAI endpoint
+ * and XAI_API_KEY environment variable fallback.
+ */
+import { OpenAIAdapter } from './openai.js'
+/**
+ * LLM adapter for Grok models (grok-4 series and future models).
+ *
+ * Thread-safe. Can be shared across agents.
+ *
+ * Usage:
+ *   provider: 'grok'
+ *   model: 'grok-4' (or any current Grok model name)
+ */
+export class GrokAdapter extends OpenAIAdapter {
+  readonly name = 'grok'
+  constructor(apiKey?: string, baseURL?: string) {
+    // Allow override of baseURL (for proxies or future changes) but default to official xAI endpoint.
+    super(
+      apiKey ?? process.env['XAI_API_KEY'],
+      baseURL ?? 'https://api.x.ai/v1'
+    )
+  }
+}

package/src/llm/openai-common.ts CHANGED Viewed

@@ -25,6 +25,7 @@ import type {
   TextBlock,
   ToolUseBlock,
 } from '../types.js'
+import { extractToolCallsFromText } from '../tool/text-tool-extractor.js'
 // ---------------------------------------------------------------------------
 // Framework → OpenAI
@@ -166,8 +167,18 @@ function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessa
  *
  * Takes only the first choice (index 0), consistent with how the framework
  * is designed for single-output agents.
+ *
+ * @param completion      - The raw OpenAI completion.
+ * @param knownToolNames  - Optional whitelist of tool names. When the model
+ *                          returns no `tool_calls` but the text contains JSON
+ *                          that looks like a tool call, the fallback extractor
+ *                          uses this list to validate matches. Pass the names
+ *                          of tools sent in the request for best results.
  */
-export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
+export function fromOpenAICompletion(
+  completion: ChatCompletion,
+  knownToolNames?: string[],
+): LLMResponse {
   const choice = completion.choices[0]
   if (choice === undefined) {
     throw new Error('OpenAI returned a completion with no choices')
@@ -201,7 +212,35 @@ export function fromOpenAICompletion(completion: ChatCompletion): LLMResponse {
     content.push(toolUseBlock)
   }
-  const stopReason = normalizeFinishReason(choice.finish_reason ?? 'stop')
+  // ---------------------------------------------------------------------------
+  // Fallback: extract tool calls from text when native tool_calls is empty.
+  //
+  // Some local models (Ollama thinking models, misconfigured vLLM) return tool
+  // calls as plain text instead of using the tool_calls wire format.  When we
+  // have text but no tool_calls, try to extract them from the text.
+  // ---------------------------------------------------------------------------
+  const hasNativeToolCalls = (message.tool_calls ?? []).length > 0
+  if (
+    !hasNativeToolCalls &&
+    knownToolNames !== undefined &&
+    knownToolNames.length > 0 &&
+    message.content !== null &&
+    message.content !== undefined &&
+    message.content.length > 0
+  ) {
+    const extracted = extractToolCallsFromText(message.content, knownToolNames)
+    if (extracted.length > 0) {
+      content.push(...extracted)
+    }
+  }
+  const hasToolUseBlocks = content.some(b => b.type === 'tool_use')
+  const rawStopReason = choice.finish_reason ?? 'stop'
+  // If we extracted tool calls from text but the finish_reason was 'stop',
+  // correct it to 'tool_use' so the agent runner continues the loop.
+  const stopReason = hasToolUseBlocks && rawStopReason === 'stop'
+    ? 'tool_use'
+    : normalizeFinishReason(rawStopReason)
   return {
     id: completion.id,

package/src/llm/openai.ts CHANGED Viewed

@@ -54,6 +54,7 @@ import {
   normalizeFinishReason,
   buildOpenAIMessageList,
 } from './openai-common.js'
+import { extractToolCallsFromText } from '../tool/text-tool-extractor.js'
 // ---------------------------------------------------------------------------
 // Adapter implementation
@@ -65,7 +66,7 @@ import {
  * Thread-safe — a single instance may be shared across concurrent agent runs.
  */
 export class OpenAIAdapter implements LLMAdapter {
-  readonly name = 'openai'
+  readonly name: string = 'openai'
   readonly #client: OpenAI
@@ -104,7 +105,8 @@ export class OpenAIAdapter implements LLMAdapter {
       },
     )
-    return fromOpenAICompletion(completion)
+    const toolNames = options.tools?.map(t => t.name)
+    return fromOpenAICompletion(completion, toolNames)
   }
   // -------------------------------------------------------------------------
@@ -241,11 +243,29 @@ export class OpenAIAdapter implements LLMAdapter {
       }
       doneContent.push(...finalToolUseBlocks)
+      // Fallback: extract tool calls from text when streaming produced no
+      // native tool_calls (same logic as fromOpenAICompletion).
+      if (finalToolUseBlocks.length === 0 && fullText.length > 0 && options.tools) {
+        const toolNames = options.tools.map(t => t.name)
+        const extracted = extractToolCallsFromText(fullText, toolNames)
+        if (extracted.length > 0) {
+          doneContent.push(...extracted)
+          for (const block of extracted) {
+            yield { type: 'tool_use', data: block } satisfies StreamEvent
+          }
+        }
+      }
+      const hasToolUseBlocks = doneContent.some(b => b.type === 'tool_use')
+      const resolvedStopReason = hasToolUseBlocks && finalFinishReason === 'stop'
+        ? 'tool_use'
+        : normalizeFinishReason(finalFinishReason)
       const finalResponse: LLMResponse = {
         id: completionId,
         content: doneContent,
         model: completionModel,
-        stop_reason: normalizeFinishReason(finalFinishReason),
+        stop_reason: resolvedStopReason,
         usage: { input_tokens: inputTokens, output_tokens: outputTokens },
       }