npm - @open-mercato/ai-assistant - Versions diffs - 0.6.2-develop.3406.1.2b403f40da → 0.6.2-develop.3446.1.bd060c6017 - Mend

@open-mercato/ai-assistant 0.6.2-develop.3406.1.2b403f40da → 0.6.2-develop.3446.1.bd060c6017

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/src/modules/ai_assistant/lib/task-plan-labels.ts ADDED Viewed

@@ -0,0 +1,112 @@
+/**
+ * Safe, user-visible task-plan labels for AI chat.
+ *
+ * These helpers intentionally reject text that looks like private reasoning.
+ * Task plans are UI copy for operators, not a channel for model scratchpads.
+ */
+export const TASK_PLAN_TOOL_NAME = 'meta.update_task_plan'
+export const TASK_PLAN_TOOL_NAME_SDK = 'meta__update_task_plan'
+export const TASK_PLAN_MAX_TASKS = 8
+export const TASK_PLAN_LABEL_MAX_CHARS = 80
+export const TASK_PLAN_DETAIL_MAX_CHARS = 160
+export const TASK_PLAN_ID_MAX_CHARS = 80
+export const TASK_PLAN_TOOL_NAME_MAX_CHARS = 160
+export interface SanitizedAgentTaskPlanInputTask {
+  id?: string
+  label: string
+  detail?: string
+  toolName?: string
+}
+export interface SanitizedAgentTaskPlanInput {
+  tasks: SanitizedAgentTaskPlanInputTask[]
+}
+export const TASK_PLAN_RUNTIME_PROMPT_SECTION = [
+  'TASK PLAN (RUNTIME)',
+  'For every tool-using turn, first call `meta.update_task_plan` with 2-5 concise user-visible steps. Then call the domain/search/attachment/mutation tools.',
+  'Task labels are visible progress UI. Never include hidden reasoning, chain-of-thought, scratchpad notes, or XML thinking tags.',
+  'When a planned step maps to a known tool, include `toolName` so the chat can advance that row from pending to running to done.',
+  'Skip `meta.update_task_plan` for pure capability, example-question, or how-can-you-help prompts where no data tool is needed.',
+].join('\n')
+const HIDDEN_REASONING_PATTERNS: RegExp[] = [
+  /\bchain[-\s]?of[-\s]?thought\b/i,
+  /\binternal\s+(?:reasoning|thoughts?)\b/i,
+  /\bprivate\s+(?:reasoning|thoughts?)\b/i,
+  /\bhidden\s+(?:reasoning|thoughts?)\b/i,
+  /\bscratch\s*pad\b/i,
+  /\bscratchpad\b/i,
+  /\b(?:my\s+)?reasoning\s*:/i,
+  /<\/?\s*(?:thinking|thought|reasoning|scratchpad)\b/i,
+]
+const CONTROL_CHARS = /[\u0000-\u001f\u007f]/g
+const WHITESPACE = /\s+/g
+export function normalizeTaskPlanToolName(toolName: unknown): string | undefined {
+  if (typeof toolName !== 'string') return undefined
+  const trimmed = toolName.trim()
+  if (!trimmed) return undefined
+  const dotted = trimmed.replace(/__/g, '.')
+  const safe = dotted.replace(/[^a-zA-Z0-9._:-]/g, '').slice(0, TASK_PLAN_TOOL_NAME_MAX_CHARS)
+  return safe.length > 0 ? safe : undefined
+}
+export function isTaskPlanToolName(toolName: unknown): boolean {
+  return normalizeTaskPlanToolName(toolName) === TASK_PLAN_TOOL_NAME
+}
+export function looksLikeHiddenReasoning(value: string): boolean {
+  return HIDDEN_REASONING_PATTERNS.some((pattern) => pattern.test(value))
+}
+export function sanitizeTaskPlanText(
+  value: unknown,
+  maxChars: number,
+): string | null {
+  if (typeof value !== 'string') return null
+  const normalized = value.replace(CONTROL_CHARS, ' ').replace(WHITESPACE, ' ').trim()
+  if (!normalized) return null
+  if (looksLikeHiddenReasoning(normalized)) return null
+  return normalized.slice(0, maxChars)
+}
+export function sanitizeTaskPlanId(value: unknown): string | undefined {
+  if (typeof value !== 'string') return undefined
+  const normalized = value
+    .trim()
+    .replace(/\s+/g, '-')
+    .replace(/[^a-zA-Z0-9._:-]/g, '')
+    .slice(0, TASK_PLAN_ID_MAX_CHARS)
+  return normalized.length > 0 ? normalized : undefined
+}
+export function sanitizeAgentTaskPlanInput(input: unknown): SanitizedAgentTaskPlanInput {
+  if (!input || typeof input !== 'object') {
+    return { tasks: [] }
+  }
+  const rawTasks = (input as { tasks?: unknown }).tasks
+  if (!Array.isArray(rawTasks)) {
+    return { tasks: [] }
+  }
+  const tasks: SanitizedAgentTaskPlanInputTask[] = []
+  for (const rawTask of rawTasks.slice(0, TASK_PLAN_MAX_TASKS)) {
+    if (!rawTask || typeof rawTask !== 'object') continue
+    const value = rawTask as Record<string, unknown>
+    const label = sanitizeTaskPlanText(value.label, TASK_PLAN_LABEL_MAX_CHARS)
+    if (!label) continue
+    const detail = sanitizeTaskPlanText(value.detail, TASK_PLAN_DETAIL_MAX_CHARS) ?? undefined
+    const id = sanitizeTaskPlanId(value.id)
+    const toolName = normalizeTaskPlanToolName(value.toolName)
+    tasks.push({
+      ...(id ? { id } : {}),
+      label,
+      ...(detail ? { detail } : {}),
+      ...(toolName ? { toolName } : {}),
+    })
+  }
+  return { tasks }
+}

package/src/modules/ai_assistant/lib/task-plan-stream.ts ADDED Viewed

@@ -0,0 +1,463 @@
+/**
+ * Visible AI chat agent task plan — server-side SSE injector.
+ *
+ * Spec: `.ai/specs/2026-05-13-ai-chat-visible-task-plan.md`.
+ *
+ * Wraps a streaming `Response` produced by `streamText().toUIMessageStreamResponse()`
+ * (or the equivalent `ToolLoopAgent.stream(...).toUIMessageStreamResponse()`)
+ * and interleaves additive `data-agent-task-plan` / `data-agent-task-update`
+ * SSE chunks alongside the AI SDK tool lifecycle chunks. The original chunks
+ * are passed through unchanged so existing clients that ignore unknown chunk
+ * types continue to work.
+ *
+ * The injector derives task labels and states from the SDK tool lifecycle:
+ *   - `tool-input-start`         → create/update task with state `running`
+ *   - `tool-input-available`     → keep task `running` (label may be refined)
+ *   - `tool-output-available`    → mark task `done`
+ *   - `tool-output-error`        → mark task `failed`
+ *   - `tool-input-error`         → mark task `failed`
+ *
+ * Agent-authored labels flow through the reserved non-mutation
+ * `meta.update_task_plan` tool. Its input is sanitized before the plan reaches
+ * the client; the raw meta-tool call is still passed through for older clients
+ * but the visible plan uses only the safe labels.
+ */
+import {
+  TASK_PLAN_LABEL_MAX_CHARS,
+  isTaskPlanToolName,
+  normalizeTaskPlanToolName,
+  sanitizeAgentTaskPlanInput,
+} from './task-plan-labels'
+const SSE_ENCODER = new TextEncoder()
+const SSE_DECODER = new TextDecoder()
+/**
+ * Mirrors the client-side `AiAgentTaskSnapshot` so server and client agree on
+ * the wire format. Kept locally in this module to avoid pulling a UI package
+ * dependency into the runtime — the shape is small and only ever serialized
+ * to JSON for the SSE chunks below.
+ */
+export interface ServerTaskSnapshot {
+  id: string
+  label: string
+  state: 'pending' | 'running' | 'done' | 'failed' | 'skipped'
+  detail?: string
+  source: 'runtime' | 'agent'
+  toolCallId?: string
+}
+const TERMINAL_STATES: ReadonlySet<ServerTaskSnapshot['state']> = new Set([
+  'done',
+  'failed',
+  'skipped',
+])
+const TASK_LABEL_MAX_CHARS = TASK_PLAN_LABEL_MAX_CHARS
+/**
+ * Convert a raw model-sanitized tool name (e.g. `customers__list_people`) to a
+ * compact operator-facing label (e.g. `Customers list people`). The trailing
+ * segment is title-cased so the plan reads like a checklist instead of a code
+ * trace.
+ */
+export function deriveTaskLabel(toolName: string | undefined): string {
+  if (typeof toolName !== 'string' || toolName.length === 0) {
+    return 'Tool call'
+  }
+  const display = toolName.replace(/__/g, '.')
+  const segments = display.split('.')
+  const lastSegment = segments[segments.length - 1] ?? display
+  const humanized = lastSegment.replace(/_/g, ' ').trim()
+  if (humanized.length === 0) return display.slice(0, TASK_LABEL_MAX_CHARS)
+  const titled = humanized.charAt(0).toUpperCase() + humanized.slice(1)
+  if (segments.length <= 1) {
+    return titled.slice(0, TASK_LABEL_MAX_CHARS)
+  }
+  const moduleSegment = segments[0]
+  const moduleLabel = moduleSegment.charAt(0).toUpperCase() + moduleSegment.slice(1).replace(/_/g, ' ')
+  const combined = `${moduleLabel} · ${titled}`
+  return combined.slice(0, TASK_LABEL_MAX_CHARS)
+}
+type AccumulatorEntry = {
+  snapshot: ServerTaskSnapshot
+  emitted: boolean
+}
+type ToolChunk = {
+  type?: unknown
+  toolCallId?: unknown
+  toolName?: unknown
+  input?: unknown
+}
+/**
+ * Encapsulates the per-turn task-plan state. Exposed for unit tests so the
+ * derivation logic can be exercised without standing up a full SSE pipeline.
+ */
+export class TaskPlanAccumulator {
+  private readonly tasks = new Map<string, AccumulatorEntry>()
+  private readonly toolCallToTaskId = new Map<string, string>()
+  private readonly taskToolNames = new Map<string, string>()
+  private readonly internalToolCallIds = new Set<string>()
+  private snapshotEmitted = false
+  private hasAgentAuthoredPlan = false
+  constructor(public readonly planId: string) {}
+  private upsert(
+    id: string,
+    patch: Partial<ServerTaskSnapshot> & { label?: string; toolCallId?: string },
+  ): ServerTaskSnapshot {
+    const existing = this.tasks.get(id)
+    if (!existing) {
+      const created: ServerTaskSnapshot = {
+        id,
+        label: patch.label ?? 'Tool call',
+        state: patch.state ?? 'running',
+        source: patch.source ?? 'runtime',
+        detail: patch.detail,
+        toolCallId: patch.toolCallId,
+      }
+      this.tasks.set(id, { snapshot: created, emitted: false })
+      return created
+    }
+    const current = existing.snapshot
+    const nextState = TERMINAL_STATES.has(current.state) ? current.state : patch.state ?? current.state
+    const merged: ServerTaskSnapshot = {
+      id: current.id,
+      label: patch.label ?? current.label,
+      state: nextState,
+      source: patch.source ?? current.source,
+      detail: patch.detail ?? current.detail,
+      toolCallId: patch.toolCallId ?? current.toolCallId,
+    }
+    this.tasks.set(id, { snapshot: merged, emitted: existing.emitted })
+    return merged
+  }
+  private makeUniqueTaskId(baseId: string): string {
+    let candidate = baseId
+    let suffix = 2
+    while (this.tasks.has(candidate)) {
+      candidate = `${baseId}-${suffix}`
+      suffix += 1
+    }
+    return candidate
+  }
+  private emitFullSnapshot(): string[] {
+    if (this.tasks.size === 0) return []
+    this.snapshotEmitted = true
+    const initialTasks = Array.from(this.tasks.values()).map((e) => e.snapshot)
+    for (const e of this.tasks.values()) e.emitted = true
+    return [
+      formatSseEvent({
+        type: 'data-agent-task-plan',
+        planId: this.planId,
+        tasks: initialTasks,
+      }),
+    ]
+  }
+  private handleAgentAuthoredPlan(input: unknown): string[] {
+    const plan = sanitizeAgentTaskPlanInput(input)
+    if (plan.tasks.length === 0) return []
+    this.tasks.clear()
+    this.toolCallToTaskId.clear()
+    this.taskToolNames.clear()
+    this.snapshotEmitted = false
+    this.hasAgentAuthoredPlan = true
+    plan.tasks.forEach((task, index) => {
+      const id = this.makeUniqueTaskId(task.id ?? `agent-plan-${index + 1}`)
+      const snapshot: ServerTaskSnapshot = {
+        id,
+        label: task.label,
+        state: 'pending',
+        source: 'agent',
+        detail: task.detail,
+      }
+      this.tasks.set(id, { snapshot, emitted: false })
+      if (task.toolName) {
+        this.taskToolNames.set(id, task.toolName)
+      }
+    })
+    return this.emitFullSnapshot()
+  }
+  private resolveTaskIdForToolCall(toolCallId: string, toolName: string | undefined): string {
+    const existing = this.toolCallToTaskId.get(toolCallId)
+    if (existing) return existing
+    const plannedTaskId = this.findPlannedTaskId(toolName)
+    if (plannedTaskId) {
+      this.toolCallToTaskId.set(toolCallId, plannedTaskId)
+      return plannedTaskId
+    }
+    this.toolCallToTaskId.set(toolCallId, toolCallId)
+    return toolCallId
+  }
+  private findPlannedTaskId(toolName: string | undefined): string | null {
+    if (!this.hasAgentAuthoredPlan) return null
+    const entries = Array.from(this.tasks.entries())
+    const isAvailable = (entry: AccumulatorEntry) => !TERMINAL_STATES.has(entry.snapshot.state)
+    if (toolName) {
+      const exactPending = entries.find(([id, entry]) => {
+        return entry.snapshot.state === 'pending' && this.taskToolNames.get(id) === toolName
+      })
+      if (exactPending) return exactPending[0]
+      const exactAvailable = entries.find(([id, entry]) => {
+        return isAvailable(entry) && this.taskToolNames.get(id) === toolName
+      })
+      if (exactAvailable) return exactAvailable[0]
+    }
+    const genericPending = entries.find(([id, entry]) => {
+      return entry.snapshot.state === 'pending' && !this.taskToolNames.has(id)
+    })
+    if (genericPending) return genericPending[0]
+    const genericAvailable = entries.find(([id, entry]) => {
+      return isAvailable(entry) && !this.taskToolNames.has(id)
+    })
+    return genericAvailable?.[0] ?? null
+  }
+  private existingSnapshot(taskId: string): ServerTaskSnapshot | undefined {
+    return this.tasks.get(taskId)?.snapshot
+  }
+  /**
+   * Apply a tool lifecycle chunk. Returns the SSE event lines (already
+   * `data: ...\n\n`-formatted) that should be injected ahead of forwarding
+   * the original chunk to the client.
+   */
+  handleToolChunk(chunk: ToolChunk): string[] {
+    if (!chunk || typeof chunk.type !== 'string') return []
+    const toolCallId = typeof chunk.toolCallId === 'string' ? chunk.toolCallId : null
+    const toolName = normalizeTaskPlanToolName(chunk.toolName)
+    if (isTaskPlanToolName(toolName)) {
+      if (toolCallId) this.internalToolCallIds.add(toolCallId)
+      if (chunk.type === 'tool-input-available') {
+        return this.handleAgentAuthoredPlan(chunk.input)
+      }
+      return []
+    }
+    if (!toolCallId) return []
+    if (this.internalToolCallIds.has(toolCallId)) return []
+    const taskId = this.resolveTaskIdForToolCall(toolCallId, toolName)
+    const existing = this.existingSnapshot(taskId)
+    const source = existing?.source ?? 'runtime'
+    const runtimeLabel = deriveTaskLabel(toolName)
+    const runtimeDetail = toolName
+    let nextSnapshot: ServerTaskSnapshot | null = null
+    switch (chunk.type) {
+      case 'tool-input-start':
+        nextSnapshot = this.upsert(taskId, {
+          label: source === 'agent' ? existing?.label : runtimeLabel,
+          state: 'running',
+          source,
+          toolCallId,
+          detail: source === 'agent' ? existing?.detail : runtimeDetail,
+        })
+        break
+      case 'tool-input-available':
+        // Runtime-derived tasks can refine the label when the SDK includes a
+        // richer toolName on input-available. Agent-authored tasks keep the
+        // safe label the model supplied through `meta.update_task_plan`.
+        nextSnapshot = this.upsert(taskId, {
+          label: source === 'agent' ? existing?.label : runtimeLabel,
+          state: 'running',
+          source,
+          toolCallId,
+          detail: source === 'agent' ? existing?.detail : runtimeDetail,
+        })
+        break
+      case 'tool-output-available':
+        nextSnapshot = this.upsert(taskId, {
+          state: 'done',
+          source,
+          toolCallId,
+        })
+        break
+      case 'tool-output-error':
+      case 'tool-input-error':
+        nextSnapshot = this.upsert(taskId, {
+          state: 'failed',
+          source,
+          toolCallId,
+        })
+        break
+      default:
+        return []
+    }
+    if (!nextSnapshot) return []
+    return this.emitForSnapshot(taskId, nextSnapshot)
+  }
+  private emitForSnapshot(id: string, snapshot: ServerTaskSnapshot): string[] {
+    const entry = this.tasks.get(id)
+    if (!entry) return []
+    const lines: string[] = []
+    if (!this.snapshotEmitted) {
+      return this.emitFullSnapshot()
+    }
+    if (!entry.emitted) {
+      // First time we surface this task: it must be part of the next snapshot
+      // refresh, but to keep the protocol minimal we emit a single
+      // `data-agent-task-update` carrying the new task — clients merge by id.
+      lines.push(
+        formatSseEvent({
+          type: 'data-agent-task-update',
+          planId: this.planId,
+          task: snapshot,
+        }),
+      )
+      entry.emitted = true
+      return lines
+    }
+    lines.push(
+      formatSseEvent({
+        type: 'data-agent-task-update',
+        planId: this.planId,
+        task: snapshot,
+      }),
+    )
+    return lines
+  }
+}
+export function formatSseEvent(payload: Record<string, unknown>): string {
+  return `data: ${JSON.stringify(payload)}\n\n`
+}
+/**
+ * Wrap a streaming `Response` and interleave `data-agent-task-plan` /
+ * `data-agent-task-update` SSE chunks. The wrapper does not consume the
+ * stream — it pipes bytes through and only parses event boundaries to know
+ * when to inject extra chunks.
+ */
+export function injectTaskPlanIntoStream(
+  baseResponse: Response,
+  planId: string,
+): Response {
+  const { readable, writable } = new TransformStream<Uint8Array, Uint8Array>()
+  const writer = writable.getWriter()
+  const accumulator = new TaskPlanAccumulator(planId)
+  async function pump(): Promise<void> {
+    if (!baseResponse.body) {
+      await writer.close()
+      return
+    }
+    const reader = baseResponse.body.getReader()
+    let textBuffer = ''
+    try {
+      for (;;) {
+        const { value, done } = await reader.read()
+        if (done) break
+        if (!value) continue
+        textBuffer += SSE_DECODER.decode(value, { stream: true })
+        textBuffer = await flushBuffer(textBuffer, accumulator, writer)
+      }
+      const tail = SSE_DECODER.decode()
+      if (tail) {
+        textBuffer += tail
+      }
+      if (textBuffer.length > 0) {
+        // Best-effort flush of any trailing bytes (the AI SDK always
+        // terminates events with `\n\n` so this path is rare).
+        await writer.write(SSE_ENCODER.encode(textBuffer))
+      }
+    } catch {
+      // Surface upstream aborts to the downstream consumer by closing the
+      // writer — propagating the error would corrupt the SSE stream.
+    } finally {
+      reader.releaseLock()
+      await writer.close().catch(() => undefined)
+    }
+  }
+  void pump()
+  return new Response(readable, {
+    status: baseResponse.status,
+    headers: baseResponse.headers,
+  })
+}
+async function flushBuffer(
+  buffer: string,
+  accumulator: TaskPlanAccumulator,
+  writer: WritableStreamDefaultWriter<Uint8Array>,
+): Promise<string> {
+  let rest = buffer
+  for (;;) {
+    const boundary = rest.indexOf('\n\n')
+    if (boundary === -1) break
+    const eventBlock = rest.slice(0, boundary + 2)
+    rest = rest.slice(boundary + 2)
+    const injected = inspectEventBlock(eventBlock, accumulator)
+    for (const line of injected.before) {
+      await writer.write(SSE_ENCODER.encode(line))
+    }
+    await writer.write(SSE_ENCODER.encode(eventBlock))
+    for (const line of injected.after) {
+      await writer.write(SSE_ENCODER.encode(line))
+    }
+  }
+  return rest
+}
+interface InjectedLines {
+  before: string[]
+  after: string[]
+}
+function inspectEventBlock(
+  eventBlock: string,
+  accumulator: TaskPlanAccumulator,
+): InjectedLines {
+  const dataPayload = extractDataPayload(eventBlock)
+  if (!dataPayload || dataPayload === '[DONE]') {
+    return { before: [], after: [] }
+  }
+  let parsed: ToolChunk | null = null
+  try {
+    parsed = JSON.parse(dataPayload)
+  } catch {
+    return { before: [], after: [] }
+  }
+  if (!parsed || typeof parsed.type !== 'string') {
+    return { before: [], after: [] }
+  }
+  const type = parsed.type
+  const injected = accumulator.handleToolChunk(parsed)
+  if (injected.length === 0) {
+    return { before: [], after: [] }
+  }
+  // Tool-input-start gets the plan event BEFORE the original (so the row
+  // appears at the same time as the tool starts). Output / error events
+  // get the plan event AFTER so the row updates only once the tool result
+  // is visible in the existing tool-call detail row.
+  if (type === 'tool-input-start' || type === 'tool-input-available') {
+    return { before: injected, after: [] }
+  }
+  return { before: [], after: injected }
+}
+function extractDataPayload(eventBlock: string): string | null {
+  const lines = eventBlock.split('\n')
+  const dataLines: string[] = []
+  for (const line of lines) {
+    if (line.startsWith('data: ')) {
+      dataLines.push(line.slice(6))
+    } else if (line.startsWith('data:')) {
+      dataLines.push(line.slice(5))
+    }
+  }
+  if (dataLines.length === 0) return null
+  return dataLines.join('\n')
+}

package/src/modules/ai_assistant/lib/tool-test-fixtures.ts CHANGED Viewed

@@ -44,6 +44,9 @@ export const toolFixtures: Record<string, ToolFixture> = {
   'meta.describe_agent': f({
     input: { agentId: 'customers.account_assistant' },
   }),
+  'meta.update_task_plan': f({
+    input: { tasks: [{ label: 'Search records', toolName: 'customers.list_people' }] },
+  }),
   'attachments.list_record_attachments': f({
     input: { entityType: 'customers:person', recordId: '00000000-0000-0000-0000-000000000000', limit: 5 },
     note: 'Empty result is a valid response; we only assert shape.',

package/src/modules/ai_assistant/lib/types.ts CHANGED Viewed

@@ -54,6 +54,8 @@ export interface AiToolLoadBeforeRecord {
   label: string
   recordVersion: string | null
   before: Record<string, unknown>
+  after?: Record<string, unknown>
+  display?: AiToolFieldDiffDisplayHints
 }
 /**
@@ -67,6 +69,20 @@ export interface AiToolLoadBeforeSingleRecord {
   entityType: string
   recordVersion: string | null
   before: Record<string, unknown>
+  after?: Record<string, unknown>
+  display?: AiToolFieldDiffDisplayHints
+}
+/**
+ * Optional display hints for mutation-preview diffs. Raw `before` / `after`
+ * values remain persisted for execution and stale checks; these labels are
+ * only for operator-facing cards, e.g. showing a pipeline stage name instead
+ * of its UUID.
+ */
+export interface AiToolFieldDiffDisplayHints {
+  fieldLabels?: Record<string, string>
+  before?: Record<string, unknown>
+  after?: Record<string, unknown>
 }
 /**