npm - ummaya - Versions diffs - 0.2.2 → 0.2.3 - Mend

ummaya 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/npm-shrinkwrap.json +2 -2
package/package.json +1 -1
package/pyproject.toml +2 -2
package/src/ummaya/ipc/stdio.py +47 -21
package/tui/package.json +1 -1
package/tui/src/services/api/claude.ts +6 -7
package/uv.lock +1 -1

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "ummaya",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "ummaya",
-      "version": "0.2.2",
+      "version": "0.2.3",
       "license": "Apache-2.0",
       "dependencies": {
         "@alcalzone/ansi-tokenize": "^0.3.0",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ummaya",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "description": "Conversational multi-agent harness for Korean public-service channels",
   "license": "Apache-2.0",
   "type": "module",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "ummaya"
-version = "0.2.2"
+version = "0.2.3"
 description = "Conversational multi-agent platform for Korean public APIs"
 readme = "README.md"
 license = "Apache-2.0"
@@ -314,7 +314,7 @@ min_confidence = 80
 [tool.commitizen]
 name = "cz_conventional_commits"
-version = "0.2.2"
+version = "0.2.3"
 tag_format = "v$version"
 # PyTorch CPU-only wheel for Docker image size discipline (SC-1: ≤ 2 GB).

package/src/ummaya/ipc/stdio.py CHANGED Viewed

@@ -199,7 +199,8 @@ def _should_append_tui_tool_to_llm_tools(
     """Return whether a TUI-sent tool should remain in the model tool list."""
     if tui_name and tui_name in backend_tool_names:
         return False
-    return not (has_concrete_backend_tools and tui_name in _ROOT_PRIMITIVE_TOOL_IDS)
+    _ = has_concrete_backend_tools
+    return True
 _VERIFY_QUERY_REQUIREMENTS: Final[tuple[tuple[tuple[str, ...], dict[str, str]], ...]] = (
@@ -5953,8 +5954,9 @@ async def run(  # noqa: C901
         # UMMAYA now follows that shape: BM25/dense retrieval selects a small
         # turn-local set of concrete adapter tools, and each selected
         # GovAPITool is exported directly as an OpenAI-compatible function.
-        # The root primitives remain internal dispatcher families and legacy
-        # transcript compatibility names, not the model-facing tool surface.
+        # Keep the root primitives alongside that set to preserve the 0.2.1
+        # CC-style loop contract: the model can paint progress prose, then call
+        # a primitive dispatcher with a concrete adapter in `tool_id`.
         registry = cast("Any", _ensure_tool_registry())
         backend_tools_raw = [
             t.to_openai_tool() for t in _select_concrete_adapter_tools_for_turn(latest_user_utt)
@@ -6172,24 +6174,45 @@ async def run(  # noqa: C901
         for _turn in range(_AGENTIC_LOOP_MAX_TURNS):
             message_id = str(uuid.uuid4())
             assistant_text_chunks: list[str] = []
-            # Epic #2766 issue B — render-order fix. K-EXAONE emits the
-            # assistant's prose preamble ("내과 병원을 검색해 보겠습니다.")
-            # BEFORE the structured ``tool_call_delta`` events arrive in the
-            # SAME turn. If we forward those prose chunks immediately, the
-            # citizen sees ``assistant text → tool_call → result``, the
-            # opposite of CC's canonical ``tool_call → result → assistant
-            # text`` order. The fix: buffer prose chunks for this turn; emit
-            # them as a single AssistantChunkFrame ONLY after we know whether
-            # this turn invoked tools. When tools are invoked we suppress the
-            # preamble entirely — the next turn produces the real answer
-            # after the tool result is appended to context. When no tools
-            # are invoked we flush the buffer as a single chunk so the prose
-            # still reaches the citizen.
+            # CC stream order: K-EXAONE may emit a visible progress sentence
+            # before the structured ``tool_call_delta`` in the same assistant
+            # turn. Claude Code commits that text block before opening the
+            # following tool_use block, so the TUI can paint
+            # ``assistant text → tool_call``. Buffer here only so textual
+            # ``<tool_call>`` markers can be stripped accurately across chunk
+            # boundaries; when a real ToolCallFrame is emitted below, flush the
+            # cleaned visible text immediately before the tool frame.
             buffered_visible: list[str] = []
             tool_call_buf: dict[int, dict[str, str]] = {}
             stream_error: Exception | None = None
             stream_gate = StreamGate()
+            async def _emit_buffered_visible_before_tool(current_message_id: str) -> None:
+                """Emit same-turn visible prose before opening a tool_use block."""
+                nonlocal buffered_visible
+                if not buffered_visible:
+                    return
+                from ummaya.llm.tool_call_parser import (  # noqa: PLC0415
+                    strip_leaked_thinking_markers,
+                )
+                merged_prose = strip_leaked_thinking_markers("".join(buffered_visible))
+                buffered_visible = []
+                if not merged_prose.strip():
+                    return
+                await write_frame(
+                    AssistantChunkFrame(
+                        session_id=frame.session_id,
+                        correlation_id=frame.correlation_id,
+                        role="llm",
+                        ts=_utcnow(),
+                        kind="assistant_chunk",
+                        message_id=current_message_id,
+                        delta=merged_prose,
+                        done=False,
+                    )
+                )
             def _append_tool_routing_observation(reason: str, message: str) -> None:
                 """Add an internal routing repair instruction for the next model turn."""
                 llm_messages.append(
@@ -6796,11 +6819,10 @@ async def run(  # noqa: C901
                     )
                 )
                 return
-            # Tool calls present → suppress the prose preamble entirely.
-            # The next agentic-loop turn will produce the real answer after
-            # appending tool_result to context. CC-style ordering preserved:
-            # `tool_call → tool_result → final assistant prose`.
-            buffered_visible.clear()
+            # Tool calls present. Preserve any same-turn progress prose by
+            # emitting it immediately before the ToolCallFrame below; do not
+            # send a done=True chunk because this provider call must still stop
+            # at assistant(tool_use), not at an assistant final answer.
             # ---- T027/T029 — emit tool_call frames + register Futures -----
             issued_calls: list[tuple[str, str]] = []  # (call_id, name)
@@ -6963,6 +6985,7 @@ async def run(  # noqa: C901
                         ToolResultFrame,
                     )
+                    await _emit_buffered_visible_before_tool(message_id)
                     await write_frame(
                         ToolCallFrame(
                             session_id=frame.session_id,
@@ -7146,6 +7169,7 @@ async def run(  # noqa: C901
                         ToolResultFrame,
                     )
+                    await _emit_buffered_visible_before_tool(message_id)
                     await write_frame(
                         ToolCallFrame(
                             session_id=frame.session_id,
@@ -7301,6 +7325,7 @@ async def run(  # noqa: C901
                         ToolResultFrame,
                     )
+                    await _emit_buffered_visible_before_tool(message_id)
                     await write_frame(
                         ToolCallFrame(
                             session_id=frame.session_id,
@@ -7378,6 +7403,7 @@ async def run(  # noqa: C901
                     )
                     continue
+                await _emit_buffered_visible_before_tool(message_id)
                 await write_frame(
                     ToolCallFrame(
                         session_id=frame.session_id,

package/tui/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ummaya",
-  "version": "0.2.2",
+  "version": "0.2.3",
   "private": true,
   "type": "module",
   "engines": {

package/tui/src/services/api/claude.ts CHANGED Viewed

@@ -216,7 +216,6 @@ import {
   TOOL_SEARCH_TOOL_NAME,
 } from '../../tools/ToolSearchTool/prompt.js'
 import {
-  isRootPrimitiveToolName,
   selectTopKAdapterToolNamesForQuery,
 } from '../../tools/AdapterTool/AdapterTool.js'
 import { count } from '../../utils/array.js'
@@ -1227,9 +1226,10 @@ async function* queryModel(
     const discoveredToolNames = extractDiscoveredToolNames(messages)
     filteredTools = tools.filter(tool => {
-      if (turnLocalAdapterToolNames.size > 0 && isRootPrimitiveToolName(tool.name)) {
-        return false
-      }
+      // 0.2.1 exposed the lightweight root primitives together with concrete
+      // adapter schemas. Keep that surface so K-EXAONE preserves CC-style
+      // prose→tool→prose loop painting, while still limiting concrete adapter
+      // schemas to the turn-local top-k set.
       if (turnLocalAdapterToolNames.has(tool.name)) return true
       // Always include non-deferred tools
       if (!deferredToolNames.has(tool.name)) return true
@@ -1241,9 +1241,8 @@ async function* queryModel(
   } else {
     filteredTools = tools.filter(t => {
       if (toolMatchesName(t, TOOL_SEARCH_TOOL_NAME)) return false
-      if (turnLocalAdapterToolNames.size > 0 && isRootPrimitiveToolName(t.name)) {
-        return false
-      }
+      // Keep non-deferred root primitives even when concrete top-k adapter
+      // schemas are available; this matches the released 0.2.1 loop surface.
       if (isDeferredTool(t)) return turnLocalAdapterToolNames.has(t.name)
       return true
     })

package/uv.lock CHANGED Viewed

@@ -2725,7 +2725,7 @@ wheels = [
 [[package]]
 name = "ummaya"
-version = "0.2.2"
+version = "0.2.3"
 source = { editable = "." }
 dependencies = [
     { name = "httpx" },