ummaya 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,12 @@
1
1
  {
2
2
  "name": "ummaya",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "lockfileVersion": 3,
5
5
  "requires": true,
6
6
  "packages": {
7
7
  "": {
8
8
  "name": "ummaya",
9
- "version": "0.2.2",
9
+ "version": "0.2.3",
10
10
  "license": "Apache-2.0",
11
11
  "dependencies": {
12
12
  "@alcalzone/ansi-tokenize": "^0.3.0",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ummaya",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "description": "Conversational multi-agent harness for Korean public-service channels",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "ummaya"
3
- version = "0.2.2"
3
+ version = "0.2.3"
4
4
  description = "Conversational multi-agent platform for Korean public APIs"
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -314,7 +314,7 @@ min_confidence = 80
314
314
 
315
315
  [tool.commitizen]
316
316
  name = "cz_conventional_commits"
317
- version = "0.2.2"
317
+ version = "0.2.3"
318
318
  tag_format = "v$version"
319
319
 
320
320
  # PyTorch CPU-only wheel for Docker image size discipline (SC-1: ≤ 2 GB).
@@ -199,7 +199,8 @@ def _should_append_tui_tool_to_llm_tools(
199
199
  """Return whether a TUI-sent tool should remain in the model tool list."""
200
200
  if tui_name and tui_name in backend_tool_names:
201
201
  return False
202
- return not (has_concrete_backend_tools and tui_name in _ROOT_PRIMITIVE_TOOL_IDS)
202
+ _ = has_concrete_backend_tools
203
+ return True
203
204
 
204
205
 
205
206
  _VERIFY_QUERY_REQUIREMENTS: Final[tuple[tuple[tuple[str, ...], dict[str, str]], ...]] = (
@@ -5953,8 +5954,9 @@ async def run( # noqa: C901
5953
5954
  # UMMAYA now follows that shape: BM25/dense retrieval selects a small
5954
5955
  # turn-local set of concrete adapter tools, and each selected
5955
5956
  # GovAPITool is exported directly as an OpenAI-compatible function.
5956
- # The root primitives remain internal dispatcher families and legacy
5957
- # transcript compatibility names, not the model-facing tool surface.
5957
+ # Keep the root primitives alongside that set to preserve the 0.2.1
5958
+ # CC-style loop contract: the model can paint progress prose, then call
5959
+ # a primitive dispatcher with a concrete adapter in `tool_id`.
5958
5960
  registry = cast("Any", _ensure_tool_registry())
5959
5961
  backend_tools_raw = [
5960
5962
  t.to_openai_tool() for t in _select_concrete_adapter_tools_for_turn(latest_user_utt)
@@ -6172,24 +6174,45 @@ async def run( # noqa: C901
6172
6174
  for _turn in range(_AGENTIC_LOOP_MAX_TURNS):
6173
6175
  message_id = str(uuid.uuid4())
6174
6176
  assistant_text_chunks: list[str] = []
6175
- # Epic #2766 issue B — render-order fix. K-EXAONE emits the
6176
- # assistant's prose preamble ("내과 병원을 검색해 보겠습니다.")
6177
- # BEFORE the structured ``tool_call_delta`` events arrive in the
6178
- # SAME turn. If we forward those prose chunks immediately, the
6179
- # citizen sees ``assistant text → tool_call result``, the
6180
- # opposite of CC's canonical ``tool_call result → assistant
6181
- # text`` order. The fix: buffer prose chunks for this turn; emit
6182
- # them as a single AssistantChunkFrame ONLY after we know whether
6183
- # this turn invoked tools. When tools are invoked we suppress the
6184
- # preamble entirely — the next turn produces the real answer
6185
- # after the tool result is appended to context. When no tools
6186
- # are invoked we flush the buffer as a single chunk so the prose
6187
- # still reaches the citizen.
6177
+ # CC stream order: K-EXAONE may emit a visible progress sentence
6178
+ # before the structured ``tool_call_delta`` in the same assistant
6179
+ # turn. Claude Code commits that text block before opening the
6180
+ # following tool_use block, so the TUI can paint
6181
+ # ``assistant text → tool_call``. Buffer here only so textual
6182
+ # ``<tool_call>`` markers can be stripped accurately across chunk
6183
+ # boundaries; when a real ToolCallFrame is emitted below, flush the
6184
+ # cleaned visible text immediately before the tool frame.
6188
6185
  buffered_visible: list[str] = []
6189
6186
  tool_call_buf: dict[int, dict[str, str]] = {}
6190
6187
  stream_error: Exception | None = None
6191
6188
  stream_gate = StreamGate()
6192
6189
 
6190
+ async def _emit_buffered_visible_before_tool(current_message_id: str) -> None:
6191
+ """Emit same-turn visible prose before opening a tool_use block."""
6192
+ nonlocal buffered_visible
6193
+ if not buffered_visible:
6194
+ return
6195
+ from ummaya.llm.tool_call_parser import ( # noqa: PLC0415
6196
+ strip_leaked_thinking_markers,
6197
+ )
6198
+
6199
+ merged_prose = strip_leaked_thinking_markers("".join(buffered_visible))
6200
+ buffered_visible = []
6201
+ if not merged_prose.strip():
6202
+ return
6203
+ await write_frame(
6204
+ AssistantChunkFrame(
6205
+ session_id=frame.session_id,
6206
+ correlation_id=frame.correlation_id,
6207
+ role="llm",
6208
+ ts=_utcnow(),
6209
+ kind="assistant_chunk",
6210
+ message_id=current_message_id,
6211
+ delta=merged_prose,
6212
+ done=False,
6213
+ )
6214
+ )
6215
+
6193
6216
  def _append_tool_routing_observation(reason: str, message: str) -> None:
6194
6217
  """Add an internal routing repair instruction for the next model turn."""
6195
6218
  llm_messages.append(
@@ -6796,11 +6819,10 @@ async def run( # noqa: C901
6796
6819
  )
6797
6820
  )
6798
6821
  return
6799
- # Tool calls present suppress the prose preamble entirely.
6800
- # The next agentic-loop turn will produce the real answer after
6801
- # appending tool_result to context. CC-style ordering preserved:
6802
- # `tool_call tool_result final assistant prose`.
6803
- buffered_visible.clear()
6822
+ # Tool calls present. Preserve any same-turn progress prose by
6823
+ # emitting it immediately before the ToolCallFrame below; do not
6824
+ # send a done=True chunk because this provider call must still stop
6825
+ # at assistant(tool_use), not at an assistant final answer.
6804
6826
 
6805
6827
  # ---- T027/T029 — emit tool_call frames + register Futures -----
6806
6828
  issued_calls: list[tuple[str, str]] = [] # (call_id, name)
@@ -6963,6 +6985,7 @@ async def run( # noqa: C901
6963
6985
  ToolResultFrame,
6964
6986
  )
6965
6987
 
6988
+ await _emit_buffered_visible_before_tool(message_id)
6966
6989
  await write_frame(
6967
6990
  ToolCallFrame(
6968
6991
  session_id=frame.session_id,
@@ -7146,6 +7169,7 @@ async def run( # noqa: C901
7146
7169
  ToolResultFrame,
7147
7170
  )
7148
7171
 
7172
+ await _emit_buffered_visible_before_tool(message_id)
7149
7173
  await write_frame(
7150
7174
  ToolCallFrame(
7151
7175
  session_id=frame.session_id,
@@ -7301,6 +7325,7 @@ async def run( # noqa: C901
7301
7325
  ToolResultFrame,
7302
7326
  )
7303
7327
 
7328
+ await _emit_buffered_visible_before_tool(message_id)
7304
7329
  await write_frame(
7305
7330
  ToolCallFrame(
7306
7331
  session_id=frame.session_id,
@@ -7378,6 +7403,7 @@ async def run( # noqa: C901
7378
7403
  )
7379
7404
  continue
7380
7405
 
7406
+ await _emit_buffered_visible_before_tool(message_id)
7381
7407
  await write_frame(
7382
7408
  ToolCallFrame(
7383
7409
  session_id=frame.session_id,
package/tui/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ummaya",
3
- "version": "0.2.2",
3
+ "version": "0.2.3",
4
4
  "private": true,
5
5
  "type": "module",
6
6
  "engines": {
@@ -216,7 +216,6 @@ import {
216
216
  TOOL_SEARCH_TOOL_NAME,
217
217
  } from '../../tools/ToolSearchTool/prompt.js'
218
218
  import {
219
- isRootPrimitiveToolName,
220
219
  selectTopKAdapterToolNamesForQuery,
221
220
  } from '../../tools/AdapterTool/AdapterTool.js'
222
221
  import { count } from '../../utils/array.js'
@@ -1227,9 +1226,10 @@ async function* queryModel(
1227
1226
  const discoveredToolNames = extractDiscoveredToolNames(messages)
1228
1227
 
1229
1228
  filteredTools = tools.filter(tool => {
1230
- if (turnLocalAdapterToolNames.size > 0 && isRootPrimitiveToolName(tool.name)) {
1231
- return false
1232
- }
1229
+ // 0.2.1 exposed the lightweight root primitives together with concrete
1230
+ // adapter schemas. Keep that surface so K-EXAONE preserves CC-style
1231
+ // prose→tool→prose loop painting, while still limiting concrete adapter
1232
+ // schemas to the turn-local top-k set.
1233
1233
  if (turnLocalAdapterToolNames.has(tool.name)) return true
1234
1234
  // Always include non-deferred tools
1235
1235
  if (!deferredToolNames.has(tool.name)) return true
@@ -1241,9 +1241,8 @@ async function* queryModel(
1241
1241
  } else {
1242
1242
  filteredTools = tools.filter(t => {
1243
1243
  if (toolMatchesName(t, TOOL_SEARCH_TOOL_NAME)) return false
1244
- if (turnLocalAdapterToolNames.size > 0 && isRootPrimitiveToolName(t.name)) {
1245
- return false
1246
- }
1244
+ // Keep non-deferred root primitives even when concrete top-k adapter
1245
+ // schemas are available; this matches the released 0.2.1 loop surface.
1247
1246
  if (isDeferredTool(t)) return turnLocalAdapterToolNames.has(t.name)
1248
1247
  return true
1249
1248
  })
package/uv.lock CHANGED
@@ -2725,7 +2725,7 @@ wheels = [
2725
2725
 
2726
2726
  [[package]]
2727
2727
  name = "ummaya"
2728
- version = "0.2.2"
2728
+ version = "0.2.3"
2729
2729
  source = { editable = "." }
2730
2730
  dependencies = [
2731
2731
  { name = "httpx" },