npm - synapse-orch-ai - Versions diffs - 1.5.5 → 1.5.6 - Mend

synapse-orch-ai 1.5.5 → 1.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/backend/core/llm_providers.py CHANGED Viewed

@@ -663,15 +663,11 @@ async def call_openai(model, messages, api_key, tools=None, images=None):
             # Handle tool_calls response
             choice = data["choices"][0]
             msg = choice.get("message", {})
+            text = _openai_compat_extract(msg)
             if msg.get("tool_calls"):
-                tc = msg["tool_calls"][0]
-                text = json.dumps({
-                    "tool": tc["function"]["name"],
-                    "arguments": json.loads(tc["function"].get("arguments", "{}"))
-                })
                 return text, input_tokens, output_tokens
             print(f"DEBUG: ✅ OpenAI call complete (attempt {attempt})", flush=True)
-            return msg.get("content", ""), input_tokens, output_tokens
+            return text, input_tokens, output_tokens
         except httpx.TimeoutException:
             last_error = f"Request timed out ({OPENAI_TIMEOUT}s)"
             print(f"DEBUG: ⏱️ OpenAI timeout on attempt {attempt}/{MAX_RETRIES}. Retrying in {backoff}s...", flush=True)
@@ -723,22 +719,56 @@ def _convert_tools_for_anthropic(ollama_tools: list[dict] | None) -> list[dict]
     return tools if tools else None
+def _openai_compat_extract(msg: dict) -> str:
+    """Normalize an OpenAI-compatible chat-completion message into the
+    `{"tool": ..., "arguments": ...}` shape (or plain text).
+    When the model emits BOTH content text and tool_calls in the same response
+    (newer reasoning models, some Claude-compat APIs), the text is preserved as
+    a `[REASONING]...[/REASONING]` preamble so the ReAct loop can project it
+    into the llm_reasoning event for the UI and orchestration log.
+    """
+    tool_calls = msg.get("tool_calls") or []
+    if tool_calls:
+        tc = tool_calls[0]
+        call_json = json.dumps({
+            "tool": tc["function"]["name"],
+            "arguments": json.loads(tc["function"].get("arguments") or "{}"),
+        })
+        reasoning = (msg.get("content") or "").strip()
+        if reasoning:
+            return f"[REASONING]\n{reasoning}\n[/REASONING]\n{call_json}"
+        return call_json
+    return msg.get("content", "") or ""
 def _extract_anthropic_response(response) -> str:
-    """Extract text or tool call from an Anthropic SDK response.
+    """Extract text and/or tool call from an Anthropic SDK response.
-    Checks for tool_use content blocks first (native tool calling),
-    then falls back to text blocks.
+    When the model emits a text block before a tool_use block (Claude does this
+    routinely), preserve the text as a [REASONING]...[/REASONING] preamble so
+    the downstream ReAct loop can project it into the llm_reasoning event.
+    Without this, native tool calling silently drops the model's pre-call
+    reasoning.
     """
     if not response.content:
         return "Error: Empty Anthropic response."
-    # Check for tool_use blocks first (native tool calling)
+    text_parts: list[str] = []
+    tool_use_call: dict | None = None
     for block in response.content:
-        if block.type == "tool_use":
-            return json.dumps({"tool": block.name, "arguments": block.input or {}})
+        if block.type == "tool_use" and tool_use_call is None:
+            tool_use_call = {"tool": block.name, "arguments": block.input or {}}
+        elif block.type == "text" and block.text:
+            text_parts.append(block.text)
+    if tool_use_call is not None:
+        call_json = json.dumps(tool_use_call)
+        reasoning = "\n".join(t for t in text_parts if t and t.strip()).strip()
+        if reasoning:
+            return f"[REASONING]\n{reasoning}\n[/REASONING]\n{call_json}"
+        return call_json
-    # Collect text blocks
-    text_parts = [block.text for block in response.content if block.type == "text" and block.text]
     if text_parts:
         return "\n".join(text_parts)
@@ -943,7 +973,14 @@ def _convert_messages_for_gemini(messages: list[dict], images: list[str] | None
 def _extract_gemini_response(response) -> str:
-    """Extract text or function call from a Gemini response."""
+    """Extract text and/or function call from a Gemini response.
+    When the model emits BOTH text and a function_call in the same response,
+    the text is preserved as a [REASONING]...[/REASONING] preamble so the
+    downstream ReAct loop can project it into the llm_reasoning event for the
+    chat UI and orchestration log. Without this, native function calling
+    silently drops the model's pre-call reasoning.
+    """
     if not response.candidates:
         return "Error: No response candidates from Gemini."
@@ -956,23 +993,27 @@ def _extract_gemini_response(response) -> str:
         reason = candidate.finish_reason.name if candidate.finish_reason else "UNKNOWN"
         return f"Error: Empty Gemini response. Finish Reason: {reason}"
-    # Check for function calls first (native tool calling)
-    function_calls = []
+    # Walk parts in order, collecting text (reasoning) and function calls.
+    text_parts: list[str] = []
+    function_calls: list[dict] = []
     for p in candidate.content.parts:
         if p.function_call:
             fc = p.function_call
             args = dict(fc.args) if fc.args else {}
             function_calls.append({"tool": fc.name, "arguments": args})
+        elif p.text:
+            text_parts.append(p.text)
     if function_calls:
-        # Return the first function call (ReAct loop processes one at a time)
         if len(function_calls) > 1:
             names = [fc["tool"] for fc in function_calls]
             print(f"DEBUG: ⚠️ Gemini returned {len(function_calls)} function calls: {names}. Using first: {names[0]}")
-        return json.dumps(function_calls[0])
+        call_json = json.dumps(function_calls[0])
+        reasoning = "\n".join(t for t in text_parts if t and t.strip()).strip()
+        if reasoning:
+            return f"[REASONING]\n{reasoning}\n[/REASONING]\n{call_json}"
+        return call_json
-    # Collect text parts
-    text_parts = [p.text for p in candidate.content.parts if p.text]
     if text_parts:
         return "\n".join(text_parts)
@@ -1141,15 +1182,11 @@ async def call_grok(model, messages, system, api_key, tools=None, images=None):
             output_tokens = usage.get("completion_tokens", 0)
             choice = data["choices"][0]
             msg = choice.get("message", {})
+            text = _openai_compat_extract(msg)
             if msg.get("tool_calls"):
-                tc = msg["tool_calls"][0]
-                text = json.dumps({
-                    "tool": tc["function"]["name"],
-                    "arguments": json.loads(tc["function"].get("arguments", "{}"))
-                })
                 return text, input_tokens, output_tokens
             print(f"DEBUG: ✅ Grok call complete (attempt {attempt})", flush=True)
-            return msg.get("content", ""), input_tokens, output_tokens
+            return text, input_tokens, output_tokens
         except httpx.TimeoutException:
             last_error = f"Request timed out ({GROK_TIMEOUT}s)"
             print(f"DEBUG: ⏱️ Grok timeout on attempt {attempt}/{MAX_RETRIES}. Retrying in {backoff}s...", flush=True)
@@ -1233,15 +1270,11 @@ async def call_deepseek(model, messages, system, api_key, tools=None, images=Non
             output_tokens = usage.get("completion_tokens", 0)
             choice = data["choices"][0]
             msg = choice.get("message", {})
+            text = _openai_compat_extract(msg)
             if msg.get("tool_calls"):
-                tc = msg["tool_calls"][0]
-                text = json.dumps({
-                    "tool": tc["function"]["name"],
-                    "arguments": json.loads(tc["function"].get("arguments", "{}"))
-                })
                 return text, input_tokens, output_tokens
             print(f"DEBUG: ✅ DeepSeek call complete (attempt {attempt})", flush=True)
-            return msg.get("content", ""), input_tokens, output_tokens
+            return text, input_tokens, output_tokens
         except httpx.TimeoutException:
             last_error = f"Request timed out ({DEEPSEEK_TIMEOUT}s)"
             print(f"DEBUG: ⏱️ DeepSeek timeout on attempt {attempt}/{MAX_RETRIES}. Retrying in {backoff}s...", flush=True)
@@ -1346,15 +1379,11 @@ async def call_v1_compatible(model, messages, system, base_url, api_key, tools=N
             output_tokens = usage.get("completion_tokens", 0)
             choice = data["choices"][0]
             msg = choice.get("message", {})
+            text = _openai_compat_extract(msg)
             if msg.get("tool_calls"):
-                tc = msg["tool_calls"][0]
-                text = json.dumps({
-                    "tool": tc["function"]["name"],
-                    "arguments": json.loads(tc["function"].get("arguments", "{}"))
-                })
                 return text, input_tokens, output_tokens
             print(f"DEBUG: ✅ V1-compatible call complete (attempt {attempt})", flush=True)
-            return msg.get("content", ""), input_tokens, output_tokens
+            return text, input_tokens, output_tokens
         except httpx.TimeoutException:
             last_error = f"Request timed out ({V1_TIMEOUT}s)"
             print(f"DEBUG: ⏱️ V1-compatible timeout on attempt {attempt}/{MAX_RETRIES}. Retrying in {backoff}s...", flush=True)
@@ -1848,13 +1877,21 @@ async def generate_response(
                     # Check for native tool calls
                     if "tool_calls" in msg and msg["tool_calls"]:
-                        # Convert Ollama native tool call to our internal JSON format
+                        # Convert Ollama native tool call to our internal JSON format.
+                        # Preserve any text content the model emitted alongside the
+                        # call as a [REASONING] preamble (some Ollama models emit
+                        # both — qwen3, deepseek-r1 routinely do).
                         tc = msg["tool_calls"][0]
                         print(f"DEBUG: Native Tool Call received: {tc['function']['name']}", flush=True)
-                        result_text = json.dumps({
+                        call_json = json.dumps({
                             "tool": tc["function"]["name"],
                             "arguments": tc["function"]["arguments"]
                         })
+                        reasoning = (msg.get("content") or "").strip()
+                        if reasoning:
+                            result_text = f"[REASONING]\n{reasoning}\n[/REASONING]\n{call_json}"
+                        else:
+                            result_text = call_json
                     else:
                         result_text = msg.get("content", "")

package/backend/core/orchestration/context.py CHANGED Viewed

@@ -564,7 +564,17 @@ def _format_context_value(
     label: str,
     max_chars: int = 5000,
 ) -> str:
-    """Format a single shared_state value for inclusion in the prompt."""
+    """Format a single shared_state value for inclusion in the prompt.
+    Format:
+        ### <key>
+        Source: <producer agent or upstream description>
+        <value>
+    The bare key (no brackets) is used as the header so the heading cannot be
+    visually confused with `{state.<key>}` template placeholders. The Source
+    line carries provenance (which agent / step produced this value).
+    """
     # List values from loop/parallel accumulation
     if isinstance(val, list) and val and isinstance(val[0], dict) and "result" in val[0]:
         parts = []
@@ -575,16 +585,17 @@ def _format_context_value(
             if len(result_str) > max_chars:
                 from .summarizer import smart_truncate
                 result_str = smart_truncate(result_str, max_chars)
-            iter_label = f" (Iteration {iteration})" if iteration else ""
-            source = f"{agent} → {key}{iter_label}" if agent else f"{key}{iter_label}"
-            parts.append(f"### [{source}]\n{result_str}")
+            iter_suffix = f" (iteration {iteration})" if iteration else ""
+            source = f"{agent} → {key}" if agent else f"loop accumulator → {key}"
+            parts.append(f"### {key}{iter_suffix}\nSource: {source}\n{result_str}")
         return "\n\n".join(parts)
     val_str = str(val)
     if len(val_str) > max_chars:
         from .summarizer import smart_truncate
         val_str = smart_truncate(val_str, max_chars)
-    return f"### [{label}]\n{val_str}"
+    source = label if label and label != key else "shared state"
+    return f"### {key}\nSource: {source}\n{val_str}"
 def build_origin_aware_context(
@@ -713,7 +724,9 @@ def build_origin_aware_context(
     # Always include user_input unless explicitly in input_keys
     if "user_input" in run.shared_state and "user_input" not in (step.input_keys or []):
-        context_parts.append(f"### [user_input]\n{run.shared_state['user_input']}")
+        context_parts.append(
+            f"### user_input\nSource: initial input\n{run.shared_state['user_input']}"
+        )
     # Human response keys (always inject unless already listed)
     human_keys = {"human_response"}
@@ -730,7 +743,7 @@ def build_origin_aware_context(
             if len(val) > 3000:
                 from .summarizer import smart_truncate
                 val = smart_truncate(val, 3000)
-            context_parts.append(f"### [{hkey}]\n{val}")
+            context_parts.append(f"### {hkey}\nSource: human response\n{val}")
     # Explicitly declared input_keys
     for key in (step.input_keys or []):
@@ -775,7 +788,9 @@ def build_origin_aware_context(
     prompt = "\n\n---\n\n".join(sections)
     # ------------------------------------------------------------------
-    # System prompt addition — datetime + workflow graph + step position
+    # System prompt addition — workflow graph + step position
+    # (Date/time is injected separately by build_system_prompt for every
+    # agent including orchestration steps — do not duplicate it here.)
     # ------------------------------------------------------------------
     # Count completed executions per step for the graph (×N badges).
     exec_counts: dict[str, int] = {}
@@ -789,8 +804,6 @@ def build_origin_aware_context(
     graph_md = build_workflow_graph_markdown(engine.orch, step.id, exec_counts)
     sys_lines = [
-        datetime_context(),
-        "",
         graph_md,
         "",
         f"You are currently executing step **\"{step_name}\"** (execution #{transition.execution_number}).",

package/backend/core/orchestration/logger.py CHANGED Viewed

@@ -181,11 +181,19 @@ class OrchestrationLogger:
      Preview: {preview}
 """)
+        elif etype == "llm_reasoning":
+            reasoning = event.get("reasoning", "")
+            turn = event.get("turn", "")
+            self._write(f"""
+  💭 REASONING (turn {turn}):
+{self._indent(reasoning)}
+""")
         elif etype == "llm_thought":
             thought = event.get("thought", "")
             turn = event.get("turn", "")
             self._write(f"""
-  🧠 LLM THOUGHT (turn {turn}):
+  🛠️  ACTION (turn {turn}):
 {self._indent(thought)}
 """)