npm - @respan/cli - Versions diffs - 0.4.1 → 0.5.1 - Mend

@respan/cli 0.4.1 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/assets/hook.py +322 -197
package/dist/commands/integrate/claude-code.d.ts +3 -0
package/dist/commands/integrate/claude-code.js +46 -14
package/dist/commands/integrate/codex-cli.d.ts +3 -0
package/dist/commands/integrate/gemini-cli.d.ts +3 -0
package/dist/commands/integrate/opencode.d.ts +3 -0
package/dist/lib/integrate.d.ts +3 -0
package/dist/lib/integrate.js +10 -0
package/oclif.manifest.json +89 -1
package/package.json +2 -2
package/dist/assets/assets/hook.py +0 -909

package/dist/assets/hook.py CHANGED Viewed

@@ -81,6 +81,38 @@ def save_state(state: Dict[str, Any]) -> None:
         STATE_FILE.write_text(json.dumps(state, indent=2), encoding="utf-8")
+# Known config keys in respan.json that map to span fields.
+# Anything else is treated as a custom property (goes into metadata).
+KNOWN_CONFIG_KEYS = {"customer_id", "span_name", "workflow_name"}
+def load_respan_config(cwd: str) -> Dict[str, Any]:
+    """Load .claude/respan.json from the project directory.
+    Returns a dict with two keys:
+      - "fields": known span fields (customer_id, span_name, workflow_name)
+      - "properties": everything else (custom properties → metadata)
+    """
+    config_path = Path(cwd) / ".claude" / "respan.json"
+    if not config_path.exists():
+        return {"fields": {}, "properties": {}}
+    try:
+        raw = json.loads(config_path.read_text(encoding="utf-8"))
+        if not isinstance(raw, dict):
+            return {"fields": {}, "properties": {}}
+        fields = {}
+        properties = {}
+        for k, v in raw.items():
+            if k in KNOWN_CONFIG_KEYS:
+                fields[k] = v
+            else:
+                properties[k] = v
+        return {"fields": fields, "properties": properties}
+    except (json.JSONDecodeError, IOError) as e:
+        debug(f"Failed to load respan.json from {config_path}: {e}")
+        return {"fields": {}, "properties": {}}
 def get_content(msg: Dict[str, Any]) -> Any:
     """Extract content from a message."""
     if isinstance(msg, dict):
@@ -317,27 +349,33 @@ def create_respan_spans(
     user_msg: Dict[str, Any],
     assistant_msgs: List[Dict[str, Any]],
     tool_results: List[Dict[str, Any]],
+    config: Optional[Dict[str, Any]] = None,
 ) -> List[Dict[str, Any]]:
-    """Create Respan span logs for a single turn with all available metadata."""
+    """Create Respan span logs for a single turn with all available metadata.
+    Produces a proper span tree so that the Respan UI renders nested children:
+        Root (agent container)
+          ├── claude.chat  (generation – carries model, tokens, messages)
+          ├── Thinking 1   (generation, if extended thinking is present)
+          ├── Tool: Read   (tool, if tool use occurred)
+          └── Tool: Write  (tool, if tool use occurred)
+    """
     spans = []
-    # Extract user text and timestamp
+    # ------------------------------------------------------------------
+    # 1. Extract data from the transcript messages
+    # ------------------------------------------------------------------
     user_text = get_text_content(user_msg)
     user_timestamp = user_msg.get("timestamp")
     user_time = parse_timestamp(user_timestamp) if user_timestamp else None
-    # Extract assistant text from ALL messages in the turn (tool-using turns
-    # have multiple assistant messages: text before tool, then text after).
+    # Collect assistant text across all messages in the turn
     final_output = ""
-    first_assistant_msg = None
     if assistant_msgs:
         text_parts = [get_text_content(m) for m in assistant_msgs]
         final_output = "\n".join(p for p in text_parts if p)
-        first_assistant_msg = assistant_msgs[0]
-    # Get model, usage, and timing info from assistant messages.
-    # For tool-using turns there are multiple assistant messages (multiple API
-    # calls), so we aggregate usage and take the *last* timestamp as end time.
+    # Aggregate model, usage, timing from (possibly multiple) API calls
     model = "claude"
     usage = None
     request_id = None
@@ -360,7 +398,6 @@ def create_respan_spans(
             last_assistant_timestamp = ts
             last_assistant_time = parse_timestamp(ts)
-        # Aggregate usage across all API calls in the turn
         msg_usage = msg_obj.get("usage")
         if msg_usage:
             if usage is None:
@@ -371,187 +408,187 @@ def create_respan_spans(
                             "cache_read_input_tokens"):
                     if key in msg_usage:
                         usage[key] = usage.get(key, 0) + msg_usage[key]
-                # Keep last service_tier
                 if "service_tier" in msg_usage:
                     usage["service_tier"] = msg_usage["service_tier"]
-    # Calculate timing
-    start_time_str = user_timestamp or first_assistant_timestamp or datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
-    timestamp_str = last_assistant_timestamp or first_assistant_timestamp or datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+    # Timing
+    now_str = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+    start_time_str = user_timestamp or first_assistant_timestamp or now_str
+    timestamp_str = last_assistant_timestamp or first_assistant_timestamp or now_str
-    # Calculate latency from user message to final assistant response
     latency = None
     if user_time and last_assistant_time:
         latency = (last_assistant_time - user_time).total_seconds()
-    # Extract messages for chat span
-    prompt_messages = []
+    # Messages
+    prompt_messages: List[Dict[str, Any]] = []
     if user_text:
         prompt_messages.append({"role": "user", "content": user_text})
-    completion_message = None
+    completion_message: Optional[Dict[str, Any]] = None
     if final_output:
         completion_message = {"role": "assistant", "content": final_output}
-    # Create trace ID for this turn
+    # IDs — respan.json fields, then env var overrides
+    cfg_fields = (config or {}).get("fields", {})
+    cfg_props = (config or {}).get("properties", {})
     trace_unique_id = f"{session_id}_turn_{turn_num}"
-    # Naming: human-readable workflow + span names
-    workflow_name = "claude-code"
-    # Use first ~60 chars of user message as span name for readability
-    user_preview = (user_text[:60] + "...") if user_text and len(user_text) > 60 else (user_text or f"turn_{turn_num}")
-    root_span_name = f"Turn {turn_num}: {user_preview}"
+    workflow_name = os.environ.get("RESPAN_WORKFLOW_NAME") or cfg_fields.get("workflow_name") or "claude-code"
+    root_span_name = os.environ.get("RESPAN_SPAN_NAME") or cfg_fields.get("span_name") or "claude-code"
     thread_id = f"claudecode_{session_id}"
-    # Build metadata with additional info
-    metadata = {
-        "claude_code_turn": turn_num,
-    }
+    customer_id = os.environ.get("RESPAN_CUSTOMER_ID") or cfg_fields.get("customer_id") or ""
+    # Metadata — custom properties from respan.json, then env overrides
+    metadata: Dict[str, Any] = {"claude_code_turn": turn_num}
+    if cfg_props:
+        metadata.update(cfg_props)
     if request_id:
         metadata["request_id"] = request_id
     if stop_reason:
         metadata["stop_reason"] = stop_reason
-    # Merge user-provided metadata from env var
     env_metadata = os.environ.get("RESPAN_METADATA")
     if env_metadata:
         try:
             extra = json.loads(env_metadata)
             if isinstance(extra, dict):
                 metadata.update(extra)
-            else:
-                debug("RESPAN_METADATA is not a JSON object, skipping")
-        except json.JSONDecodeError as e:
-            debug(f"Invalid JSON in RESPAN_METADATA, skipping: {e}")
-    # Build usage object with cache details
-    usage_obj = None
+        except json.JSONDecodeError:
+            pass
+    # Usage
+    usage_fields: Dict[str, Any] = {}
     if usage:
-        usage_obj = {
-            "prompt_tokens": usage.get("input_tokens", 0),
-            "completion_tokens": usage.get("output_tokens", 0),
-        }
-        total_tokens = usage_obj["prompt_tokens"] + usage_obj["completion_tokens"]
-        if total_tokens > 0:
-            usage_obj["total_tokens"] = total_tokens
-        # Add cache details
-        prompt_tokens_details = {}
+        prompt_tokens = usage.get("input_tokens", 0)
+        completion_tokens = usage.get("output_tokens", 0)
+        usage_fields["prompt_tokens"] = prompt_tokens
+        usage_fields["completion_tokens"] = completion_tokens
+        total = prompt_tokens + completion_tokens
+        if total > 0:
+            usage_fields["total_tokens"] = total
         cache_creation = usage.get("cache_creation_input_tokens", 0)
         cache_read = usage.get("cache_read_input_tokens", 0)
+        if cache_creation > 0:
+            usage_fields["cache_creation_prompt_tokens"] = cache_creation
+        prompt_tokens_details: Dict[str, int] = {}
         if cache_creation > 0:
             prompt_tokens_details["cache_creation_tokens"] = cache_creation
-            usage_obj["cache_creation_prompt_tokens"] = cache_creation
         if cache_read > 0:
             prompt_tokens_details["cached_tokens"] = cache_read
         if prompt_tokens_details:
-            usage_obj["prompt_tokens_details"] = prompt_tokens_details
-        # Add service tier to metadata
+            usage_fields["prompt_tokens_details"] = prompt_tokens_details
         service_tier = usage.get("service_tier")
         if service_tier:
             metadata["service_tier"] = service_tier
-    # Create chat span (root)
-    chat_span_id = f"claudecode_{trace_unique_id}_chat"
-    customer_id = os.environ.get("RESPAN_CUSTOMER_ID", "claude-code")
-    chat_span = {
+    # ------------------------------------------------------------------
+    # 2. Root span – pure agent container (no model / token info)
+    # ------------------------------------------------------------------
+    root_span_id = f"claudecode_{trace_unique_id}_root"
+    root_span: Dict[str, Any] = {
         "trace_unique_id": trace_unique_id,
         "thread_identifier": thread_id,
         "customer_identifier": customer_id,
-        "span_unique_id": chat_span_id,
-        "span_parent_id": None,
+        "span_unique_id": root_span_id,
         "span_name": root_span_name,
         "span_workflow_name": workflow_name,
-        "log_type": "agent",
+        "model": model,
+        "span_path": "",
         "input": json.dumps(prompt_messages) if prompt_messages else "",
         "output": json.dumps(completion_message) if completion_message else "",
-        "prompt_messages": prompt_messages,
-        "completion_message": completion_message,
-        "model": model,
         "timestamp": timestamp_str,
         "start_time": start_time_str,
         "metadata": metadata,
     }
-    # Add usage if available
-    if usage_obj:
-        chat_span["prompt_tokens"] = usage_obj["prompt_tokens"]
-        chat_span["completion_tokens"] = usage_obj["completion_tokens"]
-        if "total_tokens" in usage_obj:
-            chat_span["total_tokens"] = usage_obj["total_tokens"]
-        if "cache_creation_prompt_tokens" in usage_obj:
-            chat_span["cache_creation_prompt_tokens"] = usage_obj["cache_creation_prompt_tokens"]
-        if "prompt_tokens_details" in usage_obj:
-            chat_span["prompt_tokens_details"] = usage_obj["prompt_tokens_details"]
-    # Add latency if calculated
     if latency is not None:
-        chat_span["latency"] = latency
-    spans.append(chat_span)
-    # Extract thinking blocks and create spans for them
-    thinking_spans = []
-    for idx, assistant_msg in enumerate(assistant_msgs):
-        if isinstance(assistant_msg, dict) and "message" in assistant_msg:
-            content = assistant_msg["message"].get("content", [])
-            if isinstance(content, list):
-                for item in content:
-                    if isinstance(item, dict) and item.get("type") == "thinking":
-                        thinking_text = item.get("thinking", "")
-                        if thinking_text:
-                            thinking_span_id = f"claudecode_{trace_unique_id}_thinking_{len(thinking_spans) + 1}"
-                            thinking_timestamp = assistant_msg.get("timestamp", timestamp_str)
-                            thinking_spans.append({
-                                "trace_unique_id": trace_unique_id,
-                                "span_unique_id": thinking_span_id,
-                                "span_parent_id": chat_span_id,
-                                "span_name": f"Thinking {len(thinking_spans) + 1}",
-                                "span_workflow_name": workflow_name,
-                                "log_type": "generation",
-                                "input": "",
-                                "output": thinking_text,
-                                "timestamp": thinking_timestamp,
-                                "start_time": thinking_timestamp,
-                            })
-    spans.extend(thinking_spans)
-    # Collect all tool calls and results with metadata
-    tool_call_map = {}
+        root_span["latency"] = latency
+    spans.append(root_span)
+    # ------------------------------------------------------------------
+    # 3. LLM generation child span (always created → every turn has ≥1 child)
+    # ------------------------------------------------------------------
+    gen_span_id = f"claudecode_{trace_unique_id}_gen"
+    gen_start = first_assistant_timestamp or start_time_str
+    gen_end = last_assistant_timestamp or timestamp_str
+    gen_latency = None
+    gen_start_dt = parse_timestamp(gen_start) if gen_start else None
+    gen_end_dt = parse_timestamp(gen_end) if gen_end else None
+    if gen_start_dt and gen_end_dt:
+        gen_latency = (gen_end_dt - gen_start_dt).total_seconds()
+    gen_span: Dict[str, Any] = {
+        "trace_unique_id": trace_unique_id,
+        "span_unique_id": gen_span_id,
+        "span_parent_id": root_span_id,
+        "span_name": "claude.chat",
+        "span_workflow_name": workflow_name,
+        "span_path": "claude_chat",
+        "model": model,
+        "provider_id": "anthropic",
+        "input": json.dumps(prompt_messages) if prompt_messages else "",
+        "output": json.dumps(completion_message) if completion_message else "",
+        "prompt_messages": prompt_messages,
+        "completion_message": completion_message,
+        "timestamp": gen_end,
+        "start_time": gen_start,
+    }
+    if gen_latency is not None:
+        gen_span["latency"] = gen_latency
+    gen_span.update(usage_fields)
+    spans.append(gen_span)
+    # ------------------------------------------------------------------
+    # 4. Thinking child spans
+    # ------------------------------------------------------------------
+    thinking_num = 0
     for assistant_msg in assistant_msgs:
-        tool_calls = get_tool_calls(assistant_msg)
-        for tool_call in tool_calls:
-            tool_name = tool_call.get("name", "unknown")
-            tool_input = tool_call.get("input", {})
+        if not (isinstance(assistant_msg, dict) and "message" in assistant_msg):
+            continue
+        content = assistant_msg["message"].get("content", [])
+        if not isinstance(content, list):
+            continue
+        for item in content:
+            if isinstance(item, dict) and item.get("type") == "thinking":
+                thinking_text = item.get("thinking", "")
+                if not thinking_text:
+                    continue
+                thinking_num += 1
+                thinking_ts = assistant_msg.get("timestamp", timestamp_str)
+                spans.append({
+                    "trace_unique_id": trace_unique_id,
+                    "span_unique_id": f"claudecode_{trace_unique_id}_thinking_{thinking_num}",
+                    "span_parent_id": root_span_id,
+                    "span_name": f"Thinking {thinking_num}",
+                    "span_workflow_name": workflow_name,
+                    "span_path": "thinking",
+                    "input": "",
+                    "output": thinking_text,
+                    "timestamp": thinking_ts,
+                    "start_time": thinking_ts,
+                })
+    # ------------------------------------------------------------------
+    # 5. Tool child spans
+    # ------------------------------------------------------------------
+    tool_call_map: Dict[str, Dict[str, Any]] = {}
+    for assistant_msg in assistant_msgs:
+        for tool_call in get_tool_calls(assistant_msg):
             tool_id = tool_call.get("id", "")
             tool_call_map[tool_id] = {
-                "name": tool_name,
-                "input": tool_input,
+                "name": tool_call.get("name", "unknown"),
+                "input": tool_call.get("input", {}),
                 "id": tool_id,
                 "timestamp": assistant_msg.get("timestamp") if isinstance(assistant_msg, dict) else None,
             }
-    # Find matching tool results with metadata
     for tr in tool_results:
         tr_content = get_content(tr)
-        tool_result_metadata = {}
-        # Extract tool result metadata
+        tool_result_metadata: Dict[str, Any] = {}
         if isinstance(tr, dict):
-            tool_use_result = tr.get("toolUseResult", {})
-            if tool_use_result:
-                if "durationMs" in tool_use_result:
-                    tool_result_metadata["duration_ms"] = tool_use_result["durationMs"]
-                if "numFiles" in tool_use_result:
-                    tool_result_metadata["num_files"] = tool_use_result["numFiles"]
-                if "filenames" in tool_use_result:
-                    tool_result_metadata["filenames"] = tool_use_result["filenames"]
-                if "truncated" in tool_use_result:
-                    tool_result_metadata["truncated"] = tool_use_result["truncated"]
+            tur = tr.get("toolUseResult") or {}
+            for src, dst in [("durationMs", "duration_ms"), ("numFiles", "num_files"),
+                             ("filenames", "filenames"), ("truncated", "truncated")]:
+                if src in tur:
+                    tool_result_metadata[dst] = tur[src]
         if isinstance(tr_content, list):
             for item in tr_content:
                 if isinstance(item, dict) and item.get("type") == "tool_result":
@@ -560,44 +597,51 @@ def create_respan_spans(
                         tool_call_map[tool_use_id]["output"] = item.get("content")
                         tool_call_map[tool_use_id]["result_metadata"] = tool_result_metadata
                         tool_call_map[tool_use_id]["result_timestamp"] = tr.get("timestamp")
-    # Create tool spans (children)
     tool_num = 0
-    for tool_id, tool_data in tool_call_map.items():
+    for tool_id, td in tool_call_map.items():
         tool_num += 1
-        tool_span_id = f"claudecode_{trace_unique_id}_tool_{tool_num}"
-        # Use tool result timestamp if available, otherwise use tool call timestamp
-        tool_timestamp = tool_data.get("result_timestamp") or tool_data.get("timestamp") or timestamp_str
-        tool_start_time = tool_data.get("timestamp") or start_time_str
-        # Format input and output for better readability
-        formatted_input = format_tool_input(tool_data['name'], tool_data["input"])
-        formatted_output = format_tool_output(tool_data['name'], tool_data.get("output"))
-        tool_span = {
+        tool_ts = td.get("result_timestamp") or td.get("timestamp") or timestamp_str
+        tool_start = td.get("timestamp") or start_time_str
+        tool_span: Dict[str, Any] = {
             "trace_unique_id": trace_unique_id,
-            "span_unique_id": tool_span_id,
-            "span_parent_id": chat_span_id,
-            "span_name": f"Tool: {tool_data['name']}",
+            "span_unique_id": f"claudecode_{trace_unique_id}_tool_{tool_num}",
+            "span_parent_id": root_span_id,
+            "span_name": f"Tool: {td['name']}",
             "span_workflow_name": workflow_name,
-            "log_type": "tool",
-            "input": formatted_input,
-            "output": formatted_output,
-            "timestamp": tool_timestamp,
-            "start_time": tool_start_time,
+            "span_path": f"tool_{td['name'].lower()}",
+            "input": format_tool_input(td["name"], td["input"]),
+            "output": format_tool_output(td["name"], td.get("output")),
+            "timestamp": tool_ts,
+            "start_time": tool_start,
         }
-        # Add tool result metadata if available
-        if tool_data.get("result_metadata"):
-            tool_span["metadata"] = tool_data["result_metadata"]
-            # Calculate latency if duration_ms is available
-            duration_ms = tool_data["result_metadata"].get("duration_ms")
+        if td.get("result_metadata"):
+            tool_span["metadata"] = td["result_metadata"]
+            duration_ms = td["result_metadata"].get("duration_ms")
             if duration_ms:
-                tool_span["latency"] = duration_ms / 1000.0  # Convert ms to seconds
+                tool_span["latency"] = duration_ms / 1000.0
         spans.append(tool_span)
+    # Add required Respan platform fields to every span.
+    # The backend expects these on all spans (per official SDK examples).
+    respan_defaults = {
+        "warnings": "",
+        "encoding_format": "float",
+        "disable_fallback": False,
+        "respan_params": {
+            "has_webhook": False,
+            "environment": os.environ.get("RESPAN_ENVIRONMENT", "prod"),
+        },
+        "field_name": "data: ",
+        "delimiter": "\n\n",
+        "disable_log": False,
+        "request_breakdown": False,
+    }
+    for span in spans:
+        for key, value in respan_defaults.items():
+            if key not in span:
+                span[key] = value
     return spans
@@ -607,36 +651,49 @@ def send_spans(
     base_url: str,
     turn_num: int,
 ) -> None:
-    """Send spans to Respan with timeout and one retry on transient errors."""
+    """Send spans to Respan as a single batch (matches official SDK behaviour).
+    The official Respan tracing SDK sends all spans for a trace in one
+    POST request to ``/v1/traces/ingest``.  We do the same here, with
+    simple retry logic for transient server errors.
+    """
     url = f"{base_url}/v1/traces/ingest"
     headers = {"Authorization": f"Bearer {api_key}"}
-    for attempt in range(2):
+    span_names = [s.get("span_name", "?") for s in spans]
+    payload_json = json.dumps(spans)
+    payload_size = len(payload_json)
+    debug(f"Sending {len(spans)} spans ({payload_size} bytes) for turn {turn_num}: {span_names}")
+    if DEBUG:
+        debug_file = LOG_FILE.parent / f"respan_spans_turn_{turn_num}.json"
+        debug_file.write_text(payload_json, encoding="utf-8")
+        debug(f"Dumped spans to {debug_file}")
+    for attempt in range(3):
         try:
             response = requests.post(url, json=spans, headers=headers, timeout=30)
             if response.status_code < 400:
-                debug(f"Sent {len(spans)} spans for turn {turn_num}")
+                resp_text = response.text[:300] if response.text else ""
+                debug(f"Sent {len(spans)} spans for turn {turn_num} "
+                      f"(attempt {attempt + 1}): {resp_text}")
                 return
             if response.status_code < 500:
-                # 4xx — not retryable
-                log("ERROR", f"Failed to send spans for turn {turn_num}: HTTP {response.status_code}")
+                log("ERROR", f"Spans rejected for turn {turn_num}: "
+                    f"HTTP {response.status_code} - {response.text[:200]}")
                 return
-            # 5xx — retryable
-            if attempt == 0:
-                debug(f"Server error {response.status_code} for turn {turn_num}, retrying...")
-                time.sleep(1)
-                continue
-            log("ERROR", f"Failed to send spans for turn {turn_num} after retry: HTTP {response.status_code}")
-        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
-            if attempt == 0:
-                debug(f"Transient error for turn {turn_num}: {e}, retrying...")
-                time.sleep(1)
-                continue
-            log("ERROR", f"Failed to send spans for turn {turn_num} after retry: {e}")
+            # 5xx — retry after short delay
+            debug(f"Server error for turn {turn_num} "
+                  f"(attempt {attempt + 1}), retrying...")
+            time.sleep(1.0)
+        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
+            time.sleep(1.0)
         except Exception as e:
             log("ERROR", f"Failed to send spans for turn {turn_num}: {e}")
             return
+    log("ERROR", f"Failed to send {len(spans)} spans for turn {turn_num} "
+        f"after 3 attempts")
 def process_transcript(
     session_id: str,
@@ -644,6 +701,7 @@ def process_transcript(
     state: Dict[str, Any],
     api_key: str,
     base_url: str,
+    config: Optional[Dict[str, Any]] = None,
 ) -> int:
     """Process a transcript file and create traces for new turns."""
     # Get previous state for this session
@@ -693,7 +751,8 @@ def process_transcript(
         turns_processed += 1
         turn_num = turn_count + turns_processed
         spans = create_respan_spans(
-            session_id, turn_num, current_user, current_assistants, current_tool_results
+            session_id, turn_num, current_user, current_assistants, current_tool_results,
+            config=config,
         )
         send_spans(spans, api_key, base_url, turn_num)
         last_committed_line = total_lines  # safe default, refined below
@@ -755,8 +814,17 @@ def process_transcript(
         current_assistants.append(merged)
     if current_user and current_assistants:
-        _commit_turn()
-        last_committed_line = total_lines
+        # Check if the turn has actual text output.  The Stop hook can fire
+        # before the final assistant text block is flushed to disk, leaving
+        # only thinking/tool_use blocks.  If no text content is found, treat
+        # the turn as incomplete so the retry logic re-reads it.
+        has_text = any(get_text_content(m) for m in current_assistants)
+        if has_text:
+            _commit_turn()
+            last_committed_line = total_lines
+        else:
+            last_committed_line = current_user_line
+            debug(f"Turn has assistant msgs but no text output yet (likely not flushed), will retry")
     else:
         # Incomplete turn — rewind so the next run re-reads from the
         # unmatched user message (or from where we left off if no user).
@@ -869,13 +937,32 @@ def main():
         debug("Tracing disabled (TRACE_TO_RESPAN != true)")
         sys.exit(0)
-    # Check for required environment variables
+    # Resolve API key: env var > ~/.config/respan/credentials.json
     api_key = os.getenv("RESPAN_API_KEY")
-    # Default: api.respan.ai | Enterprise: endpoint.respan.ai (set RESPAN_BASE_URL)
     base_url = os.getenv("RESPAN_BASE_URL", "https://api.respan.ai/api")
     if not api_key:
-        log("ERROR", "Respan API key not set (RESPAN_API_KEY)")
+        creds_file = Path.home() / ".config" / "respan" / "credentials.json"
+        if creds_file.exists():
+            try:
+                creds = json.loads(creds_file.read_text(encoding="utf-8"))
+                # Find the active profile's credential
+                config_file = Path.home() / ".config" / "respan" / "config.json"
+                profile = "default"
+                if config_file.exists():
+                    cfg = json.loads(config_file.read_text(encoding="utf-8"))
+                    profile = cfg.get("activeProfile", "default")
+                cred = creds.get(profile, {})
+                api_key = cred.get("apiKey") or cred.get("accessToken")
+                if not base_url or base_url == "https://api.respan.ai/api":
+                    base_url = cred.get("baseUrl", base_url)
+                if api_key:
+                    debug(f"Using API key from credentials.json (profile: {profile})")
+            except (json.JSONDecodeError, IOError) as e:
+                debug(f"Failed to read credentials.json: {e}")
+    if not api_key:
+        log("ERROR", "No API key found. Run: respan auth login")
         sys.exit(0)
     # Try stdin payload first, fall back to filesystem scan
@@ -894,11 +981,49 @@ def main():
     debug(f"Processing session: {session_id}")
-    # Process the transcript under file lock
+    # Load respan.json config from the project directory.
+    # Extract the project CWD from the first user message in the transcript.
+    config: Dict[str, Any] = {"fields": {}, "properties": {}}
     try:
-        with state_lock():
-            state = load_state()
-            turns = process_transcript(session_id, transcript_file, state, api_key, base_url)
+        first_line = transcript_file.read_text(encoding="utf-8").split("\n")[0]
+        if first_line:
+            first_msg = json.loads(first_line)
+            cwd = first_msg.get("cwd")
+            if not cwd:
+                # Try second line (first is often file-history-snapshot)
+                lines = transcript_file.read_text(encoding="utf-8").split("\n")
+                for line in lines[:5]:
+                    if line.strip():
+                        msg = json.loads(line)
+                        cwd = msg.get("cwd")
+                        if cwd:
+                            break
+            if cwd:
+                config = load_respan_config(cwd)
+                debug(f"Loaded respan.json config from {cwd}: {config}")
+    except Exception as e:
+        debug(f"Failed to extract CWD or load config: {e}")
+    # Process the transcript under file lock.
+    # Retry up to 3 times with a short delay — the Stop hook can fire
+    # before Claude Code finishes flushing the assistant response to
+    # the transcript file, causing an incomplete turn on the first read.
+    max_attempts = 3
+    turns = 0
+    try:
+        for attempt in range(max_attempts):
+            with state_lock():
+                state = load_state()
+                turns = process_transcript(session_id, transcript_file, state, api_key, base_url, config=config)
+            if turns > 0:
+                break
+            if attempt < max_attempts - 1:
+                delay = 0.5 * (attempt + 1)
+                debug(f"No turns processed (attempt {attempt + 1}/{max_attempts}), "
+                      f"retrying in {delay}s...")
+                time.sleep(delay)
         # Log execution time
         duration = (datetime.now() - script_start).total_seconds()