npm - @respan/cli - Versions diffs - 0.4.0 → 0.4.1 - Mend

@respan/cli 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/assets/assets/hook.py +155 -165
package/oclif.manifest.json +183 -183
package/package.json +1 -1

package/dist/assets/assets/hook.py CHANGED Viewed

@@ -318,26 +318,31 @@ def create_respan_spans(
     assistant_msgs: List[Dict[str, Any]],
     tool_results: List[Dict[str, Any]],
 ) -> List[Dict[str, Any]]:
-    """Create Respan span logs for a single turn with all available metadata."""
+    """Create Respan span logs for a single turn with all available metadata.
+    Produces a proper span tree so that the Respan UI renders nested children:
+        Root (agent container)
+          ├── claude.chat  (generation – carries model, tokens, messages)
+          ├── Thinking 1   (generation, if extended thinking is present)
+          ├── Tool: Read   (tool, if tool use occurred)
+          └── Tool: Write  (tool, if tool use occurred)
+    """
     spans = []
-    # Extract user text and timestamp
+    # ------------------------------------------------------------------
+    # 1. Extract data from the transcript messages
+    # ------------------------------------------------------------------
     user_text = get_text_content(user_msg)
     user_timestamp = user_msg.get("timestamp")
     user_time = parse_timestamp(user_timestamp) if user_timestamp else None
-    # Extract assistant text from ALL messages in the turn (tool-using turns
-    # have multiple assistant messages: text before tool, then text after).
+    # Collect assistant text across all messages in the turn
     final_output = ""
-    first_assistant_msg = None
     if assistant_msgs:
         text_parts = [get_text_content(m) for m in assistant_msgs]
         final_output = "\n".join(p for p in text_parts if p)
-        first_assistant_msg = assistant_msgs[0]
-    # Get model, usage, and timing info from assistant messages.
-    # For tool-using turns there are multiple assistant messages (multiple API
-    # calls), so we aggregate usage and take the *last* timestamp as end time.
+    # Aggregate model, usage, timing from (possibly multiple) API calls
     model = "claude"
     usage = None
     request_id = None
@@ -360,7 +365,6 @@ def create_respan_spans(
             last_assistant_timestamp = ts
             last_assistant_time = parse_timestamp(ts)
-        # Aggregate usage across all API calls in the turn
         msg_usage = msg_obj.get("usage")
         if msg_usage:
             if usage is None:
@@ -371,187 +375,185 @@ def create_respan_spans(
                             "cache_read_input_tokens"):
                     if key in msg_usage:
                         usage[key] = usage.get(key, 0) + msg_usage[key]
-                # Keep last service_tier
                 if "service_tier" in msg_usage:
                     usage["service_tier"] = msg_usage["service_tier"]
-    # Calculate timing
-    start_time_str = user_timestamp or first_assistant_timestamp or datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
-    timestamp_str = last_assistant_timestamp or first_assistant_timestamp or datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+    # Timing
+    now_str = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+    start_time_str = user_timestamp or first_assistant_timestamp or now_str
+    timestamp_str = last_assistant_timestamp or first_assistant_timestamp or now_str
-    # Calculate latency from user message to final assistant response
     latency = None
     if user_time and last_assistant_time:
         latency = (last_assistant_time - user_time).total_seconds()
-    # Extract messages for chat span
-    prompt_messages = []
+    # Messages
+    prompt_messages: List[Dict[str, Any]] = []
     if user_text:
         prompt_messages.append({"role": "user", "content": user_text})
-    completion_message = None
+    completion_message: Optional[Dict[str, Any]] = None
     if final_output:
         completion_message = {"role": "assistant", "content": final_output}
-    # Create trace ID for this turn
+    # IDs
     trace_unique_id = f"{session_id}_turn_{turn_num}"
-    # Naming: human-readable workflow + span names
     workflow_name = "claude-code"
-    # Use first ~60 chars of user message as span name for readability
     user_preview = (user_text[:60] + "...") if user_text and len(user_text) > 60 else (user_text or f"turn_{turn_num}")
     root_span_name = f"Turn {turn_num}: {user_preview}"
     thread_id = f"claudecode_{session_id}"
-    # Build metadata with additional info
-    metadata = {
-        "claude_code_turn": turn_num,
-    }
+    customer_id = os.environ.get("RESPAN_CUSTOMER_ID", "claude-code")
+    # Metadata
+    metadata: Dict[str, Any] = {"claude_code_turn": turn_num}
     if request_id:
         metadata["request_id"] = request_id
     if stop_reason:
         metadata["stop_reason"] = stop_reason
-    # Merge user-provided metadata from env var
     env_metadata = os.environ.get("RESPAN_METADATA")
     if env_metadata:
         try:
             extra = json.loads(env_metadata)
             if isinstance(extra, dict):
                 metadata.update(extra)
-            else:
-                debug("RESPAN_METADATA is not a JSON object, skipping")
-        except json.JSONDecodeError as e:
-            debug(f"Invalid JSON in RESPAN_METADATA, skipping: {e}")
-    # Build usage object with cache details
-    usage_obj = None
+        except json.JSONDecodeError:
+            pass
+    # Usage
+    usage_fields: Dict[str, Any] = {}
     if usage:
-        usage_obj = {
-            "prompt_tokens": usage.get("input_tokens", 0),
-            "completion_tokens": usage.get("output_tokens", 0),
-        }
-        total_tokens = usage_obj["prompt_tokens"] + usage_obj["completion_tokens"]
-        if total_tokens > 0:
-            usage_obj["total_tokens"] = total_tokens
-        # Add cache details
-        prompt_tokens_details = {}
+        prompt_tokens = usage.get("input_tokens", 0)
+        completion_tokens = usage.get("output_tokens", 0)
+        usage_fields["prompt_tokens"] = prompt_tokens
+        usage_fields["completion_tokens"] = completion_tokens
+        total = prompt_tokens + completion_tokens
+        if total > 0:
+            usage_fields["total_tokens"] = total
         cache_creation = usage.get("cache_creation_input_tokens", 0)
         cache_read = usage.get("cache_read_input_tokens", 0)
+        if cache_creation > 0:
+            usage_fields["cache_creation_prompt_tokens"] = cache_creation
+        prompt_tokens_details: Dict[str, int] = {}
         if cache_creation > 0:
             prompt_tokens_details["cache_creation_tokens"] = cache_creation
-            usage_obj["cache_creation_prompt_tokens"] = cache_creation
         if cache_read > 0:
             prompt_tokens_details["cached_tokens"] = cache_read
         if prompt_tokens_details:
-            usage_obj["prompt_tokens_details"] = prompt_tokens_details
-        # Add service tier to metadata
+            usage_fields["prompt_tokens_details"] = prompt_tokens_details
         service_tier = usage.get("service_tier")
         if service_tier:
             metadata["service_tier"] = service_tier
-    # Create chat span (root)
-    chat_span_id = f"claudecode_{trace_unique_id}_chat"
-    customer_id = os.environ.get("RESPAN_CUSTOMER_ID", "claude-code")
-    chat_span = {
+    # ------------------------------------------------------------------
+    # 2. Root span – pure agent container (no model / token info)
+    # ------------------------------------------------------------------
+    root_span_id = f"claudecode_{trace_unique_id}_root"
+    root_span: Dict[str, Any] = {
         "trace_unique_id": trace_unique_id,
         "thread_identifier": thread_id,
         "customer_identifier": customer_id,
-        "span_unique_id": chat_span_id,
-        "span_parent_id": None,
+        "span_unique_id": root_span_id,
         "span_name": root_span_name,
         "span_workflow_name": workflow_name,
+        "span_path": "",
         "log_type": "agent",
         "input": json.dumps(prompt_messages) if prompt_messages else "",
         "output": json.dumps(completion_message) if completion_message else "",
-        "prompt_messages": prompt_messages,
-        "completion_message": completion_message,
-        "model": model,
         "timestamp": timestamp_str,
         "start_time": start_time_str,
         "metadata": metadata,
     }
-    # Add usage if available
-    if usage_obj:
-        chat_span["prompt_tokens"] = usage_obj["prompt_tokens"]
-        chat_span["completion_tokens"] = usage_obj["completion_tokens"]
-        if "total_tokens" in usage_obj:
-            chat_span["total_tokens"] = usage_obj["total_tokens"]
-        if "cache_creation_prompt_tokens" in usage_obj:
-            chat_span["cache_creation_prompt_tokens"] = usage_obj["cache_creation_prompt_tokens"]
-        if "prompt_tokens_details" in usage_obj:
-            chat_span["prompt_tokens_details"] = usage_obj["prompt_tokens_details"]
-    # Add latency if calculated
     if latency is not None:
-        chat_span["latency"] = latency
-    spans.append(chat_span)
-    # Extract thinking blocks and create spans for them
-    thinking_spans = []
-    for idx, assistant_msg in enumerate(assistant_msgs):
-        if isinstance(assistant_msg, dict) and "message" in assistant_msg:
-            content = assistant_msg["message"].get("content", [])
-            if isinstance(content, list):
-                for item in content:
-                    if isinstance(item, dict) and item.get("type") == "thinking":
-                        thinking_text = item.get("thinking", "")
-                        if thinking_text:
-                            thinking_span_id = f"claudecode_{trace_unique_id}_thinking_{len(thinking_spans) + 1}"
-                            thinking_timestamp = assistant_msg.get("timestamp", timestamp_str)
-                            thinking_spans.append({
-                                "trace_unique_id": trace_unique_id,
-                                "span_unique_id": thinking_span_id,
-                                "span_parent_id": chat_span_id,
-                                "span_name": f"Thinking {len(thinking_spans) + 1}",
-                                "span_workflow_name": workflow_name,
-                                "log_type": "generation",
-                                "input": "",
-                                "output": thinking_text,
-                                "timestamp": thinking_timestamp,
-                                "start_time": thinking_timestamp,
-                            })
-    spans.extend(thinking_spans)
-    # Collect all tool calls and results with metadata
-    tool_call_map = {}
+        root_span["latency"] = latency
+    spans.append(root_span)
+    # ------------------------------------------------------------------
+    # 3. LLM generation child span (always created → every turn has ≥1 child)
+    # ------------------------------------------------------------------
+    gen_span_id = f"claudecode_{trace_unique_id}_gen"
+    gen_start = first_assistant_timestamp or start_time_str
+    gen_end = last_assistant_timestamp or timestamp_str
+    gen_latency = None
+    gen_start_dt = parse_timestamp(gen_start) if gen_start else None
+    gen_end_dt = parse_timestamp(gen_end) if gen_end else None
+    if gen_start_dt and gen_end_dt:
+        gen_latency = (gen_end_dt - gen_start_dt).total_seconds()
+    gen_span: Dict[str, Any] = {
+        "trace_unique_id": trace_unique_id,
+        "span_unique_id": gen_span_id,
+        "span_parent_id": root_span_id,
+        "span_name": "claude.chat",
+        "span_workflow_name": workflow_name,
+        "span_path": "claude_chat",
+        "log_type": "generation",
+        "model": model,
+        "provider_id": "anthropic",
+        "input": json.dumps(prompt_messages) if prompt_messages else "",
+        "output": json.dumps(completion_message) if completion_message else "",
+        "prompt_messages": prompt_messages,
+        "completion_message": completion_message,
+        "timestamp": gen_end,
+        "start_time": gen_start,
+    }
+    if gen_latency is not None:
+        gen_span["latency"] = gen_latency
+    gen_span.update(usage_fields)
+    spans.append(gen_span)
+    # ------------------------------------------------------------------
+    # 4. Thinking child spans
+    # ------------------------------------------------------------------
+    thinking_num = 0
+    for assistant_msg in assistant_msgs:
+        if not (isinstance(assistant_msg, dict) and "message" in assistant_msg):
+            continue
+        content = assistant_msg["message"].get("content", [])
+        if not isinstance(content, list):
+            continue
+        for item in content:
+            if isinstance(item, dict) and item.get("type") == "thinking":
+                thinking_text = item.get("thinking", "")
+                if not thinking_text:
+                    continue
+                thinking_num += 1
+                thinking_ts = assistant_msg.get("timestamp", timestamp_str)
+                spans.append({
+                    "trace_unique_id": trace_unique_id,
+                    "span_unique_id": f"claudecode_{trace_unique_id}_thinking_{thinking_num}",
+                    "span_parent_id": root_span_id,
+                    "span_name": f"Thinking {thinking_num}",
+                    "span_workflow_name": workflow_name,
+                    "span_path": "thinking",
+                    "log_type": "generation",
+                    "input": "",
+                    "output": thinking_text,
+                    "timestamp": thinking_ts,
+                    "start_time": thinking_ts,
+                })
+    # ------------------------------------------------------------------
+    # 5. Tool child spans
+    # ------------------------------------------------------------------
+    tool_call_map: Dict[str, Dict[str, Any]] = {}
     for assistant_msg in assistant_msgs:
-        tool_calls = get_tool_calls(assistant_msg)
-        for tool_call in tool_calls:
-            tool_name = tool_call.get("name", "unknown")
-            tool_input = tool_call.get("input", {})
+        for tool_call in get_tool_calls(assistant_msg):
             tool_id = tool_call.get("id", "")
             tool_call_map[tool_id] = {
-                "name": tool_name,
-                "input": tool_input,
+                "name": tool_call.get("name", "unknown"),
+                "input": tool_call.get("input", {}),
                 "id": tool_id,
                 "timestamp": assistant_msg.get("timestamp") if isinstance(assistant_msg, dict) else None,
             }
-    # Find matching tool results with metadata
     for tr in tool_results:
         tr_content = get_content(tr)
-        tool_result_metadata = {}
-        # Extract tool result metadata
+        tool_result_metadata: Dict[str, Any] = {}
         if isinstance(tr, dict):
-            tool_use_result = tr.get("toolUseResult", {})
-            if tool_use_result:
-                if "durationMs" in tool_use_result:
-                    tool_result_metadata["duration_ms"] = tool_use_result["durationMs"]
-                if "numFiles" in tool_use_result:
-                    tool_result_metadata["num_files"] = tool_use_result["numFiles"]
-                if "filenames" in tool_use_result:
-                    tool_result_metadata["filenames"] = tool_use_result["filenames"]
-                if "truncated" in tool_use_result:
-                    tool_result_metadata["truncated"] = tool_use_result["truncated"]
+            tur = tr.get("toolUseResult") or {}
+            for src, dst in [("durationMs", "duration_ms"), ("numFiles", "num_files"),
+                             ("filenames", "filenames"), ("truncated", "truncated")]:
+                if src in tur:
+                    tool_result_metadata[dst] = tur[src]
         if isinstance(tr_content, list):
             for item in tr_content:
                 if isinstance(item, dict) and item.get("type") == "tool_result":
@@ -560,44 +562,32 @@ def create_respan_spans(
                         tool_call_map[tool_use_id]["output"] = item.get("content")
                         tool_call_map[tool_use_id]["result_metadata"] = tool_result_metadata
                         tool_call_map[tool_use_id]["result_timestamp"] = tr.get("timestamp")
-    # Create tool spans (children)
     tool_num = 0
-    for tool_id, tool_data in tool_call_map.items():
+    for tool_id, td in tool_call_map.items():
         tool_num += 1
-        tool_span_id = f"claudecode_{trace_unique_id}_tool_{tool_num}"
-        # Use tool result timestamp if available, otherwise use tool call timestamp
-        tool_timestamp = tool_data.get("result_timestamp") or tool_data.get("timestamp") or timestamp_str
-        tool_start_time = tool_data.get("timestamp") or start_time_str
-        # Format input and output for better readability
-        formatted_input = format_tool_input(tool_data['name'], tool_data["input"])
-        formatted_output = format_tool_output(tool_data['name'], tool_data.get("output"))
-        tool_span = {
+        tool_ts = td.get("result_timestamp") or td.get("timestamp") or timestamp_str
+        tool_start = td.get("timestamp") or start_time_str
+        tool_span: Dict[str, Any] = {
             "trace_unique_id": trace_unique_id,
-            "span_unique_id": tool_span_id,
-            "span_parent_id": chat_span_id,
-            "span_name": f"Tool: {tool_data['name']}",
+            "span_unique_id": f"claudecode_{trace_unique_id}_tool_{tool_num}",
+            "span_parent_id": root_span_id,
+            "span_name": f"Tool: {td['name']}",
             "span_workflow_name": workflow_name,
+            "span_path": f"tool_{td['name'].lower()}",
             "log_type": "tool",
-            "input": formatted_input,
-            "output": formatted_output,
-            "timestamp": tool_timestamp,
-            "start_time": tool_start_time,
+            "input": format_tool_input(td["name"], td["input"]),
+            "output": format_tool_output(td["name"], td.get("output")),
+            "timestamp": tool_ts,
+            "start_time": tool_start,
         }
-        # Add tool result metadata if available
-        if tool_data.get("result_metadata"):
-            tool_span["metadata"] = tool_data["result_metadata"]
-            # Calculate latency if duration_ms is available
-            duration_ms = tool_data["result_metadata"].get("duration_ms")
+        if td.get("result_metadata"):
+            tool_span["metadata"] = td["result_metadata"]
+            duration_ms = td["result_metadata"].get("duration_ms")
             if duration_ms:
-                tool_span["latency"] = duration_ms / 1000.0  # Convert ms to seconds
+                tool_span["latency"] = duration_ms / 1000.0
         spans.append(tool_span)
     return spans

package/oclif.manifest.json CHANGED Viewed

@@ -428,10 +428,16 @@
         "set.js"
       ]
     },
-    "evaluators:create": {
+    "datasets:add-spans": {
       "aliases": [],
-      "args": {},
-      "description": "Create a new evaluator",
+      "args": {
+        "dataset-id": {
+          "description": "Dataset ID",
+          "name": "dataset-id",
+          "required": true
+        }
+      },
+      "description": "Add existing spans to a dataset",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -467,39 +473,18 @@
           "allowNo": false,
           "type": "boolean"
         },
-        "name": {
-          "description": "Evaluator name",
-          "name": "name",
+        "span-ids": {
+          "description": "Comma-separated span IDs",
+          "name": "span-ids",
           "required": true,
           "hasDynamicHelp": false,
           "multiple": false,
           "type": "option"
-        },
-        "type": {
-          "description": "Evaluator type",
-          "name": "type",
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
-        },
-        "description": {
-          "description": "Evaluator description",
-          "name": "description",
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
-        },
-        "config": {
-          "description": "Evaluator config as JSON string",
-          "name": "config",
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
         }
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "evaluators:create",
+      "id": "datasets:add-spans",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -509,20 +494,20 @@
       "relativePath": [
         "dist",
         "commands",
-        "evaluators",
-        "create.js"
+        "datasets",
+        "add-spans.js"
       ]
     },
-    "evaluators:get": {
+    "datasets:create-span": {
       "aliases": [],
       "args": {
-        "id": {
-          "description": "Evaluator ID",
-          "name": "id",
+        "dataset-id": {
+          "description": "Dataset ID",
+          "name": "dataset-id",
           "required": true
         }
       },
-      "description": "Get a specific evaluator",
+      "description": "Create a span in a dataset",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -557,11 +542,19 @@
           "name": "verbose",
           "allowNo": false,
           "type": "boolean"
+        },
+        "body": {
+          "description": "Span body as JSON string",
+          "name": "body",
+          "required": true,
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
         }
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "evaluators:get",
+      "id": "datasets:create-span",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -571,14 +564,14 @@
       "relativePath": [
         "dist",
         "commands",
-        "evaluators",
-        "get.js"
+        "datasets",
+        "create-span.js"
       ]
     },
-    "evaluators:list": {
+    "datasets:create": {
       "aliases": [],
       "args": {},
-      "description": "List evaluators",
+      "description": "Create a new dataset",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -614,18 +607,17 @@
           "allowNo": false,
           "type": "boolean"
         },
-        "limit": {
-          "description": "Number of results per page",
-          "name": "limit",
-          "default": 20,
+        "name": {
+          "description": "Dataset name",
+          "name": "name",
+          "required": true,
           "hasDynamicHelp": false,
           "multiple": false,
           "type": "option"
         },
-        "page": {
-          "description": "Page number",
-          "name": "page",
-          "default": 1,
+        "description": {
+          "description": "Dataset description",
+          "name": "description",
           "hasDynamicHelp": false,
           "multiple": false,
           "type": "option"
@@ -633,7 +625,7 @@
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "evaluators:list",
+      "id": "datasets:create",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -643,20 +635,25 @@
       "relativePath": [
         "dist",
         "commands",
-        "evaluators",
-        "list.js"
+        "datasets",
+        "create.js"
       ]
     },
-    "evaluators:run": {
+    "datasets:get-span": {
       "aliases": [],
       "args": {
-        "id": {
-          "description": "Evaluator ID",
-          "name": "id",
+        "dataset-id": {
+          "description": "Dataset ID",
+          "name": "dataset-id",
+          "required": true
+        },
+        "span-id": {
+          "description": "Span ID",
+          "name": "span-id",
           "required": true
         }
       },
-      "description": "Run an evaluator",
+      "description": "Get a specific span from a dataset",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -691,32 +688,11 @@
           "name": "verbose",
           "allowNo": false,
           "type": "boolean"
-        },
-        "dataset-id": {
-          "description": "Dataset ID to evaluate against",
-          "name": "dataset-id",
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
-        },
-        "log-ids": {
-          "description": "Comma-separated log/span IDs to evaluate",
-          "name": "log-ids",
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
-        },
-        "params": {
-          "description": "Additional parameters as JSON string",
-          "name": "params",
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
         }
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "evaluators:run",
+      "id": "datasets:get-span",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -726,20 +702,20 @@
       "relativePath": [
         "dist",
         "commands",
-        "evaluators",
-        "run.js"
+        "datasets",
+        "get-span.js"
       ]
     },
-    "evaluators:update": {
+    "datasets:get": {
       "aliases": [],
       "args": {
         "id": {
-          "description": "Evaluator ID",
+          "description": "Dataset ID",
           "name": "id",
           "required": true
         }
       },
-      "description": "Update an evaluator",
+      "description": "Get a specific dataset",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -774,32 +750,11 @@
           "name": "verbose",
           "allowNo": false,
           "type": "boolean"
-        },
-        "name": {
-          "description": "Evaluator name",
-          "name": "name",
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
-        },
-        "description": {
-          "description": "Evaluator description",
-          "name": "description",
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
-        },
-        "config": {
-          "description": "Evaluator config as JSON string",
-          "name": "config",
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
         }
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "evaluators:update",
+      "id": "datasets:get",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -809,20 +764,14 @@
       "relativePath": [
         "dist",
         "commands",
-        "evaluators",
-        "update.js"
+        "datasets",
+        "get.js"
       ]
     },
-    "datasets:add-spans": {
+    "datasets:list": {
       "aliases": [],
-      "args": {
-        "dataset-id": {
-          "description": "Dataset ID",
-          "name": "dataset-id",
-          "required": true
-        }
-      },
-      "description": "Add existing spans to a dataset",
+      "args": {},
+      "description": "List datasets",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -858,10 +807,18 @@
           "allowNo": false,
           "type": "boolean"
         },
-        "span-ids": {
-          "description": "Comma-separated span IDs",
-          "name": "span-ids",
-          "required": true,
+        "limit": {
+          "description": "Number of results per page",
+          "name": "limit",
+          "default": 50,
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        },
+        "page": {
+          "description": "Page number",
+          "name": "page",
+          "default": 1,
           "hasDynamicHelp": false,
           "multiple": false,
           "type": "option"
@@ -869,7 +826,7 @@
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "datasets:add-spans",
+      "id": "datasets:list",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -880,10 +837,10 @@
         "dist",
         "commands",
         "datasets",
-        "add-spans.js"
+        "list.js"
       ]
     },
-    "datasets:create-span": {
+    "datasets:spans": {
       "aliases": [],
       "args": {
         "dataset-id": {
@@ -892,7 +849,7 @@
           "required": true
         }
       },
-      "description": "Create a span in a dataset",
+      "description": "List spans in a dataset",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -927,19 +884,11 @@
           "name": "verbose",
           "allowNo": false,
           "type": "boolean"
-        },
-        "body": {
-          "description": "Span body as JSON string",
-          "name": "body",
-          "required": true,
-          "hasDynamicHelp": false,
-          "multiple": false,
-          "type": "option"
         }
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "datasets:create-span",
+      "id": "datasets:spans",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -950,13 +899,19 @@
         "dist",
         "commands",
         "datasets",
-        "create-span.js"
+        "spans.js"
       ]
     },
-    "datasets:create": {
+    "datasets:update": {
       "aliases": [],
-      "args": {},
-      "description": "Create a new dataset",
+      "args": {
+        "id": {
+          "description": "Dataset ID",
+          "name": "id",
+          "required": true
+        }
+      },
+      "description": "Update a dataset",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -995,7 +950,6 @@
         "name": {
           "description": "Dataset name",
           "name": "name",
-          "required": true,
           "hasDynamicHelp": false,
           "multiple": false,
           "type": "option"
@@ -1010,7 +964,7 @@
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "datasets:create",
+      "id": "datasets:update",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -1021,24 +975,13 @@
         "dist",
         "commands",
         "datasets",
-        "create.js"
+        "update.js"
       ]
     },
-    "datasets:get-span": {
+    "evaluators:create": {
       "aliases": [],
-      "args": {
-        "dataset-id": {
-          "description": "Dataset ID",
-          "name": "dataset-id",
-          "required": true
-        },
-        "span-id": {
-          "description": "Span ID",
-          "name": "span-id",
-          "required": true
-        }
-      },
-      "description": "Get a specific span from a dataset",
+      "args": {},
+      "description": "Create a new evaluator",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -1073,11 +1016,40 @@
           "name": "verbose",
           "allowNo": false,
           "type": "boolean"
+        },
+        "name": {
+          "description": "Evaluator name",
+          "name": "name",
+          "required": true,
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        },
+        "type": {
+          "description": "Evaluator type",
+          "name": "type",
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        },
+        "description": {
+          "description": "Evaluator description",
+          "name": "description",
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        },
+        "config": {
+          "description": "Evaluator config as JSON string",
+          "name": "config",
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
         }
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "datasets:get-span",
+      "id": "evaluators:create",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -1087,20 +1059,20 @@
       "relativePath": [
         "dist",
         "commands",
-        "datasets",
-        "get-span.js"
+        "evaluators",
+        "create.js"
       ]
     },
-    "datasets:get": {
+    "evaluators:get": {
       "aliases": [],
       "args": {
         "id": {
-          "description": "Dataset ID",
+          "description": "Evaluator ID",
           "name": "id",
           "required": true
         }
       },
-      "description": "Get a specific dataset",
+      "description": "Get a specific evaluator",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -1139,7 +1111,7 @@
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "datasets:get",
+      "id": "evaluators:get",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -1149,14 +1121,14 @@
       "relativePath": [
         "dist",
         "commands",
-        "datasets",
+        "evaluators",
         "get.js"
       ]
     },
-    "datasets:list": {
+    "evaluators:list": {
       "aliases": [],
       "args": {},
-      "description": "List datasets",
+      "description": "List evaluators",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -1195,7 +1167,7 @@
         "limit": {
           "description": "Number of results per page",
           "name": "limit",
-          "default": 50,
+          "default": 20,
           "hasDynamicHelp": false,
           "multiple": false,
           "type": "option"
@@ -1211,7 +1183,7 @@
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "datasets:list",
+      "id": "evaluators:list",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -1221,20 +1193,20 @@
       "relativePath": [
         "dist",
         "commands",
-        "datasets",
+        "evaluators",
         "list.js"
       ]
     },
-    "datasets:spans": {
+    "evaluators:run": {
       "aliases": [],
       "args": {
-        "dataset-id": {
-          "description": "Dataset ID",
-          "name": "dataset-id",
+        "id": {
+          "description": "Evaluator ID",
+          "name": "id",
           "required": true
         }
       },
-      "description": "List spans in a dataset",
+      "description": "Run an evaluator",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -1269,11 +1241,32 @@
           "name": "verbose",
           "allowNo": false,
           "type": "boolean"
+        },
+        "dataset-id": {
+          "description": "Dataset ID to evaluate against",
+          "name": "dataset-id",
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        },
+        "log-ids": {
+          "description": "Comma-separated log/span IDs to evaluate",
+          "name": "log-ids",
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
+        },
+        "params": {
+          "description": "Additional parameters as JSON string",
+          "name": "params",
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
         }
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "datasets:spans",
+      "id": "evaluators:run",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -1283,20 +1276,20 @@
       "relativePath": [
         "dist",
         "commands",
-        "datasets",
-        "spans.js"
+        "evaluators",
+        "run.js"
       ]
     },
-    "datasets:update": {
+    "evaluators:update": {
       "aliases": [],
       "args": {
         "id": {
-          "description": "Dataset ID",
+          "description": "Evaluator ID",
           "name": "id",
           "required": true
         }
       },
-      "description": "Update a dataset",
+      "description": "Update an evaluator",
       "flags": {
         "api-key": {
           "description": "API key (env: RESPAN_API_KEY)",
@@ -1333,23 +1326,30 @@
           "type": "boolean"
         },
         "name": {
-          "description": "Dataset name",
+          "description": "Evaluator name",
           "name": "name",
           "hasDynamicHelp": false,
           "multiple": false,
           "type": "option"
         },
         "description": {
-          "description": "Dataset description",
+          "description": "Evaluator description",
           "name": "description",
           "hasDynamicHelp": false,
           "multiple": false,
           "type": "option"
+        },
+        "config": {
+          "description": "Evaluator config as JSON string",
+          "name": "config",
+          "hasDynamicHelp": false,
+          "multiple": false,
+          "type": "option"
         }
       },
       "hasDynamicHelp": false,
       "hiddenAliases": [],
-      "id": "datasets:update",
+      "id": "evaluators:update",
       "pluginAlias": "@respan/cli",
       "pluginName": "@respan/cli",
       "pluginType": "core",
@@ -1359,7 +1359,7 @@
       "relativePath": [
         "dist",
         "commands",
-        "datasets",
+        "evaluators",
         "update.js"
       ]
     },
@@ -3370,5 +3370,5 @@
       ]
     }
   },
-  "version": "0.4.0"
+  "version": "0.4.1"
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@respan/cli",
-  "version": "0.4.0",
+  "version": "0.4.1",
   "description": "Respan CLI - manage your LLM observability from the command line",
   "type": "module",
   "main": "dist/index.js",