RubyGems - lex-llm - Versions diffs - 0.4.18 → 0.5.1 - Mend

lex-llm 0.4.18 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

data/spec/legion/extensions/llm/conformance/echo_translator.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+# Trivial echo translator for conformance kit self-testing.
+# Passes canonical types through unchanged, proving the shared example groups work.
+module Canonical
+  module Conformance
+    # Echo translator: identity transform for both provider and client sides.
+    # Used exclusively as a self-test to verify the conformance kit works.
+    class EchoTranslator
+      def capabilities
+        { provider: 'echo', thinking: true, streaming: true, tool_calls: true }
+      end
+      # Provider translator interface
+      def render_request(canonical_request)
+        canonical_request.to_h
+      end
+      def parse_response(wire_hash)
+        canonical::Response.from_hash(wire_hash)
+      end
+      def parse_chunk(raw_chunk)
+        canonical::Chunk.from_hash(raw_chunk)
+      end
+      # Client translator interface
+      def format_request(canonical_request)
+        canonical_request.to_h
+      end
+      def parse_request(body, _env = {})
+        canonical::Request.from_hash(body)
+      end
+      def format_response(canonical_response)
+        canonical_response.to_h
+      end
+      def format_chunk(canonical_chunk)
+        canonical_chunk.to_h
+      end
+      def format_error(error, status)
+        [status, { error: error.message, type: error.class.name }]
+      end
+      private
+      def canonical
+        Legion::Extensions::Llm::Canonical
+      end
+    end
+  end
+end

data/spec/legion/extensions/llm/conformance/echo_translator_spec.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+require 'spec_helper'
+require_relative 'conformance'
+require_relative 'echo_translator'
+RSpec.describe Canonical::Conformance::EchoTranslator do
+  # Self-test: the echo translator passes both conformance groups,
+  # proving the shared example groups work correctly.
+  it_behaves_like 'a canonical provider translator', described_class
+  it_behaves_like 'a canonical client translator', described_class
+end

data/spec/legion/extensions/llm/conformance/fixtures/canonical_empty_response.json ADDED Viewed

@@ -0,0 +1,13 @@
+{
+  "text": "",
+  "thinking": null,
+  "tool_calls": [],
+  "usage": {
+    "input_tokens": 8,
+    "output_tokens": 0
+  },
+  "stop_reason": "end_turn",
+  "model": "test-model-1",
+  "routing": {},
+  "metadata": {}
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_error_response.json ADDED Viewed

@@ -0,0 +1,19 @@
+{
+  "text": "",
+  "thinking": null,
+  "tool_calls": [],
+  "usage": {
+    "input_tokens": 0,
+    "output_tokens": 0
+  },
+  "stop_reason": "error",
+  "model": "test-model-1",
+  "routing": {},
+  "metadata": {
+    "error": {
+      "type": "invalid_request_error",
+      "message": "Model nonexistent-model-xyz-12345 not found",
+      "code": 404
+    }
+  }
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_fleet_round_trip.json ADDED Viewed

@@ -0,0 +1,81 @@
+{
+  "description": "Fleet round-trip field mapping — R6",
+  "round_trip_request": {
+    "id": "req_fleet_rt_001",
+    "messages": [
+      {
+        "role": "user",
+        "content": [{ "type": "text", "text": "Fleet round-trip test message" }]
+      }
+    ],
+    "system": "You are a test assistant.",
+    "tools": {
+      "test_tool": {
+        "name": "test_tool",
+        "description": "A test tool for round-trip validation",
+        "parameters": {
+          "type": "object",
+          "properties": { "input": { "type": "string" } },
+          "required": ["input"]
+        }
+      }
+    },
+    "params": { "max_tokens": 1024, "temperature": 0.5 },
+    "thinking": { "effort": "low", "budget": 1024 },
+    "stream": false,
+    "conversation_id": "conv_fleet_rt_001",
+    "caller": { "type": "anthropic_messages", "client_id": "test_client" },
+    "routing": { "tier": "primary", "provider": "anthropic", "model": "claude-sonnet-4-6" },
+    "metadata": { "trace_id": "trace_abc123", "span_id": "span_def456" }
+  },
+  "round_trip_response": {
+    "text": "This is a fleet round-trip test response.",
+    "thinking": { "content": "Processing fleet round-trip test.", "signature": "sig_test_001" },
+    "tool_calls": [
+      {
+        "id": "call_fleet_rt_001",
+        "exchange_id": "exch_fleet_rt_001",
+        "name": "test_tool",
+        "arguments": { "input": "round_trip_test" },
+        "source": "client",
+        "status": "pending"
+      }
+    ],
+    "usage": {
+      "input_tokens": 50,
+      "output_tokens": 30,
+      "cache_read_tokens": 10,
+      "cache_write_tokens": 5,
+      "thinking_tokens": 20
+    },
+    "stop_reason": "tool_use",
+    "model": "claude-sonnet-4-6",
+    "routing": { "tier": "primary", "provider": "anthropic", "instance": "us-east-1" },
+    "metadata": { "trace_id": "trace_abc123", "span_id": "span_def456", "latency_ms": 1250 }
+  },
+  "field_mapping": {
+    "request": {
+      "id": "string — unique request identifier",
+      "messages": "array[Message] — conversation history",
+      "system": "string | nil — system prompt",
+      "tools": "hash[name -> ToolDefinition] — available tools",
+      "params": "Params — sampling parameters",
+      "thinking": "Thinking::Config | nil — thinking configuration",
+      "stream": "boolean — streaming mode",
+      "conversation_id": "string | nil — conversation grouping",
+      "caller": "hash — client translator identity",
+      "routing": "hash — routing hints",
+      "metadata": "hash — passthrough metadata"
+    },
+    "response": {
+      "text": "string — assistant text content",
+      "thinking": "Thinking | nil — thinking block with content and signature",
+      "tool_calls": "array[ToolCall] — tool call requests",
+      "usage": "Usage — token usage breakdown",
+      "stop_reason": "symbol — why generation stopped",
+      "model": "string — resolved model identifier",
+      "routing": "hash — actual routing used",
+      "metadata": "hash — provider passthrough quirks"
+    }
+  }
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_metering_audit_events.json ADDED Viewed

@@ -0,0 +1,101 @@
+{
+  "description": "Metering/audit event schemas — G15e",
+  "schemas": {
+    "metering_event": {
+      "event_type": "metering",
+      "required_fields": {
+        "exchange_id": "string",
+        "request_id": "string",
+        "conversation_id": "string | nil",
+        "model": "string",
+        "provider": "string",
+        "usage": {
+          "input_tokens": "integer",
+          "output_tokens": "integer",
+          "cache_read_tokens": "integer | nil",
+          "cache_write_tokens": "integer | nil",
+          "thinking_tokens": "integer | nil"
+        },
+        "cost": {
+          "input_cost_usd": "float | nil",
+          "output_cost_usd": "float | nil",
+          "total_cost_usd": "float | nil"
+        },
+        "latency_ms": "integer",
+        "timestamp": "ISO8601 string"
+      }
+    },
+    "audit_event": {
+      "event_type": "audit",
+      "required_fields": {
+        "exchange_id": "string",
+        "request_id": "string",
+        "conversation_id": "string | nil",
+        "model": "string",
+        "provider": "string",
+        "caller": "hash",
+        "status": "symbol — :success, :error, :partial",
+        "stop_reason": "symbol | nil",
+        "timestamp": "ISO8601 string"
+      }
+    },
+    "tool_call_audit_event": {
+      "event_type": "tool_call_audit",
+      "required_fields": {
+        "exchange_id": "string",
+        "tool_call_id": "string",
+        "name": "string",
+        "source": "symbol — :client, :registry, :special, :extension, :mcp",
+        "status": "symbol — :pending, :running, :success, :error",
+        "arguments": "hash",
+        "timestamp": "ISO8601 string"
+      }
+    }
+  },
+  "example_events": {
+    "metering_success": {
+      "event_type": "metering",
+      "exchange_id": "exch_meter_001",
+      "request_id": "req_meter_001",
+      "conversation_id": "conv_001",
+      "model": "claude-sonnet-4-6",
+      "provider": "anthropic",
+      "usage": { "input_tokens": 150, "output_tokens": 85, "cache_read_tokens": 20 },
+      "cost": { "input_cost_usd": 0.0015, "output_cost_usd": 0.00765, "total_cost_usd": 0.00915 },
+      "latency_ms": 1250,
+      "stop_reason": "end_turn",
+      "routing": { "tier": "primary", "provider": "anthropic" },
+      "tool_calls_count": 0,
+      "timestamp": "2026-06-10T12:00:00Z"
+    },
+    "audit_success": {
+      "event_type": "audit",
+      "exchange_id": "exch_audit_001",
+      "request_id": "req_audit_001",
+      "conversation_id": "conv_001",
+      "model": "claude-sonnet-4-6",
+      "provider": "anthropic",
+      "caller": { "type": "anthropic_messages", "client_id": "test_client" },
+      "status": "success",
+      "stop_reason": "end_turn",
+      "routing": { "tier": "primary", "provider": "anthropic" },
+      "usage": { "input_tokens": 150, "output_tokens": 85 },
+      "route_attempts": [
+        { "attempt": 1, "provider": "anthropic", "model": "claude-sonnet-4-6", "status": "success" }
+      ],
+      "timestamp": "2026-06-10T12:00:00Z"
+    },
+    "tool_call_audit": {
+      "event_type": "tool_call_audit",
+      "exchange_id": "exch_tc_audit_001",
+      "tool_call_id": "call_tc_audit_001",
+      "name": "get_weather",
+      "source": "client",
+      "status": "success",
+      "arguments": { "location": "San Francisco, CA", "unit": "fahrenheit" },
+      "result": { "temperature": 68, "unit": "fahrenheit", "conditions": "partly cloudy" },
+      "duration_ms": 250,
+      "timestamp": "2026-06-10T12:00:01Z"
+    }
+  }
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_params_mapping_request.json ADDED Viewed

@@ -0,0 +1,21 @@
+{
+  "id": "req_params_mapping_001",
+  "messages": [
+    {
+      "role": "user",
+      "content": [{ "type": "text", "text": "Generate a creative story." }]
+    }
+  ],
+  "params": {
+    "max_tokens": 2048,
+    "temperature": 0.7,
+    "top_p": 0.9,
+    "top_k": 50,
+    "stop_sequences": ["[END]", "\\n\\n"],
+    "seed": 42,
+    "frequency_penalty": 0.1,
+    "presence_penalty": 0.2,
+    "response_format": { "type": "json_object" }
+  },
+  "stream": false
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_continuation_request.json ADDED Viewed

@@ -0,0 +1,43 @@
+{
+  "description": "G24 round-trip — when a client sends back a history that contains a completed server-side exchange (Claude: server_tool_use+server_tool_result blocks; Codex: completed function_call+function_call_output items), the client translator must parse them into canonical messages losslessly so the next turn rendering still attributes the call to the assistant and its result to the tool role. Tools array is empty because the server-side exchange is already closed; this request is just continuing the conversation.",
+  "id": "req_g24_continuation_001",
+  "messages": [
+    {
+      "role": "user",
+      "content": "what legionio tools do you have available?"
+    },
+    {
+      "role": "assistant",
+      "content": "I called the legion_list_all_tools tool.",
+      "tool_calls": [
+        {
+          "id": "call_legion_001",
+          "name": "legion_list_all_tools",
+          "arguments": {"filter": "all"},
+          "source": "registry",
+          "status": "success",
+          "result": "tools: legion_list_all_tools, legion_apollo_search"
+        }
+      ]
+    },
+    {
+      "role": "tool",
+      "tool_call_id": "call_legion_001",
+      "content": "tools: legion_list_all_tools, legion_apollo_search"
+    },
+    {
+      "role": "user",
+      "content": "thanks — anything else?"
+    }
+  ],
+  "system": null,
+  "tools": null,
+  "params": null,
+  "thinking": null,
+  "stream": false,
+  "conversation_id": "conv_g24_001",
+  "routing": {},
+  "metadata": {
+    "g24": "continuation"
+  }
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_use_response.json ADDED Viewed

@@ -0,0 +1,29 @@
+{
+  "description": "G24 — server-executed LegionIO tool. The provider returned tool_use, the executor ran the tool registry-side, and the canonical response carries both the call AND its result so client translators can surface it as a completed exchange (Claude: server_tool_use+server_tool_result, Codex: completed function_call+function_call_output non-actionable). stop_reason is end_turn because the server-side exchange is closed by the time the canonical response is built.",
+  "text": "I called the legion_list_all_tools tool and here is the list.",
+  "thinking": null,
+  "tool_calls": [
+    {
+      "id": "call_legion_001",
+      "exchange_id": "exch_legion_001",
+      "name": "legion_list_all_tools",
+      "arguments": {
+        "filter": "all"
+      },
+      "source": "registry",
+      "status": "success",
+      "result": "tools: legion_list_all_tools, legion_apollo_search, legion_runner_dispatch",
+      "duration_ms": 42
+    }
+  ],
+  "usage": {
+    "input_tokens": 50,
+    "output_tokens": 22
+  },
+  "stop_reason": "end_turn",
+  "model": "test-model-1",
+  "routing": {},
+  "metadata": {
+    "g24": true
+  }
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_request.json ADDED Viewed

@@ -0,0 +1,13 @@
+{
+  "id": "req_simple_text_001",
+  "messages": [
+    {
+      "role": "user",
+      "content": [{ "type": "text", "text": "Hello, how are you?" }]
+    }
+  ],
+  "params": {
+    "max_tokens": 1024
+  },
+  "stream": false
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_response.json ADDED Viewed

@@ -0,0 +1,13 @@
+{
+  "text": "I'm doing well, thank you for asking!",
+  "thinking": null,
+  "tool_calls": [],
+  "usage": {
+    "input_tokens": 12,
+    "output_tokens": 10
+  },
+  "stop_reason": "end_turn",
+  "model": "test-model-1",
+  "routing": {},
+  "metadata": {}
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_stop_reason_matrix.json ADDED Viewed

@@ -0,0 +1,36 @@
+{
+  "description": "Stop reason mapping matrix — every provider value maps to a canonical enum",
+  "canonical_stop_reasons": ["end_turn", "tool_use", "max_tokens", "stop_sequence", "content_filter", "error"],
+  "provider_mappings": {
+    "anthropic": {
+      "end_turn": "end_turn",
+      "tool_use": "tool_use",
+      "max_tokens": "max_tokens",
+      "stop_sequence": "stop_sequence",
+      "content_filter": "content_filter"
+    },
+    "openai": {
+      "stop": "end_turn",
+      "tool_calls": "tool_use",
+      "length": "max_tokens",
+      "content_filter": "content_filter"
+    },
+    "openai_responses": {
+      "completed": "end_turn",
+      "incomplete": "max_tokens",
+      "cancelled": "stop_sequence",
+      "failed": "error"
+    },
+    "vllm": {
+      "stop": "end_turn",
+      "tool_use": "tool_use",
+      "length": "max_tokens"
+    },
+    "bedrock_converse": {
+      "end_turn": "end_turn",
+      "tool_use": "tool_use",
+      "max_tokens": "max_tokens",
+      "guardrail_intervened": "content_filter"
+    }
+  }
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_accumulated_response.json ADDED Viewed

@@ -0,0 +1,20 @@
+{
+  "description": "Expected accumulated Canonical::Response after processing canonical_streaming_tool_call_chunks.json — verify accumulate(chunks) == parse(full)",
+  "text": "Let me check the weather for you.",
+  "thinking": null,
+  "tool_calls": [
+    {
+      "id": "call_def456",
+      "name": "get_weather",
+      "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"},
+      "source": "client",
+      "status": "pending"
+    }
+  ],
+  "usage": {
+    "input_tokens": 45,
+    "output_tokens": 28
+  },
+  "stop_reason": "tool_use",
+  "model": "test-model-1"
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_error_chunks.json ADDED Viewed

@@ -0,0 +1,26 @@
+{
+  "description": "Streaming error mid-chunk sequence — provider errors during stream per G5/G6",
+  "request_id": "req_stream_error_001",
+  "chunks": [
+    {
+      "request_id": "req_stream_error_001",
+      "index": 0,
+      "type": "text_delta",
+      "delta": "The weather"
+    },
+    {
+      "request_id": "req_stream_error_001",
+      "index": 1,
+      "type": "error",
+      "stop_reason": "error",
+      "metadata": {
+        "error": {
+          "type": "overloaded",
+          "message": "Upstream provider timed out",
+          "code": 504
+        },
+        "partial_text": "The weather"
+      }
+    }
+  ]
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_server_tool_chunks.json ADDED Viewed

@@ -0,0 +1,52 @@
+{
+  "description": "G24 — streaming sequence for a server-executed LegionIO tool. The server-side exchange streams as tool_call_delta(s) (with the result attached on close) followed by trailing text from round-2. Client translators must turn this into completed (non-actionable) blocks: Claude server_tool_use+server_tool_result blocks, Codex completed function_call items with results visible.",
+  "request_id": "req_stream_legion_001",
+  "chunks": [
+    {
+      "request_id": "req_stream_legion_001",
+      "index": 0,
+      "type": "tool_call_delta",
+      "tool_call": {
+        "id": "call_legion_stream_001",
+        "name": "legion_list_all_tools",
+        "arguments": {"filter": "all"},
+        "source": "registry",
+        "status": "running"
+      }
+    },
+    {
+      "request_id": "req_stream_legion_001",
+      "index": 0,
+      "type": "tool_call_delta",
+      "tool_call": {
+        "id": "call_legion_stream_001",
+        "name": "legion_list_all_tools",
+        "arguments": {"filter": "all"},
+        "source": "registry",
+        "status": "success",
+        "result": "tools: legion_list_all_tools, legion_apollo_search"
+      }
+    },
+    {
+      "request_id": "req_stream_legion_001",
+      "index": 1,
+      "type": "text_delta",
+      "delta": "I called the "
+    },
+    {
+      "request_id": "req_stream_legion_001",
+      "index": 1,
+      "type": "text_delta",
+      "delta": "legion_list_all_tools tool."
+    },
+    {
+      "request_id": "req_stream_legion_001",
+      "type": "done",
+      "stop_reason": "end_turn",
+      "usage": {
+        "input_tokens": 50,
+        "output_tokens": 25
+      }
+    }
+  ]
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_text_chunks.json ADDED Viewed

@@ -0,0 +1,33 @@
+{
+  "description": "Streaming text-only chunk sequence — sanitized from real E2E traffic",
+  "request_id": "req_stream_text_001",
+  "chunks": [
+    {
+      "request_id": "req_stream_text_001",
+      "index": 0,
+      "type": "text_delta",
+      "delta": "Hello"
+    },
+    {
+      "request_id": "req_stream_text_001",
+      "index": 1,
+      "type": "text_delta",
+      "delta": ", world!"
+    },
+    {
+      "request_id": "req_stream_text_001",
+      "index": 2,
+      "type": "text_delta",
+      "delta": " How can I help you today?"
+    },
+    {
+      "request_id": "req_stream_text_001",
+      "type": "done",
+      "stop_reason": "end_turn",
+      "usage": {
+        "input_tokens": 12,
+        "output_tokens": 10
+      }
+    }
+  ]
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_thinking_chunks.json ADDED Viewed

@@ -0,0 +1,42 @@
+{
+  "description": "Streaming thinking + text chunk sequence — sanitized from real E2E traffic",
+  "request_id": "req_stream_thinking_001",
+  "chunks": [
+    {
+      "request_id": "req_stream_thinking_001",
+      "index": 0,
+      "type": "thinking_delta",
+      "delta": "Let me think about this step by step.",
+      "signature": null
+    },
+    {
+      "request_id": "req_stream_thinking_001",
+      "index": 1,
+      "type": "thinking_delta",
+      "delta": "The key concepts are superposition and entanglement.",
+      "signature": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFUzI1NiJ9"
+    },
+    {
+      "request_id": "req_stream_thinking_001",
+      "index": 2,
+      "type": "text_delta",
+      "delta": "Quantum computing uses qubits that can be in multiple states at once."
+    },
+    {
+      "request_id": "req_stream_thinking_001",
+      "index": 3,
+      "type": "text_delta",
+      "delta": " This enables parallel computation on a massive scale."
+    },
+    {
+      "request_id": "req_stream_thinking_001",
+      "type": "done",
+      "stop_reason": "end_turn",
+      "usage": {
+        "input_tokens": 15,
+        "output_tokens": 45,
+        "thinking_tokens": 80
+      }
+    }
+  ]
+}

data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_tool_call_chunks.json ADDED Viewed

@@ -0,0 +1,41 @@
+{
+  "description": "Streaming tool-call chunk sequence — multi-chunk stateful tool-loop per A7. Each tool_call_delta carries the complete current state (not raw incremental fragments); the assembler merges deltas via buffer_policy.",
+  "request_id": "req_stream_tool_001",
+  "chunks": [
+    {
+      "request_id": "req_stream_tool_001",
+      "index": 0,
+      "type": "text_delta",
+      "delta": "Let me check the weather for you."
+    },
+    {
+      "request_id": "req_stream_tool_001",
+      "index": 1,
+      "type": "tool_call_delta",
+      "tool_call": {
+        "id": "call_def456",
+        "name": "get_weather",
+        "arguments": {"location": "San Francisco, CA"}
+      }
+    },
+    {
+      "request_id": "req_stream_tool_001",
+      "index": 2,
+      "type": "tool_call_delta",
+      "tool_call": {
+        "id": "call_def456",
+        "name": "get_weather",
+        "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}
+      }
+    },
+    {
+      "request_id": "req_stream_tool_001",
+      "type": "done",
+      "stop_reason": "tool_use",
+      "usage": {
+        "input_tokens": 45,
+        "output_tokens": 28
+      }
+    }
+  ]
+}