RubyGems - llm.rb - Versions diffs - 4.9.0 → 4.11.0 - Mend

llm.rb 4.9.0 → 4.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +152 -0
data/README.md +178 -31
data/data/anthropic.json +209 -242
data/data/deepseek.json +15 -15
data/data/google.json +553 -403
data/data/openai.json +740 -535
data/data/xai.json +250 -253
data/data/zai.json +157 -90
data/lib/llm/context/deserializer.rb +2 -1
data/lib/llm/context.rb +58 -2
data/lib/llm/contract/completion.rb +7 -0
data/lib/llm/error.rb +4 -0
data/lib/llm/eventhandler.rb +7 -0
data/lib/llm/function/registry.rb +106 -0
data/lib/llm/function/task.rb +39 -0
data/lib/llm/function.rb +12 -7
data/lib/llm/mcp/transport/http/event_handler.rb +66 -0
data/lib/llm/mcp/transport/http.rb +156 -0
data/lib/llm/mcp/transport/stdio.rb +7 -0
data/lib/llm/mcp.rb +74 -30
data/lib/llm/message.rb +9 -2
data/lib/llm/provider.rb +10 -0
data/lib/llm/providers/anthropic/response_adapter/completion.rb +6 -0
data/lib/llm/providers/anthropic/stream_parser.rb +37 -4
data/lib/llm/providers/anthropic.rb +1 -1
data/lib/llm/providers/google/response_adapter/completion.rb +12 -5
data/lib/llm/providers/google/stream_parser.rb +54 -11
data/lib/llm/providers/google/utils.rb +30 -0
data/lib/llm/providers/google.rb +2 -0
data/lib/llm/providers/ollama/response_adapter/completion.rb +6 -0
data/lib/llm/providers/ollama/stream_parser.rb +10 -4
data/lib/llm/providers/ollama.rb +1 -1
data/lib/llm/providers/openai/response_adapter/completion.rb +7 -0
data/lib/llm/providers/openai/response_adapter/responds.rb +84 -10
data/lib/llm/providers/openai/responses/stream_parser.rb +63 -4
data/lib/llm/providers/openai/responses.rb +1 -1
data/lib/llm/providers/openai/stream_parser.rb +68 -4
data/lib/llm/providers/openai.rb +1 -1
data/lib/llm/schema/all_of.rb +31 -0
data/lib/llm/schema/any_of.rb +31 -0
data/lib/llm/schema/one_of.rb +31 -0
data/lib/llm/schema/parser.rb +36 -0
data/lib/llm/schema.rb +45 -8
data/lib/llm/stream/queue.rb +51 -0
data/lib/llm/stream.rb +102 -0
data/lib/llm/tool.rb +53 -47
data/lib/llm/version.rb +1 -1
data/lib/llm.rb +3 -2
data/llm.gemspec +2 -2
metadata +12 -1

data/data/zai.json CHANGED Viewed

@@ -8,19 +8,17 @@
   "name": "Z.AI",
   "doc": "https://docs.z.ai/guides/overview/pricing",
   "models": {
-    "glm-5": {
-      "id": "glm-5",
-      "name": "GLM-5",
-      "family": "glm",
+    "glm-4.7-flash": {
+      "id": "glm-4.7-flash",
+      "name": "GLM-4.7-Flash",
+      "family": "glm-flash",
       "attachment": false,
       "reasoning": true,
       "tool_call": true,
-      "interleaved": {
-        "field": "reasoning_content"
-      },
       "temperature": true,
-      "release_date": "2026-02-11",
-      "last_updated": "2026-02-11",
+      "knowledge": "2025-04",
+      "release_date": "2026-01-19",
+      "last_updated": "2026-01-19",
       "modalities": {
         "input": [
           "text"
@@ -31,58 +29,66 @@
       },
       "open_weights": true,
       "cost": {
-        "input": 1,
-        "output": 3.2,
-        "cache_read": 0.2,
+        "input": 0,
+        "output": 0,
+        "cache_read": 0,
         "cache_write": 0
       },
       "limit": {
-        "context": 204800,
+        "context": 200000,
         "output": 131072
       }
     },
-    "glm-4.5-air": {
-      "id": "glm-4.5-air",
-      "name": "GLM-4.5-Air",
-      "family": "glm-air",
-      "attachment": false,
+    "glm-5v-turbo": {
+      "id": "glm-5v-turbo",
+      "name": "glm-5v-turbo",
+      "family": "glm",
+      "attachment": true,
       "reasoning": true,
       "tool_call": true,
+      "interleaved": {
+        "field": "reasoning_content"
+      },
       "temperature": true,
-      "knowledge": "2025-04",
-      "release_date": "2025-07-28",
-      "last_updated": "2025-07-28",
+      "release_date": "2026-04-01",
+      "last_updated": "2026-04-01",
       "modalities": {
         "input": [
-          "text"
+          "text",
+          "image",
+          "video",
+          "pdf"
         ],
         "output": [
           "text"
         ]
       },
-      "open_weights": true,
+      "open_weights": false,
       "cost": {
-        "input": 0.2,
-        "output": 1.1,
-        "cache_read": 0.03,
+        "input": 1.2,
+        "output": 4,
+        "cache_read": 0.24,
         "cache_write": 0
       },
       "limit": {
-        "context": 131072,
-        "output": 98304
+        "context": 200000,
+        "output": 131072
       }
     },
-    "glm-4.5": {
-      "id": "glm-4.5",
-      "name": "GLM-4.5",
+    "glm-5-turbo": {
+      "id": "glm-5-turbo",
+      "name": "GLM-5-Turbo",
       "family": "glm",
       "attachment": false,
       "reasoning": true,
       "tool_call": true,
+      "interleaved": {
+        "field": "reasoning_content"
+      },
+      "structured_output": true,
       "temperature": true,
-      "knowledge": "2025-04",
-      "release_date": "2025-07-28",
-      "last_updated": "2025-07-28",
+      "release_date": "2026-03-16",
+      "last_updated": "2026-03-16",
       "modalities": {
         "input": [
           "text"
@@ -91,22 +97,22 @@
           "text"
         ]
       },
-      "open_weights": true,
+      "open_weights": false,
       "cost": {
-        "input": 0.6,
-        "output": 2.2,
-        "cache_read": 0.11,
+        "input": 1.2,
+        "output": 4,
+        "cache_read": 0.24,
         "cache_write": 0
       },
       "limit": {
-        "context": 131072,
-        "output": 98304
+        "context": 200000,
+        "output": 131072
       }
     },
-    "glm-4.5-flash": {
-      "id": "glm-4.5-flash",
-      "name": "GLM-4.5-Flash",
-      "family": "glm-flash",
+    "glm-4.5": {
+      "id": "glm-4.5",
+      "name": "GLM-4.5",
+      "family": "glm",
       "attachment": false,
       "reasoning": true,
       "tool_call": true,
@@ -124,9 +130,9 @@
       },
       "open_weights": true,
       "cost": {
-        "input": 0,
-        "output": 0,
-        "cache_read": 0,
+        "input": 0.6,
+        "output": 2.2,
+        "cache_read": 0.11,
         "cache_write": 0
       },
       "limit": {
@@ -134,9 +140,9 @@
         "output": 98304
       }
     },
-    "glm-4.7-flash": {
-      "id": "glm-4.7-flash",
-      "name": "GLM-4.7-Flash",
+    "glm-4.7-flashx": {
+      "id": "glm-4.7-flashx",
+      "name": "GLM-4.7-FlashX",
       "family": "glm-flash",
       "attachment": false,
       "reasoning": true,
@@ -155,9 +161,9 @@
       },
       "open_weights": true,
       "cost": {
-        "input": 0,
-        "output": 0,
-        "cache_read": 0,
+        "input": 0.07,
+        "output": 0.4,
+        "cache_read": 0.01,
         "cache_write": 0
       },
       "limit": {
@@ -196,20 +202,48 @@
         "output": 131072
       }
     },
-    "glm-4.7": {
-      "id": "glm-4.7",
-      "name": "GLM-4.7",
+    "glm-4.6v": {
+      "id": "glm-4.6v",
+      "name": "GLM-4.6V",
       "family": "glm",
-      "attachment": false,
+      "attachment": true,
       "reasoning": true,
       "tool_call": true,
-      "interleaved": {
-        "field": "reasoning_content"
+      "temperature": true,
+      "knowledge": "2025-04",
+      "release_date": "2025-12-08",
+      "last_updated": "2025-12-08",
+      "modalities": {
+        "input": [
+          "text",
+          "image",
+          "video"
+        ],
+        "output": [
+          "text"
+        ]
       },
+      "open_weights": true,
+      "cost": {
+        "input": 0.3,
+        "output": 0.9
+      },
+      "limit": {
+        "context": 128000,
+        "output": 32768
+      }
+    },
+    "glm-4.5-flash": {
+      "id": "glm-4.5-flash",
+      "name": "GLM-4.5-Flash",
+      "family": "glm-flash",
+      "attachment": false,
+      "reasoning": true,
+      "tool_call": true,
       "temperature": true,
       "knowledge": "2025-04",
-      "release_date": "2025-12-22",
-      "last_updated": "2025-12-22",
+      "release_date": "2025-07-28",
+      "last_updated": "2025-07-28",
       "modalities": {
         "input": [
           "text"
@@ -220,19 +254,19 @@
       },
       "open_weights": true,
       "cost": {
-        "input": 0.6,
-        "output": 2.2,
-        "cache_read": 0.11,
+        "input": 0,
+        "output": 0,
+        "cache_read": 0,
         "cache_write": 0
       },
       "limit": {
-        "context": 204800,
-        "output": 131072
+        "context": 131072,
+        "output": 98304
       }
     },
-    "glm-5-turbo": {
-      "id": "glm-5-turbo",
-      "name": "GLM-5-Turbo",
+    "glm-5": {
+      "id": "glm-5",
+      "name": "GLM-5",
       "family": "glm",
       "attachment": false,
       "reasoning": true,
@@ -240,10 +274,9 @@
       "interleaved": {
         "field": "reasoning_content"
       },
-      "structured_output": true,
       "temperature": true,
-      "release_date": "2026-03-16",
-      "last_updated": "2026-03-16",
+      "release_date": "2026-02-11",
+      "last_updated": "2026-02-11",
       "modalities": {
         "input": [
           "text"
@@ -252,18 +285,49 @@
           "text"
         ]
       },
-      "open_weights": false,
+      "open_weights": true,
       "cost": {
-        "input": 1.2,
-        "output": 4,
-        "cache_read": 0.24,
+        "input": 1,
+        "output": 3.2,
+        "cache_read": 0.2,
         "cache_write": 0
       },
       "limit": {
-        "context": 200000,
+        "context": 204800,
         "output": 131072
       }
     },
+    "glm-4.5-air": {
+      "id": "glm-4.5-air",
+      "name": "GLM-4.5-Air",
+      "family": "glm-air",
+      "attachment": false,
+      "reasoning": true,
+      "tool_call": true,
+      "temperature": true,
+      "knowledge": "2025-04",
+      "release_date": "2025-07-28",
+      "last_updated": "2025-07-28",
+      "modalities": {
+        "input": [
+          "text"
+        ],
+        "output": [
+          "text"
+        ]
+      },
+      "open_weights": true,
+      "cost": {
+        "input": 0.2,
+        "output": 1.1,
+        "cache_read": 0.03,
+        "cache_write": 0
+      },
+      "limit": {
+        "context": 131072,
+        "output": 98304
+      }
+    },
     "glm-4.5v": {
       "id": "glm-4.5v",
       "name": "GLM-4.5V",
@@ -295,22 +359,23 @@
         "output": 16384
       }
     },
-    "glm-4.6v": {
-      "id": "glm-4.6v",
-      "name": "GLM-4.6V",
+    "glm-4.7": {
+      "id": "glm-4.7",
+      "name": "GLM-4.7",
       "family": "glm",
-      "attachment": true,
+      "attachment": false,
       "reasoning": true,
       "tool_call": true,
+      "interleaved": {
+        "field": "reasoning_content"
+      },
       "temperature": true,
       "knowledge": "2025-04",
-      "release_date": "2025-12-08",
-      "last_updated": "2025-12-08",
+      "release_date": "2025-12-22",
+      "last_updated": "2025-12-22",
       "modalities": {
         "input": [
-          "text",
-          "image",
-          "video"
+          "text"
         ],
         "output": [
           "text"
@@ -318,12 +383,14 @@
       },
       "open_weights": true,
       "cost": {
-        "input": 0.3,
-        "output": 0.9
+        "input": 0.6,
+        "output": 2.2,
+        "cache_read": 0.11,
+        "cache_write": 0
       },
       "limit": {
-        "context": 128000,
-        "output": 32768
+        "context": 204800,
+        "output": 131072
       }
     }
   }

data/lib/llm/context/deserializer.rb CHANGED Viewed

@@ -12,7 +12,8 @@ class LLM::Context
       returns = deserialize_returns(payload["content"]) if returns.nil?
       original_tool_calls = payload["original_tool_calls"]
       usage = payload["usage"]
-      extra = {tool_calls:, original_tool_calls:, tools: @params[:tools], usage:}.compact
+      reasoning_content = payload["reasoning_content"]
+      extra = {tool_calls:, original_tool_calls:, tools: @params[:tools], usage:, reasoning_content:}.compact
       content = returns.nil? ? payload["content"] : returns
       LLM::Message.new(payload["role"], content, extra)
     end

data/lib/llm/context.rb CHANGED Viewed

@@ -42,6 +42,11 @@ module LLM
     # @return [LLM::Provider]
     attr_reader :llm
+    ##
+    # Returns the context mode
+    # @return [Symbol]
+    attr_reader :mode
     ##
     # @param [LLM::Provider] llm
     #  A provider
@@ -49,10 +54,12 @@ module LLM
     #  The parameters to maintain throughout the conversation.
     #  Any parameter the provider supports can be included and
     #  not only those listed here.
+    # @option params [Symbol] :mode Defaults to :completions
     # @option params [String] :model Defaults to the provider's default model
     # @option params [Array<LLM::Function>, nil] :tools Defaults to nil
     def initialize(llm, params = {})
       @llm = llm
+      @mode = params.delete(:mode) || :completions
       @params = {model: llm.default_model, schema: nil}.compact.merge!(params)
       @messages = LLM::Buffer.new(llm)
     end
@@ -70,6 +77,7 @@ module LLM
     #   res = ctx.talk("Hello, what is your name?")
     #   puts res.messages[0].content
     def talk(prompt, params = {})
+      return respond(prompt, params) if mode == :responses
       params = params.merge(messages: @messages.to_a)
       params = @params.merge(params)
       res = @llm.complete(prompt, params)
@@ -109,7 +117,7 @@ module LLM
     # @return [String]
     def inspect
       "#<#{self.class.name}:0x#{object_id.to_s(16)} " \
-      "@llm=#{@llm.class}, @params=#{@params.inspect}, " \
+      "@llm=#{@llm.class}, @mode=#{@mode.inspect}, @params=#{@params.inspect}, " \
       "@messages=#{@messages.inspect}>"
     end
@@ -117,10 +125,11 @@ module LLM
     # Returns an array of functions that can be called
     # @return [Array<LLM::Function>]
     def functions
+      return_ids = returns.map(&:id)
       @messages
         .select(&:assistant?)
         .flat_map do |msg|
-          fns = msg.functions.select(&:pending?)
+          fns = msg.functions.select { _1.pending? && !return_ids.include?(_1.id) }
           fns.each do |fn|
             fn.tracer = tracer
             fn.model  = msg.model
@@ -128,6 +137,53 @@ module LLM
         end.extend(LLM::Function::Array)
     end
+    ##
+    # Calls a named collection of work through the context.
+    #
+    # This currently supports `:functions`, forwarding to `functions.call`.
+    #
+    # @param [Symbol] target
+    #  The work collection to call
+    # @return [Array<LLM::Function::Return>]
+    def call(target)
+      case target
+      when :functions then functions.call
+      else raise ArgumentError, "Unknown target: #{target.inspect}. Expected :functions"
+      end
+    end
+    ##
+    # Returns tool returns accumulated in this context
+    # @return [Array<LLM::Function::Return>]
+    def returns
+      @messages
+        .select(&:tool_return?)
+        .flat_map do |msg|
+          LLM::Function::Return === msg.content ?
+            [msg.content] :
+            [*msg.content].grep(LLM::Function::Return)
+        end
+    end
+    ##
+    # Waits for queued tool work to finish.
+    #
+    # This prefers queued streamed tool work when the configured stream
+    # exposes a non-empty queue. Otherwise it falls back to waiting on
+    # the context's pending functions directly.
+    #
+    # @param [Symbol] strategy
+    #  The concurrency strategy to use
+    # @return [Array<LLM::Function::Return>]
+    def wait(strategy)
+      stream = @params[:stream]
+      if LLM::Stream === stream && !stream.queue.empty?
+        stream.wait(strategy)
+      else
+        functions.wait(strategy)
+      end
+    end
     ##
     # Returns token usage accumulated in this context
     # @note

data/lib/llm/contract/completion.rb CHANGED Viewed

@@ -50,6 +50,13 @@ module LLM::Contract
       messages.find(&:assistant?).content
     end
+    ##
+    # @return [String, nil]
+    #  Returns the reasoning content when the provider exposes it
+    def reasoning_content
+      messages.find(&:assistant?)&.reasoning_content
+    end
     ##
     # @return [Hash]
     #  Returns the LLM response after parsing it as JSON

data/lib/llm/error.rb CHANGED Viewed

@@ -55,6 +55,10 @@ module LLM
   # When stuck in a tool call loop
   ToolLoopError = Class.new(Error)
+  ##
+  # When a tool call cannot be mapped to a local tool
+  NoSuchToolError = Class.new(Error)
   ##
   # When {LLM::Registry} can't map a model
   NoSuchModelError = Class.new(Error)

data/lib/llm/eventhandler.rb CHANGED Viewed

@@ -42,5 +42,12 @@ module LLM
     # Returns a fully constructed response body
     # @return [LLM::Object]
     def body = @parser.body
+    ##
+    # Frees parser state after streaming completes.
+    # @return [void]
+    def free
+      @parser.free
+    end
   end
 end

data/lib/llm/function/registry.rb ADDED Viewed

@@ -0,0 +1,106 @@
+# frozen_string_literal: true
+class LLM::Function
+  ##
+  # The {LLM::Function::Registry LLM::Function::Registry} module provides
+  # shared registry behavior for functions and tools. {LLM::Tool.registry}
+  # stores {LLM::Tool LLM::Tool} subclasses, including dynamically created MCP
+  # tool subclasses, while {LLM::Function.registry} stores the functions
+  # derived from those tools.
+  #
+  # The registry overwrites older tool definitions with newer ones when they
+  # share the same tool name. In practice, tool identity is resolved by name,
+  # and LLMs generally do not allow two tools with the same name.
+  #
+  # Functions defined with {LLM.function} are not added to the function
+  # registry, since they may be closures bound to local state. Each registry
+  # decides how entries are keyed via {#registry_key}.
+  module Registry
+    ##
+    # @api private
+    def self.extended(klass)
+      klass.instance_variable_set(:@__registry, {})
+      klass.instance_variable_set(:@__names, {})
+      klass.instance_variable_set(:@__monitor, Monitor.new)
+    end
+    ##
+    # Returns all registered entries.
+    # @return [Array<LLM::Function, LLM::Tool>]
+    def registry
+      lock do
+        @__registry.values
+      end
+    end
+    ##
+    # Finds a registered entry by name.
+    # @param [String] name
+    # @return [LLM::Function, LLM::Tool, nil]
+    def find_by_name(name)
+      lock do
+        @__names[name.to_s] ||= @__registry.each_value.find do
+          tool_name(_1).to_s == name.to_s
+        end
+      end
+    end
+    ##
+    # Clears the registry.
+    # @return [void]
+    def clear_registry!
+      lock do
+        @__registry.clear
+        @__names.clear
+        nil
+      end
+    end
+    ##
+    # Registers an entry.
+    # @param [LLM::Function, LLM::Tool] entry
+    # @api private
+    def register(entry)
+      lock do
+        @__registry[registry_key(entry)] = entry
+        @__names[tool_name(entry).to_s] = entry if tool_name(entry)
+      end
+    end
+    ##
+    # Unregisters an entry.
+    # @param [LLM::Function, LLM::Tool] entry
+    # @api private
+    def unregister(entry)
+      lock do
+        @__registry.delete(registry_key(entry))
+        @__registry.delete(entry)
+        @__names.delete(tool_name(entry).to_s) if tool_name(entry)
+      end
+    end
+    ##
+    # Returns the storage key for an entry.
+    # @param [LLM::Function, LLM::Tool] entry
+    # @return [Class<LLM::Tool>, String, nil]
+    # @api private
+    def registry_key(entry)
+      tool_name(entry) ? entry.name : entry
+    end
+    ##
+    # Returns the tool name, or nil for tools that are not fully initialized.
+    # @param [LLM::Function, LLM::Tool] entry
+    # @return [String, nil]
+    # @api private
+    def tool_name(entry)
+      entry.respond_to?(:name) ? entry.name : nil
+    end
+    ##
+    # @api private
+    def lock(&)
+      @__monitor.synchronize(&)
+    end
+  end
+end