RubyGems - llms - Versions diffs - 0.1.0 - Mend

llms 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/README.md +160 -0
data/bin/llms-chat +6 -0
data/bin/llms-test-model-access +4 -0
data/bin/llms-test-model-image-support +4 -0
data/bin/llms-test-model-prompt-caching +4 -0
data/bin/llms-test-model-tool-use +5 -0
data/lib/llms/adapters/anthropic_message_adapter.rb +73 -0
data/lib/llms/adapters/anthropic_tool_call_adapter.rb +20 -0
data/lib/llms/adapters/base_message_adapter.rb +60 -0
data/lib/llms/adapters/google_gemini_message_adapter.rb +72 -0
data/lib/llms/adapters/google_gemini_tool_call_adapter.rb +20 -0
data/lib/llms/adapters/open_ai_compatible_message_adapter.rb +88 -0
data/lib/llms/adapters/open_ai_compatible_tool_call_adapter.rb +67 -0
data/lib/llms/adapters.rb +12 -0
data/lib/llms/apis/google_gemini_api.rb +45 -0
data/lib/llms/apis/open_ai_compatible_api.rb +54 -0
data/lib/llms/cli/base.rb +186 -0
data/lib/llms/cli/chat.rb +92 -0
data/lib/llms/cli/test_access.rb +79 -0
data/lib/llms/cli/test_image_support.rb +92 -0
data/lib/llms/cli/test_prompt_caching.rb +275 -0
data/lib/llms/cli/test_tool_use.rb +108 -0
data/lib/llms/cli.rb +12 -0
data/lib/llms/conversation.rb +100 -0
data/lib/llms/conversation_message.rb +60 -0
data/lib/llms/conversation_tool_call.rb +14 -0
data/lib/llms/conversation_tool_result.rb +15 -0
data/lib/llms/exceptions.rb +33 -0
data/lib/llms/executors/anthropic_executor.rb +247 -0
data/lib/llms/executors/base_executor.rb +144 -0
data/lib/llms/executors/google_gemini_executor.rb +212 -0
data/lib/llms/executors/hugging_face_executor.rb +17 -0
data/lib/llms/executors/open_ai_compatible_executor.rb +209 -0
data/lib/llms/executors.rb +52 -0
data/lib/llms/models/model.rb +86 -0
data/lib/llms/models/provider.rb +48 -0
data/lib/llms/models.rb +187 -0
data/lib/llms/parsers/anthropic_chat_response_stream_parser.rb +184 -0
data/lib/llms/parsers/google_gemini_chat_response_stream_parser.rb +128 -0
data/lib/llms/parsers/open_ai_compatible_chat_response_stream_parser.rb +170 -0
data/lib/llms/parsers/partial_json_parser.rb +77 -0
data/lib/llms/parsers/sse_chat_response_stream_parser.rb +72 -0
data/lib/llms/public_models.json +607 -0
data/lib/llms/stream/event_emitter.rb +48 -0
data/lib/llms/stream/events.rb +104 -0
data/lib/llms/usage/cost_calculator.rb +75 -0
data/lib/llms/usage/usage_data.rb +46 -0
data/lib/llms.rb +16 -0
metadata +243 -0

data/lib/llms/public_models.json ADDED Viewed

@@ -0,0 +1,607 @@
+{
+    "anthropic": {
+        "enabled": true,
+        "executor": "AnthropicExecutor",
+        "api_key_env_var": "ANTHROPIC_API_KEY",
+        "tools": true,
+        "vision": true,
+        "models": {
+            "claude-opus-4-20250514": {
+                "aliases": ["claude-opus-4-0"],
+                "latest": true,
+                "pricing": {
+                    "input": 15.00,
+                    "output": 75.00,
+                    "cache_write_5min": 18.75,
+                    "cache_write_1hr": 30.00,
+                    "cache_read": 1.50
+                },
+                "context_window": 200000,
+                "max_output": 32000,
+                "max_output_thinking": 32000
+            },
+            "claude-sonnet-4-20250514": {
+                "aliases": ["claude-sonnet-4-0"],
+                "latest": true,
+                "pricing": {
+                    "input": 3.00,
+                    "output": 15.00,
+                    "cache_write_5min": 3.75,
+                    "cache_write_1hr": 6.00,
+                    "cache_read": 0.30
+                },
+                "context_window": 200000,
+                "max_output": 64000,
+                "max_output_thinking": 64000
+            },
+            "claude-3-7-sonnet-20250219": {
+                "aliases": ["claude-3-7-sonnet-latest"],
+                "pricing": {
+                    "input": 3.00,
+                    "output": 15.00,
+                    "cache_write_5min": 3.75,
+                    "cache_write_1hr": 6.00,
+                    "cache_read": 0.30
+                },
+                "context_window": 200000,
+                "max_output": 8192,
+                "max_output_thinking": 64000,
+                "_note": "Include the beta header output-128k-2025-02-19 in your API request to increase the maximum output token length to 128k tokens for Claude 3.7 Sonnet."
+            },
+            "claude-3-5-sonnet-20241022": {
+                "aliases": ["claude-3-5-sonnet-latest"],
+                "pricing": {
+                    "input": 3.00,
+                    "output": 15.00,
+                    "cache_write_5min": 3.75,
+                    "cache_write_1hr": 6.00,
+                    "cache_read": 0.30
+                },
+                "context_window": 200000,
+                "max_output": 8192
+            },
+            "claude-3-5-sonnet-20240620": {
+                "pricing": {
+                    "input": 3.00,
+                    "output": 15.00,
+                    "cache_write_5min": 3.75,
+                    "cache_write_1hr": 6.00,
+                    "cache_read": 0.30
+                },
+                "context_window": 200000,
+                "max_output": 8192
+            },
+            "claude-3-5-haiku-20241022": {
+                "aliases": ["claude-3-5-haiku-latest"],
+                "latest": true,
+                "pricing": {
+                    "input": 0.8,
+                    "output": 4.00,
+                    "cache_write_5min": 1.00,
+                    "cache_write_1hr": 1.60,
+                    "cache_read": 0.08
+                },
+                "context_window": 200000,
+                "max_output": 8192
+            },
+            "claude-3-opus-20240229": {
+                "pricing": {
+                    "input": 15.00,
+                    "output": 75.00,
+                    "cache_write_5min": 18.75,
+                    "cache_write_1hr": 30.00,
+                    "cache_read": 1.50
+                },
+                "context_window": 200000,
+                "max_output": 4096
+            },
+            "claude-3-haiku-20240307": {
+                "pricing": {
+                    "input": 0.25,
+                    "output": 1.25,
+                    "cache_write_5min": 0.30,
+                    "cache_write_1hr": 0.50,
+                    "cache_read": 0.03
+                },
+                "context_window": 200000,
+                "max_output": 4096
+            }
+        }
+    },
+    "google": {
+        "enabled": true,
+        "executor": "GoogleGeminiExecutor",
+        "api_key_env_var": "GOOGLE_GEMINI_API_KEY",
+        "tools": true,
+        "vision": true,
+        "_TODO": "check all below do support vision; mark which do not support thinking",
+        "models": {
+            "gemini-2.5-pro": {
+                "latest": true,
+                "pricing": {
+                    "input": 1.25,
+                    "output": 10.00,
+                    "cache_write_1mt/hr": 4.50,
+                    "cache_read": 0.31
+                },
+                "context_window": 1048576,
+                "max_output": 65536
+            },
+            "gemini-2.5-flash": {
+                "latest": true,
+                "pricing": {
+                    "input": 0.30,
+                    "output": 1.00,
+                    "cache_write_1mt/hr": 1.00,
+                    "cache_read": 0.075,
+                    "context_window": 1048576
+                },
+                "max_output": 65536
+            },
+            "gemini-2.5-flash-lite-preview-06-17": {
+                "latest": true,
+                "pricing": {
+                    "input": 0.10,
+                    "output": 0.40,
+                    "cache_write_1mt/hr": 1.00,
+                    "cache_read": 0.025
+                },
+                "context_window": 1000000,
+                "max_output": 64000
+            },
+            "gemini-2.0-flash": {
+                "pricing": {
+                    "input": 0.10,
+                    "output": 0.40,
+                    "cache_write_1mt/hr": 1.00,
+                    "cache_read": 0.025
+                },
+                "context_window": 1048576,
+                "max_output": 8192
+            },
+            "gemini-2.0-flash-lite": {
+                "pricing": {
+                    "input": 0.075,
+                    "output": 0.30
+                },
+                "context_window": 1048576,
+                "max_output": 8192
+            },
+            "gemini-1.5-pro": {
+                "pricing": {
+                    "input": 1.25,
+                    "output": 5.00,
+                    "cache_write_1mt/hr": 4.50,
+                    "cache_read": 0.3125
+                },
+                "context_window": 2097152,
+                "max_output": 8192
+            },
+            "gemini-1.5-flash": {
+                "pricing": {
+                    "input": 0.075,
+                    "output": 0.30,
+                    "cache_write_1mt/hr": 1.00,
+                    "cache_read": 0.0185
+                },
+                "context_window": 1048576,
+                "max_output": 8192
+            },
+            "gemini-1.5-flash-8b": {
+                "pricing": {
+                    "input": 0.0375,
+                    "output": 0.075,
+                    "cache_write_1mt/hr": 0.25,
+                    "cache_read": 0.01
+                },
+                "context_window": 1048576,
+                "max_output": 8192
+            }
+        }
+    },
+    "xai": {
+        "enabled": true,
+        "executor": "OpenAICompatibleExecutor",
+        "api_key_env_var": "XAI_API_KEY",
+        "base_url": "https://api.x.ai/v1",
+        "tools": true,
+        "models": {
+            "grok-4-0709": {
+                "latest": true,
+                "pricing": {
+                    "input": 3.00,
+                    "cached_input": 0.75,
+                    "output": 15.00
+                },
+                "context_window": 256000,
+                "vision": true
+            },
+            "grok-3": {
+                "pricing": {
+                    "input": 3.00,
+                    "cached_input": 0.75,
+                    "output": 15.00
+                },
+                "context_window": 131072,
+                "vision": false
+            },
+            "grok-3-mini": {
+                "latest": true,
+                "pricing": {
+                    "input": 0.30,
+                    "cached_input": 0.075,
+                    "output": 0.50
+                },
+                "context_window": 131072,
+                "vision": false
+            },
+            "grok-3-fast": {
+                "pricing": {
+                    "input": 5.00,
+                    "cached_input": 1.25,
+                    "output": 25.00
+                },
+                "context_window": 131072,
+                "vision": false
+            },
+            "grok-3-mini-fast": {
+                "latest": true,
+                "pricing": {
+                    "input": 0.60,
+                    "cached_input": 0.15,
+                    "output": 4.00
+                },
+                "context_window": 131072,
+                "vision": false
+            },
+            "grok-2-1212": {
+                "pricing": {
+                    "input": 2.00,
+                    "output": 10.00
+                },
+                "context_window": 131072,
+                "vision": false
+            },
+            "grok-2-vision-1212": {
+                "pricing": {
+                    "input": 2.00,
+                    "output": 10.00
+                },
+                "context_window": 32768,
+                "vision": true
+            }
+        }
+    },
+    "cerebras": {
+        "enabled": true,
+        "executor": "OpenAICompatibleExecutor",
+        "api_key_env_var": "CEREBRAS_API_KEY",
+        "base_url": "https://api.cerebras.ai/v1",
+        "exclude_params": ["max_tokens"],
+        "models": {
+            "llama-4-scout-17b-16e-instruct": {
+                "pricing": {
+                    "input": 0.65,
+                    "output": 0.85
+                },
+                "context_window": 8192,
+                "tools": true
+            },
+            "llama3.1-8b": {
+                "pricing": {
+                    "input": 0.10,
+                    "output": 0.10
+                },
+                "context_window": 8192,
+                "tools": true
+            },
+            "llama3.3-70b": {
+                "pricing": {
+                    "input": 0.85,
+                    "output": 1.20
+                },
+                "context_window": 8192,
+                "tools": true
+            },
+            "qwen-3-32b": {
+                "pricing": {
+                    "input": 0.40,
+                    "output": 0.80
+                },
+                "context_window": 64000,
+                "tools": true,
+                "enabled": false
+            },
+            "qwen-3-235b-a22b": {
+                "pricing": {
+                    "input": 0.60,
+                    "output": 1.20
+                },
+                "context_window": 41000,
+                "tools": true,
+                "enabled": false
+            }
+        }
+    },
+    "hyperbolic": {
+        "enabled": true,
+        "executor": "OpenAICompatibleExecutor",
+        "api_key_env_var": "HYPERBOLIC_API_KEY",
+        "base_url": "https://api.hyperbolic.xyz/v1",
+        "tools": false,
+        "models": {
+            "moonshotai/Kimi-K2-Instruct": {
+                "pricing": {
+                    "input": 2.00,
+                    "output": 2.00
+                },
+                "context_window": 131069,
+                "vision": false
+            },
+            "deepseek-ai/DeepSeek-R1-0528": {
+                "pricing": {
+                    "input": 3.00,
+                    "output": 3.00
+                },
+                "context_window": 163840,
+                "vision": false
+            },
+            "Qwen/Qwen3-235B-A22B": {
+                "pricing": {
+                    "input": 0.40,
+                    "output": 0.40
+                },
+                "context_window": 40960
+            },
+            "deepseek-ai/DeepSeek-V3-0324": {
+                "pricing": {
+                    "input": 1.25,
+                    "output": 1.25
+                },
+                "context_window": 131069,
+                "vision": false
+            },
+            "Qwen/QwQ-32B": {
+                "pricing": {
+                    "input": 0.40,
+                    "output": 0.40
+                },
+                "context_window": 131069,
+                "vision": false
+            },
+            "meta-llama/Llama-3.3-70B-Instruct": {
+                "pricing": {
+                    "input": 0.40,
+                    "output": 0.40
+                },
+                "context_window": 131069,
+                "vision": false
+            },
+            "Qwen/Qwen2.5-Coder-32B-Instruct": {
+                "pricing": {
+                    "input": 0.2,
+                    "output": 0.2
+                },
+                "context_window": 131072,
+                "vision": false
+            }
+        }
+    },
+    "groq": {
+        "enabled": true,
+        "executor": "OpenAICompatibleExecutor",
+        "api_key_env_var": "GROQ_API_KEY",
+        "base_url": "https://api.groq.com/openai/v1",
+        "models": {
+            "llama-3.3-70b-versatile": {
+                "pricing": {
+                    "input": 0.59,
+                    "output": 0.79
+                },
+                "context_window": 131072,
+                "max_tokens": 32768,
+                "tools": true
+            },
+            "llama-3.1-8b-instant": {
+                "pricing": {
+                    "input": 0.05,
+                    "output": 0.08
+                },
+                "context_window": 131072,
+                "max_tokens": 131072,
+                "tools": true
+            }
+        }
+    },
+    "together": {
+        "enabled": true,
+        "executor": "OpenAICompatibleExecutor",
+        "api_key_env_var": "TOGETHER_API_KEY",
+        "base_url": "https://api.together.xyz/v1",
+        "exclude_params": ["max_completion_tokens"],
+        "models": {
+            "moonshotai/Kimi-K2-Instruct": {
+                "pricing": {
+                    "input": 1.00,
+                    "output": 3.00
+                },
+                "context_window": 128000,
+                "quantization": "FP8",
+                "tools": true
+            },
+            "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": {
+                "pricing": {
+                    "input": 0.27,
+                    "output": 0.85
+                },
+                "context_window": 1048576,
+                "quantization": "FP8",
+                "tools": true
+            },
+            "meta-llama/Llama-4-Scout-17B-16E-Instruct": {
+                "pricing": {
+                    "input": 0.18,
+                    "output": 0.59
+                },
+                "context_window": 1048576,
+                "tools": true
+            },
+            "meta-llama/Llama-3.3-70B-Instruct-Turbo": {
+                "pricing": {
+                    "input": 0.88,
+                    "output": 0.88
+                },
+                "context_window": 131072,
+                "quantization": "FP8",
+                "tools": true
+            },
+            "meta-llama/Llama-3.2-3B-Instruct-Turbo": {
+                "pricing": {
+                    "input": 0.06,
+                    "output": 0.06
+                },
+                "context_window": 131072,
+                "tools": true
+            },
+            "Qwen/Qwen3-235B-A22B-fp8-tput": {
+                "pricing": {
+                    "input": 0.20,
+                    "output": 0.60
+                },
+                "context_window": 40960,
+                "quantization": "FP8",
+                "tools": true
+            },
+            "deepseek-ai/DeepSeek-V3": {
+                "pricing": {
+                    "input": 1.25,
+                    "output": 1.25
+                },
+	        "context_window": 163839,
+                "quantization": "FP8",
+                "tools": true
+            }
+        }
+    },
+    "fireworks": {
+        "enabled": true,
+        "executor": "OpenAICompatibleExecutor",
+        "api_key_env_var": "FIREWORKS_API_KEY",
+        "base_url": "https://api.fireworks.ai/inference/v1",
+        "exclude_params": ["max_tokens"],
+        "vision": true,
+        "models": {
+            "accounts/fireworks/models/deepseek-r1-0528": {
+                "pricing": {
+                    "input": 3.00,
+                    "output": 8.00
+                },
+                "context_window": 160000,
+                "tools": false,
+                "vision": false
+            },
+            "accounts/fireworks/models/qwen3-235b-a22b": {
+                "pricing": {
+                    "input": 0.22,
+                    "output": 0.88
+                },
+                "context_window": 128000,
+                "tools": true,
+                "vision": false
+            },
+            "accounts/fireworks/models/qwen3-30b-a3b": {
+                "pricing": {
+                    "input": 0.15,
+                    "output": 0.60
+                },
+                "context_window": 128000,
+                "tools": true,
+                "vision": false
+            },
+            "accounts/fireworks/models/llama4-maverick-instruct-basic": {
+                "pricing": {
+                    "input": 0.22,
+                    "output": 0.88
+                },
+                "context_window": 1000000,
+                "tools": true
+            },
+            "accounts/fireworks/models/llama4-scout-instruct-basic": {
+                "pricing": {
+                    "input": 0.15,
+                    "output": 0.60
+                },
+                "context_window": 10000000,
+                "tools": true
+            },
+            "accounts/fireworks/models/deepseek-v3-0324": {
+                "pricing": {
+                    "input": 0.90,
+                    "output": 0.90
+                },
+                "context_window": 160000,
+                "tools": true,
+                "vision": false
+            },
+            "accounts/fireworks/models/kimi-k2-instruct" : {
+                "pricing": {
+                    "input": 1.00,
+                    "output": 3.00
+                },
+                "context_window": 128000,
+                "tools": true,
+                "vision": false
+            }
+        }
+    },
+    "deepinfra": {
+        "enabled": false,
+        "executor": "OpenAICompatibleExecutor",
+        "api_key_env_var": "DEEPINFRA_API_KEY",
+        "base_url": "https://api.deepinfra.com/v1/openai",
+        "models": {
+            "moonshotai/Kimi-K2-Instruct": {
+                "pricing": {
+                    "input": 0.55,
+                    "output": 2.20
+                },
+                "context_window": 120000,
+                "quantization": "FP4",
+                "tools": true
+            }
+        }
+    },
+    "novita": {
+        "enabled": false,
+        "executor": "OpenAICompatibleExecutor",
+        "api_key_env_var": "NOVITA_API_KEY",
+        "base_url": "https://api.novita.ai/v3/openai",
+        "models": {
+            "moonshotai/kimi-k2-instruct": {
+                "pricing": {
+                    "input": 0.57,
+                    "output": 2.30
+                },
+                "context_window": 131072,
+                "tools": true
+            },
+            "deepseek/deepseek-v3-0324": {
+                "pricing": {
+                    "input": 0.28,
+                    "output": 1.14
+                },
+                "context_window": 163840,
+                "tools": true
+            }
+        }
+    }
+}

data/lib/llms/stream/event_emitter.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module LLMs
+  module Stream
+    class EventEmitter
+      VALID_EVENTS = [
+        :message_started,
+        :usage_updated,
+        :text_delta,
+        :thinking_delta,
+        :tool_call_started,
+        :tool_call_arguments_json_delta,
+        :tool_call_arguments_updated,
+        :tool_call_completed,
+        :message_completed
+      ]
+      def initialize
+        @handlers = {}
+      end
+      def on(event_type, &block)
+        add_handler(event_type, block)
+        self
+      end
+      def connect(obj)
+        VALID_EVENTS.each do |event_type|
+          self.on(event_type) do |data|
+            obj.send(event_type, data)
+          end
+        end
+        self
+      end
+      def add_handler(event_type, callable)
+        raise ArgumentError, "Unknown event type: #{event_type}" unless VALID_EVENTS.include?(event_type)
+        @handlers[event_type] ||= []
+        @handlers[event_type] << callable
+      end
+      def emit(event_type, data)
+        raise ArgumentError, "Unknown event type: #{event_type}" unless VALID_EVENTS.include?(event_type)
+        @handlers[event_type]&.each do |handler|
+          handler.call(data)
+        end
+      end
+    end
+  end
+end