RubyGems - llm.rb - Versions diffs - 0.1.0 → 0.2.1 - Mend

llm.rb 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

checksums.yaml +4 -4
data/README.md +85 -24
data/lib/llm/conversation.rb +62 -10
data/lib/llm/core_ext/ostruct.rb +0 -0
data/lib/llm/error.rb +0 -0
data/lib/llm/file.rb +0 -0
data/lib/llm/http_client.rb +0 -0
data/lib/llm/message.rb +1 -1
data/lib/llm/message_queue.rb +18 -11
data/lib/llm/model.rb +7 -0
data/lib/llm/provider.rb +144 -98
data/lib/llm/providers/anthropic/error_handler.rb +1 -1
data/lib/llm/providers/anthropic/format.rb +7 -1
data/lib/llm/providers/anthropic/response_parser.rb +0 -0
data/lib/llm/providers/anthropic.rb +31 -15
data/lib/llm/providers/gemini/error_handler.rb +0 -0
data/lib/llm/providers/gemini/format.rb +7 -1
data/lib/llm/providers/gemini/response_parser.rb +0 -0
data/lib/llm/providers/gemini.rb +25 -14
data/lib/llm/providers/ollama/error_handler.rb +0 -0
data/lib/llm/providers/ollama/format.rb +7 -1
data/lib/llm/providers/ollama/response_parser.rb +13 -0
data/lib/llm/providers/ollama.rb +32 -8
data/lib/llm/providers/openai/error_handler.rb +0 -0
data/lib/llm/providers/openai/format.rb +7 -1
data/lib/llm/providers/openai/response_parser.rb +5 -3
data/lib/llm/providers/openai.rb +22 -12
data/lib/llm/providers/voyageai/error_handler.rb +32 -0
data/lib/llm/providers/voyageai/response_parser.rb +13 -0
data/lib/llm/providers/voyageai.rb +44 -0
data/lib/llm/response/completion.rb +0 -0
data/lib/llm/response/embedding.rb +0 -0
data/lib/llm/response.rb +0 -0
data/lib/llm/version.rb +1 -1
data/lib/llm.rb +19 -9
data/llm.gemspec +6 -1
data/share/llm/models/anthropic.yml +35 -0
data/share/llm/models/gemini.yml +35 -0
data/share/llm/models/ollama.yml +155 -0
data/share/llm/models/openai.yml +46 -0
data/spec/anthropic/completion_spec.rb +11 -27
data/spec/anthropic/embedding_spec.rb +25 -0
data/spec/gemini/completion_spec.rb +34 -29
data/spec/gemini/embedding_spec.rb +4 -12
data/spec/llm/conversation_spec.rb +93 -1
data/spec/ollama/completion_spec.rb +7 -16
data/spec/ollama/embedding_spec.rb +14 -5
data/spec/openai/completion_spec.rb +40 -43
data/spec/openai/embedding_spec.rb +4 -12
data/spec/readme_spec.rb +9 -12
data/spec/setup.rb +7 -16
metadata +81 -4
data/lib/llm/lazy_conversation.rb +0 -39
data/spec/llm/lazy_conversation_spec.rb +0 -110

data/share/llm/models/ollama.yml ADDED Viewed

@@ -0,0 +1,155 @@
+---
+gemma3:1b:
+  name: Gemma
+  parameters: 1B
+  description: Lightweight version of Google's Gemma 3 language model, suitable for
+    low-resource environments
+  to_param: gemma3:1b
+gemma3:
+  name: Gemma
+  parameters: 4B
+  description: Balanced Gemma 3 model providing good accuracy with reasonable size
+  to_param: gemma3
+gemma3:12b:
+  name: Gemma
+  parameters: 12B
+  description: Larger Gemma 3 model offering improved reasoning and generation abilities
+  to_param: gemma3:12b
+gemma3:27b:
+  name: Gemma
+  parameters: 27B
+  description: High-end Gemma 3 model focused on top-tier performance and accuracy
+  to_param: gemma3:27b
+qwq:
+  name: QwQ
+  parameters: 32B
+  description: Large-scale model with high parameter count for complex tasks and
+    high-quality generation
+  to_param: qwq
+deepseek-r1:
+  name: DeepSeek-R1
+  parameters: 7B
+  description: Compact DeepSeek model optimized for research and experimentation
+  to_param: deepseek-r1
+deepseek-r1:671b:
+  name: DeepSeek-R1
+  parameters: 671B
+  description: Massive-scale DeepSeek model focused on advanced AI reasoning and
+    capabilities
+  to_param: deepseek-r1:671b
+deepseek-coder:
+  name: DeepSeek-Coder
+  parameters: 1.3B
+  description: Lightweight code generation model trained on 2T tokens of code and natural language
+  to_param: deepseek-coder
+deepseek-coder:6.7b:
+  name: DeepSeek-Coder
+  parameters: 6.7B
+  description: Mid-sized DeepSeek-Coder model offering a strong balance between speed and capability for code-related tasks
+  to_param: deepseek-coder:6.7b
+deepseek-coder:33b:
+  name: DeepSeek-Coder
+  parameters: 33B
+  description: Large DeepSeek-Coder model with high performance for code generation, understanding, and multilingual coding tasks
+  to_param: deepseek-coder:33b
+llama3.3:
+  name: Llama
+  parameters: 70B
+  description: Latest large Llama model designed for high-end performance in reasoning
+    and language tasks
+  to_param: llama3.3
+llama3.2:
+  name: Llama
+  parameters: 3B
+  description: Small but capable version of Llama 3.2 for lightweight applications
+  to_param: llama3.2
+llama3.2:1b:
+  name: Llama
+  parameters: 1B
+  description: Tiny version of Llama 3.2, extremely lightweight and fast
+  to_param: llama3.2:1b
+llama3.2-vision:
+  name: Llama Vision
+  parameters: 11B
+  description: Multimodal Llama 3.2 model with vision capabilities (images + text)
+  to_param: llama3.2-vision
+llama3.2-vision:90b:
+  name: Llama Vision
+  parameters: 90B
+  description: Large-scale vision-capable Llama model for advanced multimodal tasks
+  to_param: llama3.2-vision:90b
+llama3.1:
+  name: Llama
+  parameters: 8B
+  description: General-purpose Llama model designed for good accuracy and performance
+    balance
+  to_param: llama3.1
+llama3.1:405b:
+  name: Llama
+  parameters: 405B
+  description: Extremely large-scale version of Llama 3.1, suitable for advanced tasks
+  to_param: llama3.1:405b
+phi4:
+  name: Phi
+  parameters: 14B
+  description: Phi 4 is known for compact size and competitive performance in general
+    tasks
+  to_param: phi4
+phi4-mini:
+  name: Phi Mini
+  parameters: 3.8B
+  description: Lightweight variant of Phi 4 ideal for quick inference on constrained systems
+  to_param: phi4-mini
+mistral:
+  name: Mistral
+  parameters: 7B
+  description: Popular and versatile open model for general language tasks
+  to_param: mistral
+moondream:
+  name: Moondream
+  parameters: 1.4B
+  description: Compact vision-enabled model with strong general performance
+  to_param: moondream
+neural-chat:
+  name: Neural Chat
+  parameters: 7B
+  description: Chat-focused model fine-tuned for natural conversations
+  to_param: neural-chat
+starling-lm:
+  name: Starling
+  parameters: 7B
+  description: Model focused on instruction-following and conversational performance
+  to_param: starling-lm
+codellama:
+  name: Code Llama
+  parameters: 7B
+  description: Llama model variant fine-tuned specifically for code understanding
+    and generation
+  to_param: codellama
+llama2-uncensored:
+  name: Llama 2 Uncensored
+  parameters: 7B
+  description: Unfiltered version of Llama 2 for unrestricted language modeling
+  to_param: llama2-uncensored
+llava:
+  name: LLaVA
+  parameters: 7B
+  description: Multimodal model combining vision and language understanding
+  to_param: llava
+granite3.2:
+  name: Granite
+  parameters: 8B
+  description: IBM’s Granite model for enterprise-grade language applications
+  to_param: granite3.2

data/share/llm/models/openai.yml ADDED Viewed

@@ -0,0 +1,46 @@
+---
+o3-mini:
+  name: OpenAI o3-mini
+  parameters: Unknown
+  description: Fast, flexible, intelligent reasoning model
+  to_param: o3-mini
+o1:
+  name: OpenAI o1
+  parameters: Unknown
+  description: High-intelligence reasoning model
+  to_param: o1
+o1-mini:
+  name: OpenAI o1-mini
+  parameters: Unknown
+  description: Faster, more affordable reasoning model than o1
+  to_param: o1-mini
+o1-pro:
+  name: OpenAI o1-pro
+  parameters: Unknown
+  description: More compute than o1 for better responses
+  to_param: o1-pro
+gpt-4.5-preview:
+  name: GPT-4.5 Preview
+  parameters: Unknown
+  description: Largest and most capable GPT model
+  to_param: gpt-4.5-preview
+gpt-4o:
+  name: GPT-4o
+  parameters: Unknown
+  description: Fast, intelligent, flexible GPT model
+  to_param: gpt-4o
+gpt-4o-mini:
+  name: GPT-4o Mini
+  parameters: Mini
+  description: Fast, affordable small model for focused tasks
+  to_param: gpt-4o-mini
+gpt-4o-realtime-preview:
+  name: GPT-4o Realtime
+  parameters: Unknown
+  description: Realtime model for text and audio inputs/outputs
+  to_param: gpt-4o-realtime-preview
+gpt-3.5-turbo:
+  name: GPT-3.5 Turbo
+  parameters: Unknown
+  description: Legacy GPT model for cheaper chat and non-chat tasks
+  to_param: gpt-3.5-turbo

data/spec/anthropic/completion_spec.rb CHANGED Viewed

@@ -3,29 +3,11 @@
 require "setup"
 RSpec.describe "LLM::Anthropic: completions" do
-  subject(:anthropic) { LLM.anthropic("") }
+  subject(:anthropic) { LLM.anthropic(token) }
+  let(:token) { ENV["LLM_SECRET"] || "TOKEN" }
-  before(:each, :success) do
-    stub_request(:post, "https://api.anthropic.com/v1/messages")
-      .with(headers: {"Content-Type" => "application/json"})
-      .to_return(
-        status: 200,
-        body: fixture("anthropic/completions/ok_completion.json"),
-        headers: {"Content-Type" => "application/json"}
-      )
-  end
-  before(:each, :unauthorized) do
-    stub_request(:post, "https://api.anthropic.com/v1/messages")
-      .with(headers: {"Content-Type" => "application/json"})
-      .to_return(
-        status: 403,
-        body: fixture("anthropic/completions/unauthorized_completion.json"),
-        headers: {"Content-Type" => "application/json"}
-      )
-  end
-  context "when given a successful response", :success do
+  context "when given a successful response",
+          vcr: {cassette_name: "anthropic/completions/successful_response"} do
     subject(:response) { anthropic.complete("Hello, world", :user) }
     it "returns a completion" do
@@ -38,9 +20,9 @@ RSpec.describe "LLM::Anthropic: completions" do
     it "includes token usage" do
       expect(response).to have_attributes(
-        prompt_tokens: 2095,
-        completion_tokens: 503,
-        total_tokens: 2598
+        prompt_tokens: 10,
+        completion_tokens: 30,
+        total_tokens: 40
       )
     end
@@ -50,7 +32,7 @@ RSpec.describe "LLM::Anthropic: completions" do
       it "has choices" do
         expect(choice).to have_attributes(
           role: "assistant",
-          content: "Hi! My name is Claude."
+          content: "Hello! How can I assist you today? Feel free to ask me any questions or let me know if you need help with anything."
         )
       end
@@ -60,8 +42,10 @@ RSpec.describe "LLM::Anthropic: completions" do
     end
   end
-  context "when given an unauthorized response", :unauthorized do
+  context "when given an unauthorized response",
+          vcr: {cassette_name: "anthropic/completions/unauthorized_response"} do
     subject(:response) { anthropic.complete("Hello", :user) }
+    let(:token) { "BADTOKEN" }
     it "raises an error" do
       expect { response }.to raise_error(LLM::Error::Unauthorized)

data/spec/anthropic/embedding_spec.rb ADDED Viewed

@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+require "setup"
+RSpec.describe "LLM::Anthropic: embeddings" do
+  let(:anthropic) { LLM.anthropic(token) }
+  let(:token) { ENV["LLM_SECRET"] || "TOKEN" }
+  context "when given a successful response",
+          vcr: {cassette_name: "anthropic/embeddings/successful_response"} do
+    subject(:response) { anthropic.embed("Hello, world", token:) }
+    it "returns an embedding" do
+      expect(response).to be_instance_of(LLM::Response::Embedding)
+    end
+    it "returns a model" do
+      expect(response.model).to eq("voyage-2")
+    end
+    it "has embeddings" do
+      expect(response.embeddings).to be_instance_of(Array)
+    end
+  end
+end

data/spec/gemini/completion_spec.rb CHANGED Viewed

@@ -3,44 +3,26 @@
 require "setup"
 RSpec.describe "LLM::Gemini: completions" do
-  subject(:gemini) { LLM.gemini("") }
+  subject(:gemini) { LLM.gemini(token) }
+  let(:token) { ENV["LLM_SECRET"] || "TOKEN" }
-  before(:each, :success) do
-    stub_request(:post, "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=")
-      .with(headers: {"Content-Type" => "application/json"})
-      .to_return(
-        status: 200,
-        body: fixture("gemini/completions/ok_completion.json"),
-        headers: {"Content-Type" => "application/json"}
-      )
-  end
-  before(:each, :unauthorized) do
-    stub_request(:post, "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=")
-      .with(headers: {"Content-Type" => "application/json"})
-      .to_return(
-        status: 400,
-        body: fixture("gemini/completions/unauthorized_completion.json"),
-        headers: {"Content-Type" => "application/json"}
-      )
-  end
-  context "when given a successful response", :success do
-    subject(:response) { gemini.complete(LLM::Message.new("user", "Hello!")) }
+  context "when given a successful response",
+          vcr: {cassette_name: "gemini/completions/successful_response"} do
+    subject(:response) { gemini.complete("Hello!", :user) }
     it "returns a completion" do
       expect(response).to be_a(LLM::Response::Completion)
     end
     it "returns a model" do
-      expect(response.model).to eq("gemini-1.5-flash-001")
+      expect(response.model).to eq("gemini-1.5-flash")
     end
     it "includes token usage" do
       expect(response).to have_attributes(
         prompt_tokens: 2,
-        completion_tokens: 10,
-        total_tokens: 12
+        completion_tokens: 11,
+        total_tokens: 13
       )
     end
@@ -52,7 +34,7 @@ RSpec.describe "LLM::Gemini: completions" do
           choices: [
             have_attributes(
               role: "model",
-              content: "Hello! How can I help you today? \n"
+              content: "Hello there! How can I help you today?\n"
             )
           ]
         )
@@ -64,8 +46,31 @@ RSpec.describe "LLM::Gemini: completions" do
     end
   end
-  context "when given an unauthorized response", :unauthorized do
-    subject(:response) { gemini.complete(LLM::Message.new("user", "Hello!")) }
+  context "when given a thread of messages",
+          vcr: {cassette_name: "gemini/completions/successful_response_thread"} do
+    subject(:response) do
+      gemini.complete "What is your name? What age are you?", :user, messages: [
+        {role: "user", content: "Answer all of my questions"},
+        {role: "user", content: "Your name is Pablo, you are 25 years old and you are my amigo"}
+      ]
+    end
+    it "has choices" do
+      expect(response).to have_attributes(
+        choices: [
+          have_attributes(
+            role: "model",
+            content: "My name is Pablo, and I am 25 years old.  ¡Amigo!\n"
+          )
+        ]
+      )
+    end
+  end
+  context "when given an unauthorized response",
+          vcr: {cassette_name: "gemini/completions/unauthorized_response"} do
+    subject(:response) { gemini.complete("Hello!", :user) }
+    let(:token) { "BADTOKEN" }
     it "raises an error" do
       expect { response }.to raise_error(LLM::Error::Unauthorized)

data/spec/gemini/embedding_spec.rb CHANGED Viewed

@@ -3,19 +3,11 @@
 require "setup"
 RSpec.describe "LLM::OpenAI: embeddings" do
-  let(:gemini) { LLM.gemini("") }
+  let(:gemini) { LLM.gemini(token) }
+  let(:token) { ENV["LLM_SECRET"] || "TOKEN" }
-  before(:each, :success) do
-    stub_request(:post, "https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:embedContent?key=")
-      .with(headers: {"Content-Type" => "application/json"})
-      .to_return(
-        status: 200,
-        body: fixture("gemini/embeddings/hello_world_embedding.json"),
-        headers: {"Content-Type" => "application/json"}
-      )
-  end
-  context "when given a successful response", :success do
+  context "when given a successful response",
+          vcr: {cassette_name: "gemini/embeddings/successful_response"} do
     subject(:response) { gemini.embed("Hello, world") }
     it "returns an embedding" do

data/spec/llm/conversation_spec.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # frozen_string_literal: true
-RSpec.describe LLM::Conversation do
+require "setup"
+RSpec.describe "LLM::Conversation: non-lazy" do
   shared_examples "a multi-turn conversation" do
     context "when given a thread of messages" do
       let(:inputs) do
@@ -54,3 +56,93 @@ RSpec.describe LLM::Conversation do
     include_examples "a multi-turn conversation"
   end
 end
+RSpec.describe "LLM::Conversation: lazy" do
+  let(:described_class) { LLM::Conversation }
+  let(:token) { ENV["LLM_SECRET"] || "TOKEN" }
+  let(:prompt) { "Keep your answers short and concise, and provide three answers to the three questions" }
+  context "with gemini",
+          vcr: {cassette_name: "gemini/lazy_conversation/successful_response"} do
+    let(:provider) { LLM.gemini(token) }
+    let(:conversation) { described_class.new(provider).lazy }
+    context "when given a thread of messages" do
+      subject(:message) { conversation.messages.to_a[-1] }
+      before do
+        conversation.chat prompt
+        conversation.chat "What is 3+2 ?"
+        conversation.chat "What is 5+5 ?"
+        conversation.chat "What is 5+7 ?"
+      end
+      it "maintains a conversation" do
+        is_expected.to have_attributes(
+          role: "model",
+          content: "5\n10\n12\n"
+        )
+      end
+    end
+  end
+  context "with openai"  do
+    let(:provider) { LLM.openai(token) }
+    let(:conversation) { described_class.new(provider).lazy }
+    context "when given a thread of messages",
+            vcr: {cassette_name: "openai/lazy_conversation/successful_response"} do
+      subject(:message) { conversation.recent_message }
+      before do
+        conversation.chat prompt, :system
+        conversation.chat "What is 3+2 ?"
+        conversation.chat "What is 5+5 ?"
+        conversation.chat "What is 5+7 ?"
+      end
+      it "maintains a conversation" do
+        is_expected.to have_attributes(
+          role: "assistant",
+          content: "1. 5  \n2. 10  \n3. 12  "
+        )
+      end
+    end
+    context "when given a specific model",
+            vcr: {cassette_name: "openai/lazy_conversation/successful_response_o3_mini"} do
+      let(:conversation) { described_class.new(provider, model: provider.models["o3-mini"]).lazy }
+      it "maintains the model throughout a conversation" do
+        conversation.chat(prompt, :system)
+        expect(conversation.recent_message.extra[:completion].model).to eq("o3-mini-2025-01-31")
+        conversation.chat("What is 5+5?")
+        expect(conversation.recent_message.extra[:completion].model).to eq("o3-mini-2025-01-31")
+      end
+    end
+  end
+  context "with ollama",
+          vcr: {cassette_name: "ollama/lazy_conversation/successful_response"} do
+    let(:provider) { LLM.ollama(nil, host: "eel.home.network") }
+    let(:conversation) { described_class.new(provider).lazy }
+    context "when given a thread of messages" do
+      subject(:message) { conversation.recent_message }
+      before do
+        conversation.chat prompt, :system
+        conversation.chat "What is 3+2 ?"
+        conversation.chat "What is 5+5 ?"
+        conversation.chat "What is 5+7 ?"
+      end
+      it "maintains a conversation" do
+        is_expected.to have_attributes(
+          role: "assistant",
+          content: "Here are the calculations:\n\n1. 3 + 2 = 5\n2. 5 + 5 = 10\n3. 5 + 7 = 12"
+        )
+      end
+    end
+  end
+end

data/spec/ollama/completion_spec.rb CHANGED Viewed

@@ -3,19 +3,10 @@
 require "setup"
 RSpec.describe "LLM::Ollama: completions" do
-  subject(:ollama) { LLM.ollama("") }
-  before(:each, :success) do
-    stub_request(:post, "localhost:11434/api/chat")
-      .with(headers: {"Content-Type" => "application/json"})
-      .to_return(
-        status: 200,
-        body: fixture("ollama/completions/ok_completion.json"),
-        headers: {"Content-Type" => "application/json"}
-      )
-  end
+  let(:ollama) { LLM.ollama(nil, host: "eel.home.network") }
-  context "when given a successful response", :success do
+  context "when given a successful response",
+          vcr: {cassette_name: "ollama/completions/successful_response"} do
     subject(:response) { ollama.complete("Hello!", :user) }
     it "returns a completion" do
@@ -28,9 +19,9 @@ RSpec.describe "LLM::Ollama: completions" do
     it "includes token usage" do
       expect(response).to have_attributes(
-        prompt_tokens: 26,
-        completion_tokens: 298,
-        total_tokens: 324
+        prompt_tokens: 27,
+        completion_tokens: 26,
+        total_tokens: 53
       )
     end
@@ -40,7 +31,7 @@ RSpec.describe "LLM::Ollama: completions" do
       it "has choices" do
         expect(choice).to have_attributes(
           role: "assistant",
-          content: "Hello! How are you today?"
+          content: "Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?"
         )
       end

data/spec/ollama/embedding_spec.rb CHANGED Viewed

@@ -3,13 +3,22 @@
 require "setup"
 RSpec.describe "LLM::Ollama: embeddings" do
-  let(:ollama) { LLM.ollama("") }
+  let(:ollama) { LLM.ollama(nil, host: "eel.home.network") }
-  context "when given a successful response", :success do
-    subject(:response) { ollama.embed("Hello, world") }
+  context "when given a successful response",
+          vcr: {cassette_name: "ollama/embeddings/successful_response"} do
+    subject(:response) { ollama.embed(["This is a paragraph", "This is another one"]) }
-    it "raises NotImplementedError" do
-      expect { response }.to raise_error(NotImplementedError)
+    it "returns an embedding" do
+      expect(response).to be_instance_of(LLM::Response::Embedding)
+    end
+    it "returns a model" do
+      expect(response.model).to eq("llama3.2")
+    end
+    it "has embeddings" do
+      expect(response.embeddings.size).to eq(2)
     end
   end
 end