llm.rb 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +83 -22
  3. data/lib/llm/conversation.rb +14 -2
  4. data/lib/llm/core_ext/ostruct.rb +0 -0
  5. data/lib/llm/error.rb +0 -0
  6. data/lib/llm/file.rb +0 -0
  7. data/lib/llm/http_client.rb +0 -0
  8. data/lib/llm/lazy_conversation.rb +14 -2
  9. data/lib/llm/message.rb +1 -1
  10. data/lib/llm/message_queue.rb +0 -0
  11. data/lib/llm/model.rb +7 -0
  12. data/lib/llm/provider.rb +117 -98
  13. data/lib/llm/providers/anthropic/error_handler.rb +1 -1
  14. data/lib/llm/providers/anthropic/format.rb +0 -0
  15. data/lib/llm/providers/anthropic/response_parser.rb +0 -0
  16. data/lib/llm/providers/anthropic.rb +31 -15
  17. data/lib/llm/providers/gemini/error_handler.rb +0 -0
  18. data/lib/llm/providers/gemini/format.rb +0 -0
  19. data/lib/llm/providers/gemini/response_parser.rb +0 -0
  20. data/lib/llm/providers/gemini.rb +25 -14
  21. data/lib/llm/providers/ollama/error_handler.rb +0 -0
  22. data/lib/llm/providers/ollama/format.rb +0 -0
  23. data/lib/llm/providers/ollama/response_parser.rb +13 -0
  24. data/lib/llm/providers/ollama.rb +32 -8
  25. data/lib/llm/providers/openai/error_handler.rb +0 -0
  26. data/lib/llm/providers/openai/format.rb +0 -0
  27. data/lib/llm/providers/openai/response_parser.rb +5 -3
  28. data/lib/llm/providers/openai.rb +22 -12
  29. data/lib/llm/providers/voyageai/error_handler.rb +32 -0
  30. data/lib/llm/providers/voyageai/response_parser.rb +13 -0
  31. data/lib/llm/providers/voyageai.rb +44 -0
  32. data/lib/llm/response/completion.rb +0 -0
  33. data/lib/llm/response/embedding.rb +0 -0
  34. data/lib/llm/response.rb +0 -0
  35. data/lib/llm/version.rb +1 -1
  36. data/lib/llm.rb +18 -8
  37. data/llm.gemspec +6 -1
  38. data/share/llm/models/anthropic.yml +35 -0
  39. data/share/llm/models/gemini.yml +35 -0
  40. data/share/llm/models/ollama.yml +155 -0
  41. data/share/llm/models/openai.yml +46 -0
  42. data/spec/anthropic/completion_spec.rb +11 -27
  43. data/spec/anthropic/embedding_spec.rb +25 -0
  44. data/spec/gemini/completion_spec.rb +13 -29
  45. data/spec/gemini/embedding_spec.rb +4 -12
  46. data/spec/llm/lazy_conversation_spec.rb +45 -63
  47. data/spec/ollama/completion_spec.rb +7 -16
  48. data/spec/ollama/embedding_spec.rb +14 -5
  49. data/spec/openai/completion_spec.rb +19 -43
  50. data/spec/openai/embedding_spec.rb +4 -12
  51. data/spec/readme_spec.rb +9 -12
  52. data/spec/setup.rb +7 -16
  53. metadata +81 -2
@@ -2,6 +2,19 @@
2
2
 
3
3
  class LLM::Ollama
4
4
  module ResponseParser
5
+ ##
6
+ # @param [Hash] body
7
+ # The response body from the LLM provider
8
+ # @return [Hash]
9
+ def parse_embedding(body)
10
+ {
11
+ model: body["model"],
12
+ embeddings: body["data"].map { _1["embedding"] },
13
+ prompt_tokens: body.dig("usage", "prompt_tokens"),
14
+ total_tokens: body.dig("usage", "total_tokens")
15
+ }
16
+ end
17
+
5
18
  ##
6
19
  # @param [Hash] body
7
20
  # The response body from the LLM provider
@@ -11,7 +11,6 @@ module LLM
11
11
  include Format
12
12
 
13
13
  HOST = "localhost"
14
- DEFAULT_PARAMS = {model: "llama3.2", stream: false}.freeze
15
14
 
16
15
  ##
17
16
  # @param secret (see LLM::Provider#initialize)
@@ -19,25 +18,50 @@ module LLM
19
18
  super(secret, host: HOST, port: 11434, ssl: false, **)
20
19
  end
21
20
 
21
+ ##
22
+ # @param input (see LLM::Provider#embed)
23
+ # @return (see LLM::Provider#embed)
24
+ def embed(input, **params)
25
+ params = {model: "llama3.2"}.merge!(params)
26
+ req = Net::HTTP::Post.new("/v1/embeddings", headers)
27
+ req.body = JSON.dump({input:}.merge!(params))
28
+ res = request(@http, req)
29
+ Response::Embedding.new(res).extend(response_parser)
30
+ end
31
+
22
32
  ##
23
33
  # @see https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion Ollama docs
24
34
  # @param prompt (see LLM::Provider#complete)
25
35
  # @param role (see LLM::Provider#complete)
26
36
  # @return (see LLM::Provider#complete)
27
37
  def complete(prompt, role = :user, **params)
28
- req = Net::HTTP::Post.new ["/api", "chat"].join("/")
38
+ params = {model: "llama3.2", stream: false}.merge!(params)
39
+ req = Net::HTTP::Post.new("/api/chat", headers)
29
40
  messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
30
- params = DEFAULT_PARAMS.merge(params)
31
- body = {messages: messages.map(&:to_h)}.merge!(params)
32
- req = preflight(req, body)
33
- res = request(@http, req)
41
+ req.body = JSON.dump({messages: messages.map(&:to_h)}.merge!(params))
42
+ res = request(@http, req)
34
43
  Response::Completion.new(res).extend(response_parser)
35
44
  end
36
45
 
46
+ ##
47
+ # @return (see LLM::Provider#assistant_role)
48
+ def assistant_role
49
+ "assistant"
50
+ end
51
+
52
+ ##
53
+ # @return (see LLM::Provider#models)
54
+ def models
55
+ @models ||= load_models!("ollama")
56
+ end
57
+
37
58
  private
38
59
 
39
- def auth(req)
40
- req["Authorization"] = "Bearer #{@secret}"
60
+ def headers
61
+ {
62
+ "Content-Type" => "application/json",
63
+ "Authorization" => "Bearer #{@secret}"
64
+ }
41
65
  end
42
66
 
43
67
  def response_parser
File without changes
File without changes
@@ -2,12 +2,14 @@
2
2
 
3
3
  class LLM::OpenAI
4
4
  module ResponseParser
5
+ ##
6
+ # @param [Hash] body
7
+ # The response body from the LLM provider
8
+ # @return [Hash]
5
9
  def parse_embedding(body)
6
10
  {
7
11
  model: body["model"],
8
- embeddings: body.dig("data").map do |data|
9
- data["embedding"]
10
- end,
12
+ embeddings: body["data"].map { _1["embedding"] },
11
13
  prompt_tokens: body.dig("usage", "prompt_tokens"),
12
14
  total_tokens: body.dig("usage", "total_tokens")
13
15
  }
@@ -11,7 +11,6 @@ module LLM
11
11
  include Format
12
12
 
13
13
  HOST = "api.openai.com"
14
- DEFAULT_PARAMS = {model: "gpt-4o-mini"}.freeze
15
14
 
16
15
  ##
17
16
  # @param secret (see LLM::Provider#initialize)
@@ -23,10 +22,9 @@ module LLM
23
22
  # @param input (see LLM::Provider#embed)
24
23
  # @return (see LLM::Provider#embed)
25
24
  def embed(input, **params)
26
- req = Net::HTTP::Post.new ["/v1", "embeddings"].join("/")
27
- body = {input:, model: "text-embedding-3-small"}.merge!(params)
28
- req = preflight(req, body)
29
- res = request @http, req
25
+ req = Net::HTTP::Post.new("/v1/embeddings", headers)
26
+ req.body = JSON.dump({input:, model: "text-embedding-3-small"}.merge!(params))
27
+ res = request(@http, req)
30
28
  Response::Embedding.new(res).extend(response_parser)
31
29
  end
32
30
 
@@ -36,19 +34,31 @@ module LLM
36
34
  # @param role (see LLM::Provider#complete)
37
35
  # @return (see LLM::Provider#complete)
38
36
  def complete(prompt, role = :user, **params)
39
- req = Net::HTTP::Post.new ["/v1", "chat", "completions"].join("/")
37
+ params = {model: "gpt-4o-mini"}.merge!(params)
38
+ req = Net::HTTP::Post.new("/v1/chat/completions", headers)
40
39
  messages = [*(params.delete(:messages) || []), Message.new(role, prompt)]
41
- params = DEFAULT_PARAMS.merge(params)
42
- body = {messages: format(messages)}.merge!(params)
43
- req = preflight(req, body)
44
- res = request(@http, req)
40
+ req.body = JSON.dump({messages: format(messages)}.merge!(params))
41
+ res = request(@http, req)
45
42
  Response::Completion.new(res).extend(response_parser)
46
43
  end
47
44
 
45
+ ##
46
+ # @return (see LLM::Provider#assistant_role)
47
+ def assistant_role
48
+ "assistant"
49
+ end
50
+
51
+ def models
52
+ @models ||= load_models!("openai")
53
+ end
54
+
48
55
  private
49
56
 
50
- def auth(req)
51
- req["Authorization"] = "Bearer #{@secret}"
57
+ def headers
58
+ {
59
+ "Content-Type" => "application/json",
60
+ "Authorization" => "Bearer #{@secret}"
61
+ }
52
62
  end
53
63
 
54
64
  def response_parser
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::VoyageAI
4
+ class ErrorHandler
5
+ ##
6
+ # @return [Net::HTTPResponse]
7
+ # Non-2XX response from the server
8
+ attr_reader :res
9
+
10
+ ##
11
+ # @param [Net::HTTPResponse] res
12
+ # The response from the server
13
+ # @return [LLM::OpenAI::ErrorHandler]
14
+ def initialize(res)
15
+ @res = res
16
+ end
17
+
18
+ ##
19
+ # @raise [LLM::Error]
20
+ # Raises a subclass of {LLM::Error LLM::Error}
21
+ def raise_error!
22
+ case res
23
+ when Net::HTTPUnauthorized
24
+ raise LLM::Error::Unauthorized.new { _1.response = res }, "Authentication error"
25
+ when Net::HTTPTooManyRequests
26
+ raise LLM::Error::RateLimit.new { _1.response = res }, "Too many requests"
27
+ else
28
+ raise LLM::Error::BadResponse.new { _1.response = res }, "Unexpected response"
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ class LLM::VoyageAI
4
+ module ResponseParser
5
+ def parse_embedding(body)
6
+ {
7
+ model: body["model"],
8
+ embeddings: body["data"].map { _1["embedding"] },
9
+ total_tokens: body.dig("usage", "total_tokens")
10
+ }
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM
4
+ class VoyageAI < Provider
5
+ require_relative "voyageai/error_handler"
6
+ require_relative "voyageai/response_parser"
7
+ HOST = "api.voyageai.com"
8
+
9
+ ##
10
+ # @param secret (see LLM::Provider#initialize)
11
+ def initialize(secret, **)
12
+ super(secret, host: HOST, **)
13
+ end
14
+
15
+ ##
16
+ # Provides an embedding via VoyageAI per
17
+ # [Anthropic's recommendation](https://docs.anthropic.com/en/docs/build-with-claude/embeddings)
18
+ # @param input (see LLM::Provider#embed)
19
+ # @return (see LLM::Provider#embed)
20
+ def embed(input, **params)
21
+ req = Net::HTTP::Post.new("/v1/embeddings", headers)
22
+ req.body = JSON.dump({input:, model: "voyage-2"}.merge!(params))
23
+ res = request(@http, req)
24
+ Response::Embedding.new(res).extend(response_parser)
25
+ end
26
+
27
+ private
28
+
29
+ def headers
30
+ {
31
+ "Content-Type" => "application/json",
32
+ "Authorization" => "Bearer #{@secret}"
33
+ }
34
+ end
35
+
36
+ def response_parser
37
+ LLM::VoyageAI::ResponseParser
38
+ end
39
+
40
+ def error_handler
41
+ LLM::VoyageAI::ErrorHandler
42
+ end
43
+ end
44
+ end
File without changes
File without changes
data/lib/llm/response.rb CHANGED
File without changes
data/lib/llm/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LLM
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/llm.rb CHANGED
@@ -6,6 +6,7 @@ module LLM
6
6
  require_relative "llm/message"
7
7
  require_relative "llm/response"
8
8
  require_relative "llm/file"
9
+ require_relative "llm/model"
9
10
  require_relative "llm/provider"
10
11
  require_relative "llm/conversation"
11
12
  require_relative "llm/lazy_conversation"
@@ -16,32 +17,41 @@ module LLM
16
17
  ##
17
18
  # @param secret (see LLM::Anthropic#initialize)
18
19
  # @return (see LLM::Anthropic#initialize)
19
- def anthropic(secret, **)
20
+ def anthropic(secret, options = {})
20
21
  require_relative "llm/providers/anthropic" unless defined?(LLM::Anthropic)
21
- LLM::Anthropic.new(secret, **)
22
+ require_relative "llm/providers/voyageai" unless defined?(LLM::VoyageAI)
23
+ LLM::Anthropic.new(secret, **options)
24
+ end
25
+
26
+ ##
27
+ # @param secret (see LLM::VoyageAI#initialize)
28
+ # @return (see LLM::VoyageAI#initialize)
29
+ def voyageai(secret, options = {})
30
+ require_relative "llm/providers/voyageai" unless defined?(LLM::VoyageAI)
31
+ LLM::VoyageAI.new(secret, **options)
22
32
  end
23
33
 
24
34
  ##
25
35
  # @param secret (see LLM::Gemini#initialize)
26
36
  # @return (see LLM::Gemini#initialize)
27
- def gemini(secret, **)
37
+ def gemini(secret, options = {})
28
38
  require_relative "llm/providers/gemini" unless defined?(LLM::Gemini)
29
- LLM::Gemini.new(secret, **)
39
+ LLM::Gemini.new(secret, **options)
30
40
  end
31
41
 
32
42
  ##
33
43
  # @param host (see LLM::Ollama#initialize)
34
44
  # @return (see LLM::Ollama#initialize)
35
- def ollama(secret)
45
+ def ollama(secret, options = {})
36
46
  require_relative "llm/providers/ollama" unless defined?(LLM::Ollama)
37
- LLM::Ollama.new(secret)
47
+ LLM::Ollama.new(secret, **options)
38
48
  end
39
49
 
40
50
  ##
41
51
  # @param secret (see LLM::OpenAI#initialize)
42
52
  # @return (see LLM::OpenAI#initialize)
43
- def openai(secret, **)
53
+ def openai(secret, options = {})
44
54
  require_relative "llm/providers/openai" unless defined?(LLM::OpenAI)
45
- LLM::OpenAI.new(secret, **)
55
+ LLM::OpenAI.new(secret, **options)
46
56
  end
47
57
  end
data/llm.gemspec CHANGED
@@ -25,16 +25,21 @@ Gem::Specification.new do |spec|
25
25
  "README.md", "LICENSE.txt",
26
26
  "lib/*.rb", "lib/**/*.rb",
27
27
  "spec/*.rb", "spec/**/*.rb",
28
- "llm.gemspec"
28
+ "share/llm/models/*.yml", "llm.gemspec"
29
29
  ]
30
30
  spec.require_paths = ["lib"]
31
31
 
32
32
  spec.add_runtime_dependency "net-http", "~> 0.6.0"
33
33
  spec.add_runtime_dependency "json"
34
+ spec.add_runtime_dependency "yaml"
34
35
 
35
36
  spec.add_development_dependency "webmock", "~> 3.24.0"
36
37
  spec.add_development_dependency "yard", "~> 0.9.37"
37
38
  spec.add_development_dependency "kramdown", "~> 2.4"
38
39
  spec.add_development_dependency "webrick", "~> 1.8"
39
40
  spec.add_development_dependency "test-cmd.rb", "~> 0.12.0"
41
+ spec.add_development_dependency "rake", "~> 13.0"
42
+ spec.add_development_dependency "rspec", "~> 3.0"
43
+ spec.add_development_dependency "standard", "~> 1.40"
44
+ spec.add_development_dependency "vcr", "~> 6.0"
40
45
  end
@@ -0,0 +1,35 @@
1
+ claude-3-7-sonnet-20250219:
2
+ name: Claude 3.7 Sonnet
3
+ parameters: Unknown
4
+ description: Most intelligent Claude model with extended thinking and high capability
5
+ to_param: claude-3-7-sonnet-20250219
6
+
7
+ claude-3-5-sonnet-20241022:
8
+ name: Claude 3.5 Sonnet (v2)
9
+ parameters: Unknown
10
+ description: High intelligence and capability; upgraded from previous Sonnet
11
+ to_param: claude-3-5-sonnet-20241022
12
+
13
+ claude-3-5-sonnet-20240620:
14
+ name: Claude 3.5 Sonnet
15
+ parameters: Unknown
16
+ description: Intelligent and capable general-purpose model
17
+ to_param: claude-3-5-sonnet-20240620
18
+
19
+ claude-3-5-haiku-20241022:
20
+ name: Claude 3.5 Haiku
21
+ parameters: Unknown
22
+ description: Blazing fast model for low-latency text generation
23
+ to_param: claude-3-5-haiku-20241022
24
+
25
+ claude-3-opus-20240229:
26
+ name: Claude 3 Opus
27
+ parameters: Unknown
28
+ description: Top-level intelligence, fluency, and reasoning for complex tasks
29
+ to_param: claude-3-opus-20240229
30
+
31
+ claude-3-haiku-20240307:
32
+ name: Claude 3 Haiku
33
+ parameters: Unknown
34
+ description: Fastest and most compact Claude model for near-instant responsiveness
35
+ to_param: claude-3-haiku-20240307
@@ -0,0 +1,35 @@
1
+ gemini-2.5-pro-exp-03-25:
2
+ name: Gemini
3
+ parameters: Unknown
4
+ description: Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more
5
+ to_param: gemini-2.5-pro-exp-03-25
6
+
7
+ gemini-2.0-flash:
8
+ name: Gemini
9
+ parameters: Unknown
10
+ description: Next generation features, speed, thinking, realtime streaming, and multimodal generation
11
+ to_param: gemini-2.0-flash
12
+
13
+ gemini-2.0-flash-lite:
14
+ name: Gemini
15
+ parameters: Unknown
16
+ description: Cost efficiency and low latency
17
+ to_param: gemini-2.0-flash-lite
18
+
19
+ gemini-1.5-flash:
20
+ name: Gemini
21
+ parameters: Unknown
22
+ description: Fast and versatile performance across a diverse variety of tasks
23
+ to_param: gemini-1.5-flash
24
+
25
+ gemini-1.5-flash-8b:
26
+ name: Gemini
27
+ parameters: 8B
28
+ description: High volume and lower intelligence tasks
29
+ to_param: gemini-1.5-flash-8b
30
+
31
+ gemini-1.5-pro:
32
+ name: Gemini
33
+ parameters: Unknown
34
+ description: Complex reasoning tasks requiring more intelligence
35
+ to_param: gemini-1.5-pro
@@ -0,0 +1,155 @@
1
+ ---
2
+ gemma3:1b:
3
+ name: Gemma
4
+ parameters: 1B
5
+ description: Lightweight version of Google's Gemma 3 language model, suitable for
6
+ low-resource environments
7
+ to_param: gemma3:1b
8
+ gemma3:
9
+ name: Gemma
10
+ parameters: 4B
11
+ description: Balanced Gemma 3 model providing good accuracy with reasonable size
12
+ to_param: gemma3
13
+ gemma3:12b:
14
+ name: Gemma
15
+ parameters: 12B
16
+ description: Larger Gemma 3 model offering improved reasoning and generation abilities
17
+ to_param: gemma3:12b
18
+ gemma3:27b:
19
+ name: Gemma
20
+ parameters: 27B
21
+ description: High-end Gemma 3 model focused on top-tier performance and accuracy
22
+ to_param: gemma3:27b
23
+
24
+ qwq:
25
+ name: QwQ
26
+ parameters: 32B
27
+ description: Large-scale model with high parameter count for complex tasks and
28
+ high-quality generation
29
+ to_param: qwq
30
+
31
+ deepseek-r1:
32
+ name: DeepSeek-R1
33
+ parameters: 7B
34
+ description: Compact DeepSeek model optimized for research and experimentation
35
+ to_param: deepseek-r1
36
+ deepseek-r1:671b:
37
+ name: DeepSeek-R1
38
+ parameters: 671B
39
+ description: Massive-scale DeepSeek model focused on advanced AI reasoning and
40
+ capabilities
41
+ to_param: deepseek-r1:671b
42
+ deepseek-coder:
43
+ name: DeepSeek-Coder
44
+ parameters: 1.3B
45
+ description: Lightweight code generation model trained on 2T tokens of code and natural language
46
+ to_param: deepseek-coder
47
+ deepseek-coder:6.7b:
48
+ name: DeepSeek-Coder
49
+ parameters: 6.7B
50
+ description: Mid-sized DeepSeek-Coder model offering a strong balance between speed and capability for code-related tasks
51
+ to_param: deepseek-coder:6.7b
52
+ deepseek-coder:33b:
53
+ name: DeepSeek-Coder
54
+ parameters: 33B
55
+ description: Large DeepSeek-Coder model with high performance for code generation, understanding, and multilingual coding tasks
56
+ to_param: deepseek-coder:33b
57
+
58
+ llama3.3:
59
+ name: Llama
60
+ parameters: 70B
61
+ description: Latest large Llama model designed for high-end performance in reasoning
62
+ and language tasks
63
+ to_param: llama3.3
64
+ llama3.2:
65
+ name: Llama
66
+ parameters: 3B
67
+ description: Small but capable version of Llama 3.2 for lightweight applications
68
+ to_param: llama3.2
69
+ llama3.2:1b:
70
+ name: Llama
71
+ parameters: 1B
72
+ description: Tiny version of Llama 3.2, extremely lightweight and fast
73
+ to_param: llama3.2:1b
74
+ llama3.2-vision:
75
+ name: Llama Vision
76
+ parameters: 11B
77
+ description: Multimodal Llama 3.2 model with vision capabilities (images + text)
78
+ to_param: llama3.2-vision
79
+ llama3.2-vision:90b:
80
+ name: Llama Vision
81
+ parameters: 90B
82
+ description: Large-scale vision-capable Llama model for advanced multimodal tasks
83
+ to_param: llama3.2-vision:90b
84
+ llama3.1:
85
+ name: Llama
86
+ parameters: 8B
87
+ description: General-purpose Llama model designed for good accuracy and performance
88
+ balance
89
+ to_param: llama3.1
90
+ llama3.1:405b:
91
+ name: Llama
92
+ parameters: 405B
93
+ description: Extremely large-scale version of Llama 3.1, suitable for advanced tasks
94
+ to_param: llama3.1:405b
95
+
96
+ phi4:
97
+ name: Phi
98
+ parameters: 14B
99
+ description: Phi 4 is known for compact size and competitive performance in general
100
+ tasks
101
+ to_param: phi4
102
+ phi4-mini:
103
+ name: Phi Mini
104
+ parameters: 3.8B
105
+ description: Lightweight variant of Phi 4 ideal for quick inference on constrained systems
106
+ to_param: phi4-mini
107
+
108
+ mistral:
109
+ name: Mistral
110
+ parameters: 7B
111
+ description: Popular and versatile open model for general language tasks
112
+ to_param: mistral
113
+
114
+ moondream:
115
+ name: Moondream
116
+ parameters: 1.4B
117
+ description: Compact vision-enabled model with strong general performance
118
+ to_param: moondream
119
+
120
+ neural-chat:
121
+ name: Neural Chat
122
+ parameters: 7B
123
+ description: Chat-focused model fine-tuned for natural conversations
124
+ to_param: neural-chat
125
+
126
+ starling-lm:
127
+ name: Starling
128
+ parameters: 7B
129
+ description: Model focused on instruction-following and conversational performance
130
+ to_param: starling-lm
131
+
132
+ codellama:
133
+ name: Code Llama
134
+ parameters: 7B
135
+ description: Llama model variant fine-tuned specifically for code understanding
136
+ and generation
137
+ to_param: codellama
138
+
139
+ llama2-uncensored:
140
+ name: Llama 2 Uncensored
141
+ parameters: 7B
142
+ description: Unfiltered version of Llama 2 for unrestricted language modeling
143
+ to_param: llama2-uncensored
144
+
145
+ llava:
146
+ name: LLaVA
147
+ parameters: 7B
148
+ description: Multimodal model combining vision and language understanding
149
+ to_param: llava
150
+
151
+ granite3.2:
152
+ name: Granite
153
+ parameters: 8B
154
+ description: IBM’s Granite model for enterprise-grade language applications
155
+ to_param: granite3.2
@@ -0,0 +1,46 @@
1
+ ---
2
+ o3-mini:
3
+ name: OpenAI o3-mini
4
+ parameters: Unknown
5
+ description: Fast, flexible, intelligent reasoning model
6
+ to_param: o3-mini
7
+ o1:
8
+ name: OpenAI o1
9
+ parameters: Unknown
10
+ description: High-intelligence reasoning model
11
+ to_param: o1
12
+ o1-mini:
13
+ name: OpenAI o1-mini
14
+ parameters: Unknown
15
+ description: Faster, more affordable reasoning model than o1
16
+ to_param: o1-mini
17
+ o1-pro:
18
+ name: OpenAI o1-pro
19
+ parameters: Unknown
20
+ description: More compute than o1 for better responses
21
+ to_param: o1-pro
22
+ gpt-4.5-preview:
23
+ name: GPT-4.5 Preview
24
+ parameters: Unknown
25
+ description: Largest and most capable GPT model
26
+ to_param: gpt-4.5-preview
27
+ gpt-4o:
28
+ name: GPT-4o
29
+ parameters: Unknown
30
+ description: Fast, intelligent, flexible GPT model
31
+ to_param: gpt-4o
32
+ gpt-4o-mini:
33
+ name: GPT-4o Mini
34
+ parameters: Mini
35
+ description: Fast, affordable small model for focused tasks
36
+ to_param: gpt-4o-mini
37
+ gpt-4o-realtime-preview:
38
+ name: GPT-4o Realtime
39
+ parameters: Unknown
40
+ description: Realtime model for text and audio inputs/outputs
41
+ to_param: gpt-4o-realtime-preview
42
+ gpt-3.5-turbo:
43
+ name: GPT-3.5 Turbo
44
+ parameters: Unknown
45
+ description: Legacy GPT model for cheaper chat and non-chat tasks
46
+ to_param: gpt-3.5-turbo