llm.rb 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +83 -22
- data/lib/llm/conversation.rb +14 -2
- data/lib/llm/core_ext/ostruct.rb +0 -0
- data/lib/llm/error.rb +0 -0
- data/lib/llm/file.rb +0 -0
- data/lib/llm/http_client.rb +0 -0
- data/lib/llm/lazy_conversation.rb +14 -2
- data/lib/llm/message.rb +1 -1
- data/lib/llm/message_queue.rb +0 -0
- data/lib/llm/model.rb +7 -0
- data/lib/llm/provider.rb +117 -98
- data/lib/llm/providers/anthropic/error_handler.rb +1 -1
- data/lib/llm/providers/anthropic/format.rb +0 -0
- data/lib/llm/providers/anthropic/response_parser.rb +0 -0
- data/lib/llm/providers/anthropic.rb +31 -15
- data/lib/llm/providers/gemini/error_handler.rb +0 -0
- data/lib/llm/providers/gemini/format.rb +0 -0
- data/lib/llm/providers/gemini/response_parser.rb +0 -0
- data/lib/llm/providers/gemini.rb +25 -14
- data/lib/llm/providers/ollama/error_handler.rb +0 -0
- data/lib/llm/providers/ollama/format.rb +0 -0
- data/lib/llm/providers/ollama/response_parser.rb +13 -0
- data/lib/llm/providers/ollama.rb +32 -8
- data/lib/llm/providers/openai/error_handler.rb +0 -0
- data/lib/llm/providers/openai/format.rb +0 -0
- data/lib/llm/providers/openai/response_parser.rb +5 -3
- data/lib/llm/providers/openai.rb +22 -12
- data/lib/llm/providers/voyageai/error_handler.rb +32 -0
- data/lib/llm/providers/voyageai/response_parser.rb +13 -0
- data/lib/llm/providers/voyageai.rb +44 -0
- data/lib/llm/response/completion.rb +0 -0
- data/lib/llm/response/embedding.rb +0 -0
- data/lib/llm/response.rb +0 -0
- data/lib/llm/version.rb +1 -1
- data/lib/llm.rb +18 -8
- data/llm.gemspec +6 -1
- data/share/llm/models/anthropic.yml +35 -0
- data/share/llm/models/gemini.yml +35 -0
- data/share/llm/models/ollama.yml +155 -0
- data/share/llm/models/openai.yml +46 -0
- data/spec/anthropic/completion_spec.rb +11 -27
- data/spec/anthropic/embedding_spec.rb +25 -0
- data/spec/gemini/completion_spec.rb +13 -29
- data/spec/gemini/embedding_spec.rb +4 -12
- data/spec/llm/lazy_conversation_spec.rb +45 -63
- data/spec/ollama/completion_spec.rb +7 -16
- data/spec/ollama/embedding_spec.rb +14 -5
- data/spec/openai/completion_spec.rb +19 -43
- data/spec/openai/embedding_spec.rb +4 -12
- data/spec/readme_spec.rb +9 -12
- data/spec/setup.rb +7 -16
- metadata +81 -2
@@ -2,6 +2,19 @@
|
|
2
2
|
|
3
3
|
class LLM::Ollama
|
4
4
|
module ResponseParser
|
5
|
+
##
|
6
|
+
# @param [Hash] body
|
7
|
+
# The response body from the LLM provider
|
8
|
+
# @return [Hash]
|
9
|
+
def parse_embedding(body)
|
10
|
+
{
|
11
|
+
model: body["model"],
|
12
|
+
embeddings: body["data"].map { _1["embedding"] },
|
13
|
+
prompt_tokens: body.dig("usage", "prompt_tokens"),
|
14
|
+
total_tokens: body.dig("usage", "total_tokens")
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
5
18
|
##
|
6
19
|
# @param [Hash] body
|
7
20
|
# The response body from the LLM provider
|
data/lib/llm/providers/ollama.rb
CHANGED
@@ -11,7 +11,6 @@ module LLM
|
|
11
11
|
include Format
|
12
12
|
|
13
13
|
HOST = "localhost"
|
14
|
-
DEFAULT_PARAMS = {model: "llama3.2", stream: false}.freeze
|
15
14
|
|
16
15
|
##
|
17
16
|
# @param secret (see LLM::Provider#initialize)
|
@@ -19,25 +18,50 @@ module LLM
|
|
19
18
|
super(secret, host: HOST, port: 11434, ssl: false, **)
|
20
19
|
end
|
21
20
|
|
21
|
+
##
|
22
|
+
# @param input (see LLM::Provider#embed)
|
23
|
+
# @return (see LLM::Provider#embed)
|
24
|
+
def embed(input, **params)
|
25
|
+
params = {model: "llama3.2"}.merge!(params)
|
26
|
+
req = Net::HTTP::Post.new("/v1/embeddings", headers)
|
27
|
+
req.body = JSON.dump({input:}.merge!(params))
|
28
|
+
res = request(@http, req)
|
29
|
+
Response::Embedding.new(res).extend(response_parser)
|
30
|
+
end
|
31
|
+
|
22
32
|
##
|
23
33
|
# @see https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion Ollama docs
|
24
34
|
# @param prompt (see LLM::Provider#complete)
|
25
35
|
# @param role (see LLM::Provider#complete)
|
26
36
|
# @return (see LLM::Provider#complete)
|
27
37
|
def complete(prompt, role = :user, **params)
|
28
|
-
|
38
|
+
params = {model: "llama3.2", stream: false}.merge!(params)
|
39
|
+
req = Net::HTTP::Post.new("/api/chat", headers)
|
29
40
|
messages = [*(params.delete(:messages) || []), LLM::Message.new(role, prompt)]
|
30
|
-
|
31
|
-
|
32
|
-
req = preflight(req, body)
|
33
|
-
res = request(@http, req)
|
41
|
+
req.body = JSON.dump({messages: messages.map(&:to_h)}.merge!(params))
|
42
|
+
res = request(@http, req)
|
34
43
|
Response::Completion.new(res).extend(response_parser)
|
35
44
|
end
|
36
45
|
|
46
|
+
##
|
47
|
+
# @return (see LLM::Provider#assistant_role)
|
48
|
+
def assistant_role
|
49
|
+
"assistant"
|
50
|
+
end
|
51
|
+
|
52
|
+
##
|
53
|
+
# @return (see LLM::Provider#models)
|
54
|
+
def models
|
55
|
+
@models ||= load_models!("ollama")
|
56
|
+
end
|
57
|
+
|
37
58
|
private
|
38
59
|
|
39
|
-
def
|
40
|
-
|
60
|
+
def headers
|
61
|
+
{
|
62
|
+
"Content-Type" => "application/json",
|
63
|
+
"Authorization" => "Bearer #{@secret}"
|
64
|
+
}
|
41
65
|
end
|
42
66
|
|
43
67
|
def response_parser
|
File without changes
|
File without changes
|
@@ -2,12 +2,14 @@
|
|
2
2
|
|
3
3
|
class LLM::OpenAI
|
4
4
|
module ResponseParser
|
5
|
+
##
|
6
|
+
# @param [Hash] body
|
7
|
+
# The response body from the LLM provider
|
8
|
+
# @return [Hash]
|
5
9
|
def parse_embedding(body)
|
6
10
|
{
|
7
11
|
model: body["model"],
|
8
|
-
embeddings: body
|
9
|
-
data["embedding"]
|
10
|
-
end,
|
12
|
+
embeddings: body["data"].map { _1["embedding"] },
|
11
13
|
prompt_tokens: body.dig("usage", "prompt_tokens"),
|
12
14
|
total_tokens: body.dig("usage", "total_tokens")
|
13
15
|
}
|
data/lib/llm/providers/openai.rb
CHANGED
@@ -11,7 +11,6 @@ module LLM
|
|
11
11
|
include Format
|
12
12
|
|
13
13
|
HOST = "api.openai.com"
|
14
|
-
DEFAULT_PARAMS = {model: "gpt-4o-mini"}.freeze
|
15
14
|
|
16
15
|
##
|
17
16
|
# @param secret (see LLM::Provider#initialize)
|
@@ -23,10 +22,9 @@ module LLM
|
|
23
22
|
# @param input (see LLM::Provider#embed)
|
24
23
|
# @return (see LLM::Provider#embed)
|
25
24
|
def embed(input, **params)
|
26
|
-
req = Net::HTTP::Post.new
|
27
|
-
body = {input:, model: "text-embedding-3-small"}.merge!(params)
|
28
|
-
|
29
|
-
res = request @http, req
|
25
|
+
req = Net::HTTP::Post.new("/v1/embeddings", headers)
|
26
|
+
req.body = JSON.dump({input:, model: "text-embedding-3-small"}.merge!(params))
|
27
|
+
res = request(@http, req)
|
30
28
|
Response::Embedding.new(res).extend(response_parser)
|
31
29
|
end
|
32
30
|
|
@@ -36,19 +34,31 @@ module LLM
|
|
36
34
|
# @param role (see LLM::Provider#complete)
|
37
35
|
# @return (see LLM::Provider#complete)
|
38
36
|
def complete(prompt, role = :user, **params)
|
39
|
-
|
37
|
+
params = {model: "gpt-4o-mini"}.merge!(params)
|
38
|
+
req = Net::HTTP::Post.new("/v1/chat/completions", headers)
|
40
39
|
messages = [*(params.delete(:messages) || []), Message.new(role, prompt)]
|
41
|
-
|
42
|
-
|
43
|
-
req = preflight(req, body)
|
44
|
-
res = request(@http, req)
|
40
|
+
req.body = JSON.dump({messages: format(messages)}.merge!(params))
|
41
|
+
res = request(@http, req)
|
45
42
|
Response::Completion.new(res).extend(response_parser)
|
46
43
|
end
|
47
44
|
|
45
|
+
##
|
46
|
+
# @return (see LLM::Provider#assistant_role)
|
47
|
+
def assistant_role
|
48
|
+
"assistant"
|
49
|
+
end
|
50
|
+
|
51
|
+
def models
|
52
|
+
@models ||= load_models!("openai")
|
53
|
+
end
|
54
|
+
|
48
55
|
private
|
49
56
|
|
50
|
-
def
|
51
|
-
|
57
|
+
def headers
|
58
|
+
{
|
59
|
+
"Content-Type" => "application/json",
|
60
|
+
"Authorization" => "Bearer #{@secret}"
|
61
|
+
}
|
52
62
|
end
|
53
63
|
|
54
64
|
def response_parser
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM::VoyageAI
|
4
|
+
class ErrorHandler
|
5
|
+
##
|
6
|
+
# @return [Net::HTTPResponse]
|
7
|
+
# Non-2XX response from the server
|
8
|
+
attr_reader :res
|
9
|
+
|
10
|
+
##
|
11
|
+
# @param [Net::HTTPResponse] res
|
12
|
+
# The response from the server
|
13
|
+
# @return [LLM::OpenAI::ErrorHandler]
|
14
|
+
def initialize(res)
|
15
|
+
@res = res
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# @raise [LLM::Error]
|
20
|
+
# Raises a subclass of {LLM::Error LLM::Error}
|
21
|
+
def raise_error!
|
22
|
+
case res
|
23
|
+
when Net::HTTPUnauthorized
|
24
|
+
raise LLM::Error::Unauthorized.new { _1.response = res }, "Authentication error"
|
25
|
+
when Net::HTTPTooManyRequests
|
26
|
+
raise LLM::Error::RateLimit.new { _1.response = res }, "Too many requests"
|
27
|
+
else
|
28
|
+
raise LLM::Error::BadResponse.new { _1.response = res }, "Unexpected response"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class LLM::VoyageAI
|
4
|
+
module ResponseParser
|
5
|
+
def parse_embedding(body)
|
6
|
+
{
|
7
|
+
model: body["model"],
|
8
|
+
embeddings: body["data"].map { _1["embedding"] },
|
9
|
+
total_tokens: body.dig("usage", "total_tokens")
|
10
|
+
}
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LLM
|
4
|
+
class VoyageAI < Provider
|
5
|
+
require_relative "voyageai/error_handler"
|
6
|
+
require_relative "voyageai/response_parser"
|
7
|
+
HOST = "api.voyageai.com"
|
8
|
+
|
9
|
+
##
|
10
|
+
# @param secret (see LLM::Provider#initialize)
|
11
|
+
def initialize(secret, **)
|
12
|
+
super(secret, host: HOST, **)
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
# Provides an embedding via VoyageAI per
|
17
|
+
# [Anthropic's recommendation](https://docs.anthropic.com/en/docs/build-with-claude/embeddings)
|
18
|
+
# @param input (see LLM::Provider#embed)
|
19
|
+
# @return (see LLM::Provider#embed)
|
20
|
+
def embed(input, **params)
|
21
|
+
req = Net::HTTP::Post.new("/v1/embeddings", headers)
|
22
|
+
req.body = JSON.dump({input:, model: "voyage-2"}.merge!(params))
|
23
|
+
res = request(@http, req)
|
24
|
+
Response::Embedding.new(res).extend(response_parser)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def headers
|
30
|
+
{
|
31
|
+
"Content-Type" => "application/json",
|
32
|
+
"Authorization" => "Bearer #{@secret}"
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def response_parser
|
37
|
+
LLM::VoyageAI::ResponseParser
|
38
|
+
end
|
39
|
+
|
40
|
+
def error_handler
|
41
|
+
LLM::VoyageAI::ErrorHandler
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
File without changes
|
File without changes
|
data/lib/llm/response.rb
CHANGED
File without changes
|
data/lib/llm/version.rb
CHANGED
data/lib/llm.rb
CHANGED
@@ -6,6 +6,7 @@ module LLM
|
|
6
6
|
require_relative "llm/message"
|
7
7
|
require_relative "llm/response"
|
8
8
|
require_relative "llm/file"
|
9
|
+
require_relative "llm/model"
|
9
10
|
require_relative "llm/provider"
|
10
11
|
require_relative "llm/conversation"
|
11
12
|
require_relative "llm/lazy_conversation"
|
@@ -16,32 +17,41 @@ module LLM
|
|
16
17
|
##
|
17
18
|
# @param secret (see LLM::Anthropic#initialize)
|
18
19
|
# @return (see LLM::Anthropic#initialize)
|
19
|
-
def anthropic(secret,
|
20
|
+
def anthropic(secret, options = {})
|
20
21
|
require_relative "llm/providers/anthropic" unless defined?(LLM::Anthropic)
|
21
|
-
LLM::
|
22
|
+
require_relative "llm/providers/voyageai" unless defined?(LLM::VoyageAI)
|
23
|
+
LLM::Anthropic.new(secret, **options)
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# @param secret (see LLM::VoyageAI#initialize)
|
28
|
+
# @return (see LLM::VoyageAI#initialize)
|
29
|
+
def voyageai(secret, options = {})
|
30
|
+
require_relative "llm/providers/voyageai" unless defined?(LLM::VoyageAI)
|
31
|
+
LLM::VoyageAI.new(secret, **options)
|
22
32
|
end
|
23
33
|
|
24
34
|
##
|
25
35
|
# @param secret (see LLM::Gemini#initialize)
|
26
36
|
# @return (see LLM::Gemini#initialize)
|
27
|
-
def gemini(secret,
|
37
|
+
def gemini(secret, options = {})
|
28
38
|
require_relative "llm/providers/gemini" unless defined?(LLM::Gemini)
|
29
|
-
LLM::Gemini.new(secret, **)
|
39
|
+
LLM::Gemini.new(secret, **options)
|
30
40
|
end
|
31
41
|
|
32
42
|
##
|
33
43
|
# @param host (see LLM::Ollama#initialize)
|
34
44
|
# @return (see LLM::Ollama#initialize)
|
35
|
-
def ollama(secret)
|
45
|
+
def ollama(secret, options = {})
|
36
46
|
require_relative "llm/providers/ollama" unless defined?(LLM::Ollama)
|
37
|
-
LLM::Ollama.new(secret)
|
47
|
+
LLM::Ollama.new(secret, **options)
|
38
48
|
end
|
39
49
|
|
40
50
|
##
|
41
51
|
# @param secret (see LLM::OpenAI#initialize)
|
42
52
|
# @return (see LLM::OpenAI#initialize)
|
43
|
-
def openai(secret,
|
53
|
+
def openai(secret, options = {})
|
44
54
|
require_relative "llm/providers/openai" unless defined?(LLM::OpenAI)
|
45
|
-
LLM::OpenAI.new(secret, **)
|
55
|
+
LLM::OpenAI.new(secret, **options)
|
46
56
|
end
|
47
57
|
end
|
data/llm.gemspec
CHANGED
@@ -25,16 +25,21 @@ Gem::Specification.new do |spec|
|
|
25
25
|
"README.md", "LICENSE.txt",
|
26
26
|
"lib/*.rb", "lib/**/*.rb",
|
27
27
|
"spec/*.rb", "spec/**/*.rb",
|
28
|
-
"llm.gemspec"
|
28
|
+
"share/llm/models/*.yml", "llm.gemspec"
|
29
29
|
]
|
30
30
|
spec.require_paths = ["lib"]
|
31
31
|
|
32
32
|
spec.add_runtime_dependency "net-http", "~> 0.6.0"
|
33
33
|
spec.add_runtime_dependency "json"
|
34
|
+
spec.add_runtime_dependency "yaml"
|
34
35
|
|
35
36
|
spec.add_development_dependency "webmock", "~> 3.24.0"
|
36
37
|
spec.add_development_dependency "yard", "~> 0.9.37"
|
37
38
|
spec.add_development_dependency "kramdown", "~> 2.4"
|
38
39
|
spec.add_development_dependency "webrick", "~> 1.8"
|
39
40
|
spec.add_development_dependency "test-cmd.rb", "~> 0.12.0"
|
41
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
42
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
43
|
+
spec.add_development_dependency "standard", "~> 1.40"
|
44
|
+
spec.add_development_dependency "vcr", "~> 6.0"
|
40
45
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
claude-3-7-sonnet-20250219:
|
2
|
+
name: Claude 3.7 Sonnet
|
3
|
+
parameters: Unknown
|
4
|
+
description: Most intelligent Claude model with extended thinking and high capability
|
5
|
+
to_param: claude-3-7-sonnet-20250219
|
6
|
+
|
7
|
+
claude-3-5-sonnet-20241022:
|
8
|
+
name: Claude 3.5 Sonnet (v2)
|
9
|
+
parameters: Unknown
|
10
|
+
description: High intelligence and capability; upgraded from previous Sonnet
|
11
|
+
to_param: claude-3-5-sonnet-20241022
|
12
|
+
|
13
|
+
claude-3-5-sonnet-20240620:
|
14
|
+
name: Claude 3.5 Sonnet
|
15
|
+
parameters: Unknown
|
16
|
+
description: Intelligent and capable general-purpose model
|
17
|
+
to_param: claude-3-5-sonnet-20240620
|
18
|
+
|
19
|
+
claude-3-5-haiku-20241022:
|
20
|
+
name: Claude 3.5 Haiku
|
21
|
+
parameters: Unknown
|
22
|
+
description: Blazing fast model for low-latency text generation
|
23
|
+
to_param: claude-3-5-haiku-20241022
|
24
|
+
|
25
|
+
claude-3-opus-20240229:
|
26
|
+
name: Claude 3 Opus
|
27
|
+
parameters: Unknown
|
28
|
+
description: Top-level intelligence, fluency, and reasoning for complex tasks
|
29
|
+
to_param: claude-3-opus-20240229
|
30
|
+
|
31
|
+
claude-3-haiku-20240307:
|
32
|
+
name: Claude 3 Haiku
|
33
|
+
parameters: Unknown
|
34
|
+
description: Fastest and most compact Claude model for near-instant responsiveness
|
35
|
+
to_param: claude-3-haiku-20240307
|
@@ -0,0 +1,35 @@
|
|
1
|
+
gemini-2.5-pro-exp-03-25:
|
2
|
+
name: Gemini
|
3
|
+
parameters: Unknown
|
4
|
+
description: Enhanced thinking and reasoning, multimodal understanding, advanced coding, and more
|
5
|
+
to_param: gemini-2.5-pro-exp-03-25
|
6
|
+
|
7
|
+
gemini-2.0-flash:
|
8
|
+
name: Gemini
|
9
|
+
parameters: Unknown
|
10
|
+
description: Next generation features, speed, thinking, realtime streaming, and multimodal generation
|
11
|
+
to_param: gemini-2.0-flash
|
12
|
+
|
13
|
+
gemini-2.0-flash-lite:
|
14
|
+
name: Gemini
|
15
|
+
parameters: Unknown
|
16
|
+
description: Cost efficiency and low latency
|
17
|
+
to_param: gemini-2.0-flash-lite
|
18
|
+
|
19
|
+
gemini-1.5-flash:
|
20
|
+
name: Gemini
|
21
|
+
parameters: Unknown
|
22
|
+
description: Fast and versatile performance across a diverse variety of tasks
|
23
|
+
to_param: gemini-1.5-flash
|
24
|
+
|
25
|
+
gemini-1.5-flash-8b:
|
26
|
+
name: Gemini
|
27
|
+
parameters: 8B
|
28
|
+
description: High volume and lower intelligence tasks
|
29
|
+
to_param: gemini-1.5-flash-8b
|
30
|
+
|
31
|
+
gemini-1.5-pro:
|
32
|
+
name: Gemini
|
33
|
+
parameters: Unknown
|
34
|
+
description: Complex reasoning tasks requiring more intelligence
|
35
|
+
to_param: gemini-1.5-pro
|
@@ -0,0 +1,155 @@
|
|
1
|
+
---
|
2
|
+
gemma3:1b:
|
3
|
+
name: Gemma
|
4
|
+
parameters: 1B
|
5
|
+
description: Lightweight version of Google's Gemma 3 language model, suitable for
|
6
|
+
low-resource environments
|
7
|
+
to_param: gemma3:1b
|
8
|
+
gemma3:
|
9
|
+
name: Gemma
|
10
|
+
parameters: 4B
|
11
|
+
description: Balanced Gemma 3 model providing good accuracy with reasonable size
|
12
|
+
to_param: gemma3
|
13
|
+
gemma3:12b:
|
14
|
+
name: Gemma
|
15
|
+
parameters: 12B
|
16
|
+
description: Larger Gemma 3 model offering improved reasoning and generation abilities
|
17
|
+
to_param: gemma3:12b
|
18
|
+
gemma3:27b:
|
19
|
+
name: Gemma
|
20
|
+
parameters: 27B
|
21
|
+
description: High-end Gemma 3 model focused on top-tier performance and accuracy
|
22
|
+
to_param: gemma3:27b
|
23
|
+
|
24
|
+
qwq:
|
25
|
+
name: QwQ
|
26
|
+
parameters: 32B
|
27
|
+
description: Large-scale model with high parameter count for complex tasks and
|
28
|
+
high-quality generation
|
29
|
+
to_param: qwq
|
30
|
+
|
31
|
+
deepseek-r1:
|
32
|
+
name: DeepSeek-R1
|
33
|
+
parameters: 7B
|
34
|
+
description: Compact DeepSeek model optimized for research and experimentation
|
35
|
+
to_param: deepseek-r1
|
36
|
+
deepseek-r1:671b:
|
37
|
+
name: DeepSeek-R1
|
38
|
+
parameters: 671B
|
39
|
+
description: Massive-scale DeepSeek model focused on advanced AI reasoning and
|
40
|
+
capabilities
|
41
|
+
to_param: deepseek-r1:671b
|
42
|
+
deepseek-coder:
|
43
|
+
name: DeepSeek-Coder
|
44
|
+
parameters: 1.3B
|
45
|
+
description: Lightweight code generation model trained on 2T tokens of code and natural language
|
46
|
+
to_param: deepseek-coder
|
47
|
+
deepseek-coder:6.7b:
|
48
|
+
name: DeepSeek-Coder
|
49
|
+
parameters: 6.7B
|
50
|
+
description: Mid-sized DeepSeek-Coder model offering a strong balance between speed and capability for code-related tasks
|
51
|
+
to_param: deepseek-coder:6.7b
|
52
|
+
deepseek-coder:33b:
|
53
|
+
name: DeepSeek-Coder
|
54
|
+
parameters: 33B
|
55
|
+
description: Large DeepSeek-Coder model with high performance for code generation, understanding, and multilingual coding tasks
|
56
|
+
to_param: deepseek-coder:33b
|
57
|
+
|
58
|
+
llama3.3:
|
59
|
+
name: Llama
|
60
|
+
parameters: 70B
|
61
|
+
description: Latest large Llama model designed for high-end performance in reasoning
|
62
|
+
and language tasks
|
63
|
+
to_param: llama3.3
|
64
|
+
llama3.2:
|
65
|
+
name: Llama
|
66
|
+
parameters: 3B
|
67
|
+
description: Small but capable version of Llama 3.2 for lightweight applications
|
68
|
+
to_param: llama3.2
|
69
|
+
llama3.2:1b:
|
70
|
+
name: Llama
|
71
|
+
parameters: 1B
|
72
|
+
description: Tiny version of Llama 3.2, extremely lightweight and fast
|
73
|
+
to_param: llama3.2:1b
|
74
|
+
llama3.2-vision:
|
75
|
+
name: Llama Vision
|
76
|
+
parameters: 11B
|
77
|
+
description: Multimodal Llama 3.2 model with vision capabilities (images + text)
|
78
|
+
to_param: llama3.2-vision
|
79
|
+
llama3.2-vision:90b:
|
80
|
+
name: Llama Vision
|
81
|
+
parameters: 90B
|
82
|
+
description: Large-scale vision-capable Llama model for advanced multimodal tasks
|
83
|
+
to_param: llama3.2-vision:90b
|
84
|
+
llama3.1:
|
85
|
+
name: Llama
|
86
|
+
parameters: 8B
|
87
|
+
description: General-purpose Llama model designed for good accuracy and performance
|
88
|
+
balance
|
89
|
+
to_param: llama3.1
|
90
|
+
llama3.1:405b:
|
91
|
+
name: Llama
|
92
|
+
parameters: 405B
|
93
|
+
description: Extremely large-scale version of Llama 3.1, suitable for advanced tasks
|
94
|
+
to_param: llama3.1:405b
|
95
|
+
|
96
|
+
phi4:
|
97
|
+
name: Phi
|
98
|
+
parameters: 14B
|
99
|
+
description: Phi 4 is known for compact size and competitive performance in general
|
100
|
+
tasks
|
101
|
+
to_param: phi4
|
102
|
+
phi4-mini:
|
103
|
+
name: Phi Mini
|
104
|
+
parameters: 3.8B
|
105
|
+
description: Lightweight variant of Phi 4 ideal for quick inference on constrained systems
|
106
|
+
to_param: phi4-mini
|
107
|
+
|
108
|
+
mistral:
|
109
|
+
name: Mistral
|
110
|
+
parameters: 7B
|
111
|
+
description: Popular and versatile open model for general language tasks
|
112
|
+
to_param: mistral
|
113
|
+
|
114
|
+
moondream:
|
115
|
+
name: Moondream
|
116
|
+
parameters: 1.4B
|
117
|
+
description: Compact vision-enabled model with strong general performance
|
118
|
+
to_param: moondream
|
119
|
+
|
120
|
+
neural-chat:
|
121
|
+
name: Neural Chat
|
122
|
+
parameters: 7B
|
123
|
+
description: Chat-focused model fine-tuned for natural conversations
|
124
|
+
to_param: neural-chat
|
125
|
+
|
126
|
+
starling-lm:
|
127
|
+
name: Starling
|
128
|
+
parameters: 7B
|
129
|
+
description: Model focused on instruction-following and conversational performance
|
130
|
+
to_param: starling-lm
|
131
|
+
|
132
|
+
codellama:
|
133
|
+
name: Code Llama
|
134
|
+
parameters: 7B
|
135
|
+
description: Llama model variant fine-tuned specifically for code understanding
|
136
|
+
and generation
|
137
|
+
to_param: codellama
|
138
|
+
|
139
|
+
llama2-uncensored:
|
140
|
+
name: Llama 2 Uncensored
|
141
|
+
parameters: 7B
|
142
|
+
description: Unfiltered version of Llama 2 for unrestricted language modeling
|
143
|
+
to_param: llama2-uncensored
|
144
|
+
|
145
|
+
llava:
|
146
|
+
name: LLaVA
|
147
|
+
parameters: 7B
|
148
|
+
description: Multimodal model combining vision and language understanding
|
149
|
+
to_param: llava
|
150
|
+
|
151
|
+
granite3.2:
|
152
|
+
name: Granite
|
153
|
+
parameters: 8B
|
154
|
+
description: IBM’s Granite model for enterprise-grade language applications
|
155
|
+
to_param: granite3.2
|
@@ -0,0 +1,46 @@
|
|
1
|
+
---
|
2
|
+
o3-mini:
|
3
|
+
name: OpenAI o3-mini
|
4
|
+
parameters: Unknown
|
5
|
+
description: Fast, flexible, intelligent reasoning model
|
6
|
+
to_param: o3-mini
|
7
|
+
o1:
|
8
|
+
name: OpenAI o1
|
9
|
+
parameters: Unknown
|
10
|
+
description: High-intelligence reasoning model
|
11
|
+
to_param: o1
|
12
|
+
o1-mini:
|
13
|
+
name: OpenAI o1-mini
|
14
|
+
parameters: Unknown
|
15
|
+
description: Faster, more affordable reasoning model than o1
|
16
|
+
to_param: o1-mini
|
17
|
+
o1-pro:
|
18
|
+
name: OpenAI o1-pro
|
19
|
+
parameters: Unknown
|
20
|
+
description: More compute than o1 for better responses
|
21
|
+
to_param: o1-pro
|
22
|
+
gpt-4.5-preview:
|
23
|
+
name: GPT-4.5 Preview
|
24
|
+
parameters: Unknown
|
25
|
+
description: Largest and most capable GPT model
|
26
|
+
to_param: gpt-4.5-preview
|
27
|
+
gpt-4o:
|
28
|
+
name: GPT-4o
|
29
|
+
parameters: Unknown
|
30
|
+
description: Fast, intelligent, flexible GPT model
|
31
|
+
to_param: gpt-4o
|
32
|
+
gpt-4o-mini:
|
33
|
+
name: GPT-4o Mini
|
34
|
+
parameters: Mini
|
35
|
+
description: Fast, affordable small model for focused tasks
|
36
|
+
to_param: gpt-4o-mini
|
37
|
+
gpt-4o-realtime-preview:
|
38
|
+
name: GPT-4o Realtime
|
39
|
+
parameters: Unknown
|
40
|
+
description: Realtime model for text and audio inputs/outputs
|
41
|
+
to_param: gpt-4o-realtime-preview
|
42
|
+
gpt-3.5-turbo:
|
43
|
+
name: GPT-3.5 Turbo
|
44
|
+
parameters: Unknown
|
45
|
+
description: Legacy GPT model for cheaper chat and non-chat tasks
|
46
|
+
to_param: gpt-3.5-turbo
|