langchainrb 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +4 -2
- data/lib/langchain/llm/ollama.rb +167 -27
- data/lib/langchain/llm/response/base_response.rb +7 -0
- data/lib/langchain/llm/response/ollama_response.rb +18 -0
- data/lib/langchain/utils/token_length/openai_validator.rb +11 -0
- data/lib/langchain/utils/token_length/token_limit_exceeded.rb +1 -1
- data/lib/langchain/vectorsearch/pinecone.rb +2 -1
- data/lib/langchain/version.rb +1 -1
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c9d0655d58ddff57b9c9163065908dd17d91c6bffc5b146bf7fc01b4c9fb96d
|
4
|
+
data.tar.gz: ee82c644b7e38503fa0587ade2af0447819863303e9fa3755dce1676d68ad5f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 05faddd31c819e6d351ed99e05353e462341ab1744769a1b3a9932c37de4c68907b54f79bcd65f6b652d954d5301e80055c5ee8c57b66a3917256918c51cc61f
|
7
|
+
data.tar.gz: c2fed05da349fdc9ebd9990ea5c2d5c70a68241c491c903c631eb0584bce01da17bab7b04c59fa9fded8282547798219b2a0b951c1c5a8d1062d08f2a930062c
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.9.1]
|
4
|
+
- Add support for new OpenAI models
|
5
|
+
- Add Ollama#chat method
|
6
|
+
- Fix and refactor of `Langchain::LLM::Ollama`, responses can now be streamed.
|
7
|
+
|
3
8
|
## [0.9.0]
|
4
9
|
- Introducing new `Langchain::Assistant` that will be replacing `Langchain::Conversation` and `Langchain::Agent`s.
|
5
10
|
- `Langchain::Conversation` is deprecated.
|
data/README.md
CHANGED
@@ -42,6 +42,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
42
42
|
|
43
43
|
gem install langchainrb
|
44
44
|
|
45
|
+
Additional gems may be required when loading LLM Providers. These are not included by default so you can include only what you need.
|
46
|
+
|
45
47
|
## Usage
|
46
48
|
|
47
49
|
```ruby
|
@@ -62,7 +64,7 @@ Langchain.rb wraps all supported LLMs in a unified interface allowing you to eas
|
|
62
64
|
| [GooglePalm](https://ai.google/discover/palm2?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
63
65
|
| [Google Vertex AI](https://cloud.google.com/vertex-ai?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ❌ | ✅ | |
|
64
66
|
| [HuggingFace](https://huggingface.co/?utm_source=langchainrb&utm_medium=github) | ✅ | ❌ | ❌ | ❌ | |
|
65
|
-
| [Ollama](https://ollama.ai/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ |
|
67
|
+
| [Ollama](https://ollama.ai/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ❌ | |
|
66
68
|
| [Replicate](https://replicate.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
67
69
|
|
68
70
|
#### Using standalone LLMs:
|
@@ -91,7 +93,7 @@ llm.complete(prompt: "What is the meaning of life?").completion
|
|
91
93
|
|
92
94
|
Generate a chat completion:
|
93
95
|
```ruby
|
94
|
-
llm.chat(
|
96
|
+
llm.chat(messages: [{role: "user", content: "What is the meaning of life?"}]).completion
|
95
97
|
```
|
96
98
|
|
97
99
|
Summarize the text:
|
data/lib/langchain/llm/ollama.rb
CHANGED
@@ -5,21 +5,26 @@ module Langchain::LLM
|
|
5
5
|
# Available models: https://ollama.ai/library
|
6
6
|
#
|
7
7
|
# Usage:
|
8
|
-
# ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"])
|
8
|
+
# ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"], default_options: {})
|
9
9
|
#
|
10
10
|
class Ollama < Base
|
11
|
-
attr_reader :url
|
11
|
+
attr_reader :url, :defaults
|
12
12
|
|
13
13
|
DEFAULTS = {
|
14
|
-
temperature: 0.
|
14
|
+
temperature: 0.8,
|
15
15
|
completion_model_name: "llama2",
|
16
|
-
embeddings_model_name: "llama2"
|
16
|
+
embeddings_model_name: "llama2",
|
17
|
+
chat_completion_model_name: "llama2"
|
17
18
|
}.freeze
|
18
19
|
|
19
20
|
# Initialize the Ollama client
|
20
21
|
# @param url [String] The URL of the Ollama instance
|
21
|
-
|
22
|
+
# @param default_options [Hash] The default options to use
|
23
|
+
#
|
24
|
+
def initialize(url:, default_options: {})
|
25
|
+
depends_on "faraday"
|
22
26
|
@url = url
|
27
|
+
@defaults = DEFAULTS.merge(default_options)
|
23
28
|
end
|
24
29
|
|
25
30
|
#
|
@@ -27,32 +32,128 @@ module Langchain::LLM
|
|
27
32
|
#
|
28
33
|
# @param prompt [String] The prompt to complete
|
29
34
|
# @param model [String] The model to use
|
30
|
-
#
|
35
|
+
# For a list of valid parameters and values, see:
|
36
|
+
# https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
|
31
37
|
# @return [Langchain::LLM::OllamaResponse] Response object
|
32
38
|
#
|
33
|
-
def complete(
|
34
|
-
|
39
|
+
def complete(
|
40
|
+
prompt:,
|
41
|
+
model: defaults[:completion_model_name],
|
42
|
+
images: nil,
|
43
|
+
format: nil,
|
44
|
+
system: nil,
|
45
|
+
template: nil,
|
46
|
+
context: nil,
|
47
|
+
stream: nil,
|
48
|
+
raw: nil,
|
49
|
+
mirostat: nil,
|
50
|
+
mirostat_eta: nil,
|
51
|
+
mirostat_tau: nil,
|
52
|
+
num_ctx: nil,
|
53
|
+
num_gqa: nil,
|
54
|
+
num_gpu: nil,
|
55
|
+
num_thread: nil,
|
56
|
+
repeat_last_n: nil,
|
57
|
+
repeat_penalty: nil,
|
58
|
+
temperature: defaults[:temperature],
|
59
|
+
seed: nil,
|
60
|
+
stop: nil,
|
61
|
+
tfs_z: nil,
|
62
|
+
num_predict: nil,
|
63
|
+
top_k: nil,
|
64
|
+
top_p: nil,
|
65
|
+
stop_sequences: nil,
|
66
|
+
&block
|
67
|
+
)
|
68
|
+
if stop_sequences
|
69
|
+
stop = stop_sequences
|
70
|
+
end
|
35
71
|
|
36
|
-
|
72
|
+
parameters = {
|
73
|
+
prompt: prompt,
|
74
|
+
model: model,
|
75
|
+
images: images,
|
76
|
+
format: format,
|
77
|
+
system: system,
|
78
|
+
template: template,
|
79
|
+
context: context,
|
80
|
+
stream: stream,
|
81
|
+
raw: raw
|
82
|
+
}.compact
|
83
|
+
|
84
|
+
llm_parameters = {
|
85
|
+
mirostat: mirostat,
|
86
|
+
mirostat_eta: mirostat_eta,
|
87
|
+
mirostat_tau: mirostat_tau,
|
88
|
+
num_ctx: num_ctx,
|
89
|
+
num_gqa: num_gqa,
|
90
|
+
num_gpu: num_gpu,
|
91
|
+
num_thread: num_thread,
|
92
|
+
repeat_last_n: repeat_last_n,
|
93
|
+
repeat_penalty: repeat_penalty,
|
94
|
+
temperature: temperature,
|
95
|
+
seed: seed,
|
96
|
+
stop: stop,
|
97
|
+
tfs_z: tfs_z,
|
98
|
+
num_predict: num_predict,
|
99
|
+
top_k: top_k,
|
100
|
+
top_p: top_p
|
101
|
+
}
|
102
|
+
|
103
|
+
parameters[:options] = llm_parameters.compact
|
104
|
+
|
105
|
+
response = ""
|
37
106
|
|
38
107
|
client.post("api/generate") do |req|
|
39
|
-
req.body =
|
40
|
-
req.body["prompt"] = prompt
|
41
|
-
req.body["model"] = model_name
|
42
|
-
|
43
|
-
req.body["options"] = options if options.any?
|
108
|
+
req.body = parameters
|
44
109
|
|
45
|
-
# TODO: Implement streaming support when a &block is passed in
|
46
110
|
req.options.on_data = proc do |chunk, size|
|
47
111
|
json_chunk = JSON.parse(chunk)
|
48
112
|
|
49
|
-
|
50
|
-
|
51
|
-
|
113
|
+
response += json_chunk.dig("response")
|
114
|
+
|
115
|
+
yield json_chunk, size if block
|
52
116
|
end
|
53
117
|
end
|
54
118
|
|
55
|
-
Langchain::LLM::OllamaResponse.new(response, model:
|
119
|
+
Langchain::LLM::OllamaResponse.new(response, model: parameters[:model])
|
120
|
+
end
|
121
|
+
|
122
|
+
# Generate a chat completion
|
123
|
+
#
|
124
|
+
# @param model [String] Model name
|
125
|
+
# @param messages [Array<Hash>] Array of messages
|
126
|
+
# @param format [String] Format to return a response in. Currently the only accepted value is `json`
|
127
|
+
# @param temperature [Float] The temperature to use
|
128
|
+
# @param template [String] The prompt template to use (overrides what is defined in the `Modelfile`)
|
129
|
+
# @param stream [Boolean] Streaming the response. If false the response will be returned as a single response object, rather than a stream of objects
|
130
|
+
#
|
131
|
+
# The message object has the following fields:
|
132
|
+
# role: the role of the message, either system, user or assistant
|
133
|
+
# content: the content of the message
|
134
|
+
# images (optional): a list of images to include in the message (for multimodal models such as llava)
|
135
|
+
def chat(
|
136
|
+
model: defaults[:chat_completion_model_name],
|
137
|
+
messages: [],
|
138
|
+
format: nil,
|
139
|
+
temperature: defaults[:temperature],
|
140
|
+
template: nil,
|
141
|
+
stream: false # TODO: Fix streaming.
|
142
|
+
)
|
143
|
+
parameters = {
|
144
|
+
model: model,
|
145
|
+
messages: messages,
|
146
|
+
format: format,
|
147
|
+
temperature: temperature,
|
148
|
+
template: template,
|
149
|
+
stream: stream
|
150
|
+
}.compact
|
151
|
+
|
152
|
+
response = client.post("api/chat") do |req|
|
153
|
+
req.body = parameters
|
154
|
+
end
|
155
|
+
|
156
|
+
Langchain::LLM::OllamaResponse.new(response.body, model: parameters[:model])
|
56
157
|
end
|
57
158
|
|
58
159
|
#
|
@@ -63,18 +164,57 @@ module Langchain::LLM
|
|
63
164
|
# @param options [Hash] The options to use
|
64
165
|
# @return [Langchain::LLM::OllamaResponse] Response object
|
65
166
|
#
|
66
|
-
def embed(
|
67
|
-
|
167
|
+
def embed(
|
168
|
+
text:,
|
169
|
+
model: defaults[:embeddings_model_name],
|
170
|
+
mirostat: nil,
|
171
|
+
mirostat_eta: nil,
|
172
|
+
mirostat_tau: nil,
|
173
|
+
num_ctx: nil,
|
174
|
+
num_gqa: nil,
|
175
|
+
num_gpu: nil,
|
176
|
+
num_thread: nil,
|
177
|
+
repeat_last_n: nil,
|
178
|
+
repeat_penalty: nil,
|
179
|
+
temperature: defaults[:temperature],
|
180
|
+
seed: nil,
|
181
|
+
stop: nil,
|
182
|
+
tfs_z: nil,
|
183
|
+
num_predict: nil,
|
184
|
+
top_k: nil,
|
185
|
+
top_p: nil
|
186
|
+
)
|
187
|
+
parameters = {
|
188
|
+
prompt: text,
|
189
|
+
model: model
|
190
|
+
}.compact
|
191
|
+
|
192
|
+
llm_parameters = {
|
193
|
+
mirostat: mirostat,
|
194
|
+
mirostat_eta: mirostat_eta,
|
195
|
+
mirostat_tau: mirostat_tau,
|
196
|
+
num_ctx: num_ctx,
|
197
|
+
num_gqa: num_gqa,
|
198
|
+
num_gpu: num_gpu,
|
199
|
+
num_thread: num_thread,
|
200
|
+
repeat_last_n: repeat_last_n,
|
201
|
+
repeat_penalty: repeat_penalty,
|
202
|
+
temperature: temperature,
|
203
|
+
seed: seed,
|
204
|
+
stop: stop,
|
205
|
+
tfs_z: tfs_z,
|
206
|
+
num_predict: num_predict,
|
207
|
+
top_k: top_k,
|
208
|
+
top_p: top_p
|
209
|
+
}
|
210
|
+
|
211
|
+
parameters[:options] = llm_parameters.compact
|
68
212
|
|
69
213
|
response = client.post("api/embeddings") do |req|
|
70
|
-
req.body =
|
71
|
-
req.body["prompt"] = text
|
72
|
-
req.body["model"] = model_name
|
73
|
-
|
74
|
-
req.body["options"] = options if options.any?
|
214
|
+
req.body = parameters
|
75
215
|
end
|
76
216
|
|
77
|
-
Langchain::LLM::OllamaResponse.new(response.body, model:
|
217
|
+
Langchain::LLM::OllamaResponse.new(response.body, model: parameters[:model])
|
78
218
|
end
|
79
219
|
|
80
220
|
private
|
@@ -7,6 +7,16 @@ module Langchain::LLM
|
|
7
7
|
super(raw_response, model: model)
|
8
8
|
end
|
9
9
|
|
10
|
+
def created_at
|
11
|
+
if raw_response.dig("created_at")
|
12
|
+
Time.parse(raw_response.dig("created_at"))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def chat_completion
|
17
|
+
raw_response.dig("message", "content")
|
18
|
+
end
|
19
|
+
|
10
20
|
def completion
|
11
21
|
completions.first
|
12
22
|
end
|
@@ -26,5 +36,13 @@ module Langchain::LLM
|
|
26
36
|
def role
|
27
37
|
"assistant"
|
28
38
|
end
|
39
|
+
|
40
|
+
def prompt_tokens
|
41
|
+
raw_response.dig("prompt_eval_count")
|
42
|
+
end
|
43
|
+
|
44
|
+
def completion_tokens
|
45
|
+
raw_response.dig("eval_count")
|
46
|
+
end
|
29
47
|
end
|
30
48
|
end
|
@@ -19,10 +19,14 @@ module Langchain
|
|
19
19
|
"gpt-3.5-turbo-1106" => 4096
|
20
20
|
}
|
21
21
|
|
22
|
+
# NOTE: The gpt-4-turbo-preview is an alias that will always point to the latest GPT 4 Turbo preview
|
23
|
+
# the future previews may have a different token limit!
|
22
24
|
TOKEN_LIMITS = {
|
23
25
|
# Source:
|
24
26
|
# https://platform.openai.com/docs/api-reference/embeddings
|
25
27
|
# https://platform.openai.com/docs/models/gpt-4
|
28
|
+
"text-embedding-3-large" => 8191,
|
29
|
+
"text-embedding-3-small" => 8191,
|
26
30
|
"text-embedding-ada-002" => 8191,
|
27
31
|
"gpt-3.5-turbo" => 4096,
|
28
32
|
"gpt-3.5-turbo-0301" => 4096,
|
@@ -40,6 +44,8 @@ module Langchain
|
|
40
44
|
"gpt-4-32k-0314" => 32768,
|
41
45
|
"gpt-4-32k-0613" => 32768,
|
42
46
|
"gpt-4-1106-preview" => 128000,
|
47
|
+
"gpt-4-turbo-preview" => 128000,
|
48
|
+
"gpt-4-0125-preview" => 128000,
|
43
49
|
"gpt-4-vision-preview" => 128000,
|
44
50
|
"text-curie-001" => 2049,
|
45
51
|
"text-babbage-001" => 2049,
|
@@ -58,6 +64,11 @@ module Langchain
|
|
58
64
|
# @return [Integer] The token length of the text
|
59
65
|
#
|
60
66
|
def self.token_length(text, model_name, options = {})
|
67
|
+
# tiktoken-ruby doesn't support text-embedding-3-large or text-embedding-3-small yet
|
68
|
+
if ["text-embedding-3-large", "text-embedding-3-small"].include?(model_name)
|
69
|
+
model_name = "text-embedding-ada-002"
|
70
|
+
end
|
71
|
+
|
61
72
|
encoder = Tiktoken.encoding_for_model(model_name)
|
62
73
|
encoder.encode(text).length
|
63
74
|
end
|
@@ -17,12 +17,13 @@ module Langchain::Vectorsearch
|
|
17
17
|
# @param api_key [String] The API key to use
|
18
18
|
# @param index_name [String] The name of the index to use
|
19
19
|
# @param llm [Object] The LLM client to use
|
20
|
-
def initialize(environment:, api_key:, index_name:, llm:)
|
20
|
+
def initialize(environment:, api_key:, index_name:, llm:, base_uri: nil)
|
21
21
|
depends_on "pinecone"
|
22
22
|
|
23
23
|
::Pinecone.configure do |config|
|
24
24
|
config.api_key = api_key
|
25
25
|
config.environment = environment
|
26
|
+
config.base_uri = base_uri if base_uri
|
26
27
|
end
|
27
28
|
|
28
29
|
@client = ::Pinecone::Client.new
|
data/lib/langchain/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -576,14 +576,14 @@ dependencies:
|
|
576
576
|
requirements:
|
577
577
|
- - "~>"
|
578
578
|
- !ruby/object:Gem::Version
|
579
|
-
version: 0.8.
|
579
|
+
version: 0.8.10
|
580
580
|
type: :development
|
581
581
|
prerelease: false
|
582
582
|
version_requirements: !ruby/object:Gem::Requirement
|
583
583
|
requirements:
|
584
584
|
- - "~>"
|
585
585
|
- !ruby/object:Gem::Version
|
586
|
-
version: 0.8.
|
586
|
+
version: 0.8.10
|
587
587
|
- !ruby/object:Gem::Dependency
|
588
588
|
name: wikipedia-client
|
589
589
|
requirement: !ruby/object:Gem::Requirement
|
@@ -598,6 +598,20 @@ dependencies:
|
|
598
598
|
- - "~>"
|
599
599
|
- !ruby/object:Gem::Version
|
600
600
|
version: 1.17.0
|
601
|
+
- !ruby/object:Gem::Dependency
|
602
|
+
name: faraday
|
603
|
+
requirement: !ruby/object:Gem::Requirement
|
604
|
+
requirements:
|
605
|
+
- - ">="
|
606
|
+
- !ruby/object:Gem::Version
|
607
|
+
version: '0'
|
608
|
+
type: :development
|
609
|
+
prerelease: false
|
610
|
+
version_requirements: !ruby/object:Gem::Requirement
|
611
|
+
requirements:
|
612
|
+
- - ">="
|
613
|
+
- !ruby/object:Gem::Version
|
614
|
+
version: '0'
|
601
615
|
description: Build LLM-backed Ruby applications with Ruby's LangChain
|
602
616
|
email:
|
603
617
|
- andrei.bondarev13@gmail.com
|